OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{ExpressionParser.cpp}
 
 anonymous_namespace{ForeignDataImporter.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterCSV.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 anonymous_namespace{RasterImporter.cpp}
 
 delimited_parser
 

Classes

class  AbstractImporter
 
struct  CopyParams
 
class  ExpressionParser
 
class  ForeignDataImporter
 
class  ImportBatchResult
 
struct  GeoImportException
 
class  ColumnNotGeoError
 
struct  BadRowsTracker
 
class  ImporterUtils
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  Importer
 
struct  MetadataColumnInfo
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  GCPTransformer
 
class  RasterImporter
 
class  RenderGroupAnalyzer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 
using MetadataColumnInfos = std::vector< MetadataColumnInfo >
 

Enumerations

enum  ImportHeaderRow { ImportHeaderRow::kAutoDetect, ImportHeaderRow::kNoHeader, ImportHeaderRow::kHasHeader }
 
enum  RasterPointType {
  RasterPointType::kNone, RasterPointType::kAuto, RasterPointType::kSmallInt, RasterPointType::kInt,
  RasterPointType::kFloat, RasterPointType::kDouble, RasterPointType::kPoint
}
 
enum  RasterPointTransform { RasterPointTransform::kNone, RasterPointTransform::kAuto, RasterPointTransform::kFile, RasterPointTransform::kWorld }
 
enum  SourceType {
  SourceType::kUnknown, SourceType::kUnsupported, SourceType::kDelimitedFile, SourceType::kGeoFile,
  SourceType::kRasterFile, SourceType::kParquetFile, SourceType::kOdbc, SourceType::kRegexParsedFile
}
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
template<class T >
bool try_cast (const std::string &str)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImportercreate_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 
MetadataColumnInfos parse_add_metadata_columns (const std::string &add_metadata_columns, const std::string &file_path)
 
size_t num_import_threads (const int32_t copy_params_threads)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static heavyai::shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 
static const size_t kImportRowLimit = 10000
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 75 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 154 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 152 of file Importer.cpp.

Definition at line 155 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 151 of file Importer.cpp.

Definition at line 36 of file MetadataColumn.h.

Enumeration Type Documentation

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 453 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

453  {
454  const auto& arr = datum.val.arr_val;
455  for (const auto& elem_datum : arr) {
456  string_vec.push_back(elem_datum.val.str_val);
457  }
458 }

+ Here is the caller graph for this function:

std::unique_ptr< AbstractImporter > import_export::create_importer ( Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const std::string &  copy_from_source,
const import_export::CopyParams copy_params 
)

Definition at line 6331 of file Importer.cpp.

References g_enable_fsi_regex_import, g_enable_legacy_delimited_import, kDelimitedFile, kParquetFile, kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by Parser::CopyTableStmt::execute(), and DBHandler::import_table().

6335  {
6337 #ifdef ENABLE_IMPORT_PARQUET
6338  if (!g_enable_legacy_parquet_import) {
6339  return std::make_unique<import_export::ForeignDataImporter>(
6340  copy_from_source, copy_params, td);
6341  }
6342 #else
6343  throw std::runtime_error("Parquet not supported!");
6344 #endif
6345  }
6346 
6349  return std::make_unique<import_export::ForeignDataImporter>(
6350  copy_from_source, copy_params, td);
6351  }
6352 
6355  return std::make_unique<import_export::ForeignDataImporter>(
6356  copy_from_source, copy_params, td);
6357  } else {
6358  throw std::runtime_error(
6359  "Regex parsed import only supported using 'fsi-regex-import' flag");
6360  }
6361  }
6362 
6363  return std::make_unique<import_export::Importer>(
6364  catalog, td, copy_from_source, copy_params);
6365 }
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:81
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:85

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::fill_missing_columns ( const Catalog_Namespace::Catalog cat,
Fragmenter_Namespace::InsertData insert_data 
)

Definition at line 6259 of file Importer.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, ColumnDescriptor::columnId, Fragmenter_Namespace::InsertData::columnIds, ColumnDescriptor::columnName, ColumnDescriptor::columnType, Fragmenter_Namespace::InsertData::data, ColumnDescriptor::default_value, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), import_export::TypedImportBuffer::get_data_block_pointers(), SQLTypeInfo::get_physical_cols(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Geospatial::GeoTypesFactory::getGeoColumns(), Catalog_Namespace::Catalog::getMetadataForDict(), Fragmenter_Namespace::InsertData::is_default, SQLTypeInfo::is_geometry(), IS_STRING, kARRAY, kENCODING_DICT, import_export::Importer::set_geo_physical_import_buffer(), gpu_enabled::sort(), and Fragmenter_Namespace::InsertData::tableId.

Referenced by RelAlgExecutor::executeSimpleInsert(), DBHandler::insert_data(), and Parser::InsertIntoTableAsSelectStmt::populateData().

6261  {
6262  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> defaults_buffers;
6263  if (insert_data.is_default.size() == 0) {
6264  insert_data.is_default.resize(insert_data.columnIds.size(), false);
6265  }
6266  CHECK(insert_data.is_default.size() == insert_data.is_default.size());
6267  auto cds = cat->getAllColumnMetadataForTable(insert_data.tableId, false, false, true);
6268  if (cds.size() == insert_data.columnIds.size()) {
6269  // all columns specified
6270  return defaults_buffers;
6271  }
6272  for (auto cd : cds) {
6273  if (std::find(insert_data.columnIds.begin(),
6274  insert_data.columnIds.end(),
6275  cd->columnId) == insert_data.columnIds.end()) {
6276  StringDictionary* dict = nullptr;
6277  if (cd->columnType.get_type() == kARRAY &&
6278  IS_STRING(cd->columnType.get_subtype()) && !cd->default_value.has_value()) {
6279  throw std::runtime_error("Cannot omit column \"" + cd->columnName +
6280  "\": omitting TEXT arrays is not supported yet");
6281  }
6282  if (cd->columnType.get_compression() == kENCODING_DICT) {
6283  dict = cat->getMetadataForDict(cd->columnType.get_comp_param())->stringDict.get();
6284  }
6285  defaults_buffers.emplace_back(std::make_unique<TypedImportBuffer>(cd, dict));
6286  }
6287  }
6288  // put buffers in order to fill geo sub-columns properly
6289  std::sort(defaults_buffers.begin(),
6290  defaults_buffers.end(),
6291  [](decltype(defaults_buffers[0])& a, decltype(defaults_buffers[0])& b) {
6292  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
6293  });
6294  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6295  auto cd = defaults_buffers[i]->getColumnDesc();
6296  std::string default_value = cd->default_value.value_or("NULL");
6297  defaults_buffers[i]->add_value(
6298  cd, default_value, !cd->default_value.has_value(), import_export::CopyParams());
6299  if (cd->columnType.is_geometry()) {
6300  std::vector<double> coords, bounds;
6301  std::vector<int> ring_sizes, poly_rings;
6302  int render_group = 0;
6303  SQLTypeInfo tinfo{cd->columnType};
6305  default_value, tinfo, coords, bounds, ring_sizes, poly_rings, false));
6306  // set physical columns starting with the following ID
6307  auto next_col = i + 1;
6309  cd,
6310  defaults_buffers,
6311  next_col,
6312  coords,
6313  bounds,
6314  ring_sizes,
6315  poly_rings,
6316  render_group);
6317  // skip physical columns filled with the call above
6318  i += cd->columnType.get_physical_cols();
6319  }
6320  }
6321  auto data = import_export::TypedImportBuffer::get_data_block_pointers(defaults_buffers);
6322  CHECK(data.size() == defaults_buffers.size());
6323  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6324  insert_data.data.push_back(data[i]);
6325  insert_data.columnIds.push_back(defaults_buffers[i]->getColumnDesc()->columnId);
6326  insert_data.is_default.push_back(true);
6327  }
6328  return defaults_buffers;
6329 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::vector< bool > is_default
Definition: Fragmenter.h:75
constexpr double a
Definition: Utm.h:32
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3053
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1960
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2228
#define IS_STRING(T)
Definition: sqltypes.h:322
#define CHECK(condition)
Definition: Logger.h:222
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1666
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 5192 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

5193  {
5194  // prepare to gather subdirectories
5195  std::vector<std::string> subdirectories;
5196 
5197  // get entries
5198  char** entries = VSIReadDir(archive_path.c_str());
5199  if (!entries) {
5200  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
5201  return;
5202  }
5203 
5204  // force scope
5205  {
5206  // request clean-up
5207  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
5208 
5209  // check all the entries
5210  int index = 0;
5211  while (true) {
5212  // get next entry, or drop out if there isn't one
5213  char* entry_c = entries[index++];
5214  if (!entry_c) {
5215  break;
5216  }
5217  std::string entry(entry_c);
5218 
5219  // ignore '.' and '..'
5220  if (entry == "." || entry == "..") {
5221  continue;
5222  }
5223 
5224  // build the full path
5225  std::string entry_path = archive_path + std::string("/") + entry;
5226 
5227  // is it a file or a sub-folder
5228  VSIStatBufL sb;
5229  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
5230  if (result < 0) {
5231  break;
5232  }
5233 
5234  if (VSI_ISDIR(sb.st_mode)) {
5235  // a directory that ends with .gdb could be a Geodatabase bundle
5236  // arguably dangerous to decide this purely by name, but any further
5237  // validation would be very complex especially at this scope
5238  if (boost::iends_with(entry_path, ".gdb")) {
5239  // add the directory as if it was a file and don't recurse into it
5240  files.push_back(entry_path);
5241  } else {
5242  // add subdirectory to be recursed into
5243  subdirectories.push_back(entry_path);
5244  }
5245  } else {
5246  // add this file
5247  files.push_back(entry_path);
5248  }
5249  }
5250  }
5251 
5252  // recurse into each subdirectories we found
5253  for (const auto& subdirectory : subdirectories) {
5254  gdalGatherFilesInArchiveRecursive(subdirectory, files);
5255  }
5256 }
#define LOG(tag)
Definition: Logger.h:216
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:5192

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer,
const Catalog_Namespace::SessionInfo session_info,
Executor executor 
)
static

Definition at line 2039 of file Importer.cpp.

References CHECK, CHECK_LT, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), Catalog_Namespace::SessionInfo::get_session_id(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), ImportHelpers::is_null_datum(), kDelimitedFile, kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, import_export::CopyParams::source_type, sv_strip(), gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), to_string(), import_export::CopyParams::trim_spaces, and UNLIKELY.

Referenced by import_export::Importer::importDelimited().

2049  {
2050  ImportStatus thread_import_status;
2051  int64_t total_get_row_time_us = 0;
2052  int64_t total_str_to_val_time_us = 0;
2053  auto query_session = session_info ? session_info->get_session_id() : "";
2054  CHECK(scratch_buffer);
2055  auto buffer = scratch_buffer.get();
2056  auto load_ms = measure<>::execution([]() {});
2057 
2058  thread_import_status.thread_id = thread_id;
2059 
2060  auto ms = measure<>::execution([&]() {
2061  const CopyParams& copy_params = importer->get_copy_params();
2062  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2063  size_t begin =
2064  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
2065  const char* thread_buf = buffer + begin_pos + begin;
2066  const char* thread_buf_end = buffer + end_pos;
2067  const char* buf_end = buffer + total_size;
2068  bool try_single_thread = false;
2069  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2070  importer->get_import_buffers(thread_id);
2072  int phys_cols = 0;
2073  int point_cols = 0;
2074  for (const auto cd : col_descs) {
2075  const auto& col_ti = cd->columnType;
2076  phys_cols += col_ti.get_physical_cols();
2077  if (cd->columnType.get_type() == kPOINT) {
2078  point_cols++;
2079  }
2080  }
2081  auto num_cols = col_descs.size() - phys_cols;
2082  for (const auto& p : import_buffers) {
2083  p->clear();
2084  }
2085  std::vector<std::string_view> row;
2086  size_t row_index_plus_one = 0;
2087  for (const char* p = thread_buf; p < thread_buf_end; p++) {
2088  row.clear();
2089  std::vector<std::unique_ptr<char[]>>
2090  tmp_buffers; // holds string w/ removed escape chars, etc
2091  if (DEBUG_TIMING) {
2094  thread_buf_end,
2095  buf_end,
2096  copy_params,
2097  importer->get_is_array(),
2098  row,
2099  tmp_buffers,
2100  try_single_thread,
2101  true);
2102  });
2103  total_get_row_time_us += us;
2104  } else {
2106  thread_buf_end,
2107  buf_end,
2108  copy_params,
2109  importer->get_is_array(),
2110  row,
2111  tmp_buffers,
2112  try_single_thread,
2113  true);
2114  }
2115  row_index_plus_one++;
2116  // Each POINT could consume two separate coords instead of a single WKT
2117  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
2118  thread_import_status.rows_rejected++;
2119  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
2120  << row.size() << "): " << shared::printContainer(row);
2121  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2122  break;
2123  }
2124  continue;
2125  }
2126 
2127  //
2128  // lambda for importing a row (perhaps multiple times if exploding a collection)
2129  //
2130 
2131  auto execute_import_row = [&](OGRGeometry* import_geometry) {
2132  size_t import_idx = 0;
2133  size_t col_idx = 0;
2134  try {
2135  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2136  auto cd = *cd_it;
2137  const auto& col_ti = cd->columnType;
2138 
2139  bool is_null =
2140  ImportHelpers::is_null_datum(row[import_idx], copy_params.null_str);
2141  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
2142  // So initially nullness may be missed and not passed to add_value,
2143  // which then might also check and still decide it's actually a NULL, e.g.
2144  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
2145  // So "NULL" is not recognized as NULL but then it's not recognized as
2146  // a valid kINT, so it's a NULL after all.
2147  // Checking for "NULL" here too, as a widely accepted notation for NULL.
2148 
2149  // Treating empty as NULL
2150  if (!cd->columnType.is_string() && row[import_idx].empty()) {
2151  is_null = true;
2152  }
2153  if (!cd->columnType.is_string() && !copy_params.trim_spaces) {
2154  // everything but strings should be always trimmed
2155  row[import_idx] = sv_strip(row[import_idx]);
2156  }
2157 
2158  if (col_ti.get_physical_cols() == 0) {
2159  // not geo
2160 
2161  import_buffers[col_idx]->add_value(
2162  cd, row[import_idx], is_null, copy_params);
2163 
2164  // next
2165  ++import_idx;
2166  ++col_idx;
2167  } else {
2168  // geo
2169 
2170  // store null string in the base column
2171  import_buffers[col_idx]->add_value(
2172  cd, copy_params.null_str, true, copy_params);
2173 
2174  // WKT from string we're not storing
2175  auto const& geo_string = row[import_idx];
2176 
2177  // next
2178  ++import_idx;
2179  ++col_idx;
2180 
2181  SQLTypes col_type = col_ti.get_type();
2182  CHECK(IS_GEO(col_type));
2183 
2184  std::vector<double> coords;
2185  std::vector<double> bounds;
2186  std::vector<int> ring_sizes;
2187  std::vector<int> poly_rings;
2188  int render_group = 0;
2189 
2190  // if this is a POINT column, and the field is not null, and
2191  // looks like a scalar numeric value (and not a hex blob)
2192  // attempt to import two columns as lon/lat (or lat/lon)
2193  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
2194  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
2195  geo_string[0] == '-') &&
2196  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
2197  double lon = std::atof(std::string(geo_string).c_str());
2198  double lat = NAN;
2199  auto lat_str = row[import_idx];
2200  ++import_idx;
2201  if (lat_str.size() > 0 &&
2202  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
2203  lat = std::atof(std::string(lat_str).c_str());
2204  }
2205  // Swap coordinates if this table uses a reverse order: lat/lon
2206  if (!copy_params.lonlat) {
2207  std::swap(lat, lon);
2208  }
2209  // TODO: should check if POINT column should have been declared with
2210  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
2211  // throw std::runtime_error("POINT column " + cd->columnName + " is
2212  // not WGS84, cannot insert lon/lat");
2213  // }
2214  SQLTypeInfo import_ti{col_ti};
2215  if (copy_params.source_type ==
2217  import_ti.get_output_srid() == 4326) {
2218  auto srid0 = copy_params.source_srid;
2219  if (srid0 > 0) {
2220  // srid0 -> 4326 transform is requested on import
2221  import_ti.set_input_srid(srid0);
2222  }
2223  }
2224  if (!importGeoFromLonLat(lon, lat, coords, import_ti)) {
2225  throw std::runtime_error(
2226  "Cannot read lon/lat to insert into POINT column " +
2227  cd->columnName);
2228  }
2229  } else {
2230  // import it
2231  SQLTypeInfo import_ti{col_ti};
2232  if (copy_params.source_type ==
2234  import_ti.get_output_srid() == 4326) {
2235  auto srid0 = copy_params.source_srid;
2236  if (srid0 > 0) {
2237  // srid0 -> 4326 transform is requested on import
2238  import_ti.set_input_srid(srid0);
2239  }
2240  }
2241  if (is_null) {
2242  if (col_ti.get_notnull()) {
2243  throw std::runtime_error("NULL geo for column " + cd->columnName);
2244  }
2246  import_ti,
2247  coords,
2248  bounds,
2249  ring_sizes,
2250  poly_rings,
2252  } else {
2253  if (import_geometry) {
2254  // geometry already exploded
2256  import_geometry,
2257  import_ti,
2258  coords,
2259  bounds,
2260  ring_sizes,
2261  poly_rings,
2263  std::string msg =
2264  "Failed to extract valid geometry from exploded row " +
2265  std::to_string(first_row_index_this_buffer +
2266  row_index_plus_one) +
2267  " for column " + cd->columnName;
2268  throw std::runtime_error(msg);
2269  }
2270  } else {
2271  // extract geometry directly from WKT
2273  std::string(geo_string),
2274  import_ti,
2275  coords,
2276  bounds,
2277  ring_sizes,
2278  poly_rings,
2280  std::string msg = "Failed to extract valid geometry from row " +
2281  std::to_string(first_row_index_this_buffer +
2282  row_index_plus_one) +
2283  " for column " + cd->columnName;
2284  throw std::runtime_error(msg);
2285  }
2286  }
2287 
2288  // validate types
2289  if (col_type != import_ti.get_type()) {
2291  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2292  col_type == SQLTypes::kMULTIPOLYGON)) {
2293  throw std::runtime_error(
2294  "Imported geometry doesn't match the type of column " +
2295  cd->columnName);
2296  }
2297  }
2298  }
2299 
2300  // assign render group?
2301  if (columnIdToRenderGroupAnalyzerMap.size()) {
2302  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2303  if (ring_sizes.size()) {
2304  // get a suitable render group for these poly coords
2305  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2306  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2307  render_group =
2308  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2309  } else {
2310  // empty poly
2311  render_group = -1;
2312  }
2313  }
2314  }
2315  }
2316 
2317  // import extracted geo
2318  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2319  cd,
2320  import_buffers,
2321  col_idx,
2322  coords,
2323  bounds,
2324  ring_sizes,
2325  poly_rings,
2326  render_group);
2327 
2328  // skip remaining physical columns
2329  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2330  ++cd_it;
2331  }
2332  }
2333  }
2334  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2335  check_session_interrupted(query_session, executor))) {
2336  thread_import_status.load_failed = true;
2337  thread_import_status.load_msg =
2338  "Table load was cancelled via Query Interrupt";
2339  return;
2340  }
2341  thread_import_status.rows_completed++;
2342  } catch (const std::exception& e) {
2343  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2344  import_buffers[col_idx_to_pop]->pop_value();
2345  }
2346  thread_import_status.rows_rejected++;
2347  LOG(ERROR) << "Input exception thrown: " << e.what()
2348  << ". Row discarded. Data: " << shared::printContainer(row);
2349  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2350  LOG(ERROR) << "Load was cancelled due to max reject rows being reached";
2351  thread_import_status.load_failed = true;
2352  thread_import_status.load_msg =
2353  "Load was cancelled due to max reject rows being reached";
2354  }
2355  }
2356  }; // End of lambda
2357 
2358  if (copy_params.geo_explode_collections) {
2359  // explode and import
2360  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2361  explode_collections_step1(col_descs);
2362  // pull out the collection WKT or WKB hex
2363  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2364  auto const& collection_geo_string = row[collection_col_idx];
2365  // convert to OGR
2366  OGRGeometry* ogr_geometry = nullptr;
2367  ScopeGuard destroy_ogr_geometry = [&] {
2368  if (ogr_geometry) {
2369  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2370  }
2371  };
2373  std::string(collection_geo_string));
2374  // do the explode and import
2375  us = explode_collections_step2(ogr_geometry,
2376  collection_child_type,
2377  collection_col_name,
2378  first_row_index_this_buffer + row_index_plus_one,
2379  execute_import_row);
2380  } else {
2381  // import non-collection row just once
2383  [&] { execute_import_row(nullptr); });
2384  }
2385 
2386  if (thread_import_status.load_failed) {
2387  break;
2388  }
2389  } // end thread
2390  total_str_to_val_time_us += us;
2391  if (!thread_import_status.load_failed && thread_import_status.rows_completed > 0) {
2392  load_ms = measure<>::execution([&]() {
2393  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2394  });
2395  }
2396  }); // end execution
2397 
2398  if (DEBUG_TIMING && !thread_import_status.load_failed &&
2399  thread_import_status.rows_completed > 0) {
2400  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2401  << thread_import_status.rows_completed << " rows inserted in "
2402  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2403  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2404  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2405  << "sec" << std::endl;
2406  }
2407 
2408  return thread_import_status;
2409 }
bool is_null_datum(const DatumStringType &datum, const std::string &null_indicator)
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
SQLTypes
Definition: sqltypes.h:52
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::string_view sv_strip(std::string_view str)
return trimmed string_view
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1949
#define LOG(tag)
Definition: Logger.h:216
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
void set_input_srid(int d)
Definition: sqltypes.h:518
CONSTEXPR DEVICE bool is_null(const T &value)
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
#define CHECK_LT(x, y)
Definition: Logger.h:232
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:1044
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1915
ThreadId thread_id()
Definition: Logger.cpp:820
#define CHECK(condition)
Definition: Logger.h:222
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
Definition: Importer.cpp:1646
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:323

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRCoordinateTransformation *  coordinate_transformation,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
const Catalog_Namespace::SessionInfo session_info,
Executor executor,
const MetadataColumnInfos &  metadata_column_infos 
)
static

Definition at line 2417 of file Importer.cpp.

References CHECK, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::compress_coords(), DEBUG_TIMING, Executor::ERR_INTERRUPTED, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Catalog_Namespace::SessionInfo::get_session_id(), QueryExecutionError::getErrorCode(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, generate_TableFunctionsFactory_init::separator, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), TIMER_STOP, to_string(), and UNLIKELY.

Referenced by import_export::Importer::importGDALGeo().

2429  {
2430  ImportStatus thread_import_status;
2431  const CopyParams& copy_params = importer->get_copy_params();
2432  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2433  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2434  importer->get_import_buffers(thread_id);
2435  auto query_session = session_info ? session_info->get_session_id() : "";
2436  for (const auto& p : import_buffers) {
2437  p->clear();
2438  }
2439 
2440  auto convert_timer = timer_start();
2441 
2442  // for all the features in this chunk...
2443  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2444  // ignore null features
2445  if (!features[iFeature]) {
2446  continue;
2447  }
2448 
2449  // get this feature's geometry
2450  // for geodatabase, we need to consider features with no geometry
2451  // as we still want to create a table, even if it has no geo column
2452  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2453  if (pGeometry && coordinate_transformation) {
2454  pGeometry->transform(coordinate_transformation);
2455  }
2456 
2457  //
2458  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2459  //
2460 
2461  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2462  size_t col_idx = 0;
2463  try {
2464  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2465  check_session_interrupted(query_session, executor))) {
2466  thread_import_status.load_failed = true;
2467  thread_import_status.load_msg = "Table load was cancelled via Query Interrupt";
2469  }
2470 
2471  uint32_t field_column_count{0u};
2472  uint32_t metadata_column_count{0u};
2473 
2474  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2475  auto cd = *cd_it;
2476 
2477  // is this a geo column?
2478  const auto& col_ti = cd->columnType;
2479  if (col_ti.is_geometry()) {
2480  // Note that this assumes there is one and only one geo column in the
2481  // table. Currently, the importer only supports reading a single
2482  // geospatial feature from an input shapefile / geojson file, but this
2483  // code will need to be modified if that changes
2484  SQLTypes col_type = col_ti.get_type();
2485 
2486  // store null string in the base column
2487  import_buffers[col_idx]->add_value(
2488  cd, copy_params.null_str, true, copy_params);
2489  ++col_idx;
2490 
2491  // the data we now need to extract for the other columns
2492  std::vector<double> coords;
2493  std::vector<double> bounds;
2494  std::vector<int> ring_sizes;
2495  std::vector<int> poly_rings;
2496  int render_group = 0;
2497 
2498  // extract it
2499  SQLTypeInfo import_ti{col_ti};
2500  bool is_null_geo = !import_geometry;
2501  if (is_null_geo) {
2502  if (col_ti.get_notnull()) {
2503  throw std::runtime_error("NULL geo for column " + cd->columnName);
2504  }
2506  import_ti,
2507  coords,
2508  bounds,
2509  ring_sizes,
2510  poly_rings,
2512  } else {
2514  import_geometry,
2515  import_ti,
2516  coords,
2517  bounds,
2518  ring_sizes,
2519  poly_rings,
2521  std::string msg = "Failed to extract valid geometry from feature " +
2522  std::to_string(firstFeature + iFeature + 1) +
2523  " for column " + cd->columnName;
2524  throw std::runtime_error(msg);
2525  }
2526 
2527  // validate types
2528  if (col_type != import_ti.get_type()) {
2530  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2531  col_type == SQLTypes::kMULTIPOLYGON)) {
2532  throw std::runtime_error(
2533  "Imported geometry doesn't match the type of column " +
2534  cd->columnName);
2535  }
2536  }
2537  }
2538 
2539  if (columnIdToRenderGroupAnalyzerMap.size()) {
2540  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2541  if (ring_sizes.size()) {
2542  // get a suitable render group for these poly coords
2543  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2544  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2545  render_group =
2546  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2547  } else {
2548  // empty poly
2549  render_group = -1;
2550  }
2551  }
2552  }
2553 
2554  // create coords array value and add it to the physical column
2555  ++cd_it;
2556  auto cd_coords = *cd_it;
2557  std::vector<TDatum> td_coord_data;
2558  if (!is_null_geo) {
2559  std::vector<uint8_t> compressed_coords =
2560  Geospatial::compress_coords(coords, col_ti);
2561  for (auto cc : compressed_coords) {
2562  TDatum td_byte;
2563  td_byte.val.int_val = cc;
2564  td_coord_data.push_back(td_byte);
2565  }
2566  }
2567  TDatum tdd_coords;
2568  tdd_coords.val.arr_val = td_coord_data;
2569  tdd_coords.is_null = is_null_geo;
2570  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2571  ++col_idx;
2572 
2573  if (col_type == kMULTILINESTRING || col_type == kPOLYGON ||
2574  col_type == kMULTIPOLYGON) {
2575  // Create [linest]ring_sizes array value and add it to the physical column
2576  ++cd_it;
2577  auto cd_ring_sizes = *cd_it;
2578  std::vector<TDatum> td_ring_sizes;
2579  if (!is_null_geo) {
2580  for (auto ring_size : ring_sizes) {
2581  TDatum td_ring_size;
2582  td_ring_size.val.int_val = ring_size;
2583  td_ring_sizes.push_back(td_ring_size);
2584  }
2585  }
2586  TDatum tdd_ring_sizes;
2587  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2588  tdd_ring_sizes.is_null = is_null_geo;
2589  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2590  ++col_idx;
2591  }
2592 
2593  if (col_type == kMULTIPOLYGON) {
2594  // Create poly_rings array value and add it to the physical column
2595  ++cd_it;
2596  auto cd_poly_rings = *cd_it;
2597  std::vector<TDatum> td_poly_rings;
2598  if (!is_null_geo) {
2599  for (auto num_rings : poly_rings) {
2600  TDatum td_num_rings;
2601  td_num_rings.val.int_val = num_rings;
2602  td_poly_rings.push_back(td_num_rings);
2603  }
2604  }
2605  TDatum tdd_poly_rings;
2606  tdd_poly_rings.val.arr_val = td_poly_rings;
2607  tdd_poly_rings.is_null = is_null_geo;
2608  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2609  ++col_idx;
2610  }
2611 
2612  if (col_type == kLINESTRING || col_type == kMULTILINESTRING ||
2613  col_type == kPOLYGON || col_type == kMULTIPOLYGON ||
2614  col_type == kMULTIPOINT) {
2615  // Create bounds array value and add it to the physical column
2616  ++cd_it;
2617  auto cd_bounds = *cd_it;
2618  std::vector<TDatum> td_bounds_data;
2619  if (!is_null_geo) {
2620  for (auto b : bounds) {
2621  TDatum td_double;
2622  td_double.val.real_val = b;
2623  td_bounds_data.push_back(td_double);
2624  }
2625  }
2626  TDatum tdd_bounds;
2627  tdd_bounds.val.arr_val = td_bounds_data;
2628  tdd_bounds.is_null = is_null_geo;
2629  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2630  ++col_idx;
2631  }
2632 
2633  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2634  // Create render_group value and add it to the physical column
2635  ++cd_it;
2636  auto cd_render_group = *cd_it;
2637  TDatum td_render_group;
2638  td_render_group.val.int_val = render_group;
2639  td_render_group.is_null = is_null_geo;
2640  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2641  ++col_idx;
2642  }
2643  } else if (field_column_count < fieldNameToIndexMap.size()) {
2644  //
2645  // field column
2646  //
2647  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2648  CHECK(cit != columnNameToSourceNameMap.end());
2649  auto const& field_name = cit->second;
2650 
2651  auto const fit = fieldNameToIndexMap.find(field_name);
2652  if (fit == fieldNameToIndexMap.end()) {
2653  throw ColumnNotGeoError(cd->columnName);
2654  }
2655 
2656  auto const& field_index = fit->second;
2657  CHECK(field_index < fieldNameToIndexMap.size());
2658 
2659  auto const& feature = features[iFeature];
2660 
2661  auto field_defn = feature->GetFieldDefnRef(field_index);
2662  CHECK(field_defn);
2663 
2664  // OGRFeature::GetFieldAsString() can only return 80 characters
2665  // so for array columns, we are obliged to fetch the actual values
2666  // and construct the concatenated string ourselves
2667 
2668  std::string value_string;
2669  int array_index = 0, array_size = 0;
2670 
2671  auto stringify_numeric_list = [&](auto* values) {
2672  value_string = "{";
2673  while (array_index < array_size) {
2674  auto separator = (array_index > 0) ? "," : "";
2675  value_string += separator + std::to_string(values[array_index]);
2676  array_index++;
2677  }
2678  value_string += "}";
2679  };
2680 
2681  auto field_type = field_defn->GetType();
2682  switch (field_type) {
2683  case OFTInteger:
2684  case OFTInteger64:
2685  case OFTReal:
2686  case OFTString:
2687  case OFTBinary:
2688  case OFTDate:
2689  case OFTTime:
2690  case OFTDateTime: {
2691  value_string = feature->GetFieldAsString(field_index);
2692  } break;
2693  case OFTIntegerList: {
2694  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2695  stringify_numeric_list(values);
2696  } break;
2697  case OFTInteger64List: {
2698  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2699  stringify_numeric_list(values);
2700  } break;
2701  case OFTRealList: {
2702  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2703  stringify_numeric_list(values);
2704  } break;
2705  case OFTStringList: {
2706  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2707  value_string = "{";
2708  if (array_of_strings) {
2709  while (auto* this_string = array_of_strings[array_index]) {
2710  auto separator = (array_index > 0) ? "," : "";
2711  value_string += separator + std::string(this_string);
2712  array_index++;
2713  }
2714  }
2715  value_string += "}";
2716  } break;
2717  default:
2718  throw std::runtime_error("Unsupported geo file field type (" +
2719  std::to_string(static_cast<int>(field_type)) +
2720  ")");
2721  }
2722 
2723  import_buffers[col_idx]->add_value(cd, value_string, false, copy_params);
2724  ++col_idx;
2725  field_column_count++;
2726  } else if (metadata_column_count < metadata_column_infos.size()) {
2727  //
2728  // metadata column
2729  //
2730  auto const& mci = metadata_column_infos[metadata_column_count];
2731  if (mci.column_descriptor.columnName != cd->columnName) {
2732  throw std::runtime_error("Metadata column name mismatch");
2733  }
2734  import_buffers[col_idx]->add_value(cd, mci.value, false, copy_params);
2735  ++col_idx;
2736  metadata_column_count++;
2737  } else {
2738  throw std::runtime_error("Column count mismatch");
2739  }
2740  }
2741  thread_import_status.rows_completed++;
2742  } catch (QueryExecutionError& e) {
2744  throw e;
2745  }
2746  } catch (ColumnNotGeoError& e) {
2747  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
2748  throw std::runtime_error(e.what());
2749  } catch (const std::exception& e) {
2750  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2751  import_buffers[col_idx_to_pop]->pop_value();
2752  }
2753  thread_import_status.rows_rejected++;
2754  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2755  }
2756  };
2757 
2758  if (pGeometry && copy_params.geo_explode_collections) {
2759  // explode and import
2760  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2761  explode_collections_step1(col_descs);
2762  explode_collections_step2(pGeometry,
2763  collection_child_type,
2764  collection_col_name,
2765  firstFeature + iFeature + 1,
2766  execute_import_feature);
2767  } else {
2768  // import non-collection or null feature just once
2769  execute_import_feature(pGeometry);
2770  }
2771  } // end features
2772 
2773  float convert_s = TIMER_STOP(convert_timer);
2774 
2775  float load_s = 0.0f;
2776  if (thread_import_status.rows_completed > 0) {
2777  auto load_timer = timer_start();
2778  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2779  load_s = TIMER_STOP(load_timer);
2780  }
2781 
2782  if (DEBUG_TIMING && thread_import_status.rows_completed > 0) {
2783  LOG(INFO) << "DEBUG: Process " << convert_s << "s";
2784  LOG(INFO) << "DEBUG: Load " << load_s << "s";
2785  LOG(INFO) << "DEBUG: Total " << (convert_s + load_s) << "s";
2786  }
2787 
2788  thread_import_status.thread_id = thread_id;
2789 
2790  return thread_import_status;
2791 }
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1378
SQLTypes
Definition: sqltypes.h:52
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1949
#define LOG(tag)
Definition: Logger.h:216
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define TIMER_STOP(t)
Definition: Importer.cpp:101
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1915
ThreadId thread_id()
Definition: Logger.cpp:820
#define CHECK(condition)
Definition: Logger.h:222
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
SQLTypeInfo ti 
)

Definition at line 1646 of file Importer.cpp.

References Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1649  {
1650  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1651  return false;
1652  }
1653  if (ti.transforms()) {
1654  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1655  if (!pt.transform(ti)) {
1656  return false;
1657  }
1658  pt.getColumns(coords);
1659  return true;
1660  }
1661  coords.push_back(lon);
1662  coords.push_back(lat);
1663  return true;
1664 }
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:567
bool transforms() const
Definition: sqltypes.h:635

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 409 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::TypedImportBuffer::addDefaultValues(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

409  {
410  SQLTypeInfo elem_ti = ti.get_elem_type();
411  auto len = ti.get_size();
412 
413  if (len > 0) {
414  // Compose a NULL fixlen array
415  int8_t* buf = (int8_t*)checked_malloc(len);
416  // First scalar is a NULL_ARRAY sentinel
417  Datum d = NullArrayDatum(elem_ti);
418  int8_t* p = append_datum(buf, d, elem_ti);
419  CHECK(p);
420  // Rest is filled with normal NULL sentinels
421  Datum d0 = NullDatum(elem_ti);
422  while ((p - buf) < len) {
423  p = append_datum(p, d0, elem_ti);
424  CHECK(p);
425  }
426  CHECK((p - buf) == len);
427  return ArrayDatum(len, buf, true);
428  }
429  // NULL varlen array
430  return ArrayDatum(0, NULL, true);
431 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:518
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:268
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:222
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:311
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 311 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_array_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

311  {
312  Datum d;
313  const auto type = get_type_for_datum(ti);
314  switch (type) {
315  case kBOOLEAN:
317  break;
318  case kBIGINT:
320  break;
321  case kINT:
323  break;
324  case kSMALLINT:
326  break;
327  case kTINYINT:
329  break;
330  case kFLOAT:
332  break;
333  case kDOUBLE:
335  break;
336  case kTIME:
337  case kTIMESTAMP:
338  case kDATE:
340  break;
341  case kPOINT:
342  case kMULTIPOINT:
343  case kLINESTRING:
344  case kMULTILINESTRING:
345  case kPOLYGON:
346  case kMULTIPOLYGON:
347  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
348  default:
349  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
350  }
351  return d;
352 }
int8_t tinyintval
Definition: sqltypes.h:232
Definition: sqltypes.h:63
int8_t boolval
Definition: sqltypes.h:231
int32_t intval
Definition: sqltypes.h:234
float floatval
Definition: sqltypes.h:236
int64_t bigintval
Definition: sqltypes.h:235
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: sqltypes.h:233
Definition: sqltypes.h:67
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:59
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:255
double doubleval
Definition: sqltypes.h:237

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullDatum ( SQLTypeInfo ti)

Definition at line 268 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

268  {
269  Datum d;
270  const auto type = get_type_for_datum(ti);
271  switch (type) {
272  case kBOOLEAN:
274  break;
275  case kBIGINT:
277  break;
278  case kINT:
280  break;
281  case kSMALLINT:
283  break;
284  case kTINYINT:
286  break;
287  case kFLOAT:
288  d.floatval = NULL_FLOAT;
289  break;
290  case kDOUBLE:
292  break;
293  case kTIME:
294  case kTIMESTAMP:
295  case kDATE:
297  break;
298  case kPOINT:
299  case kMULTIPOINT:
300  case kLINESTRING:
301  case kMULTILINESTRING:
302  case kPOLYGON:
303  case kMULTIPOLYGON:
304  throw std::runtime_error("Internal error: geometry type in NullDatum.");
305  default:
306  throw std::runtime_error("Internal error: invalid type in NullDatum.");
307  }
308  return d;
309 }
int8_t tinyintval
Definition: sqltypes.h:232
#define NULL_DOUBLE
Definition: sqltypes.h:63
#define NULL_FLOAT
int8_t boolval
Definition: sqltypes.h:231
int32_t intval
Definition: sqltypes.h:234
float floatval
Definition: sqltypes.h:236
int64_t bigintval
Definition: sqltypes.h:235
int16_t smallintval
Definition: sqltypes.h:233
Definition: sqltypes.h:67
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:59
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:255
double doubleval
Definition: sqltypes.h:237

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t import_export::num_import_threads ( const int32_t  copy_params_threads)
inline

Definition at line 31 of file thread_count.h.

References g_max_import_threads.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), foreign_storage::get_num_threads(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), and import_export::Importer::importGDALRaster().

31  {
32  if (copy_params_threads > 0) {
33  return static_cast<size_t>(copy_params_threads);
34  }
35  return std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
37 }
size_t g_max_import_threads
Definition: Importer.cpp:106

+ Here is the caller graph for this function:

MetadataColumnInfos import_export::parse_add_metadata_columns ( const std::string &  add_metadata_columns,
const std::string &  file_path 
)

Definition at line 35 of file MetadataColumn.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::ExpressionParser::evalAsString(), IS_INTEGER, join(), kBIGINT, kDATE, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNULLT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, run_benchmark_import::parser, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_type(), import_export::ExpressionParser::setExpression(), import_export::ExpressionParser::setStringConstant(), ColumnDescriptor::sourceName, split(), strip(), to_lower(), and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptorsGeo(), import_export::Importer::gdalToColumnDescriptorsRaster(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), and import_export::Importer::readMetadataSampleGDAL().

36  {
37  //
38  // each string is "column_name,column_type,expression"
39  //
40  // column_type can be:
41  // tinyint
42  // smallint
43  // int
44  // bigint
45  // float
46  // double
47  // date
48  // time
49  // timestamp
50  // text
51  //
52  // expression can be in terms of:
53  // filename
54  // filedir
55  // filepath
56  // etc.
57  //
58 
59  // anything to do?
60  if (add_metadata_columns.length() == 0u) {
61  return {};
62  }
63 
64  // split by ";"
65  // @TODO(se) is this safe?
66  // probably won't appear in a file name/path or a date/time string
67  std::vector<std::string> add_metadata_column_strings;
68  boost::split(add_metadata_column_strings, add_metadata_columns, boost::is_any_of(";"));
69  if (add_metadata_column_strings.size() == 0u) {
70  return {};
71  }
72 
73  ExpressionParser parser;
74 
75  // known string constants
76  auto const fn = boost::filesystem::path(file_path).filename().string();
77  auto const fd = boost::filesystem::path(file_path).parent_path().string();
78  auto const fp = file_path;
79  parser.setStringConstant("filename", fn);
80  parser.setStringConstant("filedir", fd);
81  parser.setStringConstant("filepath", fp);
82 
83  MetadataColumnInfos metadata_column_infos;
84 
85  // for each requested column...
86  for (auto const& add_metadata_column_string : add_metadata_column_strings) {
87  // strip
88  auto const add_metadata_column = strip(add_metadata_column_string);
89 
90  // tokenize and extract
91  std::vector<std::string> tokens;
92  boost::split(tokens, add_metadata_column, boost::is_any_of(","));
93  if (tokens.size() < 3u) {
94  throw std::runtime_error("Invalid metadata column info '" + add_metadata_column +
95  "' (must be of the form 'name,type,expression')");
96  }
97  auto token_itr = tokens.begin();
98  auto const column_name = strip(*token_itr++);
99  auto const data_type = strip(to_lower(*token_itr++));
100  tokens.erase(tokens.begin(), token_itr);
101  auto const expression = strip(boost::join(tokens, ","));
102 
103  // get column type
104  SQLTypes sql_type{kNULLT};
105  double range_min{0.0}, range_max{0.0};
106  if (data_type == "tinyint") {
107  sql_type = kTINYINT;
108  range_min = static_cast<double>(std::numeric_limits<int8_t>::min());
109  range_max = static_cast<double>(std::numeric_limits<int8_t>::max());
110  } else if (data_type == "smallint") {
111  sql_type = kSMALLINT;
112  range_min = static_cast<double>(std::numeric_limits<int16_t>::min());
113  range_max = static_cast<double>(std::numeric_limits<int16_t>::max());
114  } else if (data_type == "int") {
115  sql_type = kINT;
116  range_min = static_cast<double>(std::numeric_limits<int32_t>::min());
117  range_max = static_cast<double>(std::numeric_limits<int32_t>::max());
118  } else if (data_type == "bigint") {
119  sql_type = kBIGINT;
120  range_min = static_cast<double>(std::numeric_limits<int64_t>::min());
121  range_max = static_cast<double>(std::numeric_limits<int64_t>::max());
122  } else if (data_type == "float") {
123  sql_type = kFLOAT;
124  range_min = static_cast<double>(std::numeric_limits<float>::min());
125  range_max = static_cast<double>(std::numeric_limits<float>::max());
126  } else if (data_type == "double") {
127  sql_type = kDOUBLE;
128  range_min = static_cast<double>(std::numeric_limits<double>::min());
129  range_max = static_cast<double>(std::numeric_limits<double>::max());
130  } else if (data_type == "date") {
131  sql_type = kDATE;
132  } else if (data_type == "time") {
133  sql_type = kTIME;
134  } else if (data_type == "timestamp") {
135  sql_type = kTIMESTAMP;
136  } else if (data_type == "text") {
137  sql_type = kTEXT;
138  } else {
139  throw std::runtime_error("Invalid metadata column data type '" + data_type +
140  "' for column '" + column_name + "'");
141  }
142 
143  // set expression with force cast back to string
144  parser.setExpression("str(" + expression + ")");
145 
146  // evaluate
147  auto value = parser.evalAsString();
148 
149  // validate date/time/timestamp value now
150  // @TODO(se) do we need to provide for non-zero dimension?
151  try {
152  if (sql_type == kDATE) {
153  dateTimeParse<kDATE>(value, 0);
154  } else if (sql_type == kTIME) {
155  dateTimeParse<kTIME>(value, 0);
156  } else if (sql_type == kTIMESTAMP) {
157  dateTimeParse<kTIMESTAMP>(value, 0);
158  }
159  } catch (std::runtime_error& e) {
160  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
161  " value '" + value + "' for column '" + column_name + "'");
162  }
163 
164  // validate int/float/double
165  try {
166  if (IS_INTEGER(sql_type) || sql_type == kFLOAT || sql_type == kDOUBLE) {
167  size_t num_chars{0u};
168  auto const v = static_cast<double>(std::stod(value, &num_chars));
169  if (v < range_min || v > range_max) {
170  throw std::out_of_range(to_string(sql_type));
171  }
172  if (num_chars == 0u) {
173  throw std::invalid_argument("empty value");
174  }
175  }
176  } catch (std::invalid_argument& e) {
177  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
178  " value '" + value + "' for column '" + column_name +
179  "' (" + e.what() + ")");
180  } catch (std::out_of_range& e) {
181  throw std::runtime_error("Out-of-range metadata column " + to_string(sql_type) +
182  " value '" + value + "' for column '" + column_name +
183  "' (" + e.what() + ")");
184  }
185 
186  // build column descriptor
187  ColumnDescriptor cd;
188  cd.columnName = cd.sourceName = column_name;
189  cd.columnType.set_type(sql_type);
191  if (sql_type == kTEXT) {
194  }
195 
196  // add to result
197  metadata_column_infos.push_back({std::move(cd), std::move(value)});
198  }
199 
200  // done
201  return metadata_column_infos;
202 }
std::string to_lower(const std::string &str)
void set_compression(EncodingType c)
Definition: sqltypes.h:525
Definition: sqltypes.h:63
SQLTypes
Definition: sqltypes.h:52
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::string sourceName
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
void set_fixed_size()
Definition: sqltypes.h:523
specifies the content in-memory of a row in the column metadata table
void set_comp_param(int p)
Definition: sqltypes.h:526
Definition: sqltypes.h:66
Definition: sqltypes.h:67
#define IS_INTEGER(T)
Definition: sqltypes.h:317
Definition: sqltypes.h:59
SQLTypeInfo columnType
std::string columnName
std::vector< MetadataColumnInfo > MetadataColumnInfos
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:514

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 6244 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by DBHandler::prepare_loader_generic().

6246  {
6247  CHECK(td);
6248  auto col_descs = loader->get_column_descs();
6249 
6250  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
6251  for (auto cd : col_descs) {
6252  import_buffers.emplace_back(
6253  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
6254  }
6255 
6256  return import_buffers;
6257 }
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 354 of file Importer.cpp.

References append_datum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value(), and import_export::TypedImportBuffer::addDefaultValues().

356  {
357  SQLTypeInfo elem_ti = ti.get_elem_type();
358  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
359  return ArrayDatum(0, NULL, true);
360  }
361  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
362  LOG(WARNING) << "Malformed array: " << s;
363  return ArrayDatum(0, NULL, true);
364  }
365  std::vector<std::string> elem_strs;
366  size_t last = 1;
367  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
368  i = s.find(copy_params.array_delim, last)) {
369  elem_strs.push_back(s.substr(last, i - last));
370  last = i + 1;
371  }
372  if (last + 1 <= s.size()) {
373  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
374  }
375  if (elem_strs.size() == 1) {
376  auto str = elem_strs.front();
377  auto str_trimmed = trim_space(str.c_str(), str.length());
378  if (str_trimmed == "") {
379  elem_strs.clear(); // Empty array
380  }
381  }
382  if (!elem_ti.is_string()) {
383  size_t len = elem_strs.size() * elem_ti.get_size();
384  std::unique_ptr<int8_t, FreeDeleter> buf(
385  reinterpret_cast<int8_t*>(checked_malloc(len)));
386  int8_t* p = buf.get();
387  for (auto& es : elem_strs) {
388  auto e = trim_space(es.c_str(), es.length());
389  bool is_null = (e == copy_params.null_str) || e == "NULL";
390  if (!elem_ti.is_string() && e == "") {
391  is_null = true;
392  }
393  if (elem_ti.is_number() || elem_ti.is_time()) {
394  if (!isdigit(e[0]) && e[0] != '-') {
395  is_null = true;
396  }
397  }
398  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
399  p = append_datum(p, d, elem_ti);
400  CHECK(p);
401  }
402  return ArrayDatum(len, buf.release(), false);
403  }
404  // must not be called for array of strings
405  CHECK(false);
406  return ArrayDatum(0, NULL, true);
407 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:518
#define LOG(tag)
Definition: Logger.h:216
bool is_number() const
Definition: sqltypes.h:605
bool is_time() const
Definition: sqltypes.h:606
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:268
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:277
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:222
bool is_string() const
Definition: sqltypes.h:600
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 507 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

507  {
508  SQLTypeInfo elem_ti = ti.get_elem_type();
509 
510  CHECK(!elem_ti.is_string());
511 
512  if (datum.is_null) {
513  return NullArray(ti);
514  }
515 
516  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
517  int8_t* buf = (int8_t*)checked_malloc(len);
518  int8_t* p = buf;
519  for (auto& e : datum.val.arr_val) {
520  p = append_datum(p, TDatumToDatum(e, elem_ti), elem_ti);
521  CHECK(p);
522  }
523 
524  return ArrayDatum(len, buf, false);
525 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:518
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:409
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:460
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:222
bool is_string() const
Definition: sqltypes.h:600
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 460 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

460  {
461  Datum d;
462  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
463  switch (type) {
464  case kBOOLEAN:
465  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
466  break;
467  case kBIGINT:
468  d.bigintval =
469  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
470  break;
471  case kINT:
472  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
473  break;
474  case kSMALLINT:
475  d.smallintval =
476  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
477  break;
478  case kTINYINT:
479  d.tinyintval =
480  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
481  break;
482  case kFLOAT:
483  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
484  break;
485  case kDOUBLE:
486  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
487  break;
488  case kTIME:
489  case kTIMESTAMP:
490  case kDATE:
491  d.bigintval =
492  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
493  break;
494  case kPOINT:
495  case kMULTIPOINT:
496  case kLINESTRING:
497  case kMULTILINESTRING:
498  case kPOLYGON:
499  case kMULTIPOLYGON:
500  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
501  default:
502  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
503  }
504  return d;
505 }
int8_t tinyintval
Definition: sqltypes.h:232
#define NULL_DOUBLE
Definition: sqltypes.h:63
#define NULL_FLOAT
int8_t boolval
Definition: sqltypes.h:231
int32_t intval
Definition: sqltypes.h:234
float floatval
Definition: sqltypes.h:236
int64_t bigintval
Definition: sqltypes.h:235
int16_t smallintval
Definition: sqltypes.h:233
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:499
Definition: sqltypes.h:67
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:59
bool is_decimal() const
Definition: sqltypes.h:603
double doubleval
Definition: sqltypes.h:237

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 242 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

242  {
243  size_t i = 0;
244  size_t j = len;
245  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
246  i++;
247  }
248  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
249  j--;
250  }
251  return std::string(field + i, j - i);
252 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 3323 of file Importer.cpp.

References heavydb.dtypes::T.

3323  {
3324  try {
3325  boost::lexical_cast<T>(str);
3326  } catch (const boost::bad_lexical_cast& e) {
3327  return false;
3328  }
3329  return true;
3330 }

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 34 of file CopyParams.h.

const size_t import_export::kImportRowLimit = 10000
static
constexpr size_t import_export::max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
static

Definition at line 37 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static
heavyai::shared_mutex import_export::status_mutex
static