OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{ExpressionParser.cpp}
 
 anonymous_namespace{ForeignDataImporter.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterCSV.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 anonymous_namespace{RasterImporter.cpp}
 
 delimited_parser
 

Classes

class  AbstractImporter
 
struct  CopyParams
 
class  ExpressionParser
 
class  ForeignDataImporter
 
class  ImportBatchResult
 
struct  GeoImportException
 
class  ColumnNotGeoError
 
struct  BadRowsTracker
 
class  ImporterUtils
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  Importer
 
struct  MetadataColumnInfo
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  GCPTransformer
 
class  RasterImporter
 
class  RenderGroupAnalyzer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 
using MetadataColumnInfos = std::vector< MetadataColumnInfo >
 

Enumerations

enum  ImportHeaderRow { ImportHeaderRow::kAutoDetect, ImportHeaderRow::kNoHeader, ImportHeaderRow::kHasHeader }
 
enum  RasterPointType {
  RasterPointType::kNone, RasterPointType::kAuto, RasterPointType::kSmallInt, RasterPointType::kInt,
  RasterPointType::kFloat, RasterPointType::kDouble, RasterPointType::kPoint
}
 
enum  RasterPointTransform { RasterPointTransform::kNone, RasterPointTransform::kAuto, RasterPointTransform::kFile, RasterPointTransform::kWorld }
 
enum  SourceType {
  SourceType::kUnknown, SourceType::kUnsupported, SourceType::kDelimitedFile, SourceType::kGeoFile,
  SourceType::kRasterFile, SourceType::kParquetFile, SourceType::kOdbc, SourceType::kRegexParsedFile
}
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
template<class T >
bool try_cast (const std::string &str)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImportercreate_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 
MetadataColumnInfos parse_add_metadata_columns (const std::string &add_metadata_columns, const std::string &file_path)
 
size_t num_import_threads (const int32_t copy_params_threads)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static heavyai::shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 
static const size_t kImportRowLimit = 10000
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 75 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 154 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 152 of file Importer.cpp.

Definition at line 155 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 151 of file Importer.cpp.

Definition at line 36 of file MetadataColumn.h.

Enumeration Type Documentation

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 414 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

414  {
415  const auto& arr = datum.val.arr_val;
416  for (const auto& elem_datum : arr) {
417  string_vec.push_back(elem_datum.val.str_val);
418  }
419 }

+ Here is the caller graph for this function:

std::unique_ptr< AbstractImporter > import_export::create_importer ( Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const std::string &  copy_from_source,
const import_export::CopyParams copy_params 
)

Definition at line 6302 of file Importer.cpp.

References g_enable_fsi_regex_import, g_enable_legacy_delimited_import, kDelimitedFile, kParquetFile, kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by Parser::CopyTableStmt::execute(), and DBHandler::import_table().

6306  {
6308 #ifdef ENABLE_IMPORT_PARQUET
6309  if (!g_enable_legacy_parquet_import) {
6310  return std::make_unique<import_export::ForeignDataImporter>(
6311  copy_from_source, copy_params, td);
6312  }
6313 #else
6314  throw std::runtime_error("Parquet not supported!");
6315 #endif
6316  }
6317 
6320  return std::make_unique<import_export::ForeignDataImporter>(
6321  copy_from_source, copy_params, td);
6322  }
6323 
6326  return std::make_unique<import_export::ForeignDataImporter>(
6327  copy_from_source, copy_params, td);
6328  } else {
6329  throw std::runtime_error(
6330  "Regex parsed import only supported using 'fsi-regex-import' flag");
6331  }
6332  }
6333 
6334  return std::make_unique<import_export::Importer>(
6335  catalog, td, copy_from_source, copy_params);
6336 }
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:82
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:86

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::fill_missing_columns ( const Catalog_Namespace::Catalog cat,
Fragmenter_Namespace::InsertData insert_data 
)

Definition at line 6230 of file Importer.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, ColumnDescriptor::columnId, Fragmenter_Namespace::InsertData::columnIds, ColumnDescriptor::columnName, ColumnDescriptor::columnType, Fragmenter_Namespace::InsertData::data, ColumnDescriptor::default_value, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), import_export::TypedImportBuffer::get_data_block_pointers(), SQLTypeInfo::get_physical_cols(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Geospatial::GeoTypesFactory::getGeoColumns(), Catalog_Namespace::Catalog::getMetadataForDict(), Fragmenter_Namespace::InsertData::is_default, SQLTypeInfo::is_geometry(), IS_STRING, kARRAY, kENCODING_DICT, import_export::Importer::set_geo_physical_import_buffer(), gpu_enabled::sort(), and Fragmenter_Namespace::InsertData::tableId.

Referenced by RelAlgExecutor::executeSimpleInsert(), DBHandler::insert_data(), and Parser::InsertIntoTableAsSelectStmt::populateData().

6232  {
6233  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> defaults_buffers;
6234  if (insert_data.is_default.size() == 0) {
6235  insert_data.is_default.resize(insert_data.columnIds.size(), false);
6236  }
6237  CHECK(insert_data.is_default.size() == insert_data.is_default.size());
6238  auto cds = cat->getAllColumnMetadataForTable(insert_data.tableId, false, false, true);
6239  if (cds.size() == insert_data.columnIds.size()) {
6240  // all columns specified
6241  return defaults_buffers;
6242  }
6243  for (auto cd : cds) {
6244  if (std::find(insert_data.columnIds.begin(),
6245  insert_data.columnIds.end(),
6246  cd->columnId) == insert_data.columnIds.end()) {
6247  StringDictionary* dict = nullptr;
6248  if (cd->columnType.get_type() == kARRAY &&
6249  IS_STRING(cd->columnType.get_subtype()) && !cd->default_value.has_value()) {
6250  throw std::runtime_error("Cannot omit column \"" + cd->columnName +
6251  "\": omitting TEXT arrays is not supported yet");
6252  }
6253  if (cd->columnType.get_compression() == kENCODING_DICT) {
6254  dict = cat->getMetadataForDict(cd->columnType.get_comp_param())->stringDict.get();
6255  }
6256  defaults_buffers.emplace_back(std::make_unique<TypedImportBuffer>(cd, dict));
6257  }
6258  }
6259  // put buffers in order to fill geo sub-columns properly
6260  std::sort(defaults_buffers.begin(),
6261  defaults_buffers.end(),
6262  [](decltype(defaults_buffers[0])& a, decltype(defaults_buffers[0])& b) {
6263  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
6264  });
6265  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6266  auto cd = defaults_buffers[i]->getColumnDesc();
6267  std::string default_value = cd->default_value.value_or("NULL");
6268  defaults_buffers[i]->add_value(
6269  cd, default_value, !cd->default_value.has_value(), import_export::CopyParams());
6270  if (cd->columnType.is_geometry()) {
6271  std::vector<double> coords, bounds;
6272  std::vector<int> ring_sizes, poly_rings;
6273  int render_group = 0;
6274  SQLTypeInfo tinfo{cd->columnType};
6276  default_value, tinfo, coords, bounds, ring_sizes, poly_rings, false));
6277  // set physical columns starting with the following ID
6278  auto next_col = i + 1;
6280  cd,
6281  defaults_buffers,
6282  next_col,
6283  coords,
6284  bounds,
6285  ring_sizes,
6286  poly_rings,
6287  render_group);
6288  // skip physical columns filled with the call above
6289  i += cd->columnType.get_physical_cols();
6290  }
6291  }
6292  auto data = import_export::TypedImportBuffer::get_data_block_pointers(defaults_buffers);
6293  CHECK(data.size() == defaults_buffers.size());
6294  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6295  insert_data.data.push_back(data[i]);
6296  insert_data.columnIds.push_back(defaults_buffers[i]->getColumnDesc()->columnId);
6297  insert_data.is_default.push_back(true);
6298  }
6299  return defaults_buffers;
6300 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::vector< bool > is_default
Definition: Fragmenter.h:75
constexpr double a
Definition: Utm.h:32
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3014
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1999
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2267
#define IS_STRING(T)
Definition: sqltypes.h:299
#define CHECK(condition)
Definition: Logger.h:291
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1627
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 5163 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

5164  {
5165  // prepare to gather subdirectories
5166  std::vector<std::string> subdirectories;
5167 
5168  // get entries
5169  char** entries = VSIReadDir(archive_path.c_str());
5170  if (!entries) {
5171  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
5172  return;
5173  }
5174 
5175  // force scope
5176  {
5177  // request clean-up
5178  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
5179 
5180  // check all the entries
5181  int index = 0;
5182  while (true) {
5183  // get next entry, or drop out if there isn't one
5184  char* entry_c = entries[index++];
5185  if (!entry_c) {
5186  break;
5187  }
5188  std::string entry(entry_c);
5189 
5190  // ignore '.' and '..'
5191  if (entry == "." || entry == "..") {
5192  continue;
5193  }
5194 
5195  // build the full path
5196  std::string entry_path = archive_path + std::string("/") + entry;
5197 
5198  // is it a file or a sub-folder
5199  VSIStatBufL sb;
5200  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
5201  if (result < 0) {
5202  break;
5203  }
5204 
5205  if (VSI_ISDIR(sb.st_mode)) {
5206  // a directory that ends with .gdb could be a Geodatabase bundle
5207  // arguably dangerous to decide this purely by name, but any further
5208  // validation would be very complex especially at this scope
5209  if (boost::iends_with(entry_path, ".gdb")) {
5210  // add the directory as if it was a file and don't recurse into it
5211  files.push_back(entry_path);
5212  } else {
5213  // add subdirectory to be recursed into
5214  subdirectories.push_back(entry_path);
5215  }
5216  } else {
5217  // add this file
5218  files.push_back(entry_path);
5219  }
5220  }
5221  }
5222 
5223  // recurse into each subdirectories we found
5224  for (const auto& subdirectory : subdirectories) {
5225  gdalGatherFilesInArchiveRecursive(subdirectory, files);
5226  }
5227 }
#define LOG(tag)
Definition: Logger.h:285
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:5163

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer,
const Catalog_Namespace::SessionInfo session_info,
Executor executor 
)
static

Definition at line 2000 of file Importer.cpp.

References CHECK, CHECK_LT, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), Catalog_Namespace::SessionInfo::get_session_id(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), ImportHelpers::is_null_datum(), kDelimitedFile, kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, import_export::CopyParams::source_type, sv_strip(), gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), to_string(), import_export::CopyParams::trim_spaces, and UNLIKELY.

Referenced by import_export::Importer::importDelimited().

2010  {
2011  ImportStatus thread_import_status;
2012  int64_t total_get_row_time_us = 0;
2013  int64_t total_str_to_val_time_us = 0;
2014  auto query_session = session_info ? session_info->get_session_id() : "";
2015  CHECK(scratch_buffer);
2016  auto buffer = scratch_buffer.get();
2017  auto load_ms = measure<>::execution([]() {});
2018 
2019  thread_import_status.thread_id = thread_id;
2020 
2021  auto ms = measure<>::execution([&]() {
2022  const CopyParams& copy_params = importer->get_copy_params();
2023  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2024  size_t begin =
2025  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
2026  const char* thread_buf = buffer + begin_pos + begin;
2027  const char* thread_buf_end = buffer + end_pos;
2028  const char* buf_end = buffer + total_size;
2029  bool try_single_thread = false;
2030  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2031  importer->get_import_buffers(thread_id);
2033  int phys_cols = 0;
2034  int point_cols = 0;
2035  for (const auto cd : col_descs) {
2036  const auto& col_ti = cd->columnType;
2037  phys_cols += col_ti.get_physical_cols();
2038  if (cd->columnType.get_type() == kPOINT) {
2039  point_cols++;
2040  }
2041  }
2042  auto num_cols = col_descs.size() - phys_cols;
2043  for (const auto& p : import_buffers) {
2044  p->clear();
2045  }
2046  std::vector<std::string_view> row;
2047  size_t row_index_plus_one = 0;
2048  for (const char* p = thread_buf; p < thread_buf_end; p++) {
2049  row.clear();
2050  std::vector<std::unique_ptr<char[]>>
2051  tmp_buffers; // holds string w/ removed escape chars, etc
2052  if (DEBUG_TIMING) {
2055  thread_buf_end,
2056  buf_end,
2057  copy_params,
2058  importer->get_is_array(),
2059  row,
2060  tmp_buffers,
2061  try_single_thread,
2062  true);
2063  });
2064  total_get_row_time_us += us;
2065  } else {
2067  thread_buf_end,
2068  buf_end,
2069  copy_params,
2070  importer->get_is_array(),
2071  row,
2072  tmp_buffers,
2073  try_single_thread,
2074  true);
2075  }
2076  row_index_plus_one++;
2077  // Each POINT could consume two separate coords instead of a single WKT
2078  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
2079  thread_import_status.rows_rejected++;
2080  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
2081  << row.size() << "): " << shared::printContainer(row);
2082  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2083  break;
2084  }
2085  continue;
2086  }
2087 
2088  //
2089  // lambda for importing a row (perhaps multiple times if exploding a collection)
2090  //
2091 
2092  auto execute_import_row = [&](OGRGeometry* import_geometry) {
2093  size_t import_idx = 0;
2094  size_t col_idx = 0;
2095  try {
2096  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2097  auto cd = *cd_it;
2098  const auto& col_ti = cd->columnType;
2099 
2100  bool is_null =
2101  ImportHelpers::is_null_datum(row[import_idx], copy_params.null_str);
2102  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
2103  // So initially nullness may be missed and not passed to add_value,
2104  // which then might also check and still decide it's actually a NULL, e.g.
2105  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
2106  // So "NULL" is not recognized as NULL but then it's not recognized as
2107  // a valid kINT, so it's a NULL after all.
2108  // Checking for "NULL" here too, as a widely accepted notation for NULL.
2109 
2110  // Treating empty as NULL
2111  if (!cd->columnType.is_string() && row[import_idx].empty()) {
2112  is_null = true;
2113  }
2114  if (!cd->columnType.is_string() && !copy_params.trim_spaces) {
2115  // everything but strings should be always trimmed
2116  row[import_idx] = sv_strip(row[import_idx]);
2117  }
2118 
2119  if (col_ti.get_physical_cols() == 0) {
2120  // not geo
2121 
2122  import_buffers[col_idx]->add_value(
2123  cd, row[import_idx], is_null, copy_params);
2124 
2125  // next
2126  ++import_idx;
2127  ++col_idx;
2128  } else {
2129  // geo
2130 
2131  // store null string in the base column
2132  import_buffers[col_idx]->add_value(
2133  cd, copy_params.null_str, true, copy_params);
2134 
2135  // WKT from string we're not storing
2136  auto const& geo_string = row[import_idx];
2137 
2138  // next
2139  ++import_idx;
2140  ++col_idx;
2141 
2142  SQLTypes col_type = col_ti.get_type();
2143  CHECK(IS_GEO(col_type));
2144 
2145  std::vector<double> coords;
2146  std::vector<double> bounds;
2147  std::vector<int> ring_sizes;
2148  std::vector<int> poly_rings;
2149  int render_group = 0;
2150 
2151  // if this is a POINT column, and the field is not null, and
2152  // looks like a scalar numeric value (and not a hex blob)
2153  // attempt to import two columns as lon/lat (or lat/lon)
2154  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
2155  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
2156  geo_string[0] == '-') &&
2157  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
2158  double lon = std::atof(std::string(geo_string).c_str());
2159  double lat = NAN;
2160  auto lat_str = row[import_idx];
2161  ++import_idx;
2162  if (lat_str.size() > 0 &&
2163  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
2164  lat = std::atof(std::string(lat_str).c_str());
2165  }
2166  // Swap coordinates if this table uses a reverse order: lat/lon
2167  if (!copy_params.lonlat) {
2168  std::swap(lat, lon);
2169  }
2170  // TODO: should check if POINT column should have been declared with
2171  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
2172  // throw std::runtime_error("POINT column " + cd->columnName + " is
2173  // not WGS84, cannot insert lon/lat");
2174  // }
2175  SQLTypeInfo import_ti{col_ti};
2176  if (copy_params.source_type ==
2178  import_ti.get_output_srid() == 4326) {
2179  auto srid0 = copy_params.source_srid;
2180  if (srid0 > 0) {
2181  // srid0 -> 4326 transform is requested on import
2182  import_ti.set_input_srid(srid0);
2183  }
2184  }
2185  if (!importGeoFromLonLat(lon, lat, coords, import_ti)) {
2186  throw std::runtime_error(
2187  "Cannot read lon/lat to insert into POINT column " +
2188  cd->columnName);
2189  }
2190  } else {
2191  // import it
2192  SQLTypeInfo import_ti{col_ti};
2193  if (copy_params.source_type ==
2195  import_ti.get_output_srid() == 4326) {
2196  auto srid0 = copy_params.source_srid;
2197  if (srid0 > 0) {
2198  // srid0 -> 4326 transform is requested on import
2199  import_ti.set_input_srid(srid0);
2200  }
2201  }
2202  if (is_null) {
2203  if (col_ti.get_notnull()) {
2204  throw std::runtime_error("NULL geo for column " + cd->columnName);
2205  }
2207  import_ti,
2208  coords,
2209  bounds,
2210  ring_sizes,
2211  poly_rings,
2213  } else {
2214  if (import_geometry) {
2215  // geometry already exploded
2217  import_geometry,
2218  import_ti,
2219  coords,
2220  bounds,
2221  ring_sizes,
2222  poly_rings,
2224  std::string msg =
2225  "Failed to extract valid geometry from exploded row " +
2226  std::to_string(first_row_index_this_buffer +
2227  row_index_plus_one) +
2228  " for column " + cd->columnName;
2229  throw std::runtime_error(msg);
2230  }
2231  } else {
2232  // extract geometry directly from WKT
2234  std::string(geo_string),
2235  import_ti,
2236  coords,
2237  bounds,
2238  ring_sizes,
2239  poly_rings,
2241  std::string msg = "Failed to extract valid geometry from row " +
2242  std::to_string(first_row_index_this_buffer +
2243  row_index_plus_one) +
2244  " for column " + cd->columnName;
2245  throw std::runtime_error(msg);
2246  }
2247  }
2248 
2249  // validate types
2250  if (col_type != import_ti.get_type()) {
2252  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2253  col_type == SQLTypes::kMULTIPOLYGON)) {
2254  throw std::runtime_error(
2255  "Imported geometry doesn't match the type of column " +
2256  cd->columnName);
2257  }
2258  }
2259  }
2260 
2261  // assign render group?
2262  if (columnIdToRenderGroupAnalyzerMap.size()) {
2263  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2264  if (ring_sizes.size()) {
2265  // get a suitable render group for these poly coords
2266  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2267  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2268  render_group =
2269  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2270  } else {
2271  // empty poly
2272  render_group = -1;
2273  }
2274  }
2275  }
2276  }
2277 
2278  // import extracted geo
2279  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2280  cd,
2281  import_buffers,
2282  col_idx,
2283  coords,
2284  bounds,
2285  ring_sizes,
2286  poly_rings,
2287  render_group);
2288 
2289  // skip remaining physical columns
2290  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2291  ++cd_it;
2292  }
2293  }
2294  }
2295  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2296  check_session_interrupted(query_session, executor))) {
2297  thread_import_status.load_failed = true;
2298  thread_import_status.load_msg =
2299  "Table load was cancelled via Query Interrupt";
2300  return;
2301  }
2302  thread_import_status.rows_completed++;
2303  } catch (const std::exception& e) {
2304  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2305  import_buffers[col_idx_to_pop]->pop_value();
2306  }
2307  thread_import_status.rows_rejected++;
2308  LOG(ERROR) << "Input exception thrown: " << e.what()
2309  << ". Row discarded. Data: " << shared::printContainer(row);
2310  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2311  LOG(ERROR) << "Load was cancelled due to max reject rows being reached";
2312  thread_import_status.load_failed = true;
2313  thread_import_status.load_msg =
2314  "Load was cancelled due to max reject rows being reached";
2315  }
2316  }
2317  }; // End of lambda
2318 
2319  if (copy_params.geo_explode_collections) {
2320  // explode and import
2321  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2322  explode_collections_step1(col_descs);
2323  // pull out the collection WKT or WKB hex
2324  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2325  auto const& collection_geo_string = row[collection_col_idx];
2326  // convert to OGR
2327  OGRGeometry* ogr_geometry = nullptr;
2328  ScopeGuard destroy_ogr_geometry = [&] {
2329  if (ogr_geometry) {
2330  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2331  }
2332  };
2334  std::string(collection_geo_string));
2335  // do the explode and import
2336  us = explode_collections_step2(ogr_geometry,
2337  collection_child_type,
2338  collection_col_name,
2339  first_row_index_this_buffer + row_index_plus_one,
2340  execute_import_row);
2341  } else {
2342  // import non-collection row just once
2344  [&] { execute_import_row(nullptr); });
2345  }
2346 
2347  if (thread_import_status.load_failed) {
2348  break;
2349  }
2350  } // end thread
2351  total_str_to_val_time_us += us;
2352  if (!thread_import_status.load_failed && thread_import_status.rows_completed > 0) {
2353  load_ms = measure<>::execution([&]() {
2354  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2355  });
2356  }
2357  }); // end execution
2358 
2359  if (DEBUG_TIMING && !thread_import_status.load_failed &&
2360  thread_import_status.rows_completed > 0) {
2361  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2362  << thread_import_status.rows_completed << " rows inserted in "
2363  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2364  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2365  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2366  << "sec" << std::endl;
2367  }
2368 
2369  return thread_import_status;
2370 }
bool is_null_datum(const DatumStringType &datum, const std::string &null_indicator)
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
SQLTypes
Definition: sqltypes.h:55
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::string_view sv_strip(std::string_view str)
return trimmed string_view
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1910
#define LOG(tag)
Definition: Logger.h:285
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
void set_input_srid(int d)
Definition: sqltypes.h:497
CONSTEXPR DEVICE bool is_null(const T &value)
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
#define CHECK_LT(x, y)
Definition: Logger.h:303
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:1044
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1876
ThreadId thread_id()
Definition: Logger.cpp:871
#define CHECK(condition)
Definition: Logger.h:291
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
Definition: Importer.cpp:1607
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRCoordinateTransformation *  coordinate_transformation,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
const Catalog_Namespace::SessionInfo session_info,
Executor executor,
const MetadataColumnInfos &  metadata_column_infos 
)
static

Definition at line 2378 of file Importer.cpp.

References CHECK, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::compress_coords(), DEBUG_TIMING, Executor::ERR_INTERRUPTED, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Catalog_Namespace::SessionInfo::get_session_id(), QueryExecutionError::getErrorCode(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, generate_TableFunctionsFactory_init::separator, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), TIMER_STOP, to_string(), and UNLIKELY.

Referenced by import_export::Importer::importGDALGeo().

2390  {
2391  ImportStatus thread_import_status;
2392  const CopyParams& copy_params = importer->get_copy_params();
2393  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2394  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2395  importer->get_import_buffers(thread_id);
2396  auto query_session = session_info ? session_info->get_session_id() : "";
2397  for (const auto& p : import_buffers) {
2398  p->clear();
2399  }
2400 
2401  auto convert_timer = timer_start();
2402 
2403  // for all the features in this chunk...
2404  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2405  // ignore null features
2406  if (!features[iFeature]) {
2407  continue;
2408  }
2409 
2410  // get this feature's geometry
2411  // for geodatabase, we need to consider features with no geometry
2412  // as we still want to create a table, even if it has no geo column
2413  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2414  if (pGeometry && coordinate_transformation) {
2415  pGeometry->transform(coordinate_transformation);
2416  }
2417 
2418  //
2419  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2420  //
2421 
2422  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2423  size_t col_idx = 0;
2424  try {
2425  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2426  check_session_interrupted(query_session, executor))) {
2427  thread_import_status.load_failed = true;
2428  thread_import_status.load_msg = "Table load was cancelled via Query Interrupt";
2430  }
2431 
2432  uint32_t field_column_count{0u};
2433  uint32_t metadata_column_count{0u};
2434 
2435  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2436  auto cd = *cd_it;
2437 
2438  // is this a geo column?
2439  const auto& col_ti = cd->columnType;
2440  if (col_ti.is_geometry()) {
2441  // Note that this assumes there is one and only one geo column in the
2442  // table. Currently, the importer only supports reading a single
2443  // geospatial feature from an input shapefile / geojson file, but this
2444  // code will need to be modified if that changes
2445  SQLTypes col_type = col_ti.get_type();
2446 
2447  // store null string in the base column
2448  import_buffers[col_idx]->add_value(
2449  cd, copy_params.null_str, true, copy_params);
2450  ++col_idx;
2451 
2452  // the data we now need to extract for the other columns
2453  std::vector<double> coords;
2454  std::vector<double> bounds;
2455  std::vector<int> ring_sizes;
2456  std::vector<int> poly_rings;
2457  int render_group = 0;
2458 
2459  // extract it
2460  SQLTypeInfo import_ti{col_ti};
2461  bool is_null_geo = !import_geometry;
2462  if (is_null_geo) {
2463  if (col_ti.get_notnull()) {
2464  throw std::runtime_error("NULL geo for column " + cd->columnName);
2465  }
2467  import_ti,
2468  coords,
2469  bounds,
2470  ring_sizes,
2471  poly_rings,
2473  } else {
2475  import_geometry,
2476  import_ti,
2477  coords,
2478  bounds,
2479  ring_sizes,
2480  poly_rings,
2482  std::string msg = "Failed to extract valid geometry from feature " +
2483  std::to_string(firstFeature + iFeature + 1) +
2484  " for column " + cd->columnName;
2485  throw std::runtime_error(msg);
2486  }
2487 
2488  // validate types
2489  if (col_type != import_ti.get_type()) {
2491  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2492  col_type == SQLTypes::kMULTIPOLYGON)) {
2493  throw std::runtime_error(
2494  "Imported geometry doesn't match the type of column " +
2495  cd->columnName);
2496  }
2497  }
2498  }
2499 
2500  if (columnIdToRenderGroupAnalyzerMap.size()) {
2501  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2502  if (ring_sizes.size()) {
2503  // get a suitable render group for these poly coords
2504  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2505  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2506  render_group =
2507  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2508  } else {
2509  // empty poly
2510  render_group = -1;
2511  }
2512  }
2513  }
2514 
2515  // create coords array value and add it to the physical column
2516  ++cd_it;
2517  auto cd_coords = *cd_it;
2518  std::vector<TDatum> td_coord_data;
2519  if (!is_null_geo) {
2520  std::vector<uint8_t> compressed_coords =
2521  Geospatial::compress_coords(coords, col_ti);
2522  for (auto cc : compressed_coords) {
2523  TDatum td_byte;
2524  td_byte.val.int_val = cc;
2525  td_coord_data.push_back(td_byte);
2526  }
2527  }
2528  TDatum tdd_coords;
2529  tdd_coords.val.arr_val = td_coord_data;
2530  tdd_coords.is_null = is_null_geo;
2531  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2532  ++col_idx;
2533 
2534  if (col_type == kMULTILINESTRING || col_type == kPOLYGON ||
2535  col_type == kMULTIPOLYGON) {
2536  // Create [linest]ring_sizes array value and add it to the physical column
2537  ++cd_it;
2538  auto cd_ring_sizes = *cd_it;
2539  std::vector<TDatum> td_ring_sizes;
2540  if (!is_null_geo) {
2541  for (auto ring_size : ring_sizes) {
2542  TDatum td_ring_size;
2543  td_ring_size.val.int_val = ring_size;
2544  td_ring_sizes.push_back(td_ring_size);
2545  }
2546  }
2547  TDatum tdd_ring_sizes;
2548  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2549  tdd_ring_sizes.is_null = is_null_geo;
2550  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2551  ++col_idx;
2552  }
2553 
2554  if (col_type == kMULTIPOLYGON) {
2555  // Create poly_rings array value and add it to the physical column
2556  ++cd_it;
2557  auto cd_poly_rings = *cd_it;
2558  std::vector<TDatum> td_poly_rings;
2559  if (!is_null_geo) {
2560  for (auto num_rings : poly_rings) {
2561  TDatum td_num_rings;
2562  td_num_rings.val.int_val = num_rings;
2563  td_poly_rings.push_back(td_num_rings);
2564  }
2565  }
2566  TDatum tdd_poly_rings;
2567  tdd_poly_rings.val.arr_val = td_poly_rings;
2568  tdd_poly_rings.is_null = is_null_geo;
2569  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2570  ++col_idx;
2571  }
2572 
2573  if (col_type == kLINESTRING || col_type == kMULTILINESTRING ||
2574  col_type == kPOLYGON || col_type == kMULTIPOLYGON ||
2575  col_type == kMULTIPOINT) {
2576  // Create bounds array value and add it to the physical column
2577  ++cd_it;
2578  auto cd_bounds = *cd_it;
2579  std::vector<TDatum> td_bounds_data;
2580  if (!is_null_geo) {
2581  for (auto b : bounds) {
2582  TDatum td_double;
2583  td_double.val.real_val = b;
2584  td_bounds_data.push_back(td_double);
2585  }
2586  }
2587  TDatum tdd_bounds;
2588  tdd_bounds.val.arr_val = td_bounds_data;
2589  tdd_bounds.is_null = is_null_geo;
2590  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2591  ++col_idx;
2592  }
2593 
2594  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2595  // Create render_group value and add it to the physical column
2596  ++cd_it;
2597  auto cd_render_group = *cd_it;
2598  TDatum td_render_group;
2599  td_render_group.val.int_val = render_group;
2600  td_render_group.is_null = is_null_geo;
2601  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2602  ++col_idx;
2603  }
2604  } else if (field_column_count < fieldNameToIndexMap.size()) {
2605  //
2606  // field column
2607  //
2608  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2609  CHECK(cit != columnNameToSourceNameMap.end());
2610  auto const& field_name = cit->second;
2611 
2612  auto const fit = fieldNameToIndexMap.find(field_name);
2613  if (fit == fieldNameToIndexMap.end()) {
2614  throw ColumnNotGeoError(cd->columnName);
2615  }
2616 
2617  auto const& field_index = fit->second;
2618  CHECK(field_index < fieldNameToIndexMap.size());
2619 
2620  auto const& feature = features[iFeature];
2621 
2622  auto field_defn = feature->GetFieldDefnRef(field_index);
2623  CHECK(field_defn);
2624 
2625  // OGRFeature::GetFieldAsString() can only return 80 characters
2626  // so for array columns, we are obliged to fetch the actual values
2627  // and construct the concatenated string ourselves
2628 
2629  std::string value_string;
2630  int array_index = 0, array_size = 0;
2631 
2632  auto stringify_numeric_list = [&](auto* values) {
2633  value_string = "{";
2634  while (array_index < array_size) {
2635  auto separator = (array_index > 0) ? "," : "";
2636  value_string += separator + std::to_string(values[array_index]);
2637  array_index++;
2638  }
2639  value_string += "}";
2640  };
2641 
2642  auto field_type = field_defn->GetType();
2643  switch (field_type) {
2644  case OFTInteger:
2645  case OFTInteger64:
2646  case OFTReal:
2647  case OFTString:
2648  case OFTBinary:
2649  case OFTDate:
2650  case OFTTime:
2651  case OFTDateTime: {
2652  value_string = feature->GetFieldAsString(field_index);
2653  } break;
2654  case OFTIntegerList: {
2655  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2656  stringify_numeric_list(values);
2657  } break;
2658  case OFTInteger64List: {
2659  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2660  stringify_numeric_list(values);
2661  } break;
2662  case OFTRealList: {
2663  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2664  stringify_numeric_list(values);
2665  } break;
2666  case OFTStringList: {
2667  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2668  value_string = "{";
2669  if (array_of_strings) {
2670  while (auto* this_string = array_of_strings[array_index]) {
2671  auto separator = (array_index > 0) ? "," : "";
2672  value_string += separator + std::string(this_string);
2673  array_index++;
2674  }
2675  }
2676  value_string += "}";
2677  } break;
2678  default:
2679  throw std::runtime_error("Unsupported geo file field type (" +
2680  std::to_string(static_cast<int>(field_type)) +
2681  ")");
2682  }
2683 
2684  import_buffers[col_idx]->add_value(cd, value_string, false, copy_params);
2685  ++col_idx;
2686  field_column_count++;
2687  } else if (metadata_column_count < metadata_column_infos.size()) {
2688  //
2689  // metadata column
2690  //
2691  auto const& mci = metadata_column_infos[metadata_column_count];
2692  if (mci.column_descriptor.columnName != cd->columnName) {
2693  throw std::runtime_error("Metadata column name mismatch");
2694  }
2695  import_buffers[col_idx]->add_value(cd, mci.value, false, copy_params);
2696  ++col_idx;
2697  metadata_column_count++;
2698  } else {
2699  throw std::runtime_error("Column count mismatch");
2700  }
2701  }
2702  thread_import_status.rows_completed++;
2703  } catch (QueryExecutionError& e) {
2705  throw e;
2706  }
2707  } catch (ColumnNotGeoError& e) {
2708  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
2709  throw std::runtime_error(e.what());
2710  } catch (const std::exception& e) {
2711  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2712  import_buffers[col_idx_to_pop]->pop_value();
2713  }
2714  thread_import_status.rows_rejected++;
2715  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2716  }
2717  };
2718 
2719  if (pGeometry && copy_params.geo_explode_collections) {
2720  // explode and import
2721  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2722  explode_collections_step1(col_descs);
2723  explode_collections_step2(pGeometry,
2724  collection_child_type,
2725  collection_col_name,
2726  firstFeature + iFeature + 1,
2727  execute_import_feature);
2728  } else {
2729  // import non-collection or null feature just once
2730  execute_import_feature(pGeometry);
2731  }
2732  } // end features
2733 
2734  float convert_s = TIMER_STOP(convert_timer);
2735 
2736  float load_s = 0.0f;
2737  if (thread_import_status.rows_completed > 0) {
2738  auto load_timer = timer_start();
2739  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2740  load_s = TIMER_STOP(load_timer);
2741  }
2742 
2743  if (DEBUG_TIMING && thread_import_status.rows_completed > 0) {
2744  LOG(INFO) << "DEBUG: Process " << convert_s << "s";
2745  LOG(INFO) << "DEBUG: Load " << load_s << "s";
2746  LOG(INFO) << "DEBUG: Total " << (convert_s + load_s) << "s";
2747  }
2748 
2749  thread_import_status.thread_id = thread_id;
2750 
2751  return thread_import_status;
2752 }
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1436
SQLTypes
Definition: sqltypes.h:55
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1910
#define LOG(tag)
Definition: Logger.h:285
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define TIMER_STOP(t)
Definition: Importer.cpp:101
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1876
ThreadId thread_id()
Definition: Logger.cpp:871
#define CHECK(condition)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
SQLTypeInfo ti 
)

Definition at line 1607 of file Importer.cpp.

References Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1610  {
1611  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1612  return false;
1613  }
1614  if (ti.transforms()) {
1615  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1616  if (!pt.transform(ti)) {
1617  return false;
1618  }
1619  pt.getColumns(coords);
1620  return true;
1621  }
1622  coords.push_back(lon);
1623  coords.push_back(lat);
1624  return true;
1625 }
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:567
bool transforms() const
Definition: sqltypes.h:615

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 370 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::TypedImportBuffer::addDefaultValues(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

370  {
371  SQLTypeInfo elem_ti = ti.get_elem_type();
372  auto len = ti.get_size();
373 
374  if (len > 0) {
375  // Compose a NULL fixlen array
376  int8_t* buf = (int8_t*)checked_malloc(len);
377  // First scalar is a NULL_ARRAY sentinel
378  Datum d = NullArrayDatum(elem_ti);
379  int8_t* p = append_datum(buf, d, elem_ti);
380  CHECK(p);
381  // Rest is filled with normal NULL sentinels
382  Datum d0 = NullDatum(elem_ti);
383  while ((p - buf) < len) {
384  p = append_datum(p, d0, elem_ti);
385  CHECK(p);
386  }
387  CHECK((p - buf) == len);
388  return ArrayDatum(len, buf, true);
389  }
390  // NULL varlen array
391  return ArrayDatum(0, NULL, true);
392 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
#define CHECK(condition)
Definition: Logger.h:291
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:272
Definition: Datum.h:67
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 272 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_array_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

272  {
273  Datum d;
274  const auto type = get_type_for_datum(ti);
275  switch (type) {
276  case kBOOLEAN:
278  break;
279  case kBIGINT:
281  break;
282  case kINT:
284  break;
285  case kSMALLINT:
287  break;
288  case kTINYINT:
290  break;
291  case kFLOAT:
293  break;
294  case kDOUBLE:
296  break;
297  case kTIME:
298  case kTIMESTAMP:
299  case kDATE:
301  break;
302  case kPOINT:
303  case kMULTIPOINT:
304  case kLINESTRING:
305  case kMULTILINESTRING:
306  case kPOLYGON:
307  case kMULTIPOLYGON:
308  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
309  default:
310  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
311  }
312  return d;
313 }
int8_t tinyintval
Definition: Datum.h:69
Definition: sqltypes.h:66
int8_t boolval
Definition: Datum.h:68
int32_t intval
Definition: Datum.h:71
float floatval
Definition: Datum.h:73
int64_t bigintval
Definition: Datum.h:72
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: Datum.h:70
Definition: sqltypes.h:70
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:62
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:259
Definition: Datum.h:67
double doubleval
Definition: Datum.h:74

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t import_export::num_import_threads ( const int32_t  copy_params_threads)
inline

Definition at line 31 of file thread_count.h.

References g_max_import_threads.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), foreign_storage::get_num_threads(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), and import_export::Importer::importGDALRaster().

31  {
32  if (copy_params_threads > 0) {
33  return static_cast<size_t>(copy_params_threads);
34  }
35  return std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
37 }
size_t g_max_import_threads
Definition: Importer.cpp:106

+ Here is the caller graph for this function:

MetadataColumnInfos import_export::parse_add_metadata_columns ( const std::string &  add_metadata_columns,
const std::string &  file_path 
)

Definition at line 35 of file MetadataColumn.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::ExpressionParser::evalAsString(), IS_INTEGER, join(), kBIGINT, kDATE, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNULLT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, run_benchmark_import::parser, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_type(), import_export::ExpressionParser::setExpression(), import_export::ExpressionParser::setStringConstant(), ColumnDescriptor::sourceName, split(), strip(), to_lower(), and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptorsGeo(), import_export::Importer::gdalToColumnDescriptorsRaster(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), and import_export::Importer::readMetadataSampleGDAL().

36  {
37  //
38  // each string is "column_name,column_type,expression"
39  //
40  // column_type can be:
41  // tinyint
42  // smallint
43  // int
44  // bigint
45  // float
46  // double
47  // date
48  // time
49  // timestamp
50  // text
51  //
52  // expression can be in terms of:
53  // filename
54  // filedir
55  // filepath
56  // etc.
57  //
58 
59  // anything to do?
60  if (add_metadata_columns.length() == 0u) {
61  return {};
62  }
63 
64  // split by ";"
65  // @TODO(se) is this safe?
66  // probably won't appear in a file name/path or a date/time string
67  std::vector<std::string> add_metadata_column_strings;
68  boost::split(add_metadata_column_strings, add_metadata_columns, boost::is_any_of(";"));
69  if (add_metadata_column_strings.size() == 0u) {
70  return {};
71  }
72 
73  ExpressionParser parser;
74 
75  // known string constants
76  auto const fn = boost::filesystem::path(file_path).filename().string();
77  auto const fd = boost::filesystem::path(file_path).parent_path().string();
78  auto const fp = file_path;
79  parser.setStringConstant("filename", fn);
80  parser.setStringConstant("filedir", fd);
81  parser.setStringConstant("filepath", fp);
82 
83  MetadataColumnInfos metadata_column_infos;
84 
85  // for each requested column...
86  for (auto const& add_metadata_column_string : add_metadata_column_strings) {
87  // strip
88  auto const add_metadata_column = strip(add_metadata_column_string);
89 
90  // tokenize and extract
91  std::vector<std::string> tokens;
92  boost::split(tokens, add_metadata_column, boost::is_any_of(","));
93  if (tokens.size() < 3u) {
94  throw std::runtime_error("Invalid metadata column info '" + add_metadata_column +
95  "' (must be of the form 'name,type,expression')");
96  }
97  auto token_itr = tokens.begin();
98  auto const column_name = strip(*token_itr++);
99  auto const data_type = strip(to_lower(*token_itr++));
100  tokens.erase(tokens.begin(), token_itr);
101  auto const expression = strip(boost::join(tokens, ","));
102 
103  // get column type
104  SQLTypes sql_type{kNULLT};
105  double range_min{0.0}, range_max{0.0};
106  if (data_type == "tinyint") {
107  sql_type = kTINYINT;
108  range_min = static_cast<double>(std::numeric_limits<int8_t>::min());
109  range_max = static_cast<double>(std::numeric_limits<int8_t>::max());
110  } else if (data_type == "smallint") {
111  sql_type = kSMALLINT;
112  range_min = static_cast<double>(std::numeric_limits<int16_t>::min());
113  range_max = static_cast<double>(std::numeric_limits<int16_t>::max());
114  } else if (data_type == "int") {
115  sql_type = kINT;
116  range_min = static_cast<double>(std::numeric_limits<int32_t>::min());
117  range_max = static_cast<double>(std::numeric_limits<int32_t>::max());
118  } else if (data_type == "bigint") {
119  sql_type = kBIGINT;
120  range_min = static_cast<double>(std::numeric_limits<int64_t>::min());
121  range_max = static_cast<double>(std::numeric_limits<int64_t>::max());
122  } else if (data_type == "float") {
123  sql_type = kFLOAT;
124  range_min = static_cast<double>(std::numeric_limits<float>::min());
125  range_max = static_cast<double>(std::numeric_limits<float>::max());
126  } else if (data_type == "double") {
127  sql_type = kDOUBLE;
128  range_min = static_cast<double>(std::numeric_limits<double>::min());
129  range_max = static_cast<double>(std::numeric_limits<double>::max());
130  } else if (data_type == "date") {
131  sql_type = kDATE;
132  } else if (data_type == "time") {
133  sql_type = kTIME;
134  } else if (data_type == "timestamp") {
135  sql_type = kTIMESTAMP;
136  } else if (data_type == "text") {
137  sql_type = kTEXT;
138  } else {
139  throw std::runtime_error("Invalid metadata column data type '" + data_type +
140  "' for column '" + column_name + "'");
141  }
142 
143  // set expression with force cast back to string
144  parser.setExpression("str(" + expression + ")");
145 
146  // evaluate
147  auto value = parser.evalAsString();
148 
149  // validate date/time/timestamp value now
150  // @TODO(se) do we need to provide for non-zero dimension?
151  try {
152  if (sql_type == kDATE) {
153  dateTimeParse<kDATE>(value, 0);
154  } else if (sql_type == kTIME) {
155  dateTimeParse<kTIME>(value, 0);
156  } else if (sql_type == kTIMESTAMP) {
157  dateTimeParse<kTIMESTAMP>(value, 0);
158  }
159  } catch (std::runtime_error& e) {
160  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
161  " value '" + value + "' for column '" + column_name + "'");
162  }
163 
164  // validate int/float/double
165  try {
166  if (IS_INTEGER(sql_type) || sql_type == kFLOAT || sql_type == kDOUBLE) {
167  size_t num_chars{0u};
168  auto const v = static_cast<double>(std::stod(value, &num_chars));
169  if (v < range_min || v > range_max) {
170  throw std::out_of_range(to_string(sql_type));
171  }
172  if (num_chars == 0u) {
173  throw std::invalid_argument("empty value");
174  }
175  }
176  } catch (std::invalid_argument& e) {
177  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
178  " value '" + value + "' for column '" + column_name +
179  "' (" + e.what() + ")");
180  } catch (std::out_of_range& e) {
181  throw std::runtime_error("Out-of-range metadata column " + to_string(sql_type) +
182  " value '" + value + "' for column '" + column_name +
183  "' (" + e.what() + ")");
184  }
185 
186  // build column descriptor
187  ColumnDescriptor cd;
188  cd.columnName = cd.sourceName = column_name;
189  cd.columnType.set_type(sql_type);
191  if (sql_type == kTEXT) {
194  }
195 
196  // add to result
197  metadata_column_infos.push_back({std::move(cd), std::move(value)});
198  }
199 
200  // done
201  return metadata_column_infos;
202 }
std::string to_lower(const std::string &str)
void set_compression(EncodingType c)
Definition: sqltypes.h:504
Definition: sqltypes.h:66
SQLTypes
Definition: sqltypes.h:55
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::string sourceName
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
void set_fixed_size()
Definition: sqltypes.h:502
specifies the content in-memory of a row in the column metadata table
void set_comp_param(int p)
Definition: sqltypes.h:505
Definition: sqltypes.h:69
Definition: sqltypes.h:70
#define IS_INTEGER(T)
Definition: sqltypes.h:294
Definition: sqltypes.h:62
SQLTypeInfo columnType
std::string columnName
std::vector< MetadataColumnInfo > MetadataColumnInfos
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:493

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 6215 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by DBHandler::prepare_loader_generic().

6217  {
6218  CHECK(td);
6219  auto col_descs = loader->get_column_descs();
6220 
6221  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
6222  for (auto cd : col_descs) {
6223  import_buffers.emplace_back(
6224  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
6225  }
6226 
6227  return import_buffers;
6228 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 315 of file Importer.cpp.

References append_datum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value(), and import_export::TypedImportBuffer::addDefaultValues().

317  {
318  SQLTypeInfo elem_ti = ti.get_elem_type();
319  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
320  return ArrayDatum(0, NULL, true);
321  }
322  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
323  LOG(WARNING) << "Malformed array: " << s;
324  return ArrayDatum(0, NULL, true);
325  }
326  std::vector<std::string> elem_strs;
327  size_t last = 1;
328  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
329  i = s.find(copy_params.array_delim, last)) {
330  elem_strs.push_back(s.substr(last, i - last));
331  last = i + 1;
332  }
333  if (last + 1 <= s.size()) {
334  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
335  }
336  if (elem_strs.size() == 1) {
337  auto str = elem_strs.front();
338  auto str_trimmed = trim_space(str.c_str(), str.length());
339  if (str_trimmed == "") {
340  elem_strs.clear(); // Empty array
341  }
342  }
343  if (!elem_ti.is_string()) {
344  size_t len = elem_strs.size() * elem_ti.get_size();
345  std::unique_ptr<int8_t, FreeDeleter> buf(
346  reinterpret_cast<int8_t*>(checked_malloc(len)));
347  int8_t* p = buf.get();
348  for (auto& es : elem_strs) {
349  auto e = trim_space(es.c_str(), es.length());
350  bool is_null = (e == copy_params.null_str) || e == "NULL";
351  if (!elem_ti.is_string() && e == "") {
352  is_null = true;
353  }
354  if (elem_ti.is_number() || elem_ti.is_time()) {
355  if (!isdigit(e[0]) && e[0] != '-') {
356  is_null = true;
357  }
358  }
359  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
360  p = append_datum(p, d, elem_ti);
361  CHECK(p);
362  }
363  return ArrayDatum(len, buf.release(), false);
364  }
365  // must not be called for array of strings
366  CHECK(false);
367  return ArrayDatum(0, NULL, true);
368 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
#define LOG(tag)
Definition: Logger.h:285
bool is_number() const
Definition: sqltypes.h:585
bool is_time() const
Definition: sqltypes.h:586
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:291
bool is_string() const
Definition: sqltypes.h:580
Definition: Datum.h:67
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 468 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

468  {
469  SQLTypeInfo elem_ti = ti.get_elem_type();
470 
471  CHECK(!elem_ti.is_string());
472 
473  if (datum.is_null) {
474  return NullArray(ti);
475  }
476 
477  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
478  int8_t* buf = (int8_t*)checked_malloc(len);
479  int8_t* p = buf;
480  for (auto& e : datum.val.arr_val) {
481  p = append_datum(p, TDatumToDatum(e, elem_ti), elem_ti);
482  CHECK(p);
483  }
484 
485  return ArrayDatum(len, buf, false);
486 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:370
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:421
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:291
bool is_string() const
Definition: sqltypes.h:580
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 421 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

421  {
422  Datum d;
423  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
424  switch (type) {
425  case kBOOLEAN:
426  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
427  break;
428  case kBIGINT:
429  d.bigintval =
430  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
431  break;
432  case kINT:
433  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
434  break;
435  case kSMALLINT:
436  d.smallintval =
437  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
438  break;
439  case kTINYINT:
440  d.tinyintval =
441  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
442  break;
443  case kFLOAT:
444  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
445  break;
446  case kDOUBLE:
447  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
448  break;
449  case kTIME:
450  case kTIMESTAMP:
451  case kDATE:
452  d.bigintval =
453  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
454  break;
455  case kPOINT:
456  case kMULTIPOINT:
457  case kLINESTRING:
458  case kMULTILINESTRING:
459  case kPOLYGON:
460  case kMULTIPOLYGON:
461  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
462  default:
463  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
464  }
465  return d;
466 }
int8_t tinyintval
Definition: Datum.h:69
#define NULL_DOUBLE
Definition: sqltypes.h:66
#define NULL_FLOAT
int8_t boolval
Definition: Datum.h:68
int32_t intval
Definition: Datum.h:71
float floatval
Definition: Datum.h:73
int64_t bigintval
Definition: Datum.h:72
int16_t smallintval
Definition: Datum.h:70
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
Definition: sqltypes.h:70
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:62
Definition: Datum.h:67
bool is_decimal() const
Definition: sqltypes.h:583
double doubleval
Definition: Datum.h:74

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 246 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

246  {
247  size_t i = 0;
248  size_t j = len;
249  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
250  i++;
251  }
252  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
253  j--;
254  }
255  return std::string(field + i, j - i);
256 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 3284 of file Importer.cpp.

References heavydb.dtypes::T.

3284  {
3285  try {
3286  boost::lexical_cast<T>(str);
3287  } catch (const boost::bad_lexical_cast& e) {
3288  return false;
3289  }
3290  return true;
3291 }

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 34 of file CopyParams.h.

const size_t import_export::kImportRowLimit = 10000
static
constexpr size_t import_export::max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
static

Definition at line 37 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static
heavyai::shared_mutex import_export::status_mutex
static