OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{ExpressionParser.cpp}
 
 anonymous_namespace{ForeignDataImporter.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterCSV.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 anonymous_namespace{RasterImporter.cpp}
 
 delimited_parser
 

Classes

class  AbstractImporter
 
struct  CopyParams
 
class  ExpressionParser
 
class  ForeignDataImporter
 
class  ImportBatchResult
 
struct  GeoImportException
 
class  ColumnNotGeoError
 
struct  BadRowsTracker
 
class  ImporterUtils
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  Importer
 
struct  MetadataColumnInfo
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  GCPTransformer
 
class  RasterImporter
 
class  RenderGroupAnalyzer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 
using MetadataColumnInfos = std::vector< MetadataColumnInfo >
 

Enumerations

enum  ImportHeaderRow { ImportHeaderRow::kAutoDetect, ImportHeaderRow::kNoHeader, ImportHeaderRow::kHasHeader }
 
enum  RasterPointType {
  RasterPointType::kNone, RasterPointType::kAuto, RasterPointType::kSmallInt, RasterPointType::kInt,
  RasterPointType::kFloat, RasterPointType::kDouble, RasterPointType::kPoint
}
 
enum  RasterPointTransform { RasterPointTransform::kNone, RasterPointTransform::kAuto, RasterPointTransform::kFile, RasterPointTransform::kWorld }
 
enum  SourceType {
  SourceType::kUnknown, SourceType::kUnsupported, SourceType::kDelimitedFile, SourceType::kGeoFile,
  SourceType::kRasterFile, SourceType::kParquetFile, SourceType::kOdbc, SourceType::kRegexParsedFile
}
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
template<class T >
bool try_cast (const std::string &str)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImportercreate_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 
MetadataColumnInfos parse_add_metadata_columns (const std::string &add_metadata_columns, const std::string &file_path)
 
size_t num_import_threads (const int32_t copy_params_threads)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static heavyai::shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 
static const size_t kImportRowLimit = 10000
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 75 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 154 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 152 of file Importer.cpp.

Definition at line 155 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 151 of file Importer.cpp.

Definition at line 36 of file MetadataColumn.h.

Enumeration Type Documentation

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 410 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

410  {
411  const auto& arr = datum.val.arr_val;
412  for (const auto& elem_datum : arr) {
413  string_vec.push_back(elem_datum.val.str_val);
414  }
415 }

+ Here is the caller graph for this function:

std::unique_ptr< AbstractImporter > import_export::create_importer ( Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const std::string &  copy_from_source,
const import_export::CopyParams copy_params 
)

Definition at line 6288 of file Importer.cpp.

References g_enable_fsi_regex_import, g_enable_legacy_delimited_import, kDelimitedFile, kParquetFile, kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by Parser::CopyTableStmt::execute(), and DBHandler::import_table().

6292  {
6294 #ifdef ENABLE_IMPORT_PARQUET
6295  if (!g_enable_legacy_parquet_import) {
6296  return std::make_unique<import_export::ForeignDataImporter>(
6297  copy_from_source, copy_params, td);
6298  }
6299 #else
6300  throw std::runtime_error("Parquet not supported!");
6301 #endif
6302  }
6303 
6306  return std::make_unique<import_export::ForeignDataImporter>(
6307  copy_from_source, copy_params, td);
6308  }
6309 
6312  return std::make_unique<import_export::ForeignDataImporter>(
6313  copy_from_source, copy_params, td);
6314  } else {
6315  throw std::runtime_error(
6316  "Regex parsed import only supported using 'fsi-regex-import' flag");
6317  }
6318  }
6319 
6320  return std::make_unique<import_export::Importer>(
6321  catalog, td, copy_from_source, copy_params);
6322 }
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:81
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:85

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::fill_missing_columns ( const Catalog_Namespace::Catalog cat,
Fragmenter_Namespace::InsertData insert_data 
)

Definition at line 6216 of file Importer.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, ColumnDescriptor::columnId, Fragmenter_Namespace::InsertData::columnIds, ColumnDescriptor::columnName, ColumnDescriptor::columnType, Fragmenter_Namespace::InsertData::data, ColumnDescriptor::default_value, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), import_export::TypedImportBuffer::get_data_block_pointers(), SQLTypeInfo::get_physical_cols(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Geospatial::GeoTypesFactory::getGeoColumns(), Catalog_Namespace::Catalog::getMetadataForDict(), Fragmenter_Namespace::InsertData::is_default, SQLTypeInfo::is_geometry(), IS_STRING, kARRAY, kENCODING_DICT, import_export::Importer::set_geo_physical_import_buffer(), gpu_enabled::sort(), and Fragmenter_Namespace::InsertData::tableId.

Referenced by RelAlgExecutor::executeSimpleInsert(), DBHandler::insert_data(), and Parser::InsertIntoTableAsSelectStmt::populateData().

6218  {
6219  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> defaults_buffers;
6220  if (insert_data.is_default.size() == 0) {
6221  insert_data.is_default.resize(insert_data.columnIds.size(), false);
6222  }
6223  CHECK(insert_data.is_default.size() == insert_data.is_default.size());
6224  auto cds = cat->getAllColumnMetadataForTable(insert_data.tableId, false, false, true);
6225  if (cds.size() == insert_data.columnIds.size()) {
6226  // all columns specified
6227  return defaults_buffers;
6228  }
6229  for (auto cd : cds) {
6230  if (std::find(insert_data.columnIds.begin(),
6231  insert_data.columnIds.end(),
6232  cd->columnId) == insert_data.columnIds.end()) {
6233  StringDictionary* dict = nullptr;
6234  if (cd->columnType.get_type() == kARRAY &&
6235  IS_STRING(cd->columnType.get_subtype()) && !cd->default_value.has_value()) {
6236  throw std::runtime_error("Cannot omit column \"" + cd->columnName +
6237  "\": omitting TEXT arrays is not supported yet");
6238  }
6239  if (cd->columnType.get_compression() == kENCODING_DICT) {
6240  dict = cat->getMetadataForDict(cd->columnType.get_comp_param())->stringDict.get();
6241  }
6242  defaults_buffers.emplace_back(std::make_unique<TypedImportBuffer>(cd, dict));
6243  }
6244  }
6245  // put buffers in order to fill geo sub-columns properly
6246  std::sort(defaults_buffers.begin(),
6247  defaults_buffers.end(),
6248  [](decltype(defaults_buffers[0])& a, decltype(defaults_buffers[0])& b) {
6249  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
6250  });
6251  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6252  auto cd = defaults_buffers[i]->getColumnDesc();
6253  std::string default_value = cd->default_value.value_or("NULL");
6254  defaults_buffers[i]->add_value(
6255  cd, default_value, !cd->default_value.has_value(), import_export::CopyParams());
6256  if (cd->columnType.is_geometry()) {
6257  std::vector<double> coords, bounds;
6258  std::vector<int> ring_sizes, poly_rings;
6259  int render_group = 0;
6260  SQLTypeInfo tinfo{cd->columnType};
6262  default_value, tinfo, coords, bounds, ring_sizes, poly_rings, false));
6263  // set physical columns starting with the following ID
6264  auto next_col = i + 1;
6266  cd,
6267  defaults_buffers,
6268  next_col,
6269  coords,
6270  bounds,
6271  ring_sizes,
6272  poly_rings,
6273  render_group);
6274  // skip physical columns filled with the call above
6275  i += cd->columnType.get_physical_cols();
6276  }
6277  }
6278  auto data = import_export::TypedImportBuffer::get_data_block_pointers(defaults_buffers);
6279  CHECK(data.size() == defaults_buffers.size());
6280  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6281  insert_data.data.push_back(data[i]);
6282  insert_data.columnIds.push_back(defaults_buffers[i]->getColumnDesc()->columnId);
6283  insert_data.is_default.push_back(true);
6284  }
6285  return defaults_buffers;
6286 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::vector< bool > is_default
Definition: Fragmenter.h:75
constexpr double a
Definition: Utm.h:32
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3010
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1960
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2228
#define IS_STRING(T)
Definition: sqltypes.h:297
#define CHECK(condition)
Definition: Logger.h:222
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1623
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 5149 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

5150  {
5151  // prepare to gather subdirectories
5152  std::vector<std::string> subdirectories;
5153 
5154  // get entries
5155  char** entries = VSIReadDir(archive_path.c_str());
5156  if (!entries) {
5157  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
5158  return;
5159  }
5160 
5161  // force scope
5162  {
5163  // request clean-up
5164  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
5165 
5166  // check all the entries
5167  int index = 0;
5168  while (true) {
5169  // get next entry, or drop out if there isn't one
5170  char* entry_c = entries[index++];
5171  if (!entry_c) {
5172  break;
5173  }
5174  std::string entry(entry_c);
5175 
5176  // ignore '.' and '..'
5177  if (entry == "." || entry == "..") {
5178  continue;
5179  }
5180 
5181  // build the full path
5182  std::string entry_path = archive_path + std::string("/") + entry;
5183 
5184  // is it a file or a sub-folder
5185  VSIStatBufL sb;
5186  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
5187  if (result < 0) {
5188  break;
5189  }
5190 
5191  if (VSI_ISDIR(sb.st_mode)) {
5192  // a directory that ends with .gdb could be a Geodatabase bundle
5193  // arguably dangerous to decide this purely by name, but any further
5194  // validation would be very complex especially at this scope
5195  if (boost::iends_with(entry_path, ".gdb")) {
5196  // add the directory as if it was a file and don't recurse into it
5197  files.push_back(entry_path);
5198  } else {
5199  // add subdirectory to be recursed into
5200  subdirectories.push_back(entry_path);
5201  }
5202  } else {
5203  // add this file
5204  files.push_back(entry_path);
5205  }
5206  }
5207  }
5208 
5209  // recurse into each subdirectories we found
5210  for (const auto& subdirectory : subdirectories) {
5211  gdalGatherFilesInArchiveRecursive(subdirectory, files);
5212  }
5213 }
#define LOG(tag)
Definition: Logger.h:216
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:5149

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer,
const Catalog_Namespace::SessionInfo session_info,
Executor executor 
)
static

Definition at line 1996 of file Importer.cpp.

References CHECK, CHECK_LT, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), Catalog_Namespace::SessionInfo::get_session_id(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), ImportHelpers::is_null_datum(), kDelimitedFile, kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, import_export::CopyParams::source_type, sv_strip(), gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), to_string(), import_export::CopyParams::trim_spaces, and UNLIKELY.

Referenced by import_export::Importer::importDelimited().

2006  {
2007  ImportStatus thread_import_status;
2008  int64_t total_get_row_time_us = 0;
2009  int64_t total_str_to_val_time_us = 0;
2010  auto query_session = session_info ? session_info->get_session_id() : "";
2011  CHECK(scratch_buffer);
2012  auto buffer = scratch_buffer.get();
2013  auto load_ms = measure<>::execution([]() {});
2014 
2015  thread_import_status.thread_id = thread_id;
2016 
2017  auto ms = measure<>::execution([&]() {
2018  const CopyParams& copy_params = importer->get_copy_params();
2019  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2020  size_t begin =
2021  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
2022  const char* thread_buf = buffer + begin_pos + begin;
2023  const char* thread_buf_end = buffer + end_pos;
2024  const char* buf_end = buffer + total_size;
2025  bool try_single_thread = false;
2026  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2027  importer->get_import_buffers(thread_id);
2029  int phys_cols = 0;
2030  int point_cols = 0;
2031  for (const auto cd : col_descs) {
2032  const auto& col_ti = cd->columnType;
2033  phys_cols += col_ti.get_physical_cols();
2034  if (cd->columnType.get_type() == kPOINT) {
2035  point_cols++;
2036  }
2037  }
2038  auto num_cols = col_descs.size() - phys_cols;
2039  for (const auto& p : import_buffers) {
2040  p->clear();
2041  }
2042  std::vector<std::string_view> row;
2043  size_t row_index_plus_one = 0;
2044  for (const char* p = thread_buf; p < thread_buf_end; p++) {
2045  row.clear();
2046  std::vector<std::unique_ptr<char[]>>
2047  tmp_buffers; // holds string w/ removed escape chars, etc
2048  if (DEBUG_TIMING) {
2051  thread_buf_end,
2052  buf_end,
2053  copy_params,
2054  importer->get_is_array(),
2055  row,
2056  tmp_buffers,
2057  try_single_thread,
2058  true);
2059  });
2060  total_get_row_time_us += us;
2061  } else {
2063  thread_buf_end,
2064  buf_end,
2065  copy_params,
2066  importer->get_is_array(),
2067  row,
2068  tmp_buffers,
2069  try_single_thread,
2070  true);
2071  }
2072  row_index_plus_one++;
2073  // Each POINT could consume two separate coords instead of a single WKT
2074  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
2075  thread_import_status.rows_rejected++;
2076  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
2077  << row.size() << "): " << shared::printContainer(row);
2078  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2079  break;
2080  }
2081  continue;
2082  }
2083 
2084  //
2085  // lambda for importing a row (perhaps multiple times if exploding a collection)
2086  //
2087 
2088  auto execute_import_row = [&](OGRGeometry* import_geometry) {
2089  size_t import_idx = 0;
2090  size_t col_idx = 0;
2091  try {
2092  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2093  auto cd = *cd_it;
2094  const auto& col_ti = cd->columnType;
2095 
2096  bool is_null =
2097  ImportHelpers::is_null_datum(row[import_idx], copy_params.null_str);
2098  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
2099  // So initially nullness may be missed and not passed to add_value,
2100  // which then might also check and still decide it's actually a NULL, e.g.
2101  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
2102  // So "NULL" is not recognized as NULL but then it's not recognized as
2103  // a valid kINT, so it's a NULL after all.
2104  // Checking for "NULL" here too, as a widely accepted notation for NULL.
2105 
2106  // Treating empty as NULL
2107  if (!cd->columnType.is_string() && row[import_idx].empty()) {
2108  is_null = true;
2109  }
2110  if (!cd->columnType.is_string() && !copy_params.trim_spaces) {
2111  // everything but strings should be always trimmed
2112  row[import_idx] = sv_strip(row[import_idx]);
2113  }
2114 
2115  if (col_ti.get_physical_cols() == 0) {
2116  // not geo
2117 
2118  import_buffers[col_idx]->add_value(
2119  cd, row[import_idx], is_null, copy_params);
2120 
2121  // next
2122  ++import_idx;
2123  ++col_idx;
2124  } else {
2125  // geo
2126 
2127  // store null string in the base column
2128  import_buffers[col_idx]->add_value(
2129  cd, copy_params.null_str, true, copy_params);
2130 
2131  // WKT from string we're not storing
2132  auto const& geo_string = row[import_idx];
2133 
2134  // next
2135  ++import_idx;
2136  ++col_idx;
2137 
2138  SQLTypes col_type = col_ti.get_type();
2139  CHECK(IS_GEO(col_type));
2140 
2141  std::vector<double> coords;
2142  std::vector<double> bounds;
2143  std::vector<int> ring_sizes;
2144  std::vector<int> poly_rings;
2145  int render_group = 0;
2146 
2147  // if this is a POINT column, and the field is not null, and
2148  // looks like a scalar numeric value (and not a hex blob)
2149  // attempt to import two columns as lon/lat (or lat/lon)
2150  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
2151  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
2152  geo_string[0] == '-') &&
2153  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
2154  double lon = std::atof(std::string(geo_string).c_str());
2155  double lat = NAN;
2156  auto lat_str = row[import_idx];
2157  ++import_idx;
2158  if (lat_str.size() > 0 &&
2159  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
2160  lat = std::atof(std::string(lat_str).c_str());
2161  }
2162  // Swap coordinates if this table uses a reverse order: lat/lon
2163  if (!copy_params.lonlat) {
2164  std::swap(lat, lon);
2165  }
2166  // TODO: should check if POINT column should have been declared with
2167  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
2168  // throw std::runtime_error("POINT column " + cd->columnName + " is
2169  // not WGS84, cannot insert lon/lat");
2170  // }
2171  SQLTypeInfo import_ti{col_ti};
2172  if (copy_params.source_type ==
2174  import_ti.get_output_srid() == 4326) {
2175  auto srid0 = copy_params.source_srid;
2176  if (srid0 > 0) {
2177  // srid0 -> 4326 transform is requested on import
2178  import_ti.set_input_srid(srid0);
2179  }
2180  }
2181  if (!importGeoFromLonLat(lon, lat, coords, import_ti)) {
2182  throw std::runtime_error(
2183  "Cannot read lon/lat to insert into POINT column " +
2184  cd->columnName);
2185  }
2186  } else {
2187  // import it
2188  SQLTypeInfo import_ti{col_ti};
2189  if (copy_params.source_type ==
2191  import_ti.get_output_srid() == 4326) {
2192  auto srid0 = copy_params.source_srid;
2193  if (srid0 > 0) {
2194  // srid0 -> 4326 transform is requested on import
2195  import_ti.set_input_srid(srid0);
2196  }
2197  }
2198  if (is_null) {
2199  if (col_ti.get_notnull()) {
2200  throw std::runtime_error("NULL geo for column " + cd->columnName);
2201  }
2203  import_ti,
2204  coords,
2205  bounds,
2206  ring_sizes,
2207  poly_rings,
2209  } else {
2210  if (import_geometry) {
2211  // geometry already exploded
2213  import_geometry,
2214  import_ti,
2215  coords,
2216  bounds,
2217  ring_sizes,
2218  poly_rings,
2220  std::string msg =
2221  "Failed to extract valid geometry from exploded row " +
2222  std::to_string(first_row_index_this_buffer +
2223  row_index_plus_one) +
2224  " for column " + cd->columnName;
2225  throw std::runtime_error(msg);
2226  }
2227  } else {
2228  // extract geometry directly from WKT
2230  std::string(geo_string),
2231  import_ti,
2232  coords,
2233  bounds,
2234  ring_sizes,
2235  poly_rings,
2237  std::string msg = "Failed to extract valid geometry from row " +
2238  std::to_string(first_row_index_this_buffer +
2239  row_index_plus_one) +
2240  " for column " + cd->columnName;
2241  throw std::runtime_error(msg);
2242  }
2243  }
2244 
2245  // validate types
2246  if (col_type != import_ti.get_type()) {
2248  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2249  col_type == SQLTypes::kMULTIPOLYGON)) {
2250  throw std::runtime_error(
2251  "Imported geometry doesn't match the type of column " +
2252  cd->columnName);
2253  }
2254  }
2255  }
2256 
2257  // assign render group?
2258  if (columnIdToRenderGroupAnalyzerMap.size()) {
2259  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2260  if (ring_sizes.size()) {
2261  // get a suitable render group for these poly coords
2262  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2263  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2264  render_group =
2265  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2266  } else {
2267  // empty poly
2268  render_group = -1;
2269  }
2270  }
2271  }
2272  }
2273 
2274  // import extracted geo
2275  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2276  cd,
2277  import_buffers,
2278  col_idx,
2279  coords,
2280  bounds,
2281  ring_sizes,
2282  poly_rings,
2283  render_group);
2284 
2285  // skip remaining physical columns
2286  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2287  ++cd_it;
2288  }
2289  }
2290  }
2291  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2292  check_session_interrupted(query_session, executor))) {
2293  thread_import_status.load_failed = true;
2294  thread_import_status.load_msg =
2295  "Table load was cancelled via Query Interrupt";
2296  return;
2297  }
2298  thread_import_status.rows_completed++;
2299  } catch (const std::exception& e) {
2300  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2301  import_buffers[col_idx_to_pop]->pop_value();
2302  }
2303  thread_import_status.rows_rejected++;
2304  LOG(ERROR) << "Input exception thrown: " << e.what()
2305  << ". Row discarded. Data: " << shared::printContainer(row);
2306  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2307  LOG(ERROR) << "Load was cancelled due to max reject rows being reached";
2308  thread_import_status.load_failed = true;
2309  thread_import_status.load_msg =
2310  "Load was cancelled due to max reject rows being reached";
2311  }
2312  }
2313  }; // End of lambda
2314 
2315  if (copy_params.geo_explode_collections) {
2316  // explode and import
2317  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2318  explode_collections_step1(col_descs);
2319  // pull out the collection WKT or WKB hex
2320  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2321  auto const& collection_geo_string = row[collection_col_idx];
2322  // convert to OGR
2323  OGRGeometry* ogr_geometry = nullptr;
2324  ScopeGuard destroy_ogr_geometry = [&] {
2325  if (ogr_geometry) {
2326  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2327  }
2328  };
2330  std::string(collection_geo_string));
2331  // do the explode and import
2332  us = explode_collections_step2(ogr_geometry,
2333  collection_child_type,
2334  collection_col_name,
2335  first_row_index_this_buffer + row_index_plus_one,
2336  execute_import_row);
2337  } else {
2338  // import non-collection row just once
2340  [&] { execute_import_row(nullptr); });
2341  }
2342 
2343  if (thread_import_status.load_failed) {
2344  break;
2345  }
2346  } // end thread
2347  total_str_to_val_time_us += us;
2348  if (!thread_import_status.load_failed && thread_import_status.rows_completed > 0) {
2349  load_ms = measure<>::execution([&]() {
2350  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2351  });
2352  }
2353  }); // end execution
2354 
2355  if (DEBUG_TIMING && !thread_import_status.load_failed &&
2356  thread_import_status.rows_completed > 0) {
2357  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2358  << thread_import_status.rows_completed << " rows inserted in "
2359  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2360  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2361  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2362  << "sec" << std::endl;
2363  }
2364 
2365  return thread_import_status;
2366 }
bool is_null_datum(const DatumStringType &datum, const std::string &null_indicator)
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
SQLTypes
Definition: sqltypes.h:53
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::string_view sv_strip(std::string_view str)
return trimmed string_view
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1906
#define LOG(tag)
Definition: Logger.h:216
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
void set_input_srid(int d)
Definition: sqltypes.h:493
CONSTEXPR DEVICE bool is_null(const T &value)
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
#define CHECK_LT(x, y)
Definition: Logger.h:232
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:1044
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1872
ThreadId thread_id()
Definition: Logger.cpp:820
#define CHECK(condition)
Definition: Logger.h:222
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
Definition: Importer.cpp:1603
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:107
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:298

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRCoordinateTransformation *  coordinate_transformation,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
const Catalog_Namespace::SessionInfo session_info,
Executor executor,
const MetadataColumnInfos &  metadata_column_infos 
)
static

Definition at line 2374 of file Importer.cpp.

References CHECK, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::compress_coords(), DEBUG_TIMING, Executor::ERR_INTERRUPTED, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Catalog_Namespace::SessionInfo::get_session_id(), QueryExecutionError::getErrorCode(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, generate_TableFunctionsFactory_init::separator, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), TIMER_STOP, to_string(), and UNLIKELY.

Referenced by import_export::Importer::importGDALGeo().

2386  {
2387  ImportStatus thread_import_status;
2388  const CopyParams& copy_params = importer->get_copy_params();
2389  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2390  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2391  importer->get_import_buffers(thread_id);
2392  auto query_session = session_info ? session_info->get_session_id() : "";
2393  for (const auto& p : import_buffers) {
2394  p->clear();
2395  }
2396 
2397  auto convert_timer = timer_start();
2398 
2399  // for all the features in this chunk...
2400  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2401  // ignore null features
2402  if (!features[iFeature]) {
2403  continue;
2404  }
2405 
2406  // get this feature's geometry
2407  // for geodatabase, we need to consider features with no geometry
2408  // as we still want to create a table, even if it has no geo column
2409  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2410  if (pGeometry && coordinate_transformation) {
2411  pGeometry->transform(coordinate_transformation);
2412  }
2413 
2414  //
2415  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2416  //
2417 
2418  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2419  size_t col_idx = 0;
2420  try {
2421  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2422  check_session_interrupted(query_session, executor))) {
2423  thread_import_status.load_failed = true;
2424  thread_import_status.load_msg = "Table load was cancelled via Query Interrupt";
2426  }
2427 
2428  uint32_t field_column_count{0u};
2429  uint32_t metadata_column_count{0u};
2430 
2431  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2432  auto cd = *cd_it;
2433 
2434  // is this a geo column?
2435  const auto& col_ti = cd->columnType;
2436  if (col_ti.is_geometry()) {
2437  // Note that this assumes there is one and only one geo column in the
2438  // table. Currently, the importer only supports reading a single
2439  // geospatial feature from an input shapefile / geojson file, but this
2440  // code will need to be modified if that changes
2441  SQLTypes col_type = col_ti.get_type();
2442 
2443  // store null string in the base column
2444  import_buffers[col_idx]->add_value(
2445  cd, copy_params.null_str, true, copy_params);
2446  ++col_idx;
2447 
2448  // the data we now need to extract for the other columns
2449  std::vector<double> coords;
2450  std::vector<double> bounds;
2451  std::vector<int> ring_sizes;
2452  std::vector<int> poly_rings;
2453  int render_group = 0;
2454 
2455  // extract it
2456  SQLTypeInfo import_ti{col_ti};
2457  bool is_null_geo = !import_geometry;
2458  if (is_null_geo) {
2459  if (col_ti.get_notnull()) {
2460  throw std::runtime_error("NULL geo for column " + cd->columnName);
2461  }
2463  import_ti,
2464  coords,
2465  bounds,
2466  ring_sizes,
2467  poly_rings,
2469  } else {
2471  import_geometry,
2472  import_ti,
2473  coords,
2474  bounds,
2475  ring_sizes,
2476  poly_rings,
2478  std::string msg = "Failed to extract valid geometry from feature " +
2479  std::to_string(firstFeature + iFeature + 1) +
2480  " for column " + cd->columnName;
2481  throw std::runtime_error(msg);
2482  }
2483 
2484  // validate types
2485  if (col_type != import_ti.get_type()) {
2487  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2488  col_type == SQLTypes::kMULTIPOLYGON)) {
2489  throw std::runtime_error(
2490  "Imported geometry doesn't match the type of column " +
2491  cd->columnName);
2492  }
2493  }
2494  }
2495 
2496  if (columnIdToRenderGroupAnalyzerMap.size()) {
2497  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2498  if (ring_sizes.size()) {
2499  // get a suitable render group for these poly coords
2500  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2501  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2502  render_group =
2503  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2504  } else {
2505  // empty poly
2506  render_group = -1;
2507  }
2508  }
2509  }
2510 
2511  // create coords array value and add it to the physical column
2512  ++cd_it;
2513  auto cd_coords = *cd_it;
2514  std::vector<TDatum> td_coord_data;
2515  if (!is_null_geo) {
2516  std::vector<uint8_t> compressed_coords =
2517  Geospatial::compress_coords(coords, col_ti);
2518  for (auto cc : compressed_coords) {
2519  TDatum td_byte;
2520  td_byte.val.int_val = cc;
2521  td_coord_data.push_back(td_byte);
2522  }
2523  }
2524  TDatum tdd_coords;
2525  tdd_coords.val.arr_val = td_coord_data;
2526  tdd_coords.is_null = is_null_geo;
2527  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2528  ++col_idx;
2529 
2530  if (col_type == kMULTILINESTRING || col_type == kPOLYGON ||
2531  col_type == kMULTIPOLYGON) {
2532  // Create [linest]ring_sizes array value and add it to the physical column
2533  ++cd_it;
2534  auto cd_ring_sizes = *cd_it;
2535  std::vector<TDatum> td_ring_sizes;
2536  if (!is_null_geo) {
2537  for (auto ring_size : ring_sizes) {
2538  TDatum td_ring_size;
2539  td_ring_size.val.int_val = ring_size;
2540  td_ring_sizes.push_back(td_ring_size);
2541  }
2542  }
2543  TDatum tdd_ring_sizes;
2544  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2545  tdd_ring_sizes.is_null = is_null_geo;
2546  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2547  ++col_idx;
2548  }
2549 
2550  if (col_type == kMULTIPOLYGON) {
2551  // Create poly_rings array value and add it to the physical column
2552  ++cd_it;
2553  auto cd_poly_rings = *cd_it;
2554  std::vector<TDatum> td_poly_rings;
2555  if (!is_null_geo) {
2556  for (auto num_rings : poly_rings) {
2557  TDatum td_num_rings;
2558  td_num_rings.val.int_val = num_rings;
2559  td_poly_rings.push_back(td_num_rings);
2560  }
2561  }
2562  TDatum tdd_poly_rings;
2563  tdd_poly_rings.val.arr_val = td_poly_rings;
2564  tdd_poly_rings.is_null = is_null_geo;
2565  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2566  ++col_idx;
2567  }
2568 
2569  if (col_type == kLINESTRING || col_type == kMULTILINESTRING ||
2570  col_type == kPOLYGON || col_type == kMULTIPOLYGON ||
2571  col_type == kMULTIPOINT) {
2572  // Create bounds array value and add it to the physical column
2573  ++cd_it;
2574  auto cd_bounds = *cd_it;
2575  std::vector<TDatum> td_bounds_data;
2576  if (!is_null_geo) {
2577  for (auto b : bounds) {
2578  TDatum td_double;
2579  td_double.val.real_val = b;
2580  td_bounds_data.push_back(td_double);
2581  }
2582  }
2583  TDatum tdd_bounds;
2584  tdd_bounds.val.arr_val = td_bounds_data;
2585  tdd_bounds.is_null = is_null_geo;
2586  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2587  ++col_idx;
2588  }
2589 
2590  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2591  // Create render_group value and add it to the physical column
2592  ++cd_it;
2593  auto cd_render_group = *cd_it;
2594  TDatum td_render_group;
2595  td_render_group.val.int_val = render_group;
2596  td_render_group.is_null = is_null_geo;
2597  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2598  ++col_idx;
2599  }
2600  } else if (field_column_count < fieldNameToIndexMap.size()) {
2601  //
2602  // field column
2603  //
2604  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2605  CHECK(cit != columnNameToSourceNameMap.end());
2606  auto const& field_name = cit->second;
2607 
2608  auto const fit = fieldNameToIndexMap.find(field_name);
2609  if (fit == fieldNameToIndexMap.end()) {
2610  throw ColumnNotGeoError(cd->columnName);
2611  }
2612 
2613  auto const& field_index = fit->second;
2614  CHECK(field_index < fieldNameToIndexMap.size());
2615 
2616  auto const& feature = features[iFeature];
2617 
2618  auto field_defn = feature->GetFieldDefnRef(field_index);
2619  CHECK(field_defn);
2620 
2621  // OGRFeature::GetFieldAsString() can only return 80 characters
2622  // so for array columns, we are obliged to fetch the actual values
2623  // and construct the concatenated string ourselves
2624 
2625  std::string value_string;
2626  int array_index = 0, array_size = 0;
2627 
2628  auto stringify_numeric_list = [&](auto* values) {
2629  value_string = "{";
2630  while (array_index < array_size) {
2631  auto separator = (array_index > 0) ? "," : "";
2632  value_string += separator + std::to_string(values[array_index]);
2633  array_index++;
2634  }
2635  value_string += "}";
2636  };
2637 
2638  auto field_type = field_defn->GetType();
2639  switch (field_type) {
2640  case OFTInteger:
2641  case OFTInteger64:
2642  case OFTReal:
2643  case OFTString:
2644  case OFTBinary:
2645  case OFTDate:
2646  case OFTTime:
2647  case OFTDateTime: {
2648  value_string = feature->GetFieldAsString(field_index);
2649  } break;
2650  case OFTIntegerList: {
2651  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2652  stringify_numeric_list(values);
2653  } break;
2654  case OFTInteger64List: {
2655  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2656  stringify_numeric_list(values);
2657  } break;
2658  case OFTRealList: {
2659  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2660  stringify_numeric_list(values);
2661  } break;
2662  case OFTStringList: {
2663  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2664  value_string = "{";
2665  if (array_of_strings) {
2666  while (auto* this_string = array_of_strings[array_index]) {
2667  auto separator = (array_index > 0) ? "," : "";
2668  value_string += separator + std::string(this_string);
2669  array_index++;
2670  }
2671  }
2672  value_string += "}";
2673  } break;
2674  default:
2675  throw std::runtime_error("Unsupported geo file field type (" +
2676  std::to_string(static_cast<int>(field_type)) +
2677  ")");
2678  }
2679 
2680  import_buffers[col_idx]->add_value(cd, value_string, false, copy_params);
2681  ++col_idx;
2682  field_column_count++;
2683  } else if (metadata_column_count < metadata_column_infos.size()) {
2684  //
2685  // metadata column
2686  //
2687  auto const& mci = metadata_column_infos[metadata_column_count];
2688  if (mci.column_descriptor.columnName != cd->columnName) {
2689  throw std::runtime_error("Metadata column name mismatch");
2690  }
2691  import_buffers[col_idx]->add_value(cd, mci.value, false, copy_params);
2692  ++col_idx;
2693  metadata_column_count++;
2694  } else {
2695  throw std::runtime_error("Column count mismatch");
2696  }
2697  }
2698  thread_import_status.rows_completed++;
2699  } catch (QueryExecutionError& e) {
2701  throw e;
2702  }
2703  } catch (ColumnNotGeoError& e) {
2704  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
2705  throw std::runtime_error(e.what());
2706  } catch (const std::exception& e) {
2707  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2708  import_buffers[col_idx_to_pop]->pop_value();
2709  }
2710  thread_import_status.rows_rejected++;
2711  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2712  }
2713  };
2714 
2715  if (pGeometry && copy_params.geo_explode_collections) {
2716  // explode and import
2717  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2718  explode_collections_step1(col_descs);
2719  explode_collections_step2(pGeometry,
2720  collection_child_type,
2721  collection_col_name,
2722  firstFeature + iFeature + 1,
2723  execute_import_feature);
2724  } else {
2725  // import non-collection or null feature just once
2726  execute_import_feature(pGeometry);
2727  }
2728  } // end features
2729 
2730  float convert_s = TIMER_STOP(convert_timer);
2731 
2732  float load_s = 0.0f;
2733  if (thread_import_status.rows_completed > 0) {
2734  auto load_timer = timer_start();
2735  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2736  load_s = TIMER_STOP(load_timer);
2737  }
2738 
2739  if (DEBUG_TIMING && thread_import_status.rows_completed > 0) {
2740  LOG(INFO) << "DEBUG: Process " << convert_s << "s";
2741  LOG(INFO) << "DEBUG: Load " << load_s << "s";
2742  LOG(INFO) << "DEBUG: Total " << (convert_s + load_s) << "s";
2743  }
2744 
2745  thread_import_status.thread_id = thread_id;
2746 
2747  return thread_import_status;
2748 }
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1388
SQLTypes
Definition: sqltypes.h:53
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1906
#define LOG(tag)
Definition: Logger.h:216
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:157
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define TIMER_STOP(t)
Definition: Importer.cpp:101
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1872
ThreadId thread_id()
Definition: Logger.cpp:820
#define CHECK(condition)
Definition: Logger.h:222
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
SQLTypeInfo ti 
)

Definition at line 1603 of file Importer.cpp.

References Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1606  {
1607  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1608  return false;
1609  }
1610  if (ti.transforms()) {
1611  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1612  if (!pt.transform(ti)) {
1613  return false;
1614  }
1615  pt.getColumns(coords);
1616  return true;
1617  }
1618  coords.push_back(lon);
1619  coords.push_back(lat);
1620  return true;
1621 }
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:567
bool transforms() const
Definition: sqltypes.h:610

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 366 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::TypedImportBuffer::addDefaultValues(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

366  {
367  SQLTypeInfo elem_ti = ti.get_elem_type();
368  auto len = ti.get_size();
369 
370  if (len > 0) {
371  // Compose a NULL fixlen array
372  int8_t* buf = (int8_t*)checked_malloc(len);
373  // First scalar is a NULL_ARRAY sentinel
374  Datum d = NullArrayDatum(elem_ti);
375  int8_t* p = append_datum(buf, d, elem_ti);
376  CHECK(p);
377  // Rest is filled with normal NULL sentinels
378  Datum d0 = NullDatum(elem_ti);
379  while ((p - buf) < len) {
380  p = append_datum(p, d0, elem_ti);
381  CHECK(p);
382  }
383  CHECK((p - buf) == len);
384  return ArrayDatum(len, buf, true);
385  }
386  // NULL varlen array
387  return ArrayDatum(0, NULL, true);
388 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
#define CHECK(condition)
Definition: Logger.h:222
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:268
Definition: Datum.h:44
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 268 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_array_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

268  {
269  Datum d;
270  const auto type = get_type_for_datum(ti);
271  switch (type) {
272  case kBOOLEAN:
274  break;
275  case kBIGINT:
277  break;
278  case kINT:
280  break;
281  case kSMALLINT:
283  break;
284  case kTINYINT:
286  break;
287  case kFLOAT:
289  break;
290  case kDOUBLE:
292  break;
293  case kTIME:
294  case kTIMESTAMP:
295  case kDATE:
297  break;
298  case kPOINT:
299  case kMULTIPOINT:
300  case kLINESTRING:
301  case kMULTILINESTRING:
302  case kPOLYGON:
303  case kMULTIPOLYGON:
304  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
305  default:
306  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
307  }
308  return d;
309 }
int8_t tinyintval
Definition: Datum.h:46
Definition: sqltypes.h:64
int8_t boolval
Definition: Datum.h:45
int32_t intval
Definition: Datum.h:48
float floatval
Definition: Datum.h:50
int64_t bigintval
Definition: Datum.h:49
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: Datum.h:47
Definition: sqltypes.h:68
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:60
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:255
Definition: Datum.h:44
double doubleval
Definition: Datum.h:51

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t import_export::num_import_threads ( const int32_t  copy_params_threads)
inline

Definition at line 31 of file thread_count.h.

References g_max_import_threads.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), foreign_storage::get_num_threads(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), and import_export::Importer::importGDALRaster().

31  {
32  if (copy_params_threads > 0) {
33  return static_cast<size_t>(copy_params_threads);
34  }
35  return std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
37 }
size_t g_max_import_threads
Definition: Importer.cpp:106

+ Here is the caller graph for this function:

MetadataColumnInfos import_export::parse_add_metadata_columns ( const std::string &  add_metadata_columns,
const std::string &  file_path 
)

Definition at line 35 of file MetadataColumn.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::ExpressionParser::evalAsString(), IS_INTEGER, join(), kBIGINT, kDATE, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNULLT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, run_benchmark_import::parser, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_type(), import_export::ExpressionParser::setExpression(), import_export::ExpressionParser::setStringConstant(), ColumnDescriptor::sourceName, split(), strip(), to_lower(), and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptorsGeo(), import_export::Importer::gdalToColumnDescriptorsRaster(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), and import_export::Importer::readMetadataSampleGDAL().

36  {
37  //
38  // each string is "column_name,column_type,expression"
39  //
40  // column_type can be:
41  // tinyint
42  // smallint
43  // int
44  // bigint
45  // float
46  // double
47  // date
48  // time
49  // timestamp
50  // text
51  //
52  // expression can be in terms of:
53  // filename
54  // filedir
55  // filepath
56  // etc.
57  //
58 
59  // anything to do?
60  if (add_metadata_columns.length() == 0u) {
61  return {};
62  }
63 
64  // split by ";"
65  // @TODO(se) is this safe?
66  // probably won't appear in a file name/path or a date/time string
67  std::vector<std::string> add_metadata_column_strings;
68  boost::split(add_metadata_column_strings, add_metadata_columns, boost::is_any_of(";"));
69  if (add_metadata_column_strings.size() == 0u) {
70  return {};
71  }
72 
73  ExpressionParser parser;
74 
75  // known string constants
76  auto const fn = boost::filesystem::path(file_path).filename().string();
77  auto const fd = boost::filesystem::path(file_path).parent_path().string();
78  auto const fp = file_path;
79  parser.setStringConstant("filename", fn);
80  parser.setStringConstant("filedir", fd);
81  parser.setStringConstant("filepath", fp);
82 
83  MetadataColumnInfos metadata_column_infos;
84 
85  // for each requested column...
86  for (auto const& add_metadata_column_string : add_metadata_column_strings) {
87  // strip
88  auto const add_metadata_column = strip(add_metadata_column_string);
89 
90  // tokenize and extract
91  std::vector<std::string> tokens;
92  boost::split(tokens, add_metadata_column, boost::is_any_of(","));
93  if (tokens.size() < 3u) {
94  throw std::runtime_error("Invalid metadata column info '" + add_metadata_column +
95  "' (must be of the form 'name,type,expression')");
96  }
97  auto token_itr = tokens.begin();
98  auto const column_name = strip(*token_itr++);
99  auto const data_type = strip(to_lower(*token_itr++));
100  tokens.erase(tokens.begin(), token_itr);
101  auto const expression = strip(boost::join(tokens, ","));
102 
103  // get column type
104  SQLTypes sql_type{kNULLT};
105  double range_min{0.0}, range_max{0.0};
106  if (data_type == "tinyint") {
107  sql_type = kTINYINT;
108  range_min = static_cast<double>(std::numeric_limits<int8_t>::min());
109  range_max = static_cast<double>(std::numeric_limits<int8_t>::max());
110  } else if (data_type == "smallint") {
111  sql_type = kSMALLINT;
112  range_min = static_cast<double>(std::numeric_limits<int16_t>::min());
113  range_max = static_cast<double>(std::numeric_limits<int16_t>::max());
114  } else if (data_type == "int") {
115  sql_type = kINT;
116  range_min = static_cast<double>(std::numeric_limits<int32_t>::min());
117  range_max = static_cast<double>(std::numeric_limits<int32_t>::max());
118  } else if (data_type == "bigint") {
119  sql_type = kBIGINT;
120  range_min = static_cast<double>(std::numeric_limits<int64_t>::min());
121  range_max = static_cast<double>(std::numeric_limits<int64_t>::max());
122  } else if (data_type == "float") {
123  sql_type = kFLOAT;
124  range_min = static_cast<double>(std::numeric_limits<float>::min());
125  range_max = static_cast<double>(std::numeric_limits<float>::max());
126  } else if (data_type == "double") {
127  sql_type = kDOUBLE;
128  range_min = static_cast<double>(std::numeric_limits<double>::min());
129  range_max = static_cast<double>(std::numeric_limits<double>::max());
130  } else if (data_type == "date") {
131  sql_type = kDATE;
132  } else if (data_type == "time") {
133  sql_type = kTIME;
134  } else if (data_type == "timestamp") {
135  sql_type = kTIMESTAMP;
136  } else if (data_type == "text") {
137  sql_type = kTEXT;
138  } else {
139  throw std::runtime_error("Invalid metadata column data type '" + data_type +
140  "' for column '" + column_name + "'");
141  }
142 
143  // set expression with force cast back to string
144  parser.setExpression("str(" + expression + ")");
145 
146  // evaluate
147  auto value = parser.evalAsString();
148 
149  // validate date/time/timestamp value now
150  // @TODO(se) do we need to provide for non-zero dimension?
151  try {
152  if (sql_type == kDATE) {
153  dateTimeParse<kDATE>(value, 0);
154  } else if (sql_type == kTIME) {
155  dateTimeParse<kTIME>(value, 0);
156  } else if (sql_type == kTIMESTAMP) {
157  dateTimeParse<kTIMESTAMP>(value, 0);
158  }
159  } catch (std::runtime_error& e) {
160  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
161  " value '" + value + "' for column '" + column_name + "'");
162  }
163 
164  // validate int/float/double
165  try {
166  if (IS_INTEGER(sql_type) || sql_type == kFLOAT || sql_type == kDOUBLE) {
167  size_t num_chars{0u};
168  auto const v = static_cast<double>(std::stod(value, &num_chars));
169  if (v < range_min || v > range_max) {
170  throw std::out_of_range(to_string(sql_type));
171  }
172  if (num_chars == 0u) {
173  throw std::invalid_argument("empty value");
174  }
175  }
176  } catch (std::invalid_argument& e) {
177  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
178  " value '" + value + "' for column '" + column_name +
179  "' (" + e.what() + ")");
180  } catch (std::out_of_range& e) {
181  throw std::runtime_error("Out-of-range metadata column " + to_string(sql_type) +
182  " value '" + value + "' for column '" + column_name +
183  "' (" + e.what() + ")");
184  }
185 
186  // build column descriptor
187  ColumnDescriptor cd;
188  cd.columnName = cd.sourceName = column_name;
189  cd.columnType.set_type(sql_type);
191  if (sql_type == kTEXT) {
194  }
195 
196  // add to result
197  metadata_column_infos.push_back({std::move(cd), std::move(value)});
198  }
199 
200  // done
201  return metadata_column_infos;
202 }
std::string to_lower(const std::string &str)
void set_compression(EncodingType c)
Definition: sqltypes.h:500
Definition: sqltypes.h:64
SQLTypes
Definition: sqltypes.h:53
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::string sourceName
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
void set_fixed_size()
Definition: sqltypes.h:498
specifies the content in-memory of a row in the column metadata table
void set_comp_param(int p)
Definition: sqltypes.h:501
Definition: sqltypes.h:67
Definition: sqltypes.h:68
#define IS_INTEGER(T)
Definition: sqltypes.h:292
Definition: sqltypes.h:60
SQLTypeInfo columnType
std::string columnName
std::vector< MetadataColumnInfo > MetadataColumnInfos
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:489

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 6201 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by DBHandler::prepare_loader_generic().

6203  {
6204  CHECK(td);
6205  auto col_descs = loader->get_column_descs();
6206 
6207  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
6208  for (auto cd : col_descs) {
6209  import_buffers.emplace_back(
6210  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
6211  }
6212 
6213  return import_buffers;
6214 }
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 311 of file Importer.cpp.

References append_datum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value(), and import_export::TypedImportBuffer::addDefaultValues().

313  {
314  SQLTypeInfo elem_ti = ti.get_elem_type();
315  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
316  return ArrayDatum(0, NULL, true);
317  }
318  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
319  LOG(WARNING) << "Malformed array: " << s;
320  return ArrayDatum(0, NULL, true);
321  }
322  std::vector<std::string> elem_strs;
323  size_t last = 1;
324  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
325  i = s.find(copy_params.array_delim, last)) {
326  elem_strs.push_back(s.substr(last, i - last));
327  last = i + 1;
328  }
329  if (last + 1 <= s.size()) {
330  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
331  }
332  if (elem_strs.size() == 1) {
333  auto str = elem_strs.front();
334  auto str_trimmed = trim_space(str.c_str(), str.length());
335  if (str_trimmed == "") {
336  elem_strs.clear(); // Empty array
337  }
338  }
339  if (!elem_ti.is_string()) {
340  size_t len = elem_strs.size() * elem_ti.get_size();
341  std::unique_ptr<int8_t, FreeDeleter> buf(
342  reinterpret_cast<int8_t*>(checked_malloc(len)));
343  int8_t* p = buf.get();
344  for (auto& es : elem_strs) {
345  auto e = trim_space(es.c_str(), es.length());
346  bool is_null = (e == copy_params.null_str) || e == "NULL";
347  if (!elem_ti.is_string() && e == "") {
348  is_null = true;
349  }
350  if (elem_ti.is_number() || elem_ti.is_time()) {
351  if (!isdigit(e[0]) && e[0] != '-') {
352  is_null = true;
353  }
354  }
355  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
356  p = append_datum(p, d, elem_ti);
357  CHECK(p);
358  }
359  return ArrayDatum(len, buf.release(), false);
360  }
361  // must not be called for array of strings
362  CHECK(false);
363  return ArrayDatum(0, NULL, true);
364 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
#define LOG(tag)
Definition: Logger.h:216
bool is_number() const
Definition: sqltypes.h:580
bool is_time() const
Definition: sqltypes.h:581
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:222
bool is_string() const
Definition: sqltypes.h:575
Definition: Datum.h:44
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 464 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

464  {
465  SQLTypeInfo elem_ti = ti.get_elem_type();
466 
467  CHECK(!elem_ti.is_string());
468 
469  if (datum.is_null) {
470  return NullArray(ti);
471  }
472 
473  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
474  int8_t* buf = (int8_t*)checked_malloc(len);
475  int8_t* p = buf;
476  for (auto& e : datum.val.arr_val) {
477  p = append_datum(p, TDatumToDatum(e, elem_ti), elem_ti);
478  CHECK(p);
479  }
480 
481  return ArrayDatum(len, buf, false);
482 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:366
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:417
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:222
bool is_string() const
Definition: sqltypes.h:575
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 417 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

417  {
418  Datum d;
419  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
420  switch (type) {
421  case kBOOLEAN:
422  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
423  break;
424  case kBIGINT:
425  d.bigintval =
426  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
427  break;
428  case kINT:
429  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
430  break;
431  case kSMALLINT:
432  d.smallintval =
433  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
434  break;
435  case kTINYINT:
436  d.tinyintval =
437  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
438  break;
439  case kFLOAT:
440  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
441  break;
442  case kDOUBLE:
443  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
444  break;
445  case kTIME:
446  case kTIMESTAMP:
447  case kDATE:
448  d.bigintval =
449  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
450  break;
451  case kPOINT:
452  case kMULTIPOINT:
453  case kLINESTRING:
454  case kMULTILINESTRING:
455  case kPOLYGON:
456  case kMULTIPOLYGON:
457  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
458  default:
459  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
460  }
461  return d;
462 }
int8_t tinyintval
Definition: Datum.h:46
#define NULL_DOUBLE
Definition: sqltypes.h:64
#define NULL_FLOAT
int8_t boolval
Definition: Datum.h:45
int32_t intval
Definition: Datum.h:48
float floatval
Definition: Datum.h:50
int64_t bigintval
Definition: Datum.h:49
int16_t smallintval
Definition: Datum.h:47
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
Definition: sqltypes.h:68
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:60
Definition: Datum.h:44
bool is_decimal() const
Definition: sqltypes.h:578
double doubleval
Definition: Datum.h:51

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 242 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

242  {
243  size_t i = 0;
244  size_t j = len;
245  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
246  i++;
247  }
248  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
249  j--;
250  }
251  return std::string(field + i, j - i);
252 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 3280 of file Importer.cpp.

References heavydb.dtypes::T.

3280  {
3281  try {
3282  boost::lexical_cast<T>(str);
3283  } catch (const boost::bad_lexical_cast& e) {
3284  return false;
3285  }
3286  return true;
3287 }

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 34 of file CopyParams.h.

const size_t import_export::kImportRowLimit = 10000
static
constexpr size_t import_export::max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
static

Definition at line 37 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static
heavyai::shared_mutex import_export::status_mutex
static