OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{ExpressionParser.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterCSV.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 anonymous_namespace{RasterImporter.cpp}
 
 delimited_parser
 

Classes

class  AbstractImporter
 
struct  CopyParams
 
class  ExpressionParser
 
class  ForeignDataImporter
 
class  ImportBatchResult
 
struct  GeoImportException
 
class  ColumnNotGeoError
 
struct  BadRowsTracker
 
class  ImporterUtils
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  Importer
 
struct  MetadataColumnInfo
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  GCPTransformer
 
class  RasterImporter
 
class  RenderGroupAnalyzer
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 
using MetadataColumnInfos = std::vector< MetadataColumnInfo >
 

Enumerations

enum  ImportHeaderRow { ImportHeaderRow::kAutoDetect, ImportHeaderRow::kNoHeader, ImportHeaderRow::kHasHeader }
 
enum  RasterPointType {
  RasterPointType::kNone, RasterPointType::kAuto, RasterPointType::kSmallInt, RasterPointType::kInt,
  RasterPointType::kFloat, RasterPointType::kDouble, RasterPointType::kPoint
}
 
enum  RasterPointTransform { RasterPointTransform::kNone, RasterPointTransform::kAuto, RasterPointTransform::kFile, RasterPointTransform::kWorld }
 
enum  SourceType {
  SourceType::kUnknown, SourceType::kUnsupported, SourceType::kDelimitedFile, SourceType::kGeoFile,
  SourceType::kRasterFile, SourceType::kParquetFile, SourceType::kOdbc, SourceType::kRegexParsedFile
}
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullDatum (SQLTypeInfo &ti)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
template<class T >
bool try_cast (const std::string &str)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImportercreate_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 
MetadataColumnInfos parse_add_metadata_columns (const std::string &add_metadata_columns, const std::string &file_path)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static mapd_shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 74 of file Importer.h.

using import_export::ColumnIdToRenderGroupAnalyzerMapType = typedef std::map<int, std::shared_ptr<RenderGroupAnalyzer>>

Definition at line 162 of file Importer.cpp.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 160 of file Importer.cpp.

Definition at line 163 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 159 of file Importer.cpp.

Definition at line 36 of file MetadataColumn.h.

Enumeration Type Documentation

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 454 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

454  {
455  const auto& arr = datum.val.arr_val;
456  for (const auto& elem_datum : arr) {
457  string_vec.push_back(elem_datum.val.str_val);
458  }
459 }

+ Here is the caller graph for this function:

std::unique_ptr< AbstractImporter > import_export::create_importer ( Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const std::string &  copy_from_source,
const import_export::CopyParams copy_params 
)

Definition at line 6270 of file Importer.cpp.

References g_enable_fsi_regex_import, g_enable_legacy_delimited_import, kDelimitedFile, kParquetFile, kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by Parser::CopyTableStmt::execute(), and DBHandler::import_table().

6274  {
6276 #ifdef ENABLE_IMPORT_PARQUET
6277  if (!g_enable_legacy_parquet_import) {
6278  return std::make_unique<import_export::ForeignDataImporter>(
6279  copy_from_source, copy_params, td);
6280  }
6281 #else
6282  throw std::runtime_error("Parquet not supported!");
6283 #endif
6284  }
6285 
6288  return std::make_unique<import_export::ForeignDataImporter>(
6289  copy_from_source, copy_params, td);
6290  }
6291 
6294  return std::make_unique<import_export::ForeignDataImporter>(
6295  copy_from_source, copy_params, td);
6296  } else {
6297  throw std::runtime_error(
6298  "Regex parsed import only supported using 'fsi-regex-import' flag");
6299  }
6300  }
6301 
6302  return std::make_unique<import_export::Importer>(
6303  catalog, td, copy_from_source, copy_params);
6304 }
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:83
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:87

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::fill_missing_columns ( const Catalog_Namespace::Catalog cat,
Fragmenter_Namespace::InsertData insert_data 
)

Definition at line 6198 of file Importer.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, ColumnDescriptor::columnId, Fragmenter_Namespace::InsertData::columnIds, ColumnDescriptor::columnName, ColumnDescriptor::columnType, Fragmenter_Namespace::InsertData::data, ColumnDescriptor::default_value, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), import_export::TypedImportBuffer::get_data_block_pointers(), SQLTypeInfo::get_physical_cols(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Geospatial::GeoTypesFactory::getGeoColumns(), Catalog_Namespace::Catalog::getMetadataForDict(), Fragmenter_Namespace::InsertData::is_default, SQLTypeInfo::is_geometry(), IS_STRING, kARRAY, kENCODING_DICT, import_export::Importer::set_geo_physical_import_buffer(), gpu_enabled::sort(), and Fragmenter_Namespace::InsertData::tableId.

Referenced by RelAlgExecutor::executeSimpleInsert(), DBHandler::insert_data(), and Parser::InsertIntoTableAsSelectStmt::populateData().

6200  {
6201  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> defaults_buffers;
6202  if (insert_data.is_default.size() == 0) {
6203  insert_data.is_default.resize(insert_data.columnIds.size(), false);
6204  }
6205  CHECK(insert_data.is_default.size() == insert_data.is_default.size());
6206  auto cds = cat->getAllColumnMetadataForTable(insert_data.tableId, false, false, true);
6207  if (cds.size() == insert_data.columnIds.size()) {
6208  // all columns specified
6209  return defaults_buffers;
6210  }
6211  for (auto cd : cds) {
6212  if (std::find(insert_data.columnIds.begin(),
6213  insert_data.columnIds.end(),
6214  cd->columnId) == insert_data.columnIds.end()) {
6215  StringDictionary* dict = nullptr;
6216  if (cd->columnType.get_type() == kARRAY &&
6217  IS_STRING(cd->columnType.get_subtype()) && !cd->default_value.has_value()) {
6218  throw std::runtime_error("Cannot omit column \"" + cd->columnName +
6219  "\": omitting TEXT arrays is not supported yet");
6220  }
6221  if (cd->columnType.get_compression() == kENCODING_DICT) {
6222  dict = cat->getMetadataForDict(cd->columnType.get_comp_param())->stringDict.get();
6223  }
6224  defaults_buffers.emplace_back(std::make_unique<TypedImportBuffer>(cd, dict));
6225  }
6226  }
6227  // put buffers in order to fill geo sub-columns properly
6228  std::sort(defaults_buffers.begin(),
6229  defaults_buffers.end(),
6230  [](decltype(defaults_buffers[0])& a, decltype(defaults_buffers[0])& b) {
6231  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
6232  });
6233  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6234  auto cd = defaults_buffers[i]->getColumnDesc();
6235  std::string default_value = cd->default_value.value_or("NULL");
6236  defaults_buffers[i]->add_value(
6237  cd, default_value, !cd->default_value.has_value(), import_export::CopyParams());
6238  if (cd->columnType.is_geometry()) {
6239  std::vector<double> coords, bounds;
6240  std::vector<int> ring_sizes, poly_rings;
6241  int render_group = 0;
6242  SQLTypeInfo tinfo{cd->columnType};
6244  default_value, tinfo, coords, bounds, ring_sizes, poly_rings, false));
6245  // set physical columns starting with the following ID
6246  auto next_col = i + 1;
6248  cd,
6249  defaults_buffers,
6250  next_col,
6251  coords,
6252  bounds,
6253  ring_sizes,
6254  poly_rings,
6255  render_group);
6256  // skip physical columns filled with the call above
6257  i += cd->columnType.get_physical_cols();
6258  }
6259  }
6260  auto data = import_export::TypedImportBuffer::get_data_block_pointers(defaults_buffers);
6261  CHECK(data.size() == defaults_buffers.size());
6262  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6263  insert_data.data.push_back(data[i]);
6264  insert_data.columnIds.push_back(defaults_buffers[i]->getColumnDesc()->columnId);
6265  insert_data.is_default.push_back(true);
6266  }
6267  return defaults_buffers;
6268 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::vector< bool > is_default
Definition: Fragmenter.h:75
constexpr double a
Definition: Utm.h:32
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:3030
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1673
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1941
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:223
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1653
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 5142 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

5143  {
5144  // prepare to gather subdirectories
5145  std::vector<std::string> subdirectories;
5146 
5147  // get entries
5148  char** entries = VSIReadDir(archive_path.c_str());
5149  if (!entries) {
5150  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
5151  return;
5152  }
5153 
5154  // force scope
5155  {
5156  // request clean-up
5157  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
5158 
5159  // check all the entries
5160  int index = 0;
5161  while (true) {
5162  // get next entry, or drop out if there isn't one
5163  char* entry_c = entries[index++];
5164  if (!entry_c) {
5165  break;
5166  }
5167  std::string entry(entry_c);
5168 
5169  // ignore '.' and '..'
5170  if (entry == "." || entry == "..") {
5171  continue;
5172  }
5173 
5174  // build the full path
5175  std::string entry_path = archive_path + std::string("/") + entry;
5176 
5177  // is it a file or a sub-folder
5178  VSIStatBufL sb;
5179  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
5180  if (result < 0) {
5181  break;
5182  }
5183 
5184  if (VSI_ISDIR(sb.st_mode)) {
5185  // a directory that ends with .gdb could be a Geodatabase bundle
5186  // arguably dangerous to decide this purely by name, but any further
5187  // validation would be very complex especially at this scope
5188  if (boost::iends_with(entry_path, ".gdb")) {
5189  // add the directory as if it was a file and don't recurse into it
5190  files.push_back(entry_path);
5191  } else {
5192  // add subdirectory to be recursed into
5193  subdirectories.push_back(entry_path);
5194  }
5195  } else {
5196  // add this file
5197  files.push_back(entry_path);
5198  }
5199  }
5200  }
5201 
5202  // recurse into each subdirectories we found
5203  for (const auto& subdirectory : subdirectories) {
5204  gdalGatherFilesInArchiveRecursive(subdirectory, files);
5205  }
5206 }
#define LOG(tag)
Definition: Logger.h:217
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:5142

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
size_t  first_row_index_this_buffer,
const Catalog_Namespace::SessionInfo session_info,
Executor executor 
)
static

Definition at line 2024 of file Importer.cpp.

References CHECK, CHECK_LT, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), Catalog_Namespace::SessionInfo::get_session_id(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), kDelimitedFile, kMULTIPOLYGON, kPOINT, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, import_export::CopyParams::source_type, gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), to_string(), and UNLIKELY.

Referenced by import_export::Importer::importDelimited().

2034  {
2035  ImportStatus thread_import_status;
2036  int64_t total_get_row_time_us = 0;
2037  int64_t total_str_to_val_time_us = 0;
2038  auto query_session = session_info ? session_info->get_session_id() : "";
2039  CHECK(scratch_buffer);
2040  auto buffer = scratch_buffer.get();
2041  auto load_ms = measure<>::execution([]() {});
2042 
2043  thread_import_status.thread_id = thread_id;
2044 
2045  auto ms = measure<>::execution([&]() {
2046  const CopyParams& copy_params = importer->get_copy_params();
2047  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2048  size_t begin =
2049  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
2050  const char* thread_buf = buffer + begin_pos + begin;
2051  const char* thread_buf_end = buffer + end_pos;
2052  const char* buf_end = buffer + total_size;
2053  bool try_single_thread = false;
2054  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2055  importer->get_import_buffers(thread_id);
2057  int phys_cols = 0;
2058  int point_cols = 0;
2059  for (const auto cd : col_descs) {
2060  const auto& col_ti = cd->columnType;
2061  phys_cols += col_ti.get_physical_cols();
2062  if (cd->columnType.get_type() == kPOINT) {
2063  point_cols++;
2064  }
2065  }
2066  auto num_cols = col_descs.size() - phys_cols;
2067  for (const auto& p : import_buffers) {
2068  p->clear();
2069  }
2070  std::vector<std::string_view> row;
2071  size_t row_index_plus_one = 0;
2072  for (const char* p = thread_buf; p < thread_buf_end; p++) {
2073  row.clear();
2074  std::vector<std::unique_ptr<char[]>>
2075  tmp_buffers; // holds string w/ removed escape chars, etc
2076  if (DEBUG_TIMING) {
2079  thread_buf_end,
2080  buf_end,
2081  copy_params,
2082  importer->get_is_array(),
2083  row,
2084  tmp_buffers,
2085  try_single_thread,
2086  true);
2087  });
2088  total_get_row_time_us += us;
2089  } else {
2091  thread_buf_end,
2092  buf_end,
2093  copy_params,
2094  importer->get_is_array(),
2095  row,
2096  tmp_buffers,
2097  try_single_thread,
2098  true);
2099  }
2100  row_index_plus_one++;
2101  // Each POINT could consume two separate coords instead of a single WKT
2102  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
2103  thread_import_status.rows_rejected++;
2104  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
2105  << row.size() << "): " << shared::printContainer(row);
2106  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2107  break;
2108  }
2109  continue;
2110  }
2111 
2112  //
2113  // lambda for importing a row (perhaps multiple times if exploding a collection)
2114  //
2115 
2116  auto execute_import_row = [&](OGRGeometry* import_geometry) {
2117  size_t import_idx = 0;
2118  size_t col_idx = 0;
2119  try {
2120  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2121  auto cd = *cd_it;
2122  const auto& col_ti = cd->columnType;
2123 
2124  bool is_null =
2125  (row[import_idx] == copy_params.null_str || row[import_idx] == "NULL");
2126  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
2127  // So initially nullness may be missed and not passed to add_value,
2128  // which then might also check and still decide it's actually a NULL, e.g.
2129  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
2130  // So "NULL" is not recognized as NULL but then it's not recognized as
2131  // a valid kINT, so it's a NULL after all.
2132  // Checking for "NULL" here too, as a widely accepted notation for NULL.
2133 
2134  // Treating empty as NULL
2135  if (!cd->columnType.is_string() && row[import_idx].empty()) {
2136  is_null = true;
2137  }
2138 
2139  if (col_ti.get_physical_cols() == 0) {
2140  // not geo
2141 
2142  import_buffers[col_idx]->add_value(
2143  cd, row[import_idx], is_null, copy_params);
2144 
2145  // next
2146  ++import_idx;
2147  ++col_idx;
2148  } else {
2149  // geo
2150 
2151  // store null string in the base column
2152  import_buffers[col_idx]->add_value(
2153  cd, copy_params.null_str, true, copy_params);
2154 
2155  // WKT from string we're not storing
2156  auto const& geo_string = row[import_idx];
2157 
2158  // next
2159  ++import_idx;
2160  ++col_idx;
2161 
2162  SQLTypes col_type = col_ti.get_type();
2163  CHECK(IS_GEO(col_type));
2164 
2165  std::vector<double> coords;
2166  std::vector<double> bounds;
2167  std::vector<int> ring_sizes;
2168  std::vector<int> poly_rings;
2169  int render_group = 0;
2170 
2171  // if this is a POINT column, and the field is not null, and
2172  // looks like a scalar numeric value (and not a hex blob)
2173  // attempt to import two columns as lon/lat (or lat/lon)
2174  if (col_type == kPOINT && !is_null && geo_string.size() > 0 &&
2175  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
2176  geo_string[0] == '-') &&
2177  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
2178  double lon = std::atof(std::string(geo_string).c_str());
2179  double lat = NAN;
2180  auto lat_str = row[import_idx];
2181  ++import_idx;
2182  if (lat_str.size() > 0 &&
2183  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
2184  lat = std::atof(std::string(lat_str).c_str());
2185  }
2186  // Swap coordinates if this table uses a reverse order: lat/lon
2187  if (!copy_params.lonlat) {
2188  std::swap(lat, lon);
2189  }
2190  // TODO: should check if POINT column should have been declared with
2191  // SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
2192  // throw std::runtime_error("POINT column " + cd->columnName + " is
2193  // not WGS84, cannot insert lon/lat");
2194  // }
2195  SQLTypeInfo import_ti{col_ti};
2196  if (copy_params.source_type ==
2198  import_ti.get_output_srid() == 4326) {
2199  auto srid0 = copy_params.source_srid;
2200  if (srid0 > 0) {
2201  // srid0 -> 4326 transform is requested on import
2202  import_ti.set_input_srid(srid0);
2203  }
2204  }
2205  if (!importGeoFromLonLat(lon, lat, coords, import_ti)) {
2206  throw std::runtime_error(
2207  "Cannot read lon/lat to insert into POINT column " +
2208  cd->columnName);
2209  }
2210  } else {
2211  // import it
2212  SQLTypeInfo import_ti{col_ti};
2213  if (copy_params.source_type ==
2215  import_ti.get_output_srid() == 4326) {
2216  auto srid0 = copy_params.source_srid;
2217  if (srid0 > 0) {
2218  // srid0 -> 4326 transform is requested on import
2219  import_ti.set_input_srid(srid0);
2220  }
2221  }
2222  if (is_null) {
2223  if (col_ti.get_notnull()) {
2224  throw std::runtime_error("NULL geo for column " + cd->columnName);
2225  }
2227  import_ti,
2228  coords,
2229  bounds,
2230  ring_sizes,
2231  poly_rings,
2233  } else {
2234  if (import_geometry) {
2235  // geometry already exploded
2237  import_geometry,
2238  import_ti,
2239  coords,
2240  bounds,
2241  ring_sizes,
2242  poly_rings,
2244  std::string msg =
2245  "Failed to extract valid geometry from exploded row " +
2246  std::to_string(first_row_index_this_buffer +
2247  row_index_plus_one) +
2248  " for column " + cd->columnName;
2249  throw std::runtime_error(msg);
2250  }
2251  } else {
2252  // extract geometry directly from WKT
2254  std::string(geo_string),
2255  import_ti,
2256  coords,
2257  bounds,
2258  ring_sizes,
2259  poly_rings,
2261  std::string msg = "Failed to extract valid geometry from row " +
2262  std::to_string(first_row_index_this_buffer +
2263  row_index_plus_one) +
2264  " for column " + cd->columnName;
2265  throw std::runtime_error(msg);
2266  }
2267  }
2268 
2269  // validate types
2270  if (col_type != import_ti.get_type()) {
2272  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2273  col_type == SQLTypes::kMULTIPOLYGON)) {
2274  throw std::runtime_error(
2275  "Imported geometry doesn't match the type of column " +
2276  cd->columnName);
2277  }
2278  }
2279  }
2280 
2281  // assign render group?
2282  if (columnIdToRenderGroupAnalyzerMap.size()) {
2283  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2284  if (ring_sizes.size()) {
2285  // get a suitable render group for these poly coords
2286  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2287  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2288  render_group =
2289  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2290  } else {
2291  // empty poly
2292  render_group = -1;
2293  }
2294  }
2295  }
2296  }
2297 
2298  // import extracted geo
2299  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2300  cd,
2301  import_buffers,
2302  col_idx,
2303  coords,
2304  bounds,
2305  ring_sizes,
2306  poly_rings,
2307  render_group);
2308 
2309  // skip remaining physical columns
2310  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2311  ++cd_it;
2312  }
2313  }
2314  }
2315  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2316  check_session_interrupted(query_session, executor))) {
2317  thread_import_status.load_failed = true;
2318  thread_import_status.load_msg =
2319  "Table load was cancelled via Query Interrupt";
2320  return;
2321  }
2322  thread_import_status.rows_completed++;
2323  } catch (const std::exception& e) {
2324  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2325  import_buffers[col_idx_to_pop]->pop_value();
2326  }
2327  thread_import_status.rows_rejected++;
2328  LOG(ERROR) << "Input exception thrown: " << e.what()
2329  << ". Row discarded. Data: " << shared::printContainer(row);
2330  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2331  LOG(ERROR) << "Load was cancelled due to max reject rows being reached";
2332  thread_import_status.load_failed = true;
2333  thread_import_status.load_msg =
2334  "Load was cancelled due to max reject rows being reached";
2335  }
2336  }
2337  }; // End of lambda
2338 
2339  if (copy_params.geo_explode_collections) {
2340  // explode and import
2341  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2342  explode_collections_step1(col_descs);
2343  // pull out the collection WKT or WKB hex
2344  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2345  auto const& collection_geo_string = row[collection_col_idx];
2346  // convert to OGR
2347  OGRGeometry* ogr_geometry = nullptr;
2348  ScopeGuard destroy_ogr_geometry = [&] {
2349  if (ogr_geometry) {
2350  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2351  }
2352  };
2354  std::string(collection_geo_string));
2355  // do the explode and import
2356  us = explode_collections_step2(ogr_geometry,
2357  collection_child_type,
2358  collection_col_name,
2359  first_row_index_this_buffer + row_index_plus_one,
2360  execute_import_row);
2361  } else {
2362  // import non-collection row just once
2364  [&] { execute_import_row(nullptr); });
2365  }
2366 
2367  if (thread_import_status.load_failed) {
2368  break;
2369  }
2370  } // end thread
2371  total_str_to_val_time_us += us;
2372  if (!thread_import_status.load_failed && thread_import_status.rows_completed > 0) {
2373  load_ms = measure<>::execution([&]() {
2374  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2375  });
2376  }
2377  }); // end execution
2378 
2379  if (DEBUG_TIMING && !thread_import_status.load_failed &&
2380  thread_import_status.rows_completed > 0) {
2381  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2382  << thread_import_status.rows_completed << " rows inserted in "
2383  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2384  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2385  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2386  << "sec" << std::endl;
2387  }
2388 
2389  return thread_import_status;
2390 }
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
SQLTypes
Definition: sqltypes.h:38
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1934
#define LOG(tag)
Definition: Logger.h:217
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:165
void set_input_srid(int d)
Definition: sqltypes.h:433
CONSTEXPR DEVICE bool is_null(const T &value)
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:76
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
#define CHECK_LT(x, y)
Definition: Logger.h:233
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex)
Definition: Types.cpp:903
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1900
ThreadId thread_id()
Definition: Logger.cpp:817
#define CHECK(condition)
Definition: Logger.h:223
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
Definition: Importer.cpp:1633
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:251

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRCoordinateTransformation *  coordinate_transformation,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const ColumnIdToRenderGroupAnalyzerMapType &  columnIdToRenderGroupAnalyzerMap,
const Catalog_Namespace::SessionInfo session_info,
Executor executor,
const MetadataColumnInfos &  metadata_column_infos 
)
static

Definition at line 2398 of file Importer.cpp.

References CHECK, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::compress_coords(), DEBUG_TIMING, Executor::ERR_INTERRUPTED, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Catalog_Namespace::SessionInfo::get_session_id(), QueryExecutionError::getErrorCode(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), logger::INFO, kLINESTRING, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::null_str, PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, generate_TableFunctionsFactory_init::separator, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), TIMER_STOP, to_string(), and UNLIKELY.

Referenced by import_export::Importer::importGDALGeo().

2410  {
2411  ImportStatus thread_import_status;
2412  const CopyParams& copy_params = importer->get_copy_params();
2413  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2414  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2415  importer->get_import_buffers(thread_id);
2416  auto query_session = session_info ? session_info->get_session_id() : "";
2417  for (const auto& p : import_buffers) {
2418  p->clear();
2419  }
2420 
2421  auto convert_timer = timer_start();
2422 
2423  // for all the features in this chunk...
2424  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2425  // ignore null features
2426  if (!features[iFeature]) {
2427  continue;
2428  }
2429 
2430  // get this feature's geometry
2431  // for geodatabase, we need to consider features with no geometry
2432  // as we still want to create a table, even if it has no geo column
2433  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2434  if (pGeometry && coordinate_transformation) {
2435  pGeometry->transform(coordinate_transformation);
2436  }
2437 
2438  //
2439  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2440  //
2441 
2442  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2443  size_t col_idx = 0;
2444  try {
2445  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2446  check_session_interrupted(query_session, executor))) {
2447  thread_import_status.load_failed = true;
2448  thread_import_status.load_msg = "Table load was cancelled via Query Interrupt";
2450  }
2451 
2452  uint32_t field_column_count{0u};
2453  uint32_t metadata_column_count{0u};
2454 
2455  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2456  auto cd = *cd_it;
2457 
2458  // is this a geo column?
2459  const auto& col_ti = cd->columnType;
2460  if (col_ti.is_geometry()) {
2461  // Note that this assumes there is one and only one geo column in the
2462  // table. Currently, the importer only supports reading a single
2463  // geospatial feature from an input shapefile / geojson file, but this
2464  // code will need to be modified if that changes
2465  SQLTypes col_type = col_ti.get_type();
2466 
2467  // store null string in the base column
2468  import_buffers[col_idx]->add_value(
2469  cd, copy_params.null_str, true, copy_params);
2470  ++col_idx;
2471 
2472  // the data we now need to extract for the other columns
2473  std::vector<double> coords;
2474  std::vector<double> bounds;
2475  std::vector<int> ring_sizes;
2476  std::vector<int> poly_rings;
2477  int render_group = 0;
2478 
2479  // extract it
2480  SQLTypeInfo import_ti{col_ti};
2481  bool is_null_geo = !import_geometry;
2482  if (is_null_geo) {
2483  if (col_ti.get_notnull()) {
2484  throw std::runtime_error("NULL geo for column " + cd->columnName);
2485  }
2487  import_ti,
2488  coords,
2489  bounds,
2490  ring_sizes,
2491  poly_rings,
2493  } else {
2495  import_geometry,
2496  import_ti,
2497  coords,
2498  bounds,
2499  ring_sizes,
2500  poly_rings,
2502  std::string msg = "Failed to extract valid geometry from feature " +
2503  std::to_string(firstFeature + iFeature + 1) +
2504  " for column " + cd->columnName;
2505  throw std::runtime_error(msg);
2506  }
2507 
2508  // validate types
2509  if (col_type != import_ti.get_type()) {
2511  !(import_ti.get_type() == SQLTypes::kPOLYGON &&
2512  col_type == SQLTypes::kMULTIPOLYGON)) {
2513  throw std::runtime_error(
2514  "Imported geometry doesn't match the type of column " +
2515  cd->columnName);
2516  }
2517  }
2518  }
2519 
2520  if (columnIdToRenderGroupAnalyzerMap.size()) {
2521  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2522  if (ring_sizes.size()) {
2523  // get a suitable render group for these poly coords
2524  auto rga_it = columnIdToRenderGroupAnalyzerMap.find(cd->columnId);
2525  CHECK(rga_it != columnIdToRenderGroupAnalyzerMap.end());
2526  render_group =
2527  (*rga_it).second->insertBoundsAndReturnRenderGroup(bounds);
2528  } else {
2529  // empty poly
2530  render_group = -1;
2531  }
2532  }
2533  }
2534 
2535  // create coords array value and add it to the physical column
2536  ++cd_it;
2537  auto cd_coords = *cd_it;
2538  std::vector<TDatum> td_coord_data;
2539  if (!is_null_geo) {
2540  std::vector<uint8_t> compressed_coords =
2541  Geospatial::compress_coords(coords, col_ti);
2542  for (auto cc : compressed_coords) {
2543  TDatum td_byte;
2544  td_byte.val.int_val = cc;
2545  td_coord_data.push_back(td_byte);
2546  }
2547  }
2548  TDatum tdd_coords;
2549  tdd_coords.val.arr_val = td_coord_data;
2550  tdd_coords.is_null = is_null_geo;
2551  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2552  ++col_idx;
2553 
2554  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2555  // Create ring_sizes array value and add it to the physical column
2556  ++cd_it;
2557  auto cd_ring_sizes = *cd_it;
2558  std::vector<TDatum> td_ring_sizes;
2559  if (!is_null_geo) {
2560  for (auto ring_size : ring_sizes) {
2561  TDatum td_ring_size;
2562  td_ring_size.val.int_val = ring_size;
2563  td_ring_sizes.push_back(td_ring_size);
2564  }
2565  }
2566  TDatum tdd_ring_sizes;
2567  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2568  tdd_ring_sizes.is_null = is_null_geo;
2569  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2570  ++col_idx;
2571  }
2572 
2573  if (col_type == kMULTIPOLYGON) {
2574  // Create poly_rings array value and add it to the physical column
2575  ++cd_it;
2576  auto cd_poly_rings = *cd_it;
2577  std::vector<TDatum> td_poly_rings;
2578  if (!is_null_geo) {
2579  for (auto num_rings : poly_rings) {
2580  TDatum td_num_rings;
2581  td_num_rings.val.int_val = num_rings;
2582  td_poly_rings.push_back(td_num_rings);
2583  }
2584  }
2585  TDatum tdd_poly_rings;
2586  tdd_poly_rings.val.arr_val = td_poly_rings;
2587  tdd_poly_rings.is_null = is_null_geo;
2588  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2589  ++col_idx;
2590  }
2591 
2592  if (col_type == kLINESTRING || col_type == kPOLYGON ||
2593  col_type == kMULTIPOLYGON) {
2594  // Create bounds array value and add it to the physical column
2595  ++cd_it;
2596  auto cd_bounds = *cd_it;
2597  std::vector<TDatum> td_bounds_data;
2598  if (!is_null_geo) {
2599  for (auto b : bounds) {
2600  TDatum td_double;
2601  td_double.val.real_val = b;
2602  td_bounds_data.push_back(td_double);
2603  }
2604  }
2605  TDatum tdd_bounds;
2606  tdd_bounds.val.arr_val = td_bounds_data;
2607  tdd_bounds.is_null = is_null_geo;
2608  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2609  ++col_idx;
2610  }
2611 
2612  if (col_type == kPOLYGON || col_type == kMULTIPOLYGON) {
2613  // Create render_group value and add it to the physical column
2614  ++cd_it;
2615  auto cd_render_group = *cd_it;
2616  TDatum td_render_group;
2617  td_render_group.val.int_val = render_group;
2618  td_render_group.is_null = is_null_geo;
2619  import_buffers[col_idx]->add_value(cd_render_group, td_render_group, false);
2620  ++col_idx;
2621  }
2622  } else if (field_column_count < fieldNameToIndexMap.size()) {
2623  //
2624  // field column
2625  //
2626  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2627  CHECK(cit != columnNameToSourceNameMap.end());
2628  auto const& field_name = cit->second;
2629 
2630  auto const fit = fieldNameToIndexMap.find(field_name);
2631  if (fit == fieldNameToIndexMap.end()) {
2632  throw ColumnNotGeoError(cd->columnName);
2633  }
2634 
2635  auto const& field_index = fit->second;
2636  CHECK(field_index < fieldNameToIndexMap.size());
2637 
2638  auto const& feature = features[iFeature];
2639 
2640  auto field_defn = feature->GetFieldDefnRef(field_index);
2641  CHECK(field_defn);
2642 
2643  // OGRFeature::GetFieldAsString() can only return 80 characters
2644  // so for array columns, we are obliged to fetch the actual values
2645  // and construct the concatenated string ourselves
2646 
2647  std::string value_string;
2648  int array_index = 0, array_size = 0;
2649 
2650  auto stringify_numeric_list = [&](auto* values) {
2651  value_string = "{";
2652  while (array_index < array_size) {
2653  auto separator = (array_index > 0) ? "," : "";
2654  value_string += separator + std::to_string(values[array_index]);
2655  array_index++;
2656  }
2657  value_string += "}";
2658  };
2659 
2660  auto field_type = field_defn->GetType();
2661  switch (field_type) {
2662  case OFTInteger:
2663  case OFTInteger64:
2664  case OFTReal:
2665  case OFTString:
2666  case OFTBinary:
2667  case OFTDate:
2668  case OFTTime:
2669  case OFTDateTime: {
2670  value_string = feature->GetFieldAsString(field_index);
2671  } break;
2672  case OFTIntegerList: {
2673  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2674  stringify_numeric_list(values);
2675  } break;
2676  case OFTInteger64List: {
2677  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2678  stringify_numeric_list(values);
2679  } break;
2680  case OFTRealList: {
2681  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2682  stringify_numeric_list(values);
2683  } break;
2684  case OFTStringList: {
2685  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2686  value_string = "{";
2687  if (array_of_strings) {
2688  while (auto* this_string = array_of_strings[array_index]) {
2689  auto separator = (array_index > 0) ? "," : "";
2690  value_string += separator + std::string(this_string);
2691  array_index++;
2692  }
2693  }
2694  value_string += "}";
2695  } break;
2696  default:
2697  throw std::runtime_error("Unsupported geo file field type (" +
2698  std::to_string(static_cast<int>(field_type)) +
2699  ")");
2700  }
2701 
2702  import_buffers[col_idx]->add_value(cd, value_string, false, copy_params);
2703  ++col_idx;
2704  field_column_count++;
2705  } else if (metadata_column_count < metadata_column_infos.size()) {
2706  //
2707  // metadata column
2708  //
2709  auto const& mci = metadata_column_infos[metadata_column_count];
2710  if (mci.column_descriptor.columnName != cd->columnName) {
2711  throw std::runtime_error("Metadata column name mismatch");
2712  }
2713  import_buffers[col_idx]->add_value(cd, mci.value, false, copy_params);
2714  ++col_idx;
2715  metadata_column_count++;
2716  } else {
2717  throw std::runtime_error("Column count mismatch");
2718  }
2719  }
2720  thread_import_status.rows_completed++;
2721  } catch (QueryExecutionError& e) {
2723  throw e;
2724  }
2725  } catch (ColumnNotGeoError& e) {
2726  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
2727  throw std::runtime_error(e.what());
2728  } catch (const std::exception& e) {
2729  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2730  import_buffers[col_idx_to_pop]->pop_value();
2731  }
2732  thread_import_status.rows_rejected++;
2733  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2734  }
2735  };
2736 
2737  if (pGeometry && copy_params.geo_explode_collections) {
2738  // explode and import
2739  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2740  explode_collections_step1(col_descs);
2741  explode_collections_step2(pGeometry,
2742  collection_child_type,
2743  collection_col_name,
2744  firstFeature + iFeature + 1,
2745  execute_import_feature);
2746  } else {
2747  // import non-collection or null feature just once
2748  execute_import_feature(pGeometry);
2749  }
2750  } // end features
2751 
2752  float convert_s = TIMER_STOP(convert_timer);
2753 
2754  float load_s = 0.0f;
2755  if (thread_import_status.rows_completed > 0) {
2756  auto load_timer = timer_start();
2757  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2758  load_s = TIMER_STOP(load_timer);
2759  }
2760 
2761  if (DEBUG_TIMING && thread_import_status.rows_completed > 0) {
2762  LOG(INFO) << "DEBUG: Process " << convert_s << "s";
2763  LOG(INFO) << "DEBUG: Load " << load_s << "s";
2764  LOG(INFO) << "DEBUG: Total " << (convert_s + load_s) << "s";
2765  }
2766 
2767  thread_import_status.thread_id = thread_id;
2768 
2769  return thread_import_status;
2770 }
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:125
int32_t getErrorCode() const
Definition: ErrorHandling.h:55
static const int32_t ERR_INTERRUPTED
Definition: Execute.h:1351
SQLTypes
Definition: sqltypes.h:38
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1934
#define LOG(tag)
Definition: Logger.h:217
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:165
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define TIMER_STOP(t)
Definition: Importer.cpp:101
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:76
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1900
ThreadId thread_id()
Definition: Logger.cpp:817
#define CHECK(condition)
Definition: Logger.h:223
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
SQLTypeInfo ti 
)

Definition at line 1633 of file Importer.cpp.

References Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1636  {
1637  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1638  return false;
1639  }
1640  if (ti.transforms()) {
1641  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1642  if (!pt.transform(ti)) {
1643  return false;
1644  }
1645  pt.getColumns(coords);
1646  return true;
1647  }
1648  coords.push_back(lon);
1649  coords.push_back(lat);
1650  return true;
1651 }
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:567
bool transforms() const
Definition: sqltypes.h:531

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 410 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::TypedImportBuffer::addDefaultValues(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

410  {
411  SQLTypeInfo elem_ti = ti.get_elem_type();
412  auto len = ti.get_size();
413 
414  if (len > 0) {
415  // Compose a NULL fixlen array
416  int8_t* buf = (int8_t*)checked_malloc(len);
417  // First scalar is a NULL_ARRAY sentinel
418  Datum d = NullArrayDatum(elem_ti);
419  int8_t* p = append_datum(buf, d, elem_ti);
420  CHECK(p);
421  // Rest is filled with normal NULL sentinels
422  Datum d0 = NullDatum(elem_ti);
423  while ((p - buf) < len) {
424  p = append_datum(p, d0, elem_ti);
425  CHECK(p);
426  }
427  CHECK((p - buf) == len);
428  return ArrayDatum(len, buf, true);
429  }
430  // NULL varlen array
431  return ArrayDatum(0, NULL, true);
432 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:512
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:273
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:223
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:314
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 314 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_array_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

314  {
315  Datum d;
316  const auto type = get_type_for_datum(ti);
317  switch (type) {
318  case kBOOLEAN:
320  break;
321  case kBIGINT:
323  break;
324  case kINT:
326  break;
327  case kSMALLINT:
329  break;
330  case kTINYINT:
332  break;
333  case kFLOAT:
335  break;
336  case kDOUBLE:
338  break;
339  case kTIME:
340  case kTIMESTAMP:
341  case kDATE:
343  break;
344  case kPOINT:
345  case kLINESTRING:
346  case kPOLYGON:
347  case kMULTIPOLYGON:
348  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
349  default:
350  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
351  }
352  return d;
353 }
int8_t tinyintval
Definition: sqltypes.h:212
Definition: sqltypes.h:49
int8_t boolval
Definition: sqltypes.h:211
int32_t intval
Definition: sqltypes.h:214
float floatval
Definition: sqltypes.h:216
int64_t bigintval
Definition: sqltypes.h:215
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: sqltypes.h:213
Definition: sqltypes.h:53
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:45
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:260
double doubleval
Definition: sqltypes.h:217

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullDatum ( SQLTypeInfo ti)

Definition at line 273 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray(), and StringToArray().

273  {
274  Datum d;
275  const auto type = get_type_for_datum(ti);
276  switch (type) {
277  case kBOOLEAN:
279  break;
280  case kBIGINT:
282  break;
283  case kINT:
285  break;
286  case kSMALLINT:
288  break;
289  case kTINYINT:
291  break;
292  case kFLOAT:
293  d.floatval = NULL_FLOAT;
294  break;
295  case kDOUBLE:
297  break;
298  case kTIME:
299  case kTIMESTAMP:
300  case kDATE:
302  break;
303  case kPOINT:
304  case kLINESTRING:
305  case kPOLYGON:
306  case kMULTIPOLYGON:
307  throw std::runtime_error("Internal error: geometry type in NullDatum.");
308  default:
309  throw std::runtime_error("Internal error: invalid type in NullDatum.");
310  }
311  return d;
312 }
int8_t tinyintval
Definition: sqltypes.h:212
#define NULL_DOUBLE
Definition: sqltypes.h:49
#define NULL_FLOAT
int8_t boolval
Definition: sqltypes.h:211
int32_t intval
Definition: sqltypes.h:214
float floatval
Definition: sqltypes.h:216
int64_t bigintval
Definition: sqltypes.h:215
int16_t smallintval
Definition: sqltypes.h:213
Definition: sqltypes.h:53
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:260
double doubleval
Definition: sqltypes.h:217

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

MetadataColumnInfos import_export::parse_add_metadata_columns ( const std::string &  add_metadata_columns,
const std::string &  file_path 
)

Definition at line 35 of file MetadataColumn.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::ExpressionParser::evalAsString(), IS_INTEGER, join(), kBIGINT, kDATE, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNULLT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, run_benchmark_import::parser, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_type(), import_export::ExpressionParser::setExpression(), import_export::ExpressionParser::setStringConstant(), ColumnDescriptor::sourceName, split(), strip(), to_lower(), and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptorsGeo(), import_export::Importer::gdalToColumnDescriptorsRaster(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), and import_export::Importer::readMetadataSampleGDAL().

36  {
37  //
38  // each string is "column_name,column_type,expression"
39  //
40  // column_type can be:
41  // tinyint
42  // smallint
43  // int
44  // bigint
45  // float
46  // double
47  // date
48  // time
49  // timestamp
50  // text
51  //
52  // expression can be in terms of:
53  // filename
54  // filedir
55  // filepath
56  // etc.
57  //
58 
59  // anything to do?
60  if (add_metadata_columns.length() == 0u) {
61  return {};
62  }
63 
64  // split by ";"
65  // @TODO(se) is this safe?
66  // probably won't appear in a file name/path or a date/time string
67  std::vector<std::string> add_metadata_column_strings;
68  boost::split(add_metadata_column_strings, add_metadata_columns, boost::is_any_of(";"));
69  if (add_metadata_column_strings.size() == 0u) {
70  return {};
71  }
72 
73  ExpressionParser parser;
74 
75  // known string constants
76  auto const fn = boost::filesystem::path(file_path).filename().string();
77  auto const fd = boost::filesystem::path(file_path).parent_path().string();
78  auto const fp = file_path;
79  parser.setStringConstant("filename", fn);
80  parser.setStringConstant("filedir", fd);
81  parser.setStringConstant("filepath", fp);
82 
83  MetadataColumnInfos metadata_column_infos;
84 
85  // for each requested column...
86  for (auto const& add_metadata_column_string : add_metadata_column_strings) {
87  // strip
88  auto const add_metadata_column = strip(add_metadata_column_string);
89 
90  // tokenize and extract
91  std::vector<std::string> tokens;
92  boost::split(tokens, add_metadata_column, boost::is_any_of(","));
93  if (tokens.size() < 3u) {
94  throw std::runtime_error("Invalid metadata column info '" + add_metadata_column +
95  "' (must be of the form 'name,type,expression')");
96  }
97  auto token_itr = tokens.begin();
98  auto const column_name = strip(*token_itr++);
99  auto const data_type = strip(to_lower(*token_itr++));
100  tokens.erase(tokens.begin(), token_itr);
101  auto const expression = strip(boost::join(tokens, ","));
102 
103  // get column type
104  SQLTypes sql_type{kNULLT};
105  double range_min{0.0}, range_max{0.0};
106  if (data_type == "tinyint") {
107  sql_type = kTINYINT;
108  range_min = static_cast<double>(std::numeric_limits<int8_t>::min());
109  range_max = static_cast<double>(std::numeric_limits<int8_t>::max());
110  } else if (data_type == "smallint") {
111  sql_type = kSMALLINT;
112  range_min = static_cast<double>(std::numeric_limits<int16_t>::min());
113  range_max = static_cast<double>(std::numeric_limits<int16_t>::max());
114  } else if (data_type == "int") {
115  sql_type = kINT;
116  range_min = static_cast<double>(std::numeric_limits<int32_t>::min());
117  range_max = static_cast<double>(std::numeric_limits<int32_t>::max());
118  } else if (data_type == "bigint") {
119  sql_type = kBIGINT;
120  range_min = static_cast<double>(std::numeric_limits<int64_t>::min());
121  range_max = static_cast<double>(std::numeric_limits<int64_t>::max());
122  } else if (data_type == "float") {
123  sql_type = kFLOAT;
124  range_min = static_cast<double>(std::numeric_limits<float>::min());
125  range_max = static_cast<double>(std::numeric_limits<float>::max());
126  } else if (data_type == "double") {
127  sql_type = kDOUBLE;
128  range_min = static_cast<double>(std::numeric_limits<double>::min());
129  range_max = static_cast<double>(std::numeric_limits<double>::max());
130  } else if (data_type == "date") {
131  sql_type = kDATE;
132  } else if (data_type == "time") {
133  sql_type = kTIME;
134  } else if (data_type == "timestamp") {
135  sql_type = kTIMESTAMP;
136  } else if (data_type == "text") {
137  sql_type = kTEXT;
138  } else {
139  throw std::runtime_error("Invalid metadata column data type '" + data_type +
140  "' for column '" + column_name + "'");
141  }
142 
143  // set expression with force cast back to string
144  parser.setExpression("str(" + expression + ")");
145 
146  // evaluate
147  auto value = parser.evalAsString();
148 
149  // validate date/time/timestamp value now
150  // @TODO(se) do we need to provide for non-zero dimension?
151  try {
152  if (sql_type == kDATE) {
153  dateTimeParse<kDATE>(value, 0);
154  } else if (sql_type == kTIME) {
155  dateTimeParse<kTIME>(value, 0);
156  } else if (sql_type == kTIMESTAMP) {
157  dateTimeParse<kTIMESTAMP>(value, 0);
158  }
159  } catch (std::runtime_error& e) {
160  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
161  " value '" + value + "' for column '" + column_name + "'");
162  }
163 
164  // validate int/float/double
165  try {
166  if (IS_INTEGER(sql_type) || sql_type == kFLOAT || sql_type == kDOUBLE) {
167  size_t num_chars{0u};
168  auto const v = static_cast<double>(std::stod(value, &num_chars));
169  if (v < range_min || v > range_max) {
170  throw std::out_of_range(to_string(sql_type));
171  }
172  if (num_chars == 0u) {
173  throw std::invalid_argument("empty value");
174  }
175  }
176  } catch (std::invalid_argument& e) {
177  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
178  " value '" + value + "' for column '" + column_name +
179  "' (" + e.what() + ")");
180  } catch (std::out_of_range& e) {
181  throw std::runtime_error("Out-of-range metadata column " + to_string(sql_type) +
182  " value '" + value + "' for column '" + column_name +
183  "' (" + e.what() + ")");
184  }
185 
186  // build column descriptor
187  ColumnDescriptor cd;
188  cd.columnName = cd.sourceName = column_name;
189  cd.columnType.set_type(sql_type);
191  if (sql_type == kTEXT) {
194  }
195 
196  // add to result
197  metadata_column_infos.push_back({std::move(cd), std::move(value)});
198  }
199 
200  // done
201  return metadata_column_infos;
202 }
std::string to_lower(const std::string &str)
void set_compression(EncodingType c)
Definition: sqltypes.h:440
Definition: sqltypes.h:49
SQLTypes
Definition: sqltypes.h:38
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::string sourceName
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
void set_fixed_size()
Definition: sqltypes.h:438
specifies the content in-memory of a row in the column metadata table
void set_comp_param(int p)
Definition: sqltypes.h:441
Definition: sqltypes.h:52
Definition: sqltypes.h:53
#define IS_INTEGER(T)
Definition: sqltypes.h:245
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::string columnName
std::vector< MetadataColumnInfo > MetadataColumnInfos
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:429

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 6183 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by DBHandler::prepare_loader_generic().

6185  {
6186  CHECK(td);
6187  auto col_descs = loader->get_column_descs();
6188 
6189  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
6190  for (auto cd : col_descs) {
6191  import_buffers.emplace_back(
6192  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
6193  }
6194 
6195  return import_buffers;
6196 }
#define CHECK(condition)
Definition: Logger.h:223

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 355 of file Importer.cpp.

References append_datum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value(), and import_export::TypedImportBuffer::addDefaultValues().

357  {
358  SQLTypeInfo elem_ti = ti.get_elem_type();
359  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
360  return ArrayDatum(0, NULL, true);
361  }
362  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
363  LOG(WARNING) << "Malformed array: " << s;
364  return ArrayDatum(0, NULL, true);
365  }
366  std::vector<std::string> elem_strs;
367  size_t last = 1;
368  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
369  i = s.find(copy_params.array_delim, last)) {
370  elem_strs.push_back(s.substr(last, i - last));
371  last = i + 1;
372  }
373  if (last + 1 <= s.size()) {
374  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
375  }
376  if (elem_strs.size() == 1) {
377  auto str = elem_strs.front();
378  auto str_trimmed = trim_space(str.c_str(), str.length());
379  if (str_trimmed == "") {
380  elem_strs.clear(); // Empty array
381  }
382  }
383  if (!elem_ti.is_string()) {
384  size_t len = elem_strs.size() * elem_ti.get_size();
385  std::unique_ptr<int8_t, FreeDeleter> buf(
386  reinterpret_cast<int8_t*>(checked_malloc(len)));
387  int8_t* p = buf.get();
388  for (auto& es : elem_strs) {
389  auto e = trim_space(es.c_str(), es.length());
390  bool is_null = (e == copy_params.null_str) || e == "NULL";
391  if (!elem_ti.is_string() && e == "") {
392  is_null = true;
393  }
394  if (elem_ti.is_number() || elem_ti.is_time()) {
395  if (!isdigit(e[0]) && e[0] != '-') {
396  is_null = true;
397  }
398  }
399  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
400  p = append_datum(p, d, elem_ti);
401  CHECK(p);
402  }
403  return ArrayDatum(len, buf.release(), false);
404  }
405  // must not be called for array of strings
406  CHECK(false);
407  return ArrayDatum(0, NULL, true);
408 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:512
#define LOG(tag)
Definition: Logger.h:217
bool is_number() const
Definition: sqltypes.h:515
bool is_time() const
Definition: sqltypes.h:516
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
Datum NullDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:273
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:223
bool is_string() const
Definition: sqltypes.h:510
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 506 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

506  {
507  SQLTypeInfo elem_ti = ti.get_elem_type();
508 
509  CHECK(!elem_ti.is_string());
510 
511  if (datum.is_null) {
512  return NullArray(ti);
513  }
514 
515  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
516  int8_t* buf = (int8_t*)checked_malloc(len);
517  int8_t* p = buf;
518  for (auto& e : datum.val.arr_val) {
519  p = append_datum(p, TDatumToDatum(e, elem_ti), elem_ti);
520  CHECK(p);
521  }
522 
523  return ArrayDatum(len, buf, false);
524 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:512
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:410
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:461
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:223
bool is_string() const
Definition: sqltypes.h:510
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 461 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

461  {
462  Datum d;
463  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
464  switch (type) {
465  case kBOOLEAN:
466  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
467  break;
468  case kBIGINT:
469  d.bigintval =
470  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
471  break;
472  case kINT:
473  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
474  break;
475  case kSMALLINT:
476  d.smallintval =
477  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
478  break;
479  case kTINYINT:
480  d.tinyintval =
481  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
482  break;
483  case kFLOAT:
484  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
485  break;
486  case kDOUBLE:
487  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
488  break;
489  case kTIME:
490  case kTIMESTAMP:
491  case kDATE:
492  d.bigintval =
493  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
494  break;
495  case kPOINT:
496  case kLINESTRING:
497  case kPOLYGON:
498  case kMULTIPOLYGON:
499  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
500  default:
501  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
502  }
503  return d;
504 }
int8_t tinyintval
Definition: sqltypes.h:212
#define NULL_DOUBLE
Definition: sqltypes.h:49
#define NULL_FLOAT
int8_t boolval
Definition: sqltypes.h:211
int32_t intval
Definition: sqltypes.h:214
float floatval
Definition: sqltypes.h:216
int64_t bigintval
Definition: sqltypes.h:215
int16_t smallintval
Definition: sqltypes.h:213
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:493
Definition: sqltypes.h:53
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
bool is_decimal() const
Definition: sqltypes.h:513
double doubleval
Definition: sqltypes.h:217

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 247 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

247  {
248  size_t i = 0;
249  size_t j = len;
250  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
251  i++;
252  }
253  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
254  j--;
255  }
256  return std::string(field + i, j - i);
257 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 3299 of file Importer.cpp.

References heavydb.dtypes::T.

3299  {
3300  try {
3301  boost::lexical_cast<T>(str);
3302  } catch (const boost::bad_lexical_cast& e) {
3303  return false;
3304  }
3305  return true;
3306 }

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 34 of file CopyParams.h.

constexpr size_t import_export::max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
static

Definition at line 37 of file CopyParams.h.

constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static
mapd_shared_mutex import_export::status_mutex
static