OmniSciDB
bf83d84833
|
#include <Importer.h>
Classes | |
struct | GeoFileLayerInfo |
Public Types | |
enum | GeoFileLayerContents { GeoFileLayerContents::EMPTY, GeoFileLayerContents::GEO, GeoFileLayerContents::NON_GEO, GeoFileLayerContents::UNSUPPORTED_GEO } |
Public Member Functions | |
Importer (Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p) | |
Importer (Loader *providedLoader, const std::string &f, const CopyParams &p) | |
~Importer () override | |
ImportStatus | import () |
ImportStatus | importDelimited (const std::string &file_path, const bool decompressed) override |
ImportStatus | importGDAL (std::map< std::string, std::string > colname_to_src) |
const CopyParams & | get_copy_params () const |
const std::list< const ColumnDescriptor * > & | get_column_descs () const |
void | load (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count) |
std::vector< std::vector < std::unique_ptr < TypedImportBuffer > > > & | get_import_buffers_vec () |
std::vector< std::unique_ptr < TypedImportBuffer > > & | get_import_buffers (int i) |
const bool * | get_is_array () const |
Catalog_Namespace::Catalog & | getCatalog () |
void | checkpoint (const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs) |
auto | getLoader () const |
![]() | |
DataStreamSink () | |
DataStreamSink (const CopyParams ©_params, const std::string file_path) | |
virtual | ~DataStreamSink () |
const CopyParams & | get_copy_params () const |
void | import_compressed (std::vector< std::string > &file_paths) |
Static Public Member Functions | |
static ImportStatus | get_import_status (const std::string &id) |
static void | set_import_status (const std::string &id, const ImportStatus is) |
static const std::list < ColumnDescriptor > | gdalToColumnDescriptors (const std::string &fileName, const std::string &geoColumnName, const CopyParams ©_params) |
static void | readMetadataSampleGDAL (const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams ©_params) |
static bool | gdalFileExists (const std::string &path, const CopyParams ©_params) |
static bool | gdalFileOrDirectoryExists (const std::string &path, const CopyParams ©_params) |
static std::vector< std::string > | gdalGetAllFilesInArchive (const std::string &archive_path, const CopyParams ©_params) |
static std::vector < GeoFileLayerInfo > | gdalGetLayersInGeoFile (const std::string &file_name, const CopyParams ©_params) |
static void | set_geo_physical_import_buffer (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0) |
static void | set_geo_physical_import_buffer_columnar (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column, int render_group, const int64_t replicate_count=0) |
Static Private Member Functions | |
static bool | gdalStatInternal (const std::string &path, const CopyParams ©_params, bool also_dir) |
static OGRDataSource * | openGDALDataset (const std::string &fileName, const CopyParams ©_params) |
static void | setGDALAuthorizationTokens (const CopyParams ©_params) |
Private Attributes | |
std::string | import_id |
size_t | file_size |
size_t | max_threads |
char * | buffer [2] |
std::vector< std::vector < std::unique_ptr < TypedImportBuffer > > > | import_buffers_vec |
std::unique_ptr< Loader > | loader |
std::unique_ptr< bool[]> | is_array_a |
Additional Inherited Members | |
![]() | |
ImportStatus | archivePlumber () |
![]() | |
CopyParams | copy_params |
const std::string | file_path |
FILE * | p_file = nullptr |
ImportStatus | import_status |
bool | load_failed = false |
size_t | total_file_size {0} |
std::vector< size_t > | file_offsets |
std::mutex | file_offsets_mutex |
Definition at line 730 of file Importer.h.
|
strong |
Enumerator | |
---|---|
EMPTY | |
GEO | |
NON_GEO | |
UNSUPPORTED_GEO |
Definition at line 776 of file Importer.h.
import_export::Importer::Importer | ( | Catalog_Namespace::Catalog & | c, |
const TableDescriptor * | t, | ||
const std::string & | f, | ||
const CopyParams & | p | ||
) |
Definition at line 153 of file Importer.cpp.
import_export::Importer::Importer | ( | Loader * | providedLoader, |
const std::string & | f, | ||
const CopyParams & | p | ||
) |
Definition at line 159 of file Importer.cpp.
References buffer, import_export::DataStreamSink::file_path, file_size, import_id, is_array_a, kARRAY, loader, max_threads, and import_export::DataStreamSink::p_file.
|
override |
Definition at line 200 of file Importer.cpp.
References buffer, and import_export::DataStreamSink::p_file.
void import_export::Importer::checkpoint | ( | const std::vector< Catalog_Namespace::TableEpochInfo > & | table_epochs | ) |
Definition at line 3297 of file Importer.cpp.
References DEBUG_TIMING, Data_Namespace::DISK_LEVEL, logger::ERROR, measure< TimeT >::execution(), StorageType::FOREIGN_TABLE, import_buffers_vec, logger::INFO, import_export::DataStreamSink::load_failed, loader, and LOG.
Referenced by importDelimited(), and importGDAL().
|
static |
Definition at line 4599 of file Importer.cpp.
References gdalStatInternal().
Referenced by DBHandler::check_geospatial_files(), DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().
|
static |
Definition at line 4604 of file Importer.cpp.
References gdalStatInternal().
Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().
|
static |
Definition at line 4676 of file Importer.cpp.
References import_export::gdalGatherFilesInArchiveRecursive(), Geospatial::GDAL::init(), and setGDALAuthorizationTokens().
Referenced by anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive(), and DBHandler::get_all_files_in_archive().
|
static |
Definition at line 4701 of file Importer.cpp.
References CHECK, EMPTY, GEO, import_export::CopyParams::geo_explode_collections, Geospatial::GDAL::init(), NON_GEO, openGDALDataset(), setGDALAuthorizationTokens(), and UNSUPPORTED_GEO.
Referenced by DBHandler::get_layers_in_geo_file(), and DBHandler::import_geo_table().
|
staticprivate |
Definition at line 4566 of file Importer.cpp.
References Geospatial::GDAL::init(), run_benchmark_import::result, and setGDALAuthorizationTokens().
Referenced by gdalFileExists(), and gdalFileOrDirectoryExists().
|
static |
Definition at line 4476 of file Importer.cpp.
References CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::CopyParams::geo_coords_comp_param, import_export::CopyParams::geo_coords_encoding, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_coords_type, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), kARRAY, kENCODING_DICT, kMULTIPOLYGON, kPOLYGON, kTEXT, import_export::ogr_to_type(), openGDALDataset(), import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), and ColumnDescriptor::sourceName.
Referenced by DBHandler::detect_column_types().
|
inline |
Definition at line 743 of file Importer.h.
References loader.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 742 of file Importer.h.
References import_export::DataStreamSink::copy_params.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 751 of file Importer.h.
References import_buffers_vec.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 748 of file Importer.h.
References import_buffers_vec.
|
static |
Definition at line 212 of file Importer.cpp.
References import_export::import_status_map, and import_export::status_mutex.
Referenced by DBHandler::import_table_status().
|
inline |
Definition at line 754 of file Importer.h.
References is_array_a.
Referenced by import_export::import_thread_delimited().
|
inline |
Definition at line 786 of file Importer.h.
References loader.
Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), and import_export::import_thread_delimited().
|
inline |
ImportStatus import_export::Importer::import | ( | ) |
Definition at line 3981 of file Importer.cpp.
References import_export::DataStreamSink::archivePlumber().
|
overridevirtual |
Implements import_export::DataStreamSink.
Definition at line 3985 of file Importer.cpp.
References import_export::CopyParams::buffer_size, CHECK, checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, import_export::DataStreamSink::file_offsets, import_export::DataStreamSink::file_offsets_mutex, file_size, import_export::delimited_parser::find_row_end_pos(), omnisci::fopen(), g_max_import_threads, import_export::CopyParams::geo_assign_render_groups, SQLTypeInfo::get_type(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_delimited(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, import_export::DataStreamSink::p_file, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), import_export::status_mutex, logger::thread_id(), import_export::CopyParams::threads, import_export::DataStreamSink::total_file_size, and VLOG.
ImportStatus import_export::Importer::importGDAL | ( | std::map< std::string, std::string > | colname_to_src | ) |
Definition at line 4768 of file Importer.cpp.
References CHECK, CHECK_EQ, checkpoint(), ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::DataStreamSink::copy_params, logger::ERROR, g_max_import_threads, import_export::CopyParams::geo_assign_render_groups, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_layer_name, SQLTypeInfo::get_type(), import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status, import_export::import_thread_shapefile(), kMULTIPOLYGON, kPOLYGON, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, loader, LOG, import_export::CopyParams::max_reject, max_threads, openGDALDataset(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, set_import_status(), logger::thread_id(), import_export::CopyParams::threads, and VLOG.
Referenced by QueryRunner::ImportDriver::importGeoTable().
void import_export::Importer::load | ( | const std::vector< std::unique_ptr< TypedImportBuffer >> & | import_buffers, |
size_t | row_count | ||
) |
Definition at line 3290 of file Importer.cpp.
References import_export::DataStreamSink::load_failed, and loader.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
staticprivate |
Definition at line 4286 of file Importer.cpp.
References logger::ERROR, logger::INFO, Geospatial::GDAL::init(), LOG, and setGDALAuthorizationTokens().
Referenced by gdalGetLayersInGeoFile(), gdalToColumnDescriptors(), importGDAL(), and readMetadataSampleGDAL().
|
static |
Definition at line 4342 of file Importer.cpp.
References CHECK, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), and openGDALDataset().
Referenced by DBHandler::detect_column_types().
|
static |
Definition at line 1433 of file Importer.cpp.
References ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.
Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), Parser::AddColumnStmt::execute(), import_export::import_thread_delimited(), DBHandler::load_table(), and foreign_storage::csv_file_buffer_parser::process_geo_column().
|
static |
Definition at line 1537 of file Importer.cpp.
References CHECK, ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.
Referenced by DBHandler::load_table_binary_columnar().
|
static |
Definition at line 217 of file Importer.cpp.
References import_export::ImportStatus::elapsed, import_export::ImportStatus::end, import_id, import_export::import_status_map, import_export::ImportStatus::start, and import_export::status_mutex.
Referenced by importDelimited(), and importGDAL().
|
staticprivate |
Definition at line 4216 of file Importer.cpp.
References logger::INFO, LOG, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, and import_export::CopyParams::s3_secret_key.
Referenced by gdalGetAllFilesInArchive(), gdalGetLayersInGeoFile(), gdalStatInternal(), and openGDALDataset().
|
private |
Definition at line 822 of file Importer.h.
Referenced by Importer(), and ~Importer().
|
private |
Definition at line 820 of file Importer.h.
Referenced by importDelimited(), and Importer().
|
private |
Definition at line 823 of file Importer.h.
Referenced by checkpoint(), get_import_buffers(), get_import_buffers_vec(), importDelimited(), and importGDAL().
|
private |
Definition at line 819 of file Importer.h.
Referenced by importDelimited(), Importer(), importGDAL(), omnisci.thrift.OmniSci.import_table_status_args::read(), set_import_status(), and omnisci.thrift.OmniSci.import_table_status_args::write().
|
private |
Definition at line 825 of file Importer.h.
Referenced by get_is_array(), and Importer().
|
private |
Definition at line 824 of file Importer.h.
Referenced by checkpoint(), get_column_descs(), getCatalog(), getLoader(), importDelimited(), Importer(), importGDAL(), and load().
|
private |
Definition at line 821 of file Importer.h.
Referenced by importDelimited(), Importer(), and importGDAL().