OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Importer.cpp File Reference
#include "ImportExport/Importer.h"
#include <arrow/api.h>
#include <arrow/filesystem/localfs.h>
#include <arrow/io/api.h>
#include <gdal.h>
#include <ogrsf_frmts.h>
#include <boost/algorithm/string.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/filesystem.hpp>
#include <boost/geometry.hpp>
#include <boost/variant.hpp>
#include <csignal>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <future>
#include <iomanip>
#include <list>
#include <memory>
#include <mutex>
#include <numeric>
#include <stack>
#include <stdexcept>
#include <thread>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "Archive/PosixFileArchive.h"
#include "Archive/S3Archive.h"
#include "ArrowImporter.h"
#include "Catalog/os/UserMapping.h"
#include "Geospatial/Compression.h"
#include "Geospatial/GDAL.h"
#include "Geospatial/Transforms.h"
#include "Geospatial/Types.h"
#include "ImportExport/DelimitedParserUtils.h"
#include "ImportExport/ForeignDataImporter.h"
#include "ImportExport/MetadataColumn.h"
#include "ImportExport/RasterImporter.h"
#include "Logger/Logger.h"
#include "QueryEngine/ErrorHandling.h"
#include "QueryEngine/Execute.h"
#include "QueryEngine/TypePunning.h"
#include "RenderGroupAnalyzer.h"
#include "Shared/DateTimeParser.h"
#include "Shared/SqlTypesLayout.h"
#include "Shared/enable_assign_render_groups.h"
#include "Shared/file_path_util.h"
#include "Shared/import_helpers.h"
#include "Shared/likely.h"
#include "Shared/measure.h"
#include "Shared/misc.h"
#include "Shared/scope.h"
#include "Shared/shard_key.h"
#include "Shared/thread_count.h"
#include "Utils/ChunkAccessorTable.h"
#include "gen-cpp/Heavy.h"

Go to the source code of this file.

Classes

struct  import_export::GeoImportException
 
class  import_export::ColumnNotGeoError
 

Namespaces

 anonymous_namespace{Importer.cpp}
 
 boost
 
 boost::log
 
 import_export
 
 import_export::anonymous_namespace{Importer.cpp}
 

Macros

#define TIMER_STOP(t)
 
#define DEBUG_TIMING   false
 
#define DEBUG_RENDER_GROUP_ANALYZER   0
 
#define DEBUG_AWS_AUTHENTICATION   0
 
#define DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT   0
 

Typedefs

using import_export::FieldNameToIndexMapType = std::map< std::string, size_t >
 
using import_export::ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using import_export::ColumnIdToRenderGroupAnalyzerMapType = std::map< int, std::shared_ptr< RenderGroupAnalyzer >>
 
using import_export::FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 

Functions

auto get_filesize (const std::string &file_path)
 
bool anonymous_namespace{Importer.cpp}::check_session_interrupted (const QuerySessionId &query_session, Executor *executor)
 
formatting_ostream & boost::log::operator<< (formatting_ostream &out, std::vector< std::string > &row)
 
static const std::string import_export::trim_space (const char *field, const size_t len)
 
SQLTypes import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum (const SQLTypeInfo &ti)
 
Datum import_export::NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum import_export::StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum import_export::NullArray (const SQLTypeInfo &ti)
 
void import_export::addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum import_export::TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum import_export::TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool import_export::importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, SQLTypeInfo &ti)
 
std::tuple< int, SQLTypes,
std::string > 
import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1 (const std::list< const ColumnDescriptor * > &col_descs)
 
int64_t import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2 (OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
 
static ImportStatus import_export::import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_export::import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const ColumnIdToRenderGroupAnalyzerMapType &columnIdToRenderGroupAnalyzerMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
int64_t import_export::anonymous_namespace{Importer.cpp}::int_value_at (const TypedImportBuffer &import_buffer, const size_t index)
 
float import_export::anonymous_namespace{Importer.cpp}::float_value_at (const TypedImportBuffer &import_buffer, const size_t index)
 
double import_export::anonymous_namespace{Importer.cpp}::double_value_at (const TypedImportBuffer &import_buffer, const size_t index)
 
template<class T >
bool import_export::try_cast (const std::string &str)
 
OGRLayer & import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName (const std::string &geo_layer_name, const Geospatial::GDAL::DataSourceUqPtr &poDS, const std::string &file_name)
 
std::pair< SQLTypes, bool > import_export::anonymous_namespace{Importer.cpp}::ogr_to_type (const OGRFieldType &ogr_type)
 
SQLTypes import_export::anonymous_namespace{Importer.cpp}::ogr_to_type (const OGRwkbGeometryType &ogr_type)
 
RasterImporter::PointType import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_type (const import_export::RasterPointType raster_point_type)
 
RasterImporter::PointTransform import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_transform (const import_export::RasterPointTransform raster_point_transform)
 
void import_export::gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
import_export::setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
import_export::fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImporter > import_export::create_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 

Variables

size_t g_max_import_threads
 
size_t g_archive_read_buf_size = 1 << 20
 
std::optional< size_t > g_detect_test_sample_size = std::nullopt
 
static constexpr int kMaxRasterScanlinesPerThread = 32
 
static constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static heavyai::shared_mutex import_export::status_mutex
 
static std::map< std::string,
ImportStatus > 
import_export::import_status_map
 
static const size_t import_export::kImportRowLimit = 10000
 

Macro Definition Documentation

#define DEBUG_AWS_AUTHENTICATION   0

Definition at line 159 of file Importer.cpp.

#define DEBUG_RENDER_GROUP_ANALYZER   0

Definition at line 158 of file Importer.cpp.

#define DISABLE_MULTI_THREADED_SHAPEFILE_IMPORT   0

Definition at line 161 of file Importer.cpp.

#define TIMER_STOP (   t)
Value:
(float(timer_stop<std::chrono::steady_clock::time_point, std::chrono::microseconds>( \
t)) / \
1.0E6f)

Definition at line 101 of file Importer.cpp.

Referenced by import_export::import_thread_shapefile(), and import_export::Importer::importGDALRaster().

Function Documentation

auto get_filesize ( const std::string &  file_path)
inline

Definition at line 116 of file Importer.cpp.

References heavyai::file_size().

Referenced by import_export::DataStreamSink::archivePlumber().

116  {
117  boost::filesystem::path boost_file_path{file_path};
118  boost::system::error_code ec;
119  const auto filesize = boost::filesystem::file_size(boost_file_path, ec);
120  return ec ? 0 : filesize;
121 }
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Variable Documentation

size_t g_archive_read_buf_size = 1 << 20
std::optional<size_t> g_detect_test_sample_size = std::nullopt

Definition at line 112 of file Importer.cpp.

size_t g_max_import_threads
constexpr int kMaxRasterScanlinesPerThread = 32
static

Definition at line 114 of file Importer.cpp.

Referenced by import_export::Importer::importGDALRaster().