OmniSciDB
a575cb28ea
|
#include <Importer.h>
Public Member Functions | |
Detector (const boost::filesystem::path &fp, CopyParams &cp) | |
std::vector< std::string > | get_headers () |
std::vector< std::vector < std::string > > | get_sample_rows (size_t n) |
![]() | |
DataStreamSink () | |
DataStreamSink (const CopyParams ©_params, const std::string file_path) | |
virtual | ~DataStreamSink () |
const CopyParams & | get_copy_params () const |
void | import_compressed (std::vector< std::string > &file_paths) |
Static Public Member Functions | |
static SQLTypes | detect_sqltype (const std::string &str) |
Public Attributes | |
std::vector< std::vector < std::string > > | raw_rows |
std::vector< SQLTypes > | best_sqltypes |
std::vector< EncodingType > | best_encodings |
bool | has_headers = false |
Private Member Functions | |
void | init () |
void | read_file () |
void | detect_row_delimiter () |
void | split_raw_data () |
std::vector< SQLTypes > | detect_column_types (const std::vector< std::string > &row) |
void | find_best_sqltypes () |
std::vector< SQLTypes > | find_best_sqltypes (const std::vector< std::vector< std::string >> &raw_rows, const CopyParams ©_params) |
std::vector< SQLTypes > | find_best_sqltypes (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const CopyParams ©_params) |
std::vector< EncodingType > | find_best_encodings (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types) |
bool | detect_headers (const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types) |
void | find_best_sqltypes_and_headers () |
ImportStatus | importDelimited (const std::string &file_path, const bool decompressed) override |
Static Private Member Functions | |
static bool | more_restrictive_sqltype (const SQLTypes a, const SQLTypes b) |
Private Attributes | |
std::string | raw_data |
boost::filesystem::path | file_path |
std::chrono::duration< double > | timeout {1} |
std::string | line1 |
Additional Inherited Members | |
![]() | |
ImportStatus | archivePlumber () |
![]() | |
CopyParams | copy_params |
const std::string | file_path |
FILE * | p_file = nullptr |
ImportStatus | import_status |
bool | load_failed = false |
size_t | total_file_size {0} |
std::vector< size_t > | file_offsets |
std::mutex | file_offsets_mutex |
Definition at line 657 of file Importer.h.
|
inline |
Definition at line 659 of file Importer.h.
References init(), and read_file().
|
private |
Definition at line 3090 of file Importer.cpp.
References detect_sqltype(), and generate_TableFunctionsFactory_init::i.
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 3232 of file Importer.cpp.
References has_headers, and kTEXT.
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 2962 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, import_export::CopyParams::delimiter, and file_path.
Referenced by init().
|
static |
Definition at line 3008 of file Importer.cpp.
References dateTimeParseOptional< kDATE >(), dateTimeParseOptional< kTIME >(), dateTimeParseOptional< kTIMESTAMP >(), kBIGINT, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, and run_benchmark_import::type.
Referenced by detect_column_types(), and find_best_sqltypes().
|
private |
Definition at line 3196 of file Importer.cpp.
References file_path, IS_STRING, kENCODING_DICT, kENCODING_NONE, and raw_rows.
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 3143 of file Importer.cpp.
References best_sqltypes, import_export::DataStreamSink::copy_params, and raw_rows.
Referenced by find_best_sqltypes(), and find_best_sqltypes_and_headers().
|
private |
Definition at line 3147 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, find_best_sqltypes(), and raw_rows.
|
private |
Definition at line 3153 of file Importer.cpp.
References detect_sqltype(), run_benchmark_import::end_time, file_path, kCHAR, kTEXT, more_restrictive_sqltype(), import_export::CopyParams::null_str, raw_rows, generate_TableFunctionsFactory_init::t, and timeout.
|
private |
Definition at line 3120 of file Importer.cpp.
References import_export::AUTODETECT, best_encodings, best_sqltypes, import_export::DataStreamSink::copy_params, detect_column_types(), detect_headers(), find_best_encodings(), find_best_sqltypes(), import_export::HAS_HEADER, import_export::CopyParams::has_header, has_headers, import_export::NO_HEADER, and raw_rows.
Referenced by init().
std::vector< std::string > import_export::Detector::get_headers | ( | ) |
Definition at line 3255 of file Importer.cpp.
References best_sqltypes, has_headers, generate_TableFunctionsFactory_init::i, raw_rows, and to_string().
Referenced by DBHandler::detect_column_types().
std::vector< std::vector< std::string > > import_export::Detector::get_sample_rows | ( | size_t | n | ) |
Definition at line 3247 of file Importer.cpp.
References has_headers, and raw_rows.
Referenced by DBHandler::detect_column_types().
|
overrideprivatevirtual |
Implements import_export::DataStreamSink.
Definition at line 2895 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, run_benchmark_import::end_time, omnisci::fopen(), import_export::DataStreamSink::import_status, generate_TableFunctionsFactory_init::line, line1, import_export::CopyParams::line_delim, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, import_export::DataStreamSink::p_file, raw_data, import_export::ImportStatus::rows_completed, and timeout.
|
private |
Definition at line 2889 of file Importer.cpp.
References detect_row_delimiter(), find_best_sqltypes_and_headers(), and split_raw_data().
Referenced by Detector().
|
staticprivate |
Definition at line 3098 of file Importer.cpp.
References kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, and kTIMESTAMP.
Referenced by find_best_sqltypes().
|
private |
Definition at line 2957 of file Importer.cpp.
References import_export::DataStreamSink::archivePlumber().
Referenced by Detector().
|
private |
Definition at line 2971 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, import_export::delimited_parser::get_row(), raw_data, raw_rows, and import_export::CopyParams::threads.
Referenced by init().
std::vector<EncodingType> import_export::Detector::best_encodings |
Definition at line 672 of file Importer.h.
Referenced by DBHandler::detect_column_types(), and find_best_sqltypes_and_headers().
std::vector<SQLTypes> import_export::Detector::best_sqltypes |
Definition at line 671 of file Importer.h.
Referenced by DBHandler::detect_column_types(), find_best_sqltypes(), find_best_sqltypes_and_headers(), and get_headers().
|
private |
Definition at line 702 of file Importer.h.
Referenced by detect_row_delimiter(), find_best_encodings(), and find_best_sqltypes().
bool import_export::Detector::has_headers = false |
Definition at line 673 of file Importer.h.
Referenced by detect_headers(), find_best_sqltypes_and_headers(), get_headers(), and get_sample_rows().
|
private |
Definition at line 704 of file Importer.h.
Referenced by importDelimited().
|
private |
Definition at line 701 of file Importer.h.
Referenced by importDelimited(), and split_raw_data().
std::vector<std::vector<std::string> > import_export::Detector::raw_rows |
Definition at line 669 of file Importer.h.
Referenced by find_best_encodings(), find_best_sqltypes(), find_best_sqltypes_and_headers(), get_headers(), get_sample_rows(), and split_raw_data().
|
private |
Definition at line 703 of file Importer.h.
Referenced by find_best_sqltypes(), and importDelimited().