OmniSciDB  a575cb28ea
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export::Detector Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::Detector:
+ Collaboration diagram for import_export::Detector:

Public Member Functions

 Detector (const boost::filesystem::path &fp, CopyParams &cp)
 
std::vector< std::string > get_headers ()
 
std::vector< std::vector
< std::string > > 
get_sample_rows (size_t n)
 
- Public Member Functions inherited from import_export::DataStreamSink
 DataStreamSink ()
 
 DataStreamSink (const CopyParams &copy_params, const std::string file_path)
 
virtual ~DataStreamSink ()
 
const CopyParamsget_copy_params () const
 
void import_compressed (std::vector< std::string > &file_paths)
 

Static Public Member Functions

static SQLTypes detect_sqltype (const std::string &str)
 

Public Attributes

std::vector< std::vector
< std::string > > 
raw_rows
 
std::vector< SQLTypesbest_sqltypes
 
std::vector< EncodingTypebest_encodings
 
bool has_headers = false
 

Private Member Functions

void init ()
 
void read_file ()
 
void detect_row_delimiter ()
 
void split_raw_data ()
 
std::vector< SQLTypesdetect_column_types (const std::vector< std::string > &row)
 
void find_best_sqltypes ()
 
std::vector< SQLTypesfind_best_sqltypes (const std::vector< std::vector< std::string >> &raw_rows, const CopyParams &copy_params)
 
std::vector< SQLTypesfind_best_sqltypes (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const CopyParams &copy_params)
 
std::vector< EncodingTypefind_best_encodings (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
 
bool detect_headers (const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
 
void find_best_sqltypes_and_headers ()
 
ImportStatus importDelimited (const std::string &file_path, const bool decompressed) override
 

Static Private Member Functions

static bool more_restrictive_sqltype (const SQLTypes a, const SQLTypes b)
 

Private Attributes

std::string raw_data
 
boost::filesystem::path file_path
 
std::chrono::duration< double > timeout {1}
 
std::string line1
 

Additional Inherited Members

- Protected Member Functions inherited from import_export::DataStreamSink
ImportStatus archivePlumber ()
 
- Protected Attributes inherited from import_export::DataStreamSink
CopyParams copy_params
 
const std::string file_path
 
FILE * p_file = nullptr
 
ImportStatus import_status
 
bool load_failed = false
 
size_t total_file_size {0}
 
std::vector< size_t > file_offsets
 
std::mutex file_offsets_mutex
 

Detailed Description

Definition at line 657 of file Importer.h.

Constructor & Destructor Documentation

import_export::Detector::Detector ( const boost::filesystem::path &  fp,
CopyParams cp 
)
inline

Definition at line 659 of file Importer.h.

References init(), and read_file().

660  : DataStreamSink(cp, fp.string()), file_path(fp) {
661  read_file();
662  init();
663  };
boost::filesystem::path file_path
Definition: Importer.h:702

+ Here is the call graph for this function:

Member Function Documentation

std::vector< SQLTypes > import_export::Detector::detect_column_types ( const std::vector< std::string > &  row)
private

Definition at line 3090 of file Importer.cpp.

References detect_sqltype(), and generate_TableFunctionsFactory_init::i.

Referenced by find_best_sqltypes_and_headers().

3090  {
3091  std::vector<SQLTypes> types(row.size());
3092  for (size_t i = 0; i < row.size(); i++) {
3093  types[i] = detect_sqltype(row[i]);
3094  }
3095  return types;
3096 }
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:3008

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Detector::detect_headers ( const std::vector< SQLTypes > &  first_types,
const std::vector< SQLTypes > &  rest_types 
)
private

Definition at line 3232 of file Importer.cpp.

References has_headers, and kTEXT.

Referenced by find_best_sqltypes_and_headers().

3233  {
3234  if (head_types.size() != tail_types.size()) {
3235  return false;
3236  }
3237  bool has_headers = false;
3238  for (size_t col_idx = 0; col_idx < tail_types.size(); col_idx++) {
3239  if (head_types[col_idx] != kTEXT) {
3240  return false;
3241  }
3242  has_headers = has_headers || tail_types[col_idx] != kTEXT;
3243  }
3244  return has_headers;
3245 }
Definition: sqltypes.h:51

+ Here is the caller graph for this function:

void import_export::Detector::detect_row_delimiter ( )
private

Definition at line 2962 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, import_export::CopyParams::delimiter, and file_path.

Referenced by init().

2962  {
2963  if (copy_params.delimiter == '\0') {
2964  copy_params.delimiter = ',';
2965  if (boost::filesystem::extension(file_path) == ".tsv") {
2966  copy_params.delimiter = '\t';
2967  }
2968  }
2969 }
boost::filesystem::path file_path
Definition: Importer.h:702

+ Here is the caller graph for this function:

SQLTypes import_export::Detector::detect_sqltype ( const std::string &  str)
static

Definition at line 3008 of file Importer.cpp.

References dateTimeParseOptional< kDATE >(), dateTimeParseOptional< kTIME >(), dateTimeParseOptional< kTIMESTAMP >(), kBIGINT, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, and run_benchmark_import::type.

Referenced by detect_column_types(), and find_best_sqltypes().

3008  {
3009  SQLTypes type = kTEXT;
3010  if (try_cast<double>(str)) {
3011  type = kDOUBLE;
3012  /*if (try_cast<bool>(str)) {
3013  type = kBOOLEAN;
3014  }*/
3015  if (try_cast<int16_t>(str)) {
3016  type = kSMALLINT;
3017  } else if (try_cast<int32_t>(str)) {
3018  type = kINT;
3019  } else if (try_cast<int64_t>(str)) {
3020  type = kBIGINT;
3021  } else if (try_cast<float>(str)) {
3022  type = kFLOAT;
3023  }
3024  }
3025 
3026  // check for geo types
3027  if (type == kTEXT) {
3028  // convert to upper case
3029  std::string str_upper_case = str;
3030  std::transform(
3031  str_upper_case.begin(), str_upper_case.end(), str_upper_case.begin(), ::toupper);
3032 
3033  // then test for leading words
3034  if (str_upper_case.find("POINT") == 0) {
3035  type = kPOINT;
3036  } else if (str_upper_case.find("LINESTRING") == 0) {
3037  type = kLINESTRING;
3038  } else if (str_upper_case.find("POLYGON") == 0) {
3040  type = kMULTIPOLYGON;
3041  } else {
3042  type = kPOLYGON;
3043  }
3044  } else if (str_upper_case.find("MULTIPOLYGON") == 0) {
3045  type = kMULTIPOLYGON;
3046  } else if (str_upper_case.find_first_not_of("0123456789ABCDEF") ==
3047  std::string::npos &&
3048  (str_upper_case.size() % 2) == 0) {
3049  // simple hex blob (two characters per byte, not uu-encode or base64)
3050  if (str_upper_case.size() >= 10) {
3051  // match WKB blobs for supported geometry types
3052  // the first byte specifies if the data is big-endian or little-endian
3053  // the next four bytes are the geometry type (1 = POINT etc.)
3054  // @TODO support eWKB, which has extra bits set in the geometry type
3055  auto first_five_bytes = str_upper_case.substr(0, 10);
3056  if (first_five_bytes == "0000000001" || first_five_bytes == "0101000000") {
3057  type = kPOINT;
3058  } else if (first_five_bytes == "0000000002" || first_five_bytes == "0102000000") {
3059  type = kLINESTRING;
3060  } else if (first_five_bytes == "0000000003" || first_five_bytes == "0103000000") {
3061  type = kPOLYGON;
3062  } else if (first_five_bytes == "0000000006" || first_five_bytes == "0106000000") {
3063  type = kMULTIPOLYGON;
3064  } else {
3065  // unsupported WKB type
3066  return type;
3067  }
3068  } else {
3069  // too short to be WKB
3070  return type;
3071  }
3072  }
3073  }
3074 
3075  // check for time types
3076  if (type == kTEXT) {
3077  // This won't match unix timestamp, since floats and ints were checked above.
3078  if (dateTimeParseOptional<kTIME>(str, 0)) {
3079  type = kTIME;
3080  } else if (dateTimeParseOptional<kTIMESTAMP>(str, 0)) {
3081  type = kTIMESTAMP;
3082  } else if (dateTimeParseOptional<kDATE>(str, 0)) {
3083  type = kDATE;
3084  }
3085  }
3086 
3087  return type;
3088 }
Definition: sqltypes.h:48
SQLTypes
Definition: sqltypes.h:37
std::optional< int64_t > dateTimeParseOptional< kTIME >(std::string_view str, unsigned const dim)
std::optional< int64_t > dateTimeParseOptional< kDATE >(std::string_view str, unsigned const dim)
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:149
std::optional< int64_t > dateTimeParseOptional< kTIMESTAMP >(std::string_view str, unsigned const dim)
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< EncodingType > import_export::Detector::find_best_encodings ( const std::vector< std::vector< std::string >>::const_iterator &  row_begin,
const std::vector< std::vector< std::string >>::const_iterator &  row_end,
const std::vector< SQLTypes > &  best_types 
)
private

Definition at line 3196 of file Importer.cpp.

References file_path, IS_STRING, kENCODING_DICT, kENCODING_NONE, and raw_rows.

Referenced by find_best_sqltypes_and_headers().

3199  {
3200  if (raw_rows.size() < 1) {
3201  throw std::runtime_error("No rows found in: " +
3202  boost::filesystem::basename(file_path));
3203  }
3204  size_t num_cols = best_types.size();
3205  std::vector<EncodingType> best_encodes(num_cols, kENCODING_NONE);
3206  std::vector<size_t> num_rows_per_col(num_cols, 1);
3207  std::vector<std::unordered_set<std::string>> count_set(num_cols);
3208  for (auto row = row_begin; row != row_end; row++) {
3209  for (size_t col_idx = 0; col_idx < row->size() && col_idx < num_cols; col_idx++) {
3210  if (IS_STRING(best_types[col_idx])) {
3211  count_set[col_idx].insert(row->at(col_idx));
3212  num_rows_per_col[col_idx]++;
3213  }
3214  }
3215  }
3216  for (size_t col_idx = 0; col_idx < num_cols; col_idx++) {
3217  if (IS_STRING(best_types[col_idx])) {
3218  float uniqueRatio =
3219  static_cast<float>(count_set[col_idx].size()) / num_rows_per_col[col_idx];
3220  if (uniqueRatio < 0.75) {
3221  best_encodes[col_idx] = kENCODING_DICT;
3222  }
3223  }
3224  }
3225  return best_encodes;
3226 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
boost::filesystem::path file_path
Definition: Importer.h:702
#define IS_STRING(T)
Definition: sqltypes.h:241

+ Here is the caller graph for this function:

void import_export::Detector::find_best_sqltypes ( )
private

Definition at line 3143 of file Importer.cpp.

References best_sqltypes, import_export::DataStreamSink::copy_params, and raw_rows.

Referenced by find_best_sqltypes(), and find_best_sqltypes_and_headers().

3143  {
3145 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the caller graph for this function:

std::vector< SQLTypes > import_export::Detector::find_best_sqltypes ( const std::vector< std::vector< std::string >> &  raw_rows,
const CopyParams copy_params 
)
private

Definition at line 3147 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, find_best_sqltypes(), and raw_rows.

3149  {
3150  return find_best_sqltypes(raw_rows.begin(), raw_rows.end(), copy_params);
3151 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the call graph for this function:

std::vector< SQLTypes > import_export::Detector::find_best_sqltypes ( const std::vector< std::vector< std::string >>::const_iterator &  row_begin,
const std::vector< std::vector< std::string >>::const_iterator &  row_end,
const CopyParams copy_params 
)
private

Definition at line 3153 of file Importer.cpp.

References detect_sqltype(), run_benchmark_import::end_time, file_path, kCHAR, kTEXT, more_restrictive_sqltype(), import_export::CopyParams::null_str, raw_rows, generate_TableFunctionsFactory_init::t, and timeout.

3156  {
3157  if (raw_rows.size() < 1) {
3158  throw std::runtime_error("No rows found in: " +
3159  boost::filesystem::basename(file_path));
3160  }
3161  auto end_time = std::chrono::steady_clock::now() + timeout;
3162  size_t num_cols = raw_rows.front().size();
3163  std::vector<SQLTypes> best_types(num_cols, kCHAR);
3164  std::vector<size_t> non_null_col_counts(num_cols, 0);
3165  for (auto row = row_begin; row != row_end; row++) {
3166  while (best_types.size() < row->size() || non_null_col_counts.size() < row->size()) {
3167  best_types.push_back(kCHAR);
3168  non_null_col_counts.push_back(0);
3169  }
3170  for (size_t col_idx = 0; col_idx < row->size(); col_idx++) {
3171  // do not count nulls
3172  if (row->at(col_idx) == "" || !row->at(col_idx).compare(copy_params.null_str)) {
3173  continue;
3174  }
3175  SQLTypes t = detect_sqltype(row->at(col_idx));
3176  non_null_col_counts[col_idx]++;
3177  if (!more_restrictive_sqltype(best_types[col_idx], t)) {
3178  best_types[col_idx] = t;
3179  }
3180  }
3181  if (std::chrono::steady_clock::now() > end_time) {
3182  break;
3183  }
3184  }
3185  for (size_t col_idx = 0; col_idx < num_cols; col_idx++) {
3186  // if we don't have any non-null values for this column make it text to be
3187  // safe b/c that is least restrictive type
3188  if (non_null_col_counts[col_idx] == 0) {
3189  best_types[col_idx] = kTEXT;
3190  }
3191  }
3192 
3193  return best_types;
3194 }
SQLTypes
Definition: sqltypes.h:37
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:3008
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
boost::filesystem::path file_path
Definition: Importer.h:702
Definition: sqltypes.h:51
static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b)
Definition: Importer.cpp:3098
std::chrono::duration< double > timeout
Definition: Importer.h:703
Definition: sqltypes.h:40

+ Here is the call graph for this function:

void import_export::Detector::find_best_sqltypes_and_headers ( )
private

Definition at line 3120 of file Importer.cpp.

References import_export::AUTODETECT, best_encodings, best_sqltypes, import_export::DataStreamSink::copy_params, detect_column_types(), detect_headers(), find_best_encodings(), find_best_sqltypes(), import_export::HAS_HEADER, import_export::CopyParams::has_header, has_headers, import_export::NO_HEADER, and raw_rows.

Referenced by init().

3120  {
3122  best_encodings =
3123  find_best_encodings(raw_rows.begin() + 1, raw_rows.end(), best_sqltypes);
3124  std::vector<SQLTypes> head_types = detect_column_types(raw_rows.at(0));
3125  switch (copy_params.has_header) {
3127  has_headers = detect_headers(head_types, best_sqltypes);
3128  if (has_headers) {
3130  } else {
3132  }
3133  break;
3135  has_headers = false;
3136  break;
3138  has_headers = true;
3139  break;
3140  }
3141 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
ImportHeaderRow has_header
Definition: CopyParams.h:48
std::vector< EncodingType > find_best_encodings(const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
Definition: Importer.cpp:3196
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
std::vector< EncodingType > best_encodings
Definition: Importer.h:672
bool detect_headers(const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
Definition: Importer.cpp:3232
std::vector< SQLTypes > detect_column_types(const std::vector< std::string > &row)
Definition: Importer.cpp:3090

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::string > import_export::Detector::get_headers ( )

Definition at line 3255 of file Importer.cpp.

References best_sqltypes, has_headers, generate_TableFunctionsFactory_init::i, raw_rows, and to_string().

Referenced by DBHandler::detect_column_types().

3255  {
3256  std::vector<std::string> headers(best_sqltypes.size());
3257  for (size_t i = 0; i < best_sqltypes.size(); i++) {
3258  if (has_headers && i < raw_rows[0].size()) {
3259  headers[i] = raw_rows[0][i];
3260  } else {
3261  headers[i] = "column_" + std::to_string(i + 1);
3262  }
3263  }
3264  return headers;
3265 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
std::string to_string(char const *&&v)
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::vector< std::string > > import_export::Detector::get_sample_rows ( size_t  n)

Definition at line 3247 of file Importer.cpp.

References has_headers, and raw_rows.

Referenced by DBHandler::detect_column_types().

3247  {
3248  n = std::min(n, raw_rows.size());
3249  size_t offset = (has_headers && raw_rows.size() > 1) ? 1 : 0;
3250  std::vector<std::vector<std::string>> sample_rows(raw_rows.begin() + offset,
3251  raw_rows.begin() + n);
3252  return sample_rows;
3253 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the caller graph for this function:

ImportStatus import_export::Detector::importDelimited ( const std::string &  file_path,
const bool  decompressed 
)
overrideprivatevirtual

Implements import_export::DataStreamSink.

Definition at line 2895 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, run_benchmark_import::end_time, omnisci::fopen(), import_export::DataStreamSink::import_status, generate_TableFunctionsFactory_init::line, line1, import_export::CopyParams::line_delim, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, import_export::DataStreamSink::p_file, raw_data, import_export::ImportStatus::rows_completed, and timeout.

2896  {
2897  if (!p_file) {
2898  p_file = fopen(file_path.c_str(), "rb");
2899  }
2900  if (!p_file) {
2901  throw std::runtime_error("failed to open file '" + file_path +
2902  "': " + strerror(errno));
2903  }
2904 
2905  // somehow clang does not support ext/stdio_filebuf.h, so
2906  // need to diy readline with customized copy_params.line_delim...
2907  std::string line;
2908  line.reserve(1 * 1024 * 1024);
2909  auto end_time = std::chrono::steady_clock::now() +
2910  timeout * (boost::istarts_with(file_path, "s3://") ? 3 : 1);
2911  try {
2912  while (!feof(p_file)) {
2913  int c;
2914  size_t n = 0;
2915  while (EOF != (c = fgetc(p_file)) && copy_params.line_delim != c) {
2916  if (n++ >= line.capacity()) {
2917  break;
2918  }
2919  line += c;
2920  }
2921  if (0 == n) {
2922  break;
2923  }
2924  // remember the first line, which is possibly a header line, to
2925  // ignore identical header line(s) in 2nd+ files of a archive;
2926  // otherwise, 2nd+ header may be mistaken as an all-string row
2927  // and so be final column types.
2928  if (line1.empty()) {
2929  line1 = line;
2930  } else if (line == line1) {
2931  line.clear();
2932  continue;
2933  }
2934 
2935  raw_data += line;
2937  line.clear();
2939  if (std::chrono::steady_clock::now() > end_time) {
2940  if (import_status.rows_completed > 10000) {
2941  break;
2942  }
2943  }
2944  }
2945  } catch (std::exception& e) {
2946  }
2947 
2948  // as if load truncated
2950  load_failed = true;
2951 
2952  fclose(p_file);
2953  p_file = nullptr;
2954  return import_status;
2955 }
::FILE * fopen(const char *filename, const char *mode)
Definition: omnisci_fs.cpp:72
boost::filesystem::path file_path
Definition: Importer.h:702
std::chrono::duration< double > timeout
Definition: Importer.h:703
std::string raw_data
Definition: Importer.h:701

+ Here is the call graph for this function:

void import_export::Detector::init ( )
private

Definition at line 2889 of file Importer.cpp.

References detect_row_delimiter(), find_best_sqltypes_and_headers(), and split_raw_data().

Referenced by Detector().

2889  {
2891  split_raw_data();
2893 }
void find_best_sqltypes_and_headers()
Definition: Importer.cpp:3120

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Detector::more_restrictive_sqltype ( const SQLTypes  a,
const SQLTypes  b 
)
staticprivate

Definition at line 3098 of file Importer.cpp.

References kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, and kTIMESTAMP.

Referenced by find_best_sqltypes().

3098  {
3099  static std::array<int, kSQLTYPE_LAST> typeorder;
3100  typeorder[kCHAR] = 0;
3101  typeorder[kBOOLEAN] = 2;
3102  typeorder[kSMALLINT] = 3;
3103  typeorder[kINT] = 4;
3104  typeorder[kBIGINT] = 5;
3105  typeorder[kFLOAT] = 6;
3106  typeorder[kDOUBLE] = 7;
3107  typeorder[kTIMESTAMP] = 8;
3108  typeorder[kTIME] = 9;
3109  typeorder[kDATE] = 10;
3110  typeorder[kPOINT] = 11;
3111  typeorder[kLINESTRING] = 11;
3112  typeorder[kPOLYGON] = 11;
3113  typeorder[kMULTIPOLYGON] = 11;
3114  typeorder[kTEXT] = 12;
3115 
3116  // note: b < a instead of a < b because the map is ordered most to least restrictive
3117  return typeorder[b] < typeorder[a];
3118 }
Definition: sqltypes.h:48
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:40
Definition: sqltypes.h:44

+ Here is the caller graph for this function:

void import_export::Detector::read_file ( )
private

Definition at line 2957 of file Importer.cpp.

References import_export::DataStreamSink::archivePlumber().

Referenced by Detector().

2957  {
2958  // this becomes analogous to Importer::import()
2960 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Detector::split_raw_data ( )
private

Definition at line 2971 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, import_export::delimited_parser::get_row(), raw_data, raw_rows, and import_export::CopyParams::threads.

Referenced by init().

2971  {
2972  const char* buf = raw_data.c_str();
2973  const char* buf_end = buf + raw_data.size();
2974  bool try_single_thread = false;
2975  for (const char* p = buf; p < buf_end; p++) {
2976  std::vector<std::string> row;
2977  std::vector<std::unique_ptr<char[]>> tmp_buffers;
2979  p, buf_end, buf_end, copy_params, nullptr, row, tmp_buffers, try_single_thread);
2980  raw_rows.push_back(row);
2981  if (try_single_thread) {
2982  break;
2983  }
2984  }
2985  if (try_single_thread) {
2986  copy_params.threads = 1;
2987  raw_rows.clear();
2988  for (const char* p = buf; p < buf_end; p++) {
2989  std::vector<std::string> row;
2990  std::vector<std::unique_ptr<char[]>> tmp_buffers;
2992  p, buf_end, buf_end, copy_params, nullptr, row, tmp_buffers, try_single_thread);
2993  raw_rows.push_back(row);
2994  }
2995  }
2996 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
std::string raw_data
Definition: Importer.h:701
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<EncodingType> import_export::Detector::best_encodings

Definition at line 672 of file Importer.h.

Referenced by DBHandler::detect_column_types(), and find_best_sqltypes_and_headers().

std::vector<SQLTypes> import_export::Detector::best_sqltypes
boost::filesystem::path import_export::Detector::file_path
private

Definition at line 702 of file Importer.h.

Referenced by detect_row_delimiter(), find_best_encodings(), and find_best_sqltypes().

bool import_export::Detector::has_headers = false
std::string import_export::Detector::line1
private

Definition at line 704 of file Importer.h.

Referenced by importDelimited().

std::string import_export::Detector::raw_data
private

Definition at line 701 of file Importer.h.

Referenced by importDelimited(), and split_raw_data().

std::vector<std::vector<std::string> > import_export::Detector::raw_rows
std::chrono::duration<double> import_export::Detector::timeout {1}
private

Definition at line 703 of file Importer.h.

Referenced by find_best_sqltypes(), and importDelimited().


The documentation for this class was generated from the following files: