OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export::Detector Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::Detector:
+ Collaboration diagram for import_export::Detector:

Public Member Functions

 Detector (const boost::filesystem::path &fp, CopyParams &cp)
 
std::vector< std::string > get_headers ()
 
std::vector< std::vector
< std::string > > 
get_sample_rows (size_t n)
 
- Public Member Functions inherited from import_export::DataStreamSink
 DataStreamSink ()
 
 DataStreamSink (const CopyParams &copy_params, const std::string file_path)
 
virtual ~DataStreamSink ()
 
const CopyParamsget_copy_params () const
 
void import_compressed (std::vector< std::string > &file_paths)
 

Static Public Member Functions

static SQLTypes detect_sqltype (const std::string &str)
 

Public Attributes

std::vector< std::vector
< std::string > > 
raw_rows
 
std::vector< SQLTypesbest_sqltypes
 
std::vector< EncodingTypebest_encodings
 
bool has_headers = false
 

Private Member Functions

void init ()
 
void read_file ()
 
void detect_row_delimiter ()
 
void split_raw_data ()
 
std::vector< SQLTypesdetect_column_types (const std::vector< std::string > &row)
 
void find_best_sqltypes ()
 
std::vector< SQLTypesfind_best_sqltypes (const std::vector< std::vector< std::string >> &raw_rows, const CopyParams &copy_params)
 
std::vector< SQLTypesfind_best_sqltypes (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const CopyParams &copy_params)
 
std::vector< EncodingTypefind_best_encodings (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
 
bool detect_headers (const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
 
void find_best_sqltypes_and_headers ()
 
ImportStatus importDelimited (const std::string &file_path, const bool decompressed) override
 

Static Private Member Functions

static bool more_restrictive_sqltype (const SQLTypes a, const SQLTypes b)
 

Private Attributes

std::string raw_data
 
boost::filesystem::path file_path
 
std::chrono::duration< double > timeout {1}
 
std::string line1
 

Additional Inherited Members

- Protected Member Functions inherited from import_export::DataStreamSink
ImportStatus archivePlumber ()
 
- Protected Attributes inherited from import_export::DataStreamSink
CopyParams copy_params
 
const std::string file_path
 
FILE * p_file = nullptr
 
ImportStatus import_status
 
bool load_failed = false
 
size_t total_file_size {0}
 
std::vector< size_t > file_offsets
 
std::mutex file_offsets_mutex
 

Detailed Description

Definition at line 657 of file Importer.h.

Constructor & Destructor Documentation

import_export::Detector::Detector ( const boost::filesystem::path &  fp,
CopyParams cp 
)
inline

Definition at line 659 of file Importer.h.

References init(), and read_file().

660  : DataStreamSink(cp, fp.string()), file_path(fp) {
661  read_file();
662  init();
663  };
boost::filesystem::path file_path
Definition: Importer.h:702

+ Here is the call graph for this function:

Member Function Documentation

std::vector< SQLTypes > import_export::Detector::detect_column_types ( const std::vector< std::string > &  row)
private

Definition at line 3083 of file Importer.cpp.

References detect_sqltype().

Referenced by find_best_sqltypes_and_headers().

3083  {
3084  std::vector<SQLTypes> types(row.size());
3085  for (size_t i = 0; i < row.size(); i++) {
3086  types[i] = detect_sqltype(row[i]);
3087  }
3088  return types;
3089 }
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:2989

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Detector::detect_headers ( const std::vector< SQLTypes > &  first_types,
const std::vector< SQLTypes > &  rest_types 
)
private

Definition at line 3225 of file Importer.cpp.

References has_headers, and kTEXT.

Referenced by find_best_sqltypes_and_headers().

3226  {
3227  if (head_types.size() != tail_types.size()) {
3228  return false;
3229  }
3230  bool has_headers = false;
3231  for (size_t col_idx = 0; col_idx < tail_types.size(); col_idx++) {
3232  if (head_types[col_idx] != kTEXT) {
3233  return false;
3234  }
3235  has_headers = has_headers || tail_types[col_idx] != kTEXT;
3236  }
3237  return has_headers;
3238 }
Definition: sqltypes.h:54

+ Here is the caller graph for this function:

void import_export::Detector::detect_row_delimiter ( )
private

Definition at line 2931 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, import_export::CopyParams::delimiter, and file_path.

Referenced by init().

2931  {
2932  if (copy_params.delimiter == '\0') {
2933  copy_params.delimiter = ',';
2934  if (boost::filesystem::extension(file_path) == ".tsv") {
2935  copy_params.delimiter = '\t';
2936  }
2937  }
2938 }
boost::filesystem::path file_path
Definition: Importer.h:702

+ Here is the caller graph for this function:

SQLTypes import_export::Detector::detect_sqltype ( const std::string &  str)
static

Definition at line 2989 of file Importer.cpp.

References kBIGINT, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, import_export::try_strptimes(), and run_benchmark_import::type.

Referenced by detect_column_types(), and find_best_sqltypes().

2989  {
2990  SQLTypes type = kTEXT;
2991  if (try_cast<double>(str)) {
2992  type = kDOUBLE;
2993  /*if (try_cast<bool>(str)) {
2994  type = kBOOLEAN;
2995  }*/
2996  if (try_cast<int16_t>(str)) {
2997  type = kSMALLINT;
2998  } else if (try_cast<int32_t>(str)) {
2999  type = kINT;
3000  } else if (try_cast<int64_t>(str)) {
3001  type = kBIGINT;
3002  } else if (try_cast<float>(str)) {
3003  type = kFLOAT;
3004  }
3005  }
3006 
3007  // check for geo types
3008  if (type == kTEXT) {
3009  // convert to upper case
3010  std::string str_upper_case = str;
3011  std::transform(
3012  str_upper_case.begin(), str_upper_case.end(), str_upper_case.begin(), ::toupper);
3013 
3014  // then test for leading words
3015  if (str_upper_case.find("POINT") == 0) {
3016  type = kPOINT;
3017  } else if (str_upper_case.find("LINESTRING") == 0) {
3018  type = kLINESTRING;
3019  } else if (str_upper_case.find("POLYGON") == 0) {
3021  type = kMULTIPOLYGON;
3022  } else {
3023  type = kPOLYGON;
3024  }
3025  } else if (str_upper_case.find("MULTIPOLYGON") == 0) {
3026  type = kMULTIPOLYGON;
3027  } else if (str_upper_case.find_first_not_of("0123456789ABCDEF") ==
3028  std::string::npos &&
3029  (str_upper_case.size() % 2) == 0) {
3030  // simple hex blob (two characters per byte, not uu-encode or base64)
3031  if (str_upper_case.size() >= 10) {
3032  // match WKB blobs for supported geometry types
3033  // the first byte specifies if the data is big-endian or little-endian
3034  // the next four bytes are the geometry type (1 = POINT etc.)
3035  // @TODO support eWKB, which has extra bits set in the geometry type
3036  auto first_five_bytes = str_upper_case.substr(0, 10);
3037  if (first_five_bytes == "0000000001" || first_five_bytes == "0101000000") {
3038  type = kPOINT;
3039  } else if (first_five_bytes == "0000000002" || first_five_bytes == "0102000000") {
3040  type = kLINESTRING;
3041  } else if (first_five_bytes == "0000000003" || first_five_bytes == "0103000000") {
3042  type = kPOLYGON;
3043  } else if (first_five_bytes == "0000000006" || first_five_bytes == "0106000000") {
3044  type = kMULTIPOLYGON;
3045  } else {
3046  // unsupported WKB type
3047  return type;
3048  }
3049  } else {
3050  // too short to be WKB
3051  return type;
3052  }
3053  }
3054  }
3055 
3056  // check for time types
3057  if (type == kTEXT) {
3058  // @TODO
3059  // make these tests more robust so they don't match stuff they should not
3060  char* buf;
3061  buf = try_strptimes(str.c_str(),
3062  {"%Y-%m-%d", "%m/%d/%Y", "%Y/%m/%d", "%d-%b-%y", "%d/%b/%Y"});
3063  if (buf) {
3064  type = kDATE;
3065  if (*buf == 'T' || *buf == ' ' || *buf == ':') {
3066  buf++;
3067  }
3068  }
3069  buf = try_strptimes(buf == nullptr ? str.c_str() : buf,
3070  {"%T %z", "%T", "%H%M%S", "%R"});
3071  if (buf) {
3072  if (type == kDATE) {
3073  type = kTIMESTAMP;
3074  } else {
3075  type = kTIME;
3076  }
3077  }
3078  }
3079 
3080  return type;
3081 }
char * try_strptimes(const char *str, const std::vector< std::string > &formats)
Definition: Importer.cpp:2977
Definition: sqltypes.h:51
SQLTypes
Definition: sqltypes.h:40
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON
Definition: Importer.cpp:143
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:47

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< EncodingType > import_export::Detector::find_best_encodings ( const std::vector< std::vector< std::string >>::const_iterator &  row_begin,
const std::vector< std::vector< std::string >>::const_iterator &  row_end,
const std::vector< SQLTypes > &  best_types 
)
private

Definition at line 3189 of file Importer.cpp.

References file_path, IS_STRING, kENCODING_DICT, kENCODING_NONE, and raw_rows.

Referenced by find_best_sqltypes_and_headers().

3192  {
3193  if (raw_rows.size() < 1) {
3194  throw std::runtime_error("No rows found in: " +
3195  boost::filesystem::basename(file_path));
3196  }
3197  size_t num_cols = best_types.size();
3198  std::vector<EncodingType> best_encodes(num_cols, kENCODING_NONE);
3199  std::vector<size_t> num_rows_per_col(num_cols, 1);
3200  std::vector<std::unordered_set<std::string>> count_set(num_cols);
3201  for (auto row = row_begin; row != row_end; row++) {
3202  for (size_t col_idx = 0; col_idx < row->size() && col_idx < num_cols; col_idx++) {
3203  if (IS_STRING(best_types[col_idx])) {
3204  count_set[col_idx].insert(row->at(col_idx));
3205  num_rows_per_col[col_idx]++;
3206  }
3207  }
3208  }
3209  for (size_t col_idx = 0; col_idx < num_cols; col_idx++) {
3210  if (IS_STRING(best_types[col_idx])) {
3211  float uniqueRatio =
3212  static_cast<float>(count_set[col_idx].size()) / num_rows_per_col[col_idx];
3213  if (uniqueRatio < 0.75) {
3214  best_encodes[col_idx] = kENCODING_DICT;
3215  }
3216  }
3217  }
3218  return best_encodes;
3219 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
boost::filesystem::path file_path
Definition: Importer.h:702
#define IS_STRING(T)
Definition: sqltypes.h:244

+ Here is the caller graph for this function:

void import_export::Detector::find_best_sqltypes ( )
private

Definition at line 3136 of file Importer.cpp.

References best_sqltypes, import_export::DataStreamSink::copy_params, and raw_rows.

Referenced by find_best_sqltypes(), and find_best_sqltypes_and_headers().

3136  {
3138 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the caller graph for this function:

std::vector< SQLTypes > import_export::Detector::find_best_sqltypes ( const std::vector< std::vector< std::string >> &  raw_rows,
const CopyParams copy_params 
)
private

Definition at line 3140 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, find_best_sqltypes(), and raw_rows.

3142  {
3143  return find_best_sqltypes(raw_rows.begin(), raw_rows.end(), copy_params);
3144 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the call graph for this function:

std::vector< SQLTypes > import_export::Detector::find_best_sqltypes ( const std::vector< std::vector< std::string >>::const_iterator &  row_begin,
const std::vector< std::vector< std::string >>::const_iterator &  row_end,
const CopyParams copy_params 
)
private

Definition at line 3146 of file Importer.cpp.

References detect_sqltype(), run_benchmark_import::end_time, file_path, kCHAR, kTEXT, more_restrictive_sqltype(), import_export::CopyParams::null_str, raw_rows, and timeout.

3149  {
3150  if (raw_rows.size() < 1) {
3151  throw std::runtime_error("No rows found in: " +
3152  boost::filesystem::basename(file_path));
3153  }
3154  auto end_time = std::chrono::steady_clock::now() + timeout;
3155  size_t num_cols = raw_rows.front().size();
3156  std::vector<SQLTypes> best_types(num_cols, kCHAR);
3157  std::vector<size_t> non_null_col_counts(num_cols, 0);
3158  for (auto row = row_begin; row != row_end; row++) {
3159  while (best_types.size() < row->size() || non_null_col_counts.size() < row->size()) {
3160  best_types.push_back(kCHAR);
3161  non_null_col_counts.push_back(0);
3162  }
3163  for (size_t col_idx = 0; col_idx < row->size(); col_idx++) {
3164  // do not count nulls
3165  if (row->at(col_idx) == "" || !row->at(col_idx).compare(copy_params.null_str)) {
3166  continue;
3167  }
3168  SQLTypes t = detect_sqltype(row->at(col_idx));
3169  non_null_col_counts[col_idx]++;
3170  if (!more_restrictive_sqltype(best_types[col_idx], t)) {
3171  best_types[col_idx] = t;
3172  }
3173  }
3174  if (std::chrono::steady_clock::now() > end_time) {
3175  break;
3176  }
3177  }
3178  for (size_t col_idx = 0; col_idx < num_cols; col_idx++) {
3179  // if we don't have any non-null values for this column make it text to be
3180  // safe b/c that is least restrictive type
3181  if (non_null_col_counts[col_idx] == 0) {
3182  best_types[col_idx] = kTEXT;
3183  }
3184  }
3185 
3186  return best_types;
3187 }
SQLTypes
Definition: sqltypes.h:40
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:2989
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
boost::filesystem::path file_path
Definition: Importer.h:702
Definition: sqltypes.h:54
static bool more_restrictive_sqltype(const SQLTypes a, const SQLTypes b)
Definition: Importer.cpp:3091
std::chrono::duration< double > timeout
Definition: Importer.h:703
Definition: sqltypes.h:43

+ Here is the call graph for this function:

void import_export::Detector::find_best_sqltypes_and_headers ( )
private

Definition at line 3113 of file Importer.cpp.

References import_export::AUTODETECT, best_encodings, best_sqltypes, import_export::DataStreamSink::copy_params, detect_column_types(), detect_headers(), find_best_encodings(), find_best_sqltypes(), import_export::HAS_HEADER, import_export::CopyParams::has_header, has_headers, import_export::NO_HEADER, and raw_rows.

Referenced by init().

3113  {
3115  best_encodings =
3116  find_best_encodings(raw_rows.begin() + 1, raw_rows.end(), best_sqltypes);
3117  std::vector<SQLTypes> head_types = detect_column_types(raw_rows.at(0));
3118  switch (copy_params.has_header) {
3120  has_headers = detect_headers(head_types, best_sqltypes);
3121  if (has_headers) {
3123  } else {
3125  }
3126  break;
3128  has_headers = false;
3129  break;
3131  has_headers = true;
3132  break;
3133  }
3134 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
ImportHeaderRow has_header
Definition: CopyParams.h:48
std::vector< EncodingType > find_best_encodings(const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types)
Definition: Importer.cpp:3189
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
std::vector< EncodingType > best_encodings
Definition: Importer.h:672
bool detect_headers(const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types)
Definition: Importer.cpp:3225
std::vector< SQLTypes > detect_column_types(const std::vector< std::string > &row)
Definition: Importer.cpp:3083

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::string > import_export::Detector::get_headers ( )

Definition at line 3248 of file Importer.cpp.

References best_sqltypes, has_headers, raw_rows, and to_string().

Referenced by DBHandler::detect_column_types().

3248  {
3249  std::vector<std::string> headers(best_sqltypes.size());
3250  for (size_t i = 0; i < best_sqltypes.size(); i++) {
3251  if (has_headers && i < raw_rows[0].size()) {
3252  headers[i] = raw_rows[0][i];
3253  } else {
3254  headers[i] = "column_" + std::to_string(i + 1);
3255  }
3256  }
3257  return headers;
3258 }
std::vector< SQLTypes > best_sqltypes
Definition: Importer.h:671
std::string to_string(char const *&&v)
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::vector< std::string > > import_export::Detector::get_sample_rows ( size_t  n)

Definition at line 3240 of file Importer.cpp.

References has_headers, and raw_rows.

Referenced by DBHandler::detect_column_types().

3240  {
3241  n = std::min(n, raw_rows.size());
3242  size_t offset = (has_headers && raw_rows.size() > 1) ? 1 : 0;
3243  std::vector<std::vector<std::string>> sample_rows(raw_rows.begin() + offset,
3244  raw_rows.begin() + n);
3245  return sample_rows;
3246 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669

+ Here is the caller graph for this function:

ImportStatus import_export::Detector::importDelimited ( const std::string &  file_path,
const bool  decompressed 
)
overrideprivatevirtual

Implements import_export::DataStreamSink.

Definition at line 2864 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, run_benchmark_import::end_time, omnisci::fopen(), import_export::DataStreamSink::import_status, parse_ast::line, line1, import_export::CopyParams::line_delim, import_export::DataStreamSink::load_failed, import_export::ImportStatus::load_truncated, import_export::DataStreamSink::p_file, raw_data, import_export::ImportStatus::rows_completed, and timeout.

2865  {
2866  if (!p_file) {
2867  p_file = fopen(file_path.c_str(), "rb");
2868  }
2869  if (!p_file) {
2870  throw std::runtime_error("failed to open file '" + file_path +
2871  "': " + strerror(errno));
2872  }
2873 
2874  // somehow clang does not support ext/stdio_filebuf.h, so
2875  // need to diy readline with customized copy_params.line_delim...
2876  std::string line;
2877  line.reserve(1 * 1024 * 1024);
2878  auto end_time = std::chrono::steady_clock::now() +
2879  timeout * (boost::istarts_with(file_path, "s3://") ? 3 : 1);
2880  try {
2881  while (!feof(p_file)) {
2882  int c;
2883  size_t n = 0;
2884  while (EOF != (c = fgetc(p_file)) && copy_params.line_delim != c) {
2885  if (n++ >= line.capacity()) {
2886  break;
2887  }
2888  line += c;
2889  }
2890  if (0 == n) {
2891  break;
2892  }
2893  // remember the first line, which is possibly a header line, to
2894  // ignore identical header line(s) in 2nd+ files of a archive;
2895  // otherwise, 2nd+ header may be mistaken as an all-string row
2896  // and so be final column types.
2897  if (line1.empty()) {
2898  line1 = line;
2899  } else if (line == line1) {
2900  line.clear();
2901  continue;
2902  }
2903 
2904  raw_data += line;
2906  line.clear();
2908  if (std::chrono::steady_clock::now() > end_time) {
2909  if (import_status.rows_completed > 10000) {
2910  break;
2911  }
2912  }
2913  }
2914  } catch (std::exception& e) {
2915  }
2916 
2917  // as if load truncated
2919  load_failed = true;
2920 
2921  fclose(p_file);
2922  p_file = nullptr;
2923  return import_status;
2924 }
tuple line
Definition: parse_ast.py:10
::FILE * fopen(const char *filename, const char *mode)
Definition: omnisci_fs.cpp:72
boost::filesystem::path file_path
Definition: Importer.h:702
std::chrono::duration< double > timeout
Definition: Importer.h:703
std::string raw_data
Definition: Importer.h:701

+ Here is the call graph for this function:

void import_export::Detector::init ( )
private

Definition at line 2858 of file Importer.cpp.

References detect_row_delimiter(), find_best_sqltypes_and_headers(), and split_raw_data().

Referenced by Detector().

2858  {
2860  split_raw_data();
2862 }
void find_best_sqltypes_and_headers()
Definition: Importer.cpp:3113

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::Detector::more_restrictive_sqltype ( const SQLTypes  a,
const SQLTypes  b 
)
staticprivate

Definition at line 3091 of file Importer.cpp.

References kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, and kTIMESTAMP.

Referenced by find_best_sqltypes().

3091  {
3092  static std::array<int, kSQLTYPE_LAST> typeorder;
3093  typeorder[kCHAR] = 0;
3094  typeorder[kBOOLEAN] = 2;
3095  typeorder[kSMALLINT] = 3;
3096  typeorder[kINT] = 4;
3097  typeorder[kBIGINT] = 5;
3098  typeorder[kFLOAT] = 6;
3099  typeorder[kDOUBLE] = 7;
3100  typeorder[kTIMESTAMP] = 8;
3101  typeorder[kTIME] = 9;
3102  typeorder[kDATE] = 10;
3103  typeorder[kPOINT] = 11;
3104  typeorder[kLINESTRING] = 11;
3105  typeorder[kPOLYGON] = 11;
3106  typeorder[kMULTIPOLYGON] = 11;
3107  typeorder[kTEXT] = 12;
3108 
3109  // note: b < a instead of a < b because the map is ordered most to least restrictive
3110  return typeorder[b] < typeorder[a];
3111 }
Definition: sqltypes.h:51
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
Definition: sqltypes.h:47

+ Here is the caller graph for this function:

void import_export::Detector::read_file ( )
private

Definition at line 2926 of file Importer.cpp.

References import_export::DataStreamSink::archivePlumber().

Referenced by Detector().

2926  {
2927  // this becomes analogous to Importer::import()
2929 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::Detector::split_raw_data ( )
private

Definition at line 2940 of file Importer.cpp.

References import_export::DataStreamSink::copy_params, import_export::delimited_parser::get_row(), raw_data, raw_rows, and import_export::CopyParams::threads.

Referenced by init().

2940  {
2941  const char* buf = raw_data.c_str();
2942  const char* buf_end = buf + raw_data.size();
2943  bool try_single_thread = false;
2944  for (const char* p = buf; p < buf_end; p++) {
2945  std::vector<std::string> row;
2946  std::vector<std::unique_ptr<char[]>> tmp_buffers;
2948  p, buf_end, buf_end, copy_params, nullptr, row, tmp_buffers, try_single_thread);
2949  raw_rows.push_back(row);
2950  if (try_single_thread) {
2951  break;
2952  }
2953  }
2954  if (try_single_thread) {
2955  copy_params.threads = 1;
2956  raw_rows.clear();
2957  for (const char* p = buf; p < buf_end; p++) {
2958  std::vector<std::string> row;
2959  std::vector<std::unique_ptr<char[]>> tmp_buffers;
2961  p, buf_end, buf_end, copy_params, nullptr, row, tmp_buffers, try_single_thread);
2962  raw_rows.push_back(row);
2963  }
2964  }
2965 }
std::vector< std::vector< std::string > > raw_rows
Definition: Importer.h:669
std::string raw_data
Definition: Importer.h:701
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread)
Parses the first row in the given buffer and inserts fields into given vector.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<EncodingType> import_export::Detector::best_encodings

Definition at line 672 of file Importer.h.

Referenced by DBHandler::detect_column_types(), and find_best_sqltypes_and_headers().

std::vector<SQLTypes> import_export::Detector::best_sqltypes
boost::filesystem::path import_export::Detector::file_path
private

Definition at line 702 of file Importer.h.

Referenced by detect_row_delimiter(), find_best_encodings(), and find_best_sqltypes().

bool import_export::Detector::has_headers = false
std::string import_export::Detector::line1
private

Definition at line 704 of file Importer.h.

Referenced by importDelimited().

std::string import_export::Detector::raw_data
private

Definition at line 701 of file Importer.h.

Referenced by importDelimited(), and split_raw_data().

std::vector<std::vector<std::string> > import_export::Detector::raw_rows
std::chrono::duration<double> import_export::Detector::timeout {1}
private

Definition at line 703 of file Importer.h.

Referenced by find_best_sqltypes(), and importDelimited().


The documentation for this class was generated from the following files: