OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RowToColumnLoader.cpp File Reference

Based on StreamInsert code but using binary columnar format for inserting a stream of rows with optional transformations from stdin to a MapD table. More...

#include "ImportExport/RowToColumnLoader.h"
#include "ImportExport/DelimitedParserUtils.h"
#include "Logger/Logger.h"
#include <chrono>
#include <thread>
+ Include dependency graph for RowToColumnLoader.cpp:

Go to the source code of this file.

Functions

SQLTypes get_sql_types (const TColumnType &ct)
 
SQLTypeInfo create_sql_type_info_from_col_type (const TColumnType &ct)
 
SQLTypeInfo create_array_sql_type_info_from_col_type (const TColumnType &ct)
 
void remove_partial_row (size_t failed_column, std::vector< SQLTypeInfo > column_type_info_vector, std::vector< TColumn > &input_col_vec)
 
void populate_TColumn (TStringValue ts, SQLTypeInfo column_type_info, TColumn &input_col, const import_export::CopyParams &copy_params)
 

Detailed Description

Based on StreamInsert code but using binary columnar format for inserting a stream of rows with optional transformations from stdin to a MapD table.

Author
Michael micha.nosp@m.el@m.nosp@m.apd.c.nosp@m.om Copyright (c) 2017 MapD Technologies, Inc. All rights reserved.

Definition in file RowToColumnLoader.cpp.

Function Documentation

SQLTypeInfo create_array_sql_type_info_from_col_type ( const TColumnType &  ct)

Definition at line 106 of file RowToColumnLoader.cpp.

References get_sql_types(), kENCODING_NONE, and kNULLT.

Referenced by RowToColumnLoader::RowToColumnLoader().

106  {
107  return SQLTypeInfo(get_sql_types(ct),
108  ct.col_type.precision,
109  ct.col_type.scale,
110  ct.col_type.nullable,
112  0,
114 }
SQLTypes get_sql_types(const TColumnType &ct)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypeInfo create_sql_type_info_from_col_type ( const TColumnType &  ct)

Definition at line 80 of file RowToColumnLoader.cpp.

References get_sql_types(), kARRAY, kENCODING_NONE, and kNULLT.

Referenced by RowToColumnLoader::RowToColumnLoader().

80  {
81  if (ct.col_type.is_array) {
83  ct.col_type.precision,
84  ct.col_type.scale,
85  ct.col_type.nullable,
87  0,
88  get_sql_types(ct));
89  } else {
90  // normal column
91  // NOTE(se)
92  // for geo types, the values inserted for the other fields
93  // may not be valid, but only the type field is ever used
94  return SQLTypeInfo(get_sql_types(ct),
95  ct.col_type.precision,
96  ct.col_type.scale,
97  ct.col_type.nullable,
99  0,
101  }
102 }
SQLTypes get_sql_types(const TColumnType &ct)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

SQLTypes get_sql_types ( const TColumnType &  ct)

Definition at line 35 of file RowToColumnLoader.cpp.

References BIGINT, DATE, DECIMAL, DOUBLE, logger::FATAL, FLOAT, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNULLT, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, LINESTRING, LOG, MULTIPOLYGON, POINT, POLYGON, SMALLINT, TIME, TIMESTAMP, and TINYINT.

Referenced by create_array_sql_type_info_from_col_type(), and create_sql_type_info_from_col_type().

35  {
36  switch (ct.col_type.type) {
37  case TDatumType::BIGINT:
38  return SQLTypes::kBIGINT;
39  case TDatumType::BOOL:
40  return SQLTypes::kBOOLEAN;
41  case TDatumType::DATE:
42  return SQLTypes::kDATE;
44  return SQLTypes::kDECIMAL;
45  case TDatumType::DOUBLE:
46  return SQLTypes::kDOUBLE;
47  case TDatumType::FLOAT:
48  return SQLTypes::kFLOAT;
49  case TDatumType::INT:
50  return SQLTypes::kINT;
51  case TDatumType::STR:
52  // Tdataum is lossy here so need to look at precision to see if it was defined
53  if (ct.col_type.precision == 0) {
54  return SQLTypes::kTEXT;
55  } else {
56  return SQLTypes::kVARCHAR;
57  }
58  case TDatumType::TIME:
59  return SQLTypes::kTIME;
61  return SQLTypes::kTIMESTAMP;
63  return SQLTypes::kSMALLINT;
65  return SQLTypes::kTINYINT;
66  case TDatumType::POINT:
67  return SQLTypes::kPOINT;
69  return SQLTypes::kLINESTRING;
71  return SQLTypes::kPOLYGON;
74  default:
75  LOG(FATAL) << "Unsupported TColumnType found, should not be possible";
76  return SQLTypes::kNULLT; // satisfy return-type warning
77  }
78 }
#define LINESTRING
Definition: sqltypes.h:49
#define LOG(tag)
Definition: Logger.h:203
#define SMALLINT
#define DOUBLE
#define BIGINT
#define DATE
#define MULTIPOLYGON
#define POINT
#define TIME
#define TINYINT
Definition: sqltypes.h:52
Definition: sqltypes.h:53
#define TIMESTAMP
#define DECIMAL
#define POLYGON
Definition: sqltypes.h:45
#define FLOAT

+ Here is the caller graph for this function:

void populate_TColumn ( TStringValue  ts,
SQLTypeInfo  column_type_info,
TColumn &  input_col,
const import_export::CopyParams copy_params 
)

Definition at line 180 of file RowToColumnLoader.cpp.

References Datum::bigintval, Datum::doubleval, logger::FATAL, Datum::floatval, SQLTypeInfo::get_precision(), SQLTypeInfo::get_type(), Datum::intval, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, LOG, Datum::smallintval, StringToDatum(), and Datum::tinyintval.

Referenced by RowToColumnLoader::convert_string_to_column().

183  {
184  // create datum and push data to column structure from row data
185  switch (column_type_info.get_type()) {
186  case SQLTypes::kARRAY:
187  LOG(FATAL) << "Trying to process ARRAY at item level something is wrong";
188  break;
189  case SQLTypes::kTEXT:
190  case SQLTypes::kCHAR:
191  case SQLTypes::kVARCHAR:
192  case SQLTypes::kPOINT:
194  case SQLTypes::kPOLYGON:
196  if (ts.is_null) {
197  input_col.nulls.push_back(true);
198  input_col.data.str_col.emplace_back("");
199 
200  } else {
201  input_col.nulls.push_back(false);
202  switch (column_type_info.get_type()) {
203  case SQLTypes::kCHAR:
204  case SQLTypes::kVARCHAR:
205  input_col.data.str_col.push_back(
206  ts.str_val.substr(0, column_type_info.get_precision()));
207  break;
208  case SQLTypes::kTEXT:
209  case SQLTypes::kPOINT:
211  case SQLTypes::kPOLYGON:
213  input_col.data.str_col.push_back(ts.str_val);
214  break;
215  default:
216  LOG(FATAL) << " trying to process a STRING transport type not handled "
217  << column_type_info.get_type();
218  }
219  }
220  break;
221  case SQLTypes::kINT:
222  case SQLTypes::kBIGINT:
223  case SQLTypes::kSMALLINT:
224  case SQLTypes::kTINYINT:
225  case SQLTypes::kDATE:
226  case SQLTypes::kTIME:
228  case SQLTypes::kNUMERIC:
229  case SQLTypes::kDECIMAL:
230  case SQLTypes::kBOOLEAN:
231  if (ts.is_null) {
232  input_col.nulls.push_back(true);
233  input_col.data.int_col.push_back(0);
234  } else {
235  input_col.nulls.push_back(false);
236  Datum d = StringToDatum(ts.str_val, column_type_info);
237  switch (column_type_info.get_type()) {
238  case SQLTypes::kINT:
239  case SQLTypes::kBOOLEAN:
240  input_col.data.int_col.push_back(d.intval);
241  break;
242  case SQLTypes::kBIGINT:
243  case SQLTypes::kNUMERIC:
244  case SQLTypes::kDECIMAL:
245  input_col.data.int_col.push_back(d.bigintval);
246  break;
247  case SQLTypes::kSMALLINT:
248  input_col.data.int_col.push_back(d.smallintval);
249  break;
250  case SQLTypes::kTINYINT:
251  input_col.data.int_col.push_back(d.tinyintval);
252  break;
253  case SQLTypes::kDATE:
254  case SQLTypes::kTIME:
256  input_col.data.int_col.push_back(d.bigintval);
257  break;
258  default:
259  LOG(FATAL) << " trying to process an INT transport type not handled "
260  << column_type_info.get_type();
261  }
262  }
263  break;
264  case SQLTypes::kFLOAT:
265  case SQLTypes::kDOUBLE:
266  if (ts.is_null) {
267  input_col.nulls.push_back(true);
268  input_col.data.real_col.push_back(0);
269 
270  } else {
271  input_col.nulls.push_back(false);
272  Datum d = StringToDatum(ts.str_val, column_type_info);
273  switch (column_type_info.get_type()) {
274  case SQLTypes::kFLOAT:
275  input_col.data.real_col.push_back(d.floatval);
276  break;
277  case SQLTypes::kDOUBLE:
278  input_col.data.real_col.push_back(d.doubleval);
279  break;
280  default:
281  LOG(FATAL) << " trying to process a REAL transport type not handled "
282  << column_type_info.get_type();
283  }
284  }
285  break;
286  default:
287  LOG(FATAL) << "Trying to process an unsupported datatype, should be impossible";
288  }
289 }
int8_t tinyintval
Definition: sqltypes.h:212
Definition: sqltypes.h:49
#define LOG(tag)
Definition: Logger.h:203
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
int32_t intval
Definition: sqltypes.h:214
float floatval
Definition: sqltypes.h:216
int64_t bigintval
Definition: sqltypes.h:215
int16_t smallintval
Definition: sqltypes.h:213
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
int get_precision() const
Definition: sqltypes.h:332
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
Definition: sqltypes.h:45
double doubleval
Definition: sqltypes.h:217

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void remove_partial_row ( size_t  failed_column,
std::vector< SQLTypeInfo column_type_info_vector,
std::vector< TColumn > &  input_col_vec 
)

Definition at line 134 of file RowToColumnLoader.cpp.

References logger::FATAL, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and LOG.

Referenced by RowToColumnLoader::convert_string_to_column().

136  {
137  for (size_t idx = 0; idx < failed_column; idx++) {
138  switch (column_type_info_vector[idx].get_type()) {
139  case SQLTypes::kARRAY:
140  input_col_vec[idx].nulls.pop_back();
141  input_col_vec[idx].data.arr_col.pop_back();
142  break;
143  case SQLTypes::kTEXT:
144  case SQLTypes::kCHAR:
145  case SQLTypes::kVARCHAR:
146  input_col_vec[idx].nulls.pop_back();
147  input_col_vec[idx].data.str_col.pop_back();
148  break;
149  case SQLTypes::kTINYINT:
150  case SQLTypes::kINT:
151  case SQLTypes::kBIGINT:
152  case SQLTypes::kSMALLINT:
153  case SQLTypes::kDATE:
154  case SQLTypes::kTIME:
156  case SQLTypes::kNUMERIC:
157  case SQLTypes::kDECIMAL:
158  case SQLTypes::kBOOLEAN:
159  input_col_vec[idx].nulls.pop_back();
160  input_col_vec[idx].data.int_col.pop_back();
161  break;
162  case SQLTypes::kFLOAT:
163  case SQLTypes::kDOUBLE:
164  input_col_vec[idx].nulls.pop_back();
165  input_col_vec[idx].data.real_col.pop_back();
166  break;
167  case SQLTypes::kPOINT:
169  case SQLTypes::kPOLYGON:
171  input_col_vec[idx].nulls.pop_back();
172  input_col_vec[idx].data.str_col.pop_back();
173  break;
174  default:
175  LOG(FATAL) << "Trying to process an unsupported datatype, should be impossible";
176  }
177  }
178 }
Definition: sqltypes.h:49
#define LOG(tag)
Definition: Logger.h:203
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
Definition: sqltypes.h:45

+ Here is the caller graph for this function: