OmniSciDB  eb3a3d0a03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryExporterCSV.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <boost/variant/get.hpp>
20 
21 #include "QueryEngine/ResultSet.h"
22 #include "Shared/misc.h"
23 
24 namespace import_export {
25 
27 
29 
30 void QueryExporterCSV::beginExport(const std::string& file_path,
31  const std::string& layer_name,
32  const CopyParams& copy_params,
33  const std::vector<TargetMetaInfo>& column_infos,
34  const FileCompression file_compression,
35  const ArrayNullHandling array_null_handling) {
36  validateFileExtensions(file_path, "CSV", {".csv", ".tsv"});
37 
38  // compression?
39  auto actual_file_path{file_path};
40  if (file_compression != FileCompression::kNone) {
41  // @TODO(se) implement post-export compression
42  throw std::runtime_error("Compression not yet supported for this file type");
43  }
44 
45  // open file
46  outfile_.open(actual_file_path);
47  if (!outfile_) {
48  throw std::runtime_error("Failed to create file '" + actual_file_path + "'");
49  }
50 
51  // write header?
53  bool not_first{false};
54  int column_index = 0;
55  for (auto const& column_info : column_infos) {
56  // get name or default
57  auto column_name = safeColumnName(column_info.get_resname(), column_index + 1);
58  // output to header line
59  if (not_first) {
60  outfile_ << copy_params.delimiter;
61  } else {
62  not_first = true;
63  }
64  outfile_ << column_name;
65  column_index++;
66  }
67  outfile_ << copy_params.line_delim;
68  }
69 
70  // keep these
71  copy_params_ = copy_params;
72 }
73 
74 namespace {
75 
76 std::string nullable_str_to_string(const NullableString& str) {
77  auto nptr = boost::get<void*>(&str);
78  if (nptr) {
79  CHECK(!*nptr);
80  return "NULL";
81  }
82  auto sptr = boost::get<std::string>(&str);
83  CHECK(sptr);
84  return *sptr;
85 }
86 
87 std::string target_value_to_string(const TargetValue& tv,
88  const SQLTypeInfo& ti,
89  const std::string& delim) {
90  if (ti.is_array()) {
91  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
92  CHECK(array_tv);
93  if (array_tv->is_initialized()) {
94  const auto& vec = array_tv->get();
95  std::vector<std::string> elem_strs;
96  elem_strs.reserve(vec.size());
97  const auto& elem_ti = ti.get_elem_type();
98  for (const auto& elem_tv : vec) {
99  elem_strs.push_back(target_value_to_string(elem_tv, elem_ti, delim));
100  }
101  return "{" + boost::algorithm::join(elem_strs, delim) + "}";
102  }
103  return "NULL";
104  }
105  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
106  if (ti.is_time() || ti.is_decimal()) {
107  Datum datum;
108  datum.bigintval = *boost::get<int64_t>(scalar_tv);
109  if (datum.bigintval == NULL_BIGINT) {
110  return "NULL";
111  }
112  return DatumToString(datum, ti);
113  }
114  if (ti.is_boolean()) {
115  const auto bool_val = *boost::get<int64_t>(scalar_tv);
116  return bool_val == NULL_BOOLEAN ? "NULL" : (bool_val ? "true" : "false");
117  }
118  auto iptr = boost::get<int64_t>(scalar_tv);
119  if (iptr) {
120  return *iptr == inline_int_null_val(ti) ? "NULL" : std::to_string(*iptr);
121  }
122  auto fptr = boost::get<float>(scalar_tv);
123  if (fptr) {
124  return *fptr == inline_fp_null_val(ti) ? "NULL" : std::to_string(*fptr);
125  }
126  auto dptr = boost::get<double>(scalar_tv);
127  if (dptr) {
128  return *dptr == inline_fp_null_val(ti.is_decimal() ? SQLTypeInfo(kDOUBLE, false) : ti)
129  ? "NULL"
130  : std::to_string(*dptr);
131  }
132  auto sptr = boost::get<NullableString>(scalar_tv);
133  CHECK(sptr);
134  return nullable_str_to_string(*sptr);
135 }
136 
137 } // namespace
138 
139 void QueryExporterCSV::exportResults(const std::vector<AggregatedResult>& query_results) {
140  for (auto& agg_result : query_results) {
141  auto results = agg_result.rs;
142  auto const& targets = agg_result.targets_meta;
143 
144  while (true) {
145  auto const crt_row = results->getNextRow(true, true);
146  if (crt_row.empty()) {
147  break;
148  }
149  bool not_first = false;
150  for (size_t i = 0; i < results->colCount(); ++i) {
151  bool is_null{false};
152  auto const tv = crt_row[i];
153  auto const scalar_tv = boost::get<ScalarTargetValue>(&tv);
154  if (not_first) {
156  } else {
157  not_first = true;
158  }
159  if (copy_params_.quoted) {
161  }
162  auto const& ti = targets[i].get_type_info();
163  if (!scalar_tv) {
164  outfile_ << target_value_to_string(crt_row[i], ti, " | ");
165  if (copy_params_.quoted) {
167  }
168  continue;
169  }
170  if (boost::get<int64_t>(scalar_tv)) {
171  auto int_val = *(boost::get<int64_t>(scalar_tv));
172  switch (ti.get_type()) {
173  case kBOOLEAN:
174  is_null = (int_val == NULL_BOOLEAN);
175  break;
176  case kTINYINT:
177  is_null = (int_val == NULL_TINYINT);
178  break;
179  case kSMALLINT:
180  is_null = (int_val == NULL_SMALLINT);
181  break;
182  case kINT:
183  is_null = (int_val == NULL_INT);
184  break;
185  case kBIGINT:
186  is_null = (int_val == NULL_BIGINT);
187  break;
188  case kTIME:
189  case kTIMESTAMP:
190  case kDATE:
191  is_null = (int_val == NULL_BIGINT);
192  break;
193  default:
194  is_null = false;
195  }
196  if (is_null) {
198  } else if (ti.get_type() == kTIME) {
199  constexpr size_t buf_size = 9;
200  char buf[buf_size];
201  size_t const len = shared::formatHMS(buf, buf_size, int_val);
202  CHECK_EQ(8u, len); // 8 == strlen("HH:MM:SS")
203  outfile_ << buf;
204  } else {
205  outfile_ << int_val;
206  }
207  } else if (boost::get<double>(scalar_tv)) {
208  auto real_val = *(boost::get<double>(scalar_tv));
209  if (ti.get_type() == kFLOAT) {
210  is_null = (real_val == NULL_FLOAT);
211  } else {
212  is_null = (real_val == NULL_DOUBLE);
213  }
214  if (is_null) {
216  } else if (ti.get_type() == kNUMERIC) {
217  outfile_ << std::setprecision(ti.get_precision()) << real_val;
218  } else {
219  outfile_ << std::setprecision(std::numeric_limits<double>::digits10 + 1)
220  << real_val;
221  }
222  } else if (boost::get<float>(scalar_tv)) {
223  CHECK_EQ(kFLOAT, ti.get_type());
224  auto real_val = *(boost::get<float>(scalar_tv));
225  if (real_val == NULL_FLOAT) {
227  } else {
228  outfile_ << std::setprecision(std::numeric_limits<float>::digits10 + 1)
229  << real_val;
230  }
231  } else {
232  auto s = boost::get<NullableString>(scalar_tv);
233  is_null = !s || boost::get<void*>(s);
234  if (is_null) {
236  } else {
237  auto s_notnull = boost::get<std::string>(s);
238  CHECK(s_notnull);
239  if (!copy_params_.quoted) {
240  outfile_ << *s_notnull;
241  } else {
242  size_t q = s_notnull->find(copy_params_.quote);
243  if (q == std::string::npos) {
244  outfile_ << *s_notnull;
245  } else {
246  std::string str(*s_notnull);
247  while (q != std::string::npos) {
248  str.insert(q, 1, copy_params_.escape);
249  q = str.find(copy_params_.quote, q + 2);
250  }
251  outfile_ << str;
252  }
253  }
254  }
255  }
256  if (copy_params_.quoted) {
258  }
259  }
261  }
262  }
263 }
264 
266  // just close the file
267  outfile_.close();
268 }
269 
270 } // namespace import_export
#define CHECK_EQ(x, y)
Definition: Logger.h:217
#define NULL_DOUBLE
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:388
Definition: sqltypes.h:49
#define NULL_FLOAT
#define NULL_BIGINT
std::string safeColumnName(const std::string &resname, const int column_index)
std::string join(T const &container, std::string const &delim)
std::string target_value_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:510
void exportResults(const std::vector< AggregatedResult > &query_results) final
std::string to_string(char const *&&v)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:80
#define NULL_INT
std::string nullable_str_to_string(const NullableString &str)
ImportHeaderRow has_header
Definition: CopyParams.h:51
CONSTEXPR DEVICE bool is_null(const T &value)
int64_t bigintval
Definition: sqltypes.h:215
bool is_boolean() const
Definition: sqltypes.h:511
#define NULL_BOOLEAN
Definition: sqltypes.h:53
boost::variant< std::string, void * > NullableString
Definition: TargetValue.h:155
void beginExport(const std::string &file_path, const std::string &layer_name, const CopyParams &copy_params, const std::vector< TargetMetaInfo > &column_infos, const FileCompression file_compression, const ArrayNullHandling array_null_handling) final
#define NULL_TINYINT
#define CHECK(condition)
Definition: Logger.h:209
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
#define NULL_SMALLINT
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
Definition: sqltypes.h:45
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:732
bool is_decimal() const
Definition: sqltypes.h:507
void validateFileExtensions(const std::string &file_path, const std::string &file_type, const std::unordered_set< std::string > &valid_extensions) const
bool is_array() const
Definition: sqltypes.h:512