OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::Csv Namespace Reference

Namespaces

 anonymous_namespace{CsvShared.cpp}
 

Functions

bool validate_and_get_is_s3_select (const ForeignTable *foreign_table)
 
void validate_options (const ForeignTable *foreign_table)
 
import_export::CopyParams validate_and_get_copy_params (const ForeignTable *foreign_table)
 
void init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const ColumnDescriptor *column, size_t num_elements)
 

Function Documentation

std::shared_ptr<ChunkMetadata> foreign_storage::Csv::get_placeholder_metadata ( const ColumnDescriptor column,
size_t  num_elements 
)

Definition at line 254 of file CsvShared.cpp.

References ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

255  {
256  ForeignStorageBuffer empty_buffer;
257  // Use default encoder metadata as in parquet wrapper
258  empty_buffer.initEncoder(column->columnType);
259  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
260  chunk_metadata->numElements = num_elements;
261 
262  if (!column->columnType.is_varlen_indeed()) {
263  chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
264  }
265  // min/max not set by default for arrays, so get from elem type encoder
266  if (column->columnType.is_array()) {
267  ForeignStorageBuffer scalar_buffer;
268  scalar_buffer.initEncoder(column->columnType.get_elem_type());
269  auto scalar_metadata =
270  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
271  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
272  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
273  }
274  chunk_metadata->chunkStats.has_nulls = true;
275  return chunk_metadata;
276 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:540
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
bool is_array() const
Definition: sqltypes.h:517

+ Here is the call graph for this function:

void foreign_storage::Csv::init_chunk_for_column ( const ChunkKey chunk_key,
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers,
Chunk_NS::Chunk chunk 
)

Definition at line 201 of file CsvShared.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

205  {
206  auto catalog =
208  CHECK(catalog);
209 
210  ChunkKey data_chunk_key = chunk_key;
211  AbstractBuffer* data_buffer = nullptr;
212  AbstractBuffer* index_buffer = nullptr;
213  const auto column = catalog->getMetadataForColumnUnlocked(
214  chunk_key[CHUNK_KEY_TABLE_IDX], chunk_key[CHUNK_KEY_COLUMN_IDX]);
215 
216  if (column->columnType.is_varlen_indeed()) {
217  data_chunk_key.push_back(1);
218  ChunkKey index_chunk_key = chunk_key;
219  index_chunk_key.push_back(2);
220 
221  CHECK(buffers.find(data_chunk_key) != buffers.end());
222  CHECK(buffers.find(index_chunk_key) != buffers.end());
223 
224  data_buffer = buffers.find(data_chunk_key)->second;
225  index_buffer = buffers.find(index_chunk_key)->second;
226  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
227  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
228 
229  size_t index_offset_size{0};
230  if (column->columnType.is_string() || column->columnType.is_geometry()) {
231  index_offset_size = sizeof(StringOffsetT);
232  } else if (column->columnType.is_array()) {
233  index_offset_size = sizeof(ArrayOffsetT);
234  } else {
235  UNREACHABLE();
236  }
237  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
238  index_buffer->reserve(index_offset_size *
239  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
240  } else {
241  data_chunk_key = chunk_key;
242  CHECK(buffers.find(data_chunk_key) != buffers.end());
243  data_buffer = buffers.find(data_chunk_key)->second;
244  }
245  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
246  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
247 
248  chunk.setColumnDesc(column);
249  chunk.setBuffer(data_buffer);
250  chunk.setIndexBuffer(index_buffer);
251  chunk.initEncoder();
252 }
#define CHECK_EQ(x, y)
Definition: Logger.h:217
std::vector< int > ChunkKey
Definition: types.h:37
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:113
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define UNREACHABLE()
Definition: Logger.h:253
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:111
int32_t StringOffsetT
Definition: sqltypes.h:1075
static SysCatalog & instance()
Definition: SysCatalog.h:325
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1076
void initEncoder()
Definition: Chunk.cpp:225
#define CHECK(condition)
Definition: Logger.h:209
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:56
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

import_export::CopyParams foreign_storage::Csv::validate_and_get_copy_params ( const ForeignTable *  foreign_table)

Definition at line 146 of file CsvShared.cpp.

References import_export::HAS_HEADER, import_export::NO_HEADER, foreign_storage::OptionsContainer::options, import_export::CopyParams::plain_text, foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_and_get_bool_value(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_and_get_delimiter(), and foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_and_get_string_with_length().

Referenced by validate_options().

147  {
148  import_export::CopyParams copy_params{};
149  copy_params.plain_text = true;
150  if (const auto& value =
151  validate_and_get_string_with_length(foreign_table, "ARRAY_DELIMITER", 1);
152  !value.empty()) {
153  copy_params.array_delim = value[0];
154  }
155  if (const auto& value =
156  validate_and_get_string_with_length(foreign_table, "ARRAY_MARKER", 2);
157  !value.empty()) {
158  copy_params.array_begin = value[0];
159  copy_params.array_end = value[1];
160  }
161  if (auto it = foreign_table->options.find("BUFFER_SIZE");
162  it != foreign_table->options.end()) {
163  copy_params.buffer_size = std::stoi(it->second);
164  }
165  if (const auto& value = validate_and_get_delimiter(foreign_table, "DELIMITER");
166  !value.empty()) {
167  copy_params.delimiter = value[0];
168  }
169  if (const auto& value = validate_and_get_string_with_length(foreign_table, "ESCAPE", 1);
170  !value.empty()) {
171  copy_params.escape = value[0];
172  }
173  auto has_header = validate_and_get_bool_value(foreign_table, "HEADER");
174  if (has_header.has_value()) {
175  if (has_header.value()) {
176  copy_params.has_header = import_export::ImportHeaderRow::HAS_HEADER;
177  } else {
178  copy_params.has_header = import_export::ImportHeaderRow::NO_HEADER;
179  }
180  }
181  if (const auto& value = validate_and_get_delimiter(foreign_table, "LINE_DELIMITER");
182  !value.empty()) {
183  copy_params.line_delim = value[0];
184  }
185  copy_params.lonlat =
186  validate_and_get_bool_value(foreign_table, "LONLAT").value_or(copy_params.lonlat);
187 
188  if (auto it = foreign_table->options.find("NULLS");
189  it != foreign_table->options.end()) {
190  copy_params.null_str = it->second;
191  }
192  if (const auto& value = validate_and_get_string_with_length(foreign_table, "QUOTE", 1);
193  !value.empty()) {
194  copy_params.quote = value[0];
195  }
196  copy_params.quoted =
197  validate_and_get_bool_value(foreign_table, "QUOTED").value_or(copy_params.quoted);
198  return copy_params;
199 }
std::string validate_and_get_string_with_length(const ForeignTable *foreign_table, const std::string &option_name, const size_t expected_num_chars)
std::optional< bool > validate_and_get_bool_value(const ForeignTable *foreign_table, const std::string &option_name)
std::string validate_and_get_delimiter(const ForeignTable *foreign_table, const std::string &option_name)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::Csv::validate_and_get_is_s3_select ( const ForeignTable *  foreign_table)

Definition at line 113 of file CsvShared.cpp.

References foreign_storage::ForeignTable::foreign_server, foreign_storage::OptionsContainer::options, foreign_storage::AbstractFileStorageDataWrapper::S3_STORAGE_TYPE, and foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY.

Referenced by validate_options().

113  {
114  static constexpr const char* S3_DIRECT = "S3_DIRECT";
115  static constexpr const char* S3_SELECT = "S3_SELECT";
116  static constexpr const char* S3_ACCESS_TYPE = "S3_ACCESS_TYPE";
117  auto access_type = foreign_table->options.find(S3_ACCESS_TYPE);
118 
119  if (access_type != foreign_table->options.end()) {
120  auto& server_options = foreign_table->foreign_server->options;
121  if (server_options.find(AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY)->second !=
122  AbstractFileStorageDataWrapper::S3_STORAGE_TYPE) {
123  throw std::runtime_error{
124  "The \"" + std::string{S3_ACCESS_TYPE} +
125  "\" option is only valid for foreign tables using servers with \"" +
126  AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY + "\" option value of \"" +
127  AbstractFileStorageDataWrapper::S3_STORAGE_TYPE + "\"."};
128  }
129  if (access_type->second != S3_DIRECT && access_type->second != S3_SELECT) {
130  throw std::runtime_error{
131  "Invalid value provided for the \"" + std::string{S3_ACCESS_TYPE} +
132  "\" option. Value must be one of the following: " + S3_DIRECT + ", " +
133  S3_SELECT + "."};
134  }
135  return (access_type->second == S3_SELECT);
136  } else {
137  return false;
138  }
139 }

+ Here is the caller graph for this function:

void foreign_storage::Csv::validate_options ( const ForeignTable *  foreign_table)

Definition at line 141 of file CsvShared.cpp.

References validate_and_get_copy_params(), and validate_and_get_is_s3_select().

141  {
142  validate_and_get_copy_params(foreign_table);
143  validate_and_get_is_s3_select(foreign_table);
144 }
import_export::CopyParams validate_and_get_copy_params(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:146
bool validate_and_get_is_s3_select(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:113

+ Here is the call graph for this function: