OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
foreign_storage::Csv Namespace Reference

Namespaces

 anonymous_namespace{CsvShared.cpp}
 

Functions

bool validate_and_get_is_s3_select (const ForeignTable *foreign_table)
 
void validate_options (const ForeignTable *foreign_table)
 
import_export::CopyParams validate_and_get_copy_params (const ForeignTable *foreign_table)
 
Chunk_NS::Chunk make_chunk_for_column (const ChunkKey &chunk_key, std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers)
 
std::shared_ptr< ChunkMetadataget_placeholder_metadata (const ColumnDescriptor *column, size_t num_elements)
 

Function Documentation

std::shared_ptr< ChunkMetadata > foreign_storage::Csv::get_placeholder_metadata ( const ColumnDescriptor column,
size_t  num_elements 
)

Definition at line 235 of file CsvShared.cpp.

References ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().

Referenced by foreign_storage::anonymous_namespace{CsvDataWrapper.cpp}::add_placeholder_metadata().

236  {
237  ForeignStorageBuffer empty_buffer;
238  // Use default encoder metadata as in parquet wrapper
239  empty_buffer.initEncoder(column->columnType);
240  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
241  chunk_metadata->numElements = num_elements;
242 
243  if (!column->columnType.is_varlen_indeed()) {
244  chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
245  }
246  // min/max not set by default for arrays, so get from elem type encoder
247  if (column->columnType.is_array()) {
248  ForeignStorageBuffer scalar_buffer;
249  scalar_buffer.initEncoder(column->columnType.get_elem_type());
250  auto scalar_metadata =
251  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
252  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
253  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
254  }
255  chunk_metadata->chunkStats.has_nulls = true;
256  return chunk_metadata;
257 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
void initEncoder(const SQLTypeInfo &tmp_sql_type)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:520
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:713
bool is_array() const
Definition: sqltypes.h:497

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Chunk_NS::Chunk foreign_storage::Csv::make_chunk_for_column ( const ChunkKey chunk_key,
std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &  chunk_metadata_map,
const std::map< ChunkKey, AbstractBuffer * > &  buffers 
)

Definition at line 183 of file CsvShared.cpp.

References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

Referenced by foreign_storage::CsvDataWrapper::populateChunkMapForColumns().

186  {
187  auto catalog =
189  CHECK(catalog);
190 
191  ChunkKey data_chunk_key = chunk_key;
192  AbstractBuffer* data_buffer = nullptr;
193  AbstractBuffer* index_buffer = nullptr;
194  const auto column = catalog->getMetadataForColumnUnlocked(
195  chunk_key[CHUNK_KEY_TABLE_IDX], chunk_key[CHUNK_KEY_COLUMN_IDX]);
196 
197  if (column->columnType.is_varlen_indeed()) {
198  data_chunk_key.push_back(1);
199  ChunkKey index_chunk_key = chunk_key;
200  index_chunk_key.push_back(2);
201 
202  CHECK(buffers.find(data_chunk_key) != buffers.end());
203  CHECK(buffers.find(index_chunk_key) != buffers.end());
204 
205  data_buffer = buffers.find(data_chunk_key)->second;
206  index_buffer = buffers.find(index_chunk_key)->second;
207  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
208  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
209 
210  size_t index_offset_size{0};
211  if (column->columnType.is_string() || column->columnType.is_geometry()) {
212  index_offset_size = sizeof(StringOffsetT);
213  } else if (column->columnType.is_array()) {
214  index_offset_size = sizeof(ArrayOffsetT);
215  } else {
216  UNREACHABLE();
217  }
218  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
219  index_buffer->reserve(index_offset_size *
220  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
221  } else {
222  data_chunk_key = chunk_key;
223  CHECK(buffers.find(data_chunk_key) != buffers.end());
224  data_buffer = buffers.find(data_chunk_key)->second;
225  }
226  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
227 
228  auto retval = Chunk_NS::Chunk{column};
229  retval.setBuffer(data_buffer);
230  retval.setIndexBuffer(index_buffer);
231  retval.initEncoder();
232  return retval;
233 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::vector< int > ChunkKey
Definition: types.h:37
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define UNREACHABLE()
Definition: Logger.h:247
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:109
int32_t StringOffsetT
Definition: sqltypes.h:937
static SysCatalog & instance()
Definition: SysCatalog.h:292
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
An AbstractBuffer is a unit of data management for a data manager.
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:938
#define CHECK(condition)
Definition: Logger.h:203
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

import_export::CopyParams foreign_storage::Csv::validate_and_get_copy_params ( const ForeignTable *  foreign_table)

Definition at line 126 of file CsvShared.cpp.

References import_export::HAS_HEADER, import_export::NO_HEADER, foreign_storage::OptionsContainer::options, import_export::CopyParams::plain_text, foreign_storage::Csv::anonymous_namespace{CsvShared.cpp}::validate_and_get_bool_value(), and foreign_storage::Csv::anonymous_namespace{CsvShared.cpp}::validate_and_get_string_with_length().

Referenced by foreign_storage::CsvDataWrapper::populateChunkMetadata(), foreign_storage::CsvDataWrapper::populateChunks(), foreign_storage::CsvDataWrapper::restoreDataWrapperInternals(), and validate_options().

127  {
128  import_export::CopyParams copy_params{};
129  copy_params.plain_text = true;
130  if (const auto& value =
131  validate_and_get_string_with_length(foreign_table, "ARRAY_DELIMITER", 1);
132  !value.empty()) {
133  copy_params.array_delim = value[0];
134  }
135  if (const auto& value =
136  validate_and_get_string_with_length(foreign_table, "ARRAY_MARKER", 2);
137  !value.empty()) {
138  copy_params.array_begin = value[0];
139  copy_params.array_end = value[1];
140  }
141  if (auto it = foreign_table->options.find("BUFFER_SIZE");
142  it != foreign_table->options.end()) {
143  copy_params.buffer_size = std::stoi(it->second);
144  }
145  if (const auto& value =
146  validate_and_get_string_with_length(foreign_table, "DELIMITER", 1);
147  !value.empty()) {
148  copy_params.delimiter = value[0];
149  }
150  if (const auto& value = validate_and_get_string_with_length(foreign_table, "ESCAPE", 1);
151  !value.empty()) {
152  copy_params.escape = value[0];
153  }
154  auto has_header = validate_and_get_bool_value(foreign_table, "HEADER");
155  if (has_header.has_value()) {
156  if (has_header.value()) {
157  copy_params.has_header = import_export::ImportHeaderRow::HAS_HEADER;
158  } else {
159  copy_params.has_header = import_export::ImportHeaderRow::NO_HEADER;
160  }
161  }
162  if (const auto& value =
163  validate_and_get_string_with_length(foreign_table, "LINE_DELIMITER", 1);
164  !value.empty()) {
165  copy_params.line_delim = value[0];
166  }
167  copy_params.lonlat =
168  validate_and_get_bool_value(foreign_table, "LONLAT").value_or(copy_params.lonlat);
169 
170  if (auto it = foreign_table->options.find("NULLS");
171  it != foreign_table->options.end()) {
172  copy_params.null_str = it->second;
173  }
174  if (const auto& value = validate_and_get_string_with_length(foreign_table, "QUOTE", 1);
175  !value.empty()) {
176  copy_params.quote = value[0];
177  }
178  copy_params.quoted =
179  validate_and_get_bool_value(foreign_table, "QUOTED").value_or(copy_params.quoted);
180  return copy_params;
181 }
std::optional< bool > validate_and_get_bool_value(const ForeignTable *foreign_table, const std::string &option_name)
Definition: CsvShared.cpp:75
std::string validate_and_get_string_with_length(const ForeignTable *foreign_table, const std::string &option_name, const size_t expected_num_chars)
Definition: CsvShared.cpp:59

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::Csv::validate_and_get_is_s3_select ( const ForeignTable *  foreign_table)

Definition at line 93 of file CsvShared.cpp.

References foreign_storage::ForeignTable::foreign_server, foreign_storage::OptionsContainer::options, foreign_storage::AbstractFileStorageDataWrapper::S3_STORAGE_TYPE, and foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY.

Referenced by foreign_storage::ForeignDataWrapperFactory::create(), foreign_storage::ForeignDataWrapperFactory::createForValidation(), and validate_options().

93  {
94  static constexpr const char* S3_DIRECT = "S3_DIRECT";
95  static constexpr const char* S3_SELECT = "S3_SELECT";
96  static constexpr const char* S3_ACCESS_TYPE = "S3_ACCESS_TYPE";
97  auto access_type = foreign_table->options.find(S3_ACCESS_TYPE);
98 
99  if (access_type != foreign_table->options.end()) {
100  auto& server_options = foreign_table->foreign_server->options;
101  if (server_options.find(AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY)->second !=
102  AbstractFileStorageDataWrapper::S3_STORAGE_TYPE) {
103  throw std::runtime_error{
104  "The \"" + std::string{S3_ACCESS_TYPE} +
105  "\" option is only valid for foreign tables using servers with \"" +
106  AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY + "\" option value of \"" +
107  AbstractFileStorageDataWrapper::S3_STORAGE_TYPE + "\"."};
108  }
109  if (access_type->second != S3_DIRECT && access_type->second != S3_SELECT) {
110  throw std::runtime_error{
111  "Invalid value provided for the \"" + std::string{S3_ACCESS_TYPE} +
112  "\" option. Value must be one of the following: " + S3_DIRECT + ", " +
113  S3_SELECT + "."};
114  }
115  return (access_type->second == S3_SELECT);
116  } else {
117  return false;
118  }
119 }

+ Here is the caller graph for this function:

void foreign_storage::Csv::validate_options ( const ForeignTable *  foreign_table)

Definition at line 121 of file CsvShared.cpp.

References validate_and_get_copy_params(), and validate_and_get_is_s3_select().

Referenced by foreign_storage::CsvDataWrapper::validateTableOptions().

121  {
122  validate_and_get_copy_params(foreign_table);
123  validate_and_get_is_s3_select(foreign_table);
124 }
import_export::CopyParams validate_and_get_copy_params(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:126
bool validate_and_get_is_s3_select(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:93

+ Here is the call graph for this function:

+ Here is the caller graph for this function: