OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InternalSystemDataWrapper.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include "Catalog/Catalog.h"
20 #include "Catalog/SysCatalog.h"
21 #include "ForeignTableSchema.h"
22 #include "FsiChunkUtils.h"
23 #include "ImportExport/Importer.h"
24 #include "TextFileBufferParser.h"
25 #include "UserMapping.h"
26 
27 namespace foreign_storage {
29  : db_id_(-1), foreign_table_(nullptr) {}
30 
32  const ForeignTable* foreign_table)
33  : db_id_(db_id), foreign_table_(foreign_table) {}
34 
36  const ForeignServer* foreign_server) const {
37  CHECK(foreign_server->options.empty());
38 }
39 
41  const ForeignTable* foreign_table) const {}
42 
43 const std::set<std::string_view>& InternalSystemDataWrapper::getSupportedTableOptions()
44  const {
45  static const std::set<std::string_view> supported_table_options{};
46  return supported_table_options;
47 }
48 
50  const UserMapping* user_mapping,
51  const ForeignServer* foreign_server) const {
52  CHECK(user_mapping->options.empty());
53 }
54 
55 const std::set<std::string_view>&
57  static const std::set<std::string_view> supported_user_mapping_options{};
58  return supported_user_mapping_options;
59 }
60 
61 namespace {
62 void initialize_chunks(std::map<ChunkKey, Chunk_NS::Chunk>& chunks,
63  const ChunkToBufferMap& buffers,
64  size_t row_count,
65  std::set<const ColumnDescriptor*>& columns_to_parse,
66  int32_t fragment_id,
67  const Catalog_Namespace::Catalog& catalog) {
68  for (auto& [chunk_key, buffer] : buffers) {
69  CHECK_EQ(fragment_id, chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
70  const auto column = catalog.getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
71  chunk_key[CHUNK_KEY_COLUMN_IDX]);
72  if (is_varlen_index_key(chunk_key)) {
73  continue;
74  }
75  chunks[chunk_key] = Chunk_NS::Chunk{column};
76  if (column->columnType.is_varlen_indeed()) {
77  CHECK(is_varlen_data_key(chunk_key));
78  size_t index_offset_size{0};
79  if (column->columnType.is_string()) {
80  index_offset_size = sizeof(StringOffsetT);
81  } else if (column->columnType.is_array()) {
82  index_offset_size = sizeof(ArrayOffsetT);
83  } else {
84  UNREACHABLE() << "Unexpected column type: " << column->columnType.to_string();
85  }
86  ChunkKey index_chunk_key = chunk_key;
87  index_chunk_key[CHUNK_KEY_VARLEN_IDX] = 2;
88  CHECK(buffers.find(index_chunk_key) != buffers.end());
89  AbstractBuffer* index_buffer = buffers.find(index_chunk_key)->second;
90  index_buffer->reserve(index_offset_size * row_count + 1);
91  chunks[chunk_key].setIndexBuffer(index_buffer);
92  }
93 
94  if (!column->columnType.is_varlen_indeed()) {
95  buffer->reserve(column->columnType.get_size() * row_count);
96  }
97  chunks[chunk_key].setBuffer(buffer);
98  chunks[chunk_key].initEncoder();
99  columns_to_parse.emplace(column);
100  }
101 }
102 
104  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
105  std::map<std::string, import_export::TypedImportBuffer*>& import_buffers_map,
106  const std::set<const ColumnDescriptor*>& columns_to_parse,
107  const Catalog_Namespace::Catalog& catalog) {
108  for (const auto column : columns_to_parse) {
109  StringDictionary* string_dictionary = nullptr;
110  if (column->columnType.is_dict_encoded_string() ||
111  (column->columnType.is_array() && IS_STRING(column->columnType.get_subtype()) &&
112  column->columnType.get_compression() == kENCODING_DICT)) {
113  auto dict_descriptor =
114  catalog.getMetadataForDict(column->columnType.get_comp_param(), true);
115  string_dictionary = dict_descriptor->stringDict.get();
116  }
117  import_buffers.emplace_back(
118  std::make_unique<import_export::TypedImportBuffer>(column, string_dictionary));
119  import_buffers_map[column->columnName] = import_buffers.back().get();
120  }
121 }
122 } // namespace
123 
125  ChunkMetadataVector& chunk_metadata_vector) {
126  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
127  auto catalog = sys_catalog.getCatalog(db_id_);
128  CHECK(catalog);
129  CHECK_EQ(catalog->name(), INFORMATION_SCHEMA_DB);
130 
133  for (auto column : schema.getLogicalColumns()) {
134  ChunkKey chunk_key = {db_id_, foreign_table_->tableId, column->columnId, 0};
135  if (column->columnType.is_varlen_indeed()) {
136  chunk_key.emplace_back(1);
137  }
138  ForeignStorageBuffer empty_buffer;
139  // Use default encoder metadata
140  empty_buffer.initEncoder(column->columnType);
141  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
142  chunk_metadata->numElements = row_count_;
143  if (!column->columnType.is_varlen_indeed()) {
144  chunk_metadata->numBytes = column->columnType.get_size() * row_count_;
145  }
146  if (column->columnType.is_array()) {
147  ForeignStorageBuffer scalar_buffer;
148  scalar_buffer.initEncoder(column->columnType.get_elem_type());
149  auto scalar_metadata =
150  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
151  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
152  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
153  }
154  chunk_metadata->chunkStats.has_nulls = true;
155  chunk_metadata_vector.emplace_back(chunk_key, chunk_metadata);
156  }
157 }
158 
160  const ChunkToBufferMap& required_buffers,
161  const ChunkToBufferMap& optional_buffers) {
162  auto timer = DEBUG_TIMER(__func__);
163  CHECK(optional_buffers.empty());
164 
165  auto& sys_catalog = Catalog_Namespace::SysCatalog::instance();
166  auto catalog = sys_catalog.getCatalog(db_id_);
167  CHECK(catalog);
168  CHECK_EQ(catalog->name(), INFORMATION_SCHEMA_DB);
169 
170  auto fragment_id = required_buffers.begin()->first[CHUNK_KEY_FRAGMENT_IDX];
171  CHECK_EQ(fragment_id, 0);
172 
173  std::map<ChunkKey, Chunk_NS::Chunk> chunks;
174  std::set<const ColumnDescriptor*> columns_to_parse;
176  chunks, required_buffers, row_count_, columns_to_parse, fragment_id, *catalog);
177 
178  // initialize import buffers from columns.
179  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
180  std::map<std::string, import_export::TypedImportBuffer*> import_buffers_map;
182  import_buffers, import_buffers_map, columns_to_parse, *catalog);
184 
185  auto column_id_to_data_blocks_map =
187  for (auto& [chunk_key, chunk] : chunks) {
188  auto data_block_entry =
189  column_id_to_data_blocks_map.find(chunk_key[CHUNK_KEY_COLUMN_IDX]);
190  CHECK(data_block_entry != column_id_to_data_blocks_map.end());
191  chunk.appendData(data_block_entry->second, row_count_, 0);
192  auto cd = chunk.getColumnDesc();
193  if (!cd->columnType.is_varlen_indeed()) {
195  auto metadata = chunk.getBuffer()->getEncoder()->getMetadata(cd->columnType);
196  foreign_table_->fragmenter->updateColumnChunkMetadata(cd, fragment_id, metadata);
197  }
198  chunk.setBuffer(nullptr);
199  chunk.setIndexBuffer(nullptr);
200  }
201 }
202 
204  return {};
205 }
206 
208  const std::string& file_path,
209  const ChunkMetadataVector& chunk_metadata) {}
210 
212  return false;
213 }
214 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< int > ChunkKey
Definition: types.h:37
static std::map< int, DataBlockPtr > convertImportBuffersToDataBlocks(const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:114
std::string tableName
virtual void initializeObjectsForTable(const std::string &table_name)=0
bool is_varlen_data_key(const ChunkKey &key)
Definition: types.h:71
const std::set< std::string_view > & getSupportedTableOptions() const override
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
#define UNREACHABLE()
Definition: Logger.h:255
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
void initEncoder(const SQLTypeInfo &tmp_sql_type)
int32_t StringOffsetT
Definition: sqltypes.h:1090
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void validateTableOptions(const ForeignTable *foreign_table) const override
This file contains the class specification and related data structures for Catalog.
void populateChunkBuffers(const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers) override
void restoreDataWrapperInternals(const std::string &file_path, const ChunkMetadataVector &chunk_metadata) override
static SysCatalog & instance()
Definition: SysCatalog.h:326
This file contains the class specification and related data structures for SysCatalog.
void validateUserMappingOptions(const UserMapping *user_mapping, const ForeignServer *foreign_server) const override
void initialize_chunks(std::map< ChunkKey, Chunk_NS::Chunk > &chunks, const ChunkToBufferMap &buffers, size_t row_count, std::set< const ColumnDescriptor * > &columns_to_parse, int32_t fragment_id, const Catalog_Namespace::Catalog &catalog)
bool is_varlen_index_key(const ChunkKey &key)
Definition: types.h:75
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
virtual void populateChunkBuffersForTable(const std::string &table_name, std::map< std::string, import_export::TypedImportBuffer * > &import_buffers)=0
An AbstractBuffer is a unit of data management for a data manager.
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1554
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void populateChunkMetadata(ChunkMetadataVector &chunk_metadata_vector) override
const std::list< const ColumnDescriptor * > & getLogicalColumns() const
int32_t ArrayOffsetT
Definition: sqltypes.h:1091
void validateServerOptions(const ForeignServer *foreign_server) const override
#define CHUNK_KEY_VARLEN_IDX
Definition: types.h:43
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
const std::set< std::string_view > & getSupportedUserMappingOptions() const override
void initialize_import_buffers(std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, std::map< std::string, import_export::TypedImportBuffer * > &import_buffers_map, const std::set< const ColumnDescriptor * > &columns_to_parse, const Catalog_Namespace::Catalog &catalog)
virtual void reserve(size_t num_bytes)=0
const std::string INFORMATION_SCHEMA_DB
Definition: SysCatalog.h:65