OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FsiChunkUtils.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FsiChunkUtils.h"
18 
19 #include "Catalog/Catalog.h"
20 
21 namespace foreign_storage {
23  const ChunkKey& chunk_key,
24  const std::map<ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
25  const std::map<ChunkKey, AbstractBuffer*>& buffers,
26  Chunk_NS::Chunk& chunk) {
27  auto catalog =
29  CHECK(catalog);
30 
31  ChunkKey data_chunk_key = chunk_key;
32  AbstractBuffer* data_buffer = nullptr;
33  AbstractBuffer* index_buffer = nullptr;
34  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
35  chunk_key[CHUNK_KEY_COLUMN_IDX]);
36 
37  if (column->columnType.is_varlen_indeed()) {
38  data_chunk_key.push_back(1);
39  ChunkKey index_chunk_key = chunk_key;
40  index_chunk_key.push_back(2);
41 
42  CHECK(buffers.find(data_chunk_key) != buffers.end());
43  CHECK(buffers.find(index_chunk_key) != buffers.end());
44 
45  data_buffer = buffers.find(data_chunk_key)->second;
46  index_buffer = buffers.find(index_chunk_key)->second;
47  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
48  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
49 
50  size_t index_offset_size{0};
51  if (column->columnType.is_string() || column->columnType.is_geometry()) {
52  index_offset_size = sizeof(StringOffsetT);
53  } else if (column->columnType.is_array()) {
54  index_offset_size = sizeof(ArrayOffsetT);
55  } else {
56  UNREACHABLE();
57  }
58  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
59  index_buffer->reserve(index_offset_size *
60  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68 
69  chunk.setColumnDesc(column);
70  chunk.setBuffer(data_buffer);
71  chunk.setIndexBuffer(index_buffer);
72  chunk.initEncoder();
73 }
74 
75 std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const ColumnDescriptor* column,
76  size_t num_elements) {
77  ForeignStorageBuffer empty_buffer;
78  // Use default encoder metadata as in parquet wrapper
79  empty_buffer.initEncoder(column->columnType);
80  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
81  chunk_metadata->numElements = num_elements;
82 
83  if (!column->columnType.is_varlen_indeed()) {
84  chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
85  }
86  // min/max not set by default for arrays, so get from elem type encoder
87  if (column->columnType.is_array()) {
88  ForeignStorageBuffer scalar_buffer;
89  scalar_buffer.initEncoder(column->columnType.get_elem_type());
90  auto scalar_metadata =
91  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
92  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
93  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
94  }
95  chunk_metadata->chunkStats.has_nulls = true;
96  return chunk_metadata;
97 }
98 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:217
std::vector< int > ChunkKey
Definition: types.h:37
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:113
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define UNREACHABLE()
Definition: Logger.h:253
void initEncoder(const SQLTypeInfo &tmp_sql_type)
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:111
int32_t StringOffsetT
Definition: sqltypes.h:1075
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
This file contains the class specification and related data structures for Catalog.
static SysCatalog & instance()
Definition: SysCatalog.h:325
std::shared_ptr< ChunkMetadata > get_placeholder_metadata(const ColumnDescriptor *column, size_t num_elements)
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
void init_chunk_for_column(const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t ArrayOffsetT
Definition: sqltypes.h:1076
void initEncoder()
Definition: Chunk.cpp:225
#define CHECK(condition)
Definition: Logger.h:209
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:56
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:540
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
virtual void reserve(size_t num_bytes)=0
bool is_array() const
Definition: sqltypes.h:517