OmniSciDB  4201147b46
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FsiChunkUtils.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FsiChunkUtils.h"
18 
19 #include "Catalog/Catalog.h"
20 
21 namespace foreign_storage {
23  const ChunkKey& chunk_key,
24  const std::map<ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
25  const std::map<ChunkKey, AbstractBuffer*>& buffers,
26  Chunk_NS::Chunk& chunk) {
27  auto catalog =
29  CHECK(catalog);
30 
31  ChunkKey data_chunk_key = chunk_key;
32  AbstractBuffer* data_buffer = nullptr;
33  AbstractBuffer* index_buffer = nullptr;
34  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
35  chunk_key[CHUNK_KEY_COLUMN_IDX]);
36 
37  if (column->columnType.is_varlen_indeed()) {
38  data_chunk_key.push_back(1);
39  ChunkKey index_chunk_key = chunk_key;
40  index_chunk_key.push_back(2);
41 
42  CHECK(buffers.find(data_chunk_key) != buffers.end());
43  CHECK(buffers.find(index_chunk_key) != buffers.end());
44 
45  data_buffer = buffers.find(data_chunk_key)->second;
46  index_buffer = buffers.find(index_chunk_key)->second;
47  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
48  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
49 
50  size_t index_offset_size{0};
51  if (column->columnType.is_string() || column->columnType.is_geometry()) {
52  index_offset_size = sizeof(StringOffsetT);
53  } else if (column->columnType.is_array()) {
54  index_offset_size = sizeof(ArrayOffsetT);
55  } else {
56  UNREACHABLE();
57  }
58  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
59  index_buffer->reserve(index_offset_size *
60  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68 
69  chunk.setPinnable(false);
70  chunk.setColumnDesc(column);
71  chunk.setBuffer(data_buffer);
72  chunk.setIndexBuffer(index_buffer);
73  chunk.initEncoder();
74 }
75 
76 std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const ColumnDescriptor* column,
77  size_t num_elements) {
78  ForeignStorageBuffer empty_buffer;
79  // Use default encoder metadata as in parquet wrapper
80  empty_buffer.initEncoder(column->columnType);
81  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
82  chunk_metadata->numElements = num_elements;
83 
84  if (!column->columnType.is_varlen_indeed()) {
85  chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
86  }
87  // min/max not set by default for arrays, so get from elem type encoder
88  if (column->columnType.is_array()) {
89  ForeignStorageBuffer scalar_buffer;
90  scalar_buffer.initEncoder(column->columnType.get_elem_type());
91  auto scalar_metadata =
92  scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
93  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
94  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
95  }
96  chunk_metadata->chunkStats.has_nulls = true;
97  return chunk_metadata;
98 }
99 
101  auto [db_id, tb_id] = get_table_prefix(key);
102  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
103  CHECK(catalog);
104  auto table = catalog->getForeignTable(tb_id);
105  CHECK(table);
106  return *table;
107 }
108 
109 bool is_system_table_chunk_key(const ChunkKey& chunk_key) {
110  return get_foreign_table_for_key(chunk_key).is_system_table;
111 }
112 
113 bool is_replicated_table_chunk_key(const ChunkKey& chunk_key) {
115 }
116 
117 bool is_append_table_chunk_key(const ChunkKey& chunk_key) {
118  return get_foreign_table_for_key(chunk_key).isAppendMode();
119 }
120 
121 bool is_shardable_key(const ChunkKey& key) {
122  return (dist::is_distributed() && !dist::is_aggregator() &&
124 }
125 
126 // If we want to change the way we shard foreign tables we can do it in this function.
127 bool fragment_maps_to_leaf(const ChunkKey& key) {
132 }
133 
135  return (is_shardable_key(key) && !fragment_maps_to_leaf(key));
136 }
137 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:230
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:36
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
bool is_system_table_chunk_key(const ChunkKey &chunk_key)
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:152
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define UNREACHABLE()
Definition: Logger.h:266
bool is_append_table_chunk_key(const ChunkKey &chunk_key)
void initEncoder(const SQLTypeInfo &tmp_sql_type)
bool is_replicated_table_chunk_key(const ChunkKey &chunk_key)
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
int32_t StringOffsetT
Definition: sqltypes.h:1113
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
This file contains the class specification and related data structures for Catalog.
bool key_does_not_shard_to_leaf(const ChunkKey &key)
static SysCatalog & instance()
Definition: SysCatalog.h:336
int get_fragment(const ChunkKey &key)
Definition: types.h:52
std::shared_ptr< ChunkMetadata > get_placeholder_metadata(const ColumnDescriptor *column, size_t num_elements)
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
void init_chunk_for_column(const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
bool fragment_maps_to_leaf(const ChunkKey &key)
bool isAppendMode() const
Checks if the table is in append mode.
bool is_shardable_key(const ChunkKey &key)
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:97
bool is_aggregator()
Definition: distributed.cpp:33
bool table_is_replicated(const TableDescriptor *td)
int32_t ArrayOffsetT
Definition: sqltypes.h:1114
void initEncoder()
Definition: Chunk.cpp:284
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:98
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:222
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
SQLTypeInfo columnType
bool is_varlen_indeed() const
Definition: sqltypes.h:542
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865
virtual void reserve(size_t num_bytes)=0
bool is_array() const
Definition: sqltypes.h:518
bool is_distributed()
Definition: distributed.cpp:21