OmniSciDB  17c254d2f8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SortedOrderFragmenter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <cstring>
17 #include <numeric>
18 
19 #include "../Catalog/Catalog.h"
20 #include "SortedOrderFragmenter.h"
21 
22 namespace Fragmenter_Namespace {
23 
24 template <typename T>
25 void shuffleByIndexesImpl(const std::vector<size_t>& indexes, T* buffer) {
26  std::vector<T> new_buffer;
27  new_buffer.reserve(indexes.size());
28  for (const auto i : indexes) {
29  new_buffer.push_back(buffer[i]);
30  }
31  std::memcpy(buffer, new_buffer.data(), indexes.size() * sizeof(T));
32 }
33 
34 template <typename T>
35 void shuffleByIndexesImpl(const std::vector<size_t>& indexes, std::vector<T>& buffer) {
36  std::vector<T> new_buffer;
37  new_buffer.reserve(indexes.size());
38  for (const auto i : indexes) {
39  new_buffer.push_back(buffer[i]);
40  }
41  buffer.swap(new_buffer);
42 }
43 
45  const std::vector<size_t>& indexes,
46  DataBlockPtr& data) {
47  const auto& ti = cd->columnType;
48  switch (ti.get_type()) {
49  case kBOOLEAN:
50  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
51  break;
52  case kTINYINT:
53  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
54  break;
55  case kSMALLINT:
56  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
57  break;
58  case kINT:
59  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
60  break;
61  case kBIGINT:
62  case kNUMERIC:
63  case kDECIMAL:
64  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
65  break;
66  case kFLOAT:
67  shuffleByIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
68  break;
69  case kDOUBLE:
70  shuffleByIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
71  break;
72  case kTEXT:
73  case kVARCHAR:
74  case kCHAR:
75  if (ti.is_varlen()) {
76  shuffleByIndexesImpl(indexes, *data.stringsPtr);
77  } else {
78  switch (ti.get_size()) {
79  case 1:
80  shuffleByIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
81  break;
82  case 2:
83  shuffleByIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
84  break;
85  case 4:
86  shuffleByIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
87  break;
88  default:
89  CHECK(false);
90  }
91  }
92  break;
93  case kDATE:
94  case kTIME:
95  case kTIMESTAMP:
96  shuffleByIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
97  break;
98  case kARRAY:
99  shuffleByIndexesImpl(indexes, *data.arraysPtr);
100  break;
101  case kPOINT:
102  case kLINESTRING:
103  case kPOLYGON:
104  case kMULTIPOLYGON:
105  shuffleByIndexesImpl(indexes, *data.stringsPtr);
106  break;
107  default:
108  CHECK(false);
109  }
110 }
111 
112 template <typename T>
113 void sortIndexesImpl(std::vector<size_t>& indexes, const T* buffer) {
114  CHECK(buffer);
115  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
116  return buffer[a] < buffer[b];
117  });
118 }
119 
120 void sortIndexesImpl(std::vector<size_t>& indexes,
121  const std::vector<std::string>& buffer) {
122  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
123  return buffer[a].size() < buffer[b].size() ||
124  (buffer[a].size() == buffer[b].size() && buffer[a] < buffer[b]);
125  });
126 }
127 
128 void sortIndexesImpl(std::vector<size_t>& indexes,
129  const std::vector<ArrayDatum>& buffer) {
130  std::sort(indexes.begin(), indexes.end(), [&](const auto a, const auto b) {
131  return buffer[a].is_null || buffer[a].length < buffer[b].length ||
132  (!buffer[b].is_null && buffer[a].length == buffer[b].length &&
133  memcmp(buffer[a].pointer, buffer[b].pointer, buffer[a].length) < 0);
134  });
135 }
136 
138  std::vector<size_t>& indexes,
139  const DataBlockPtr& data) {
140  const auto& ti = cd->columnType;
141  switch (ti.get_type()) {
142  case kBOOLEAN:
143  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
144  break;
145  case kTINYINT:
146  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
147  break;
148  case kSMALLINT:
149  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
150  break;
151  case kINT:
152  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
153  break;
154  case kBIGINT:
155  case kNUMERIC:
156  case kDECIMAL:
157  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
158  break;
159  case kFLOAT:
160  sortIndexesImpl(indexes, reinterpret_cast<float*>(data.numbersPtr));
161  break;
162  case kDOUBLE:
163  sortIndexesImpl(indexes, reinterpret_cast<double*>(data.numbersPtr));
164  break;
165  case kTEXT:
166  case kVARCHAR:
167  case kCHAR:
168  if (ti.is_varlen()) {
169  sortIndexesImpl(indexes, *data.stringsPtr);
170  } else {
171  switch (ti.get_size()) {
172  case 1:
173  sortIndexesImpl(indexes, reinterpret_cast<int8_t*>(data.numbersPtr));
174  break;
175  case 2:
176  sortIndexesImpl(indexes, reinterpret_cast<int16_t*>(data.numbersPtr));
177  break;
178  case 4:
179  sortIndexesImpl(indexes, reinterpret_cast<int32_t*>(data.numbersPtr));
180  break;
181  default:
182  CHECK(false);
183  }
184  }
185  break;
186  case kDATE:
187  case kTIME:
188  case kTIMESTAMP:
189  sortIndexesImpl(indexes, reinterpret_cast<int64_t*>(data.numbersPtr));
190  break;
191  case kARRAY:
192  sortIndexesImpl(indexes, *data.arraysPtr);
193  break;
194  default:
195  CHECK(false) << "invalid type '" << ti.get_type() << "' to sort";
196  }
197 }
198 
200  // coming here table must have defined a sort_column for mini sort
201  const auto table_desc = catalog_->getMetadataForTable(physicalTableId_);
202  CHECK(table_desc);
203  CHECK_GT(table_desc->sortedColumnId, 0);
204  const auto logical_cd =
205  catalog_->getMetadataForColumn(table_desc->tableId, table_desc->sortedColumnId);
206  CHECK(logical_cd);
207  const auto physical_cd = catalog_->getMetadataForColumn(
208  table_desc->tableId,
209  table_desc->sortedColumnId + (logical_cd->columnType.is_geometry() ? 1 : 0));
210  const auto it = std::find(insertDataStruct.columnIds.begin(),
211  insertDataStruct.columnIds.end(),
212  physical_cd->columnId);
213  CHECK(it != insertDataStruct.columnIds.end());
214  // sort row indexes of the sort column
215  std::vector<size_t> indexes(insertDataStruct.numRows);
216  std::iota(indexes.begin(), indexes.end(), 0);
217  const auto dist = std::distance(insertDataStruct.columnIds.begin(), it);
218  CHECK_LT(static_cast<size_t>(dist), insertDataStruct.data.size());
219  sortIndexes(physical_cd, indexes, insertDataStruct.data[dist]);
220  // shuffle rows of all columns
221  for (size_t i = 0; i < insertDataStruct.columnIds.size(); ++i) {
222  const auto cd = catalog_->getMetadataForColumn(table_desc->tableId,
223  insertDataStruct.columnIds[i]);
224  shuffleByIndexes(cd, indexes, insertDataStruct.data[i]);
225  }
226 }
227 
228 } // namespace Fragmenter_Namespace
Definition: sqltypes.h:50
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:148
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:149
void sortIndexesImpl(std::vector< size_t > &indexes, const T *buffer)
void shuffleByIndexes(const ColumnDescriptor *cd, const std::vector< size_t > &indexes, DataBlockPtr &data)
#define CHECK_GT(x, y)
Definition: Logger.h:209
virtual void sortData(InsertData &insertDataStruct)
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
CHECK(cgen_state)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
specifies the content in-memory of a row in the column metadata table
void sortIndexes(const ColumnDescriptor *cd, std::vector< size_t > &indexes, const DataBlockPtr &data)
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
Definition: sqltypes.h:54
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
Definition: sqltypes.h:42
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
Definition: sqltypes.h:46
SQLTypeInfo columnType
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
int8_t * numbersPtr
Definition: sqltypes.h:147
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
void shuffleByIndexesImpl(const std::vector< size_t > &indexes, T *buffer)