OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RenderGroupAnalyzer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Importer.cpp
19  * @brief Functions for Importer class
20  *
21  */
22 
23 #include "RenderGroupAnalyzer.h"
24 
25 #include <boost/dynamic_bitset.hpp>
26 #include <boost/geometry.hpp>
27 #include <csignal>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <memory>
31 #include <mutex>
32 #include <stdexcept>
33 #include <utility>
34 #include <vector>
35 #include "Catalog/Catalog.h"
36 #include "Shared/measure.h"
38 
39 namespace import_export {
40 
41 //
42 // class RenderGroupAnalyzer
43 //
44 
45 #define DEBUG_RENDER_GROUP_ANALYZER 0
46 
49  const std::string& tableName,
50  const std::string& geoColumnBaseName) {
51  // start timer
52  auto seedTimer = timer_start();
53 
54  // start with a fresh tree
55  _rtree = nullptr;
56  _numRenderGroups = 0;
57 
58  // get the table descriptor
59  auto const* td = cat.getMetadataForTable(tableName);
60  CHECK(td);
61 
62  // foreign tables not supported
63  if (td->storageType == StorageType::FOREIGN_TABLE) {
65  LOG(INFO) << "DEBUG: Table is a foreign table";
66  }
67  _rtree = std::make_unique<RTree>();
68  CHECK(_rtree);
69  return;
70  }
71 
72  // if the table is empty, just make an empty tree
73  CHECK(td->fragmenter);
74  if (td->fragmenter->getFragmentsForQuery().getPhysicalNumTuples() == 0) {
76  LOG(INFO) << "DEBUG: Table is empty!";
77  }
78  _rtree = std::make_unique<RTree>();
79  CHECK(_rtree);
80  return;
81  }
82 
83  // no seeding possible without these two columns
84  const auto cd_bounds =
85  cat.getMetadataForColumn(td->tableId, geoColumnBaseName + "_bounds");
86  const auto cd_render_group =
87  cat.getMetadataForColumn(td->tableId, geoColumnBaseName + "_render_group");
88  if (!cd_bounds || !cd_render_group) {
89  throw std::runtime_error("RenderGroupAnalyzer: Table " + tableName +
90  " doesn't have bounds or render_group columns!");
91  }
92 
93  // and validate their types
94  if (cd_bounds->columnType.get_type() != kARRAY ||
95  cd_bounds->columnType.get_subtype() != kDOUBLE) {
96  throw std::runtime_error("RenderGroupAnalyzer: Table " + tableName +
97  " bounds column is wrong type!");
98  }
99  if (cd_render_group->columnType.get_type() != kINT) {
100  throw std::runtime_error("RenderGroupAnalyzer: Table " + tableName +
101  " render_group column is wrong type!");
102  }
103 
104  // get chunk accessor table
105  auto chunkAccessorTable = getChunkAccessorTable(
106  cat, td, {geoColumnBaseName + "_bounds", geoColumnBaseName + "_render_group"});
107  const auto table_count = std::get<0>(chunkAccessorTable.back());
108 
110  LOG(INFO) << "DEBUG: Scanning existing table geo column set '" << geoColumnBaseName
111  << "'";
112  }
113 
114  std::vector<Node> nodes;
115  try {
116  nodes.resize(table_count);
117  } catch (const std::exception& e) {
118  throw std::runtime_error("RenderGroupAnalyzer failed to reserve memory for " +
119  std::to_string(table_count) + " rows");
120  }
121 
122  for (size_t row = 0; row < table_count; row++) {
123  ArrayDatum ad;
124  VarlenDatum vd;
125  bool is_end;
126 
127  // get ChunkIters and fragment row offset
128  size_t rowOffset = 0;
129  auto& chunkIters = getChunkItersAndRowOffset(chunkAccessorTable, row, rowOffset);
130  auto& boundsChunkIter = chunkIters[0];
131  auto& renderGroupChunkIter = chunkIters[1];
132 
133  // get bounds values
134  ChunkIter_get_nth(&boundsChunkIter, row - rowOffset, &ad, &is_end);
135  CHECK(!is_end);
136  CHECK(ad.pointer);
137  int numBounds = (int)(ad.length / sizeof(double));
138  CHECK(numBounds == 4);
139 
140  // convert to bounding box
141  double* bounds = reinterpret_cast<double*>(ad.pointer);
142  BoundingBox bounding_box;
143  boost::geometry::assign_inverse(bounding_box);
144  boost::geometry::expand(bounding_box, Point(bounds[0], bounds[1]));
145  boost::geometry::expand(bounding_box, Point(bounds[2], bounds[3]));
146 
147  // get render group
148  ChunkIter_get_nth(&renderGroupChunkIter, row - rowOffset, false, &vd, &is_end);
149  CHECK(!is_end);
150  CHECK(vd.pointer);
151  int renderGroup = *reinterpret_cast<int32_t*>(vd.pointer);
152 
153  // skip rows with invalid render groups (e.g. EMPTY geometry)
154  if (renderGroup < 0) {
155  continue;
156  }
157 
158  // store
159  nodes[row] = std::make_pair(bounding_box, renderGroup);
160 
161  // how many render groups do we have now?
162  if (renderGroup >= _numRenderGroups) {
163  _numRenderGroups = renderGroup + 1;
164  }
165 
167  LOG(INFO) << "DEBUG: Existing row " << row << " has Render Group " << renderGroup;
168  }
169  }
170 
171  // bulk-load the tree
172  auto bulk_load_timer = timer_start();
173  _rtree = std::make_unique<RTree>(nodes);
174  CHECK(_rtree);
175  LOG(INFO) << "Scanning render groups of poly column '" << geoColumnBaseName
176  << "' of table '" << tableName << "' took " << timer_stop(seedTimer) << "ms ("
177  << timer_stop(bulk_load_timer) << " ms for tree)";
178 
180  LOG(INFO) << "DEBUG: Done! Now have " << _numRenderGroups << " Render Groups";
181  }
182 }
183 
185  const std::vector<double>& bounds) {
186  // validate
187  CHECK(bounds.size() == 4);
188 
189  // get bounds
190  BoundingBox bounding_box;
191  boost::geometry::assign_inverse(bounding_box);
192  boost::geometry::expand(bounding_box, Point(bounds[0], bounds[1]));
193  boost::geometry::expand(bounding_box, Point(bounds[2], bounds[3]));
194 
195  // remainder under mutex to allow this to be multi-threaded
196  std::lock_guard<std::mutex> guard(_rtreeMutex);
197 
198  // get the intersecting nodes
199  std::vector<Node> intersects;
200  _rtree->query(boost::geometry::index::intersects(bounding_box),
201  std::back_inserter(intersects));
202 
203  // build bitset of render groups of the intersecting rectangles
204  // clear bit means available, allows use of find_first()
205  boost::dynamic_bitset<> bits(_numRenderGroups);
206  bits.set();
207  for (const auto& intersection : intersects) {
208  CHECK(intersection.second < _numRenderGroups);
209  bits.reset(intersection.second);
210  }
211 
212  // find first available group
213  int firstAvailableRenderGroup = 0;
214  size_t firstSetBit = bits.find_first();
215  if (firstSetBit == boost::dynamic_bitset<>::npos) {
216  // all known groups represented, add a new one
217  firstAvailableRenderGroup = _numRenderGroups;
219  } else {
220  firstAvailableRenderGroup = (int)firstSetBit;
221  }
222 
223  // insert new node
224  _rtree->insert(std::make_pair(bounding_box, firstAvailableRenderGroup));
225 
226  // return it
227  return firstAvailableRenderGroup;
228 }
229 
230 } // namespace import_export
ChunkAccessorTable getChunkAccessorTable(const Catalog_Namespace::Catalog &cat, const TableDescriptor *td, const std::vector< std::string > &columnNames)
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
#define LOG(tag)
Definition: Logger.h:285
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
std::string to_string(char const *&&v)
int8_t * pointer
Definition: Datum.h:54
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
This file contains the class specification and related data structures for Catalog.
ChunkIterVector & getChunkItersAndRowOffset(ChunkAccessorTable &table, size_t rowid, size_t &rowOffset)
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void seedFromExistingTableContents(const Catalog_Namespace::Catalog &cat, const std::string &tableName, const std::string &geoColumnBaseName)
boost::geometry::model::box< Point > BoundingBox
#define DEBUG_RENDER_GROUP_ANALYZER
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:62
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static constexpr char const * FOREIGN_TABLE
int insertBoundsAndReturnRenderGroup(const std::vector< double > &bounds)
Type timer_start()
Definition: measure.h:42
boost::geometry::model::point< double, 2, boost::geometry::cs::cartesian > Point