OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SpeculativeTopN.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "SpeculativeTopN.h"
18 
19 #include "RelAlgExecutor.h"
20 #include "ResultSet.h"
21 #include "Shared/Logger.h"
22 
24 
26  const std::vector<Analyzer::Expr*>& target_exprs,
27  const size_t truncate_n)
28  : unknown_(0) {
29  CHECK_EQ(rows.colCount(), target_exprs.size());
30  const bool count_first = dynamic_cast<const Analyzer::AggExpr*>(target_exprs[0]);
31  for (size_t i = 0; i < truncate_n + 1; ++i) {
32  const auto crt_row = rows.getNextRow(false, false);
33  if (crt_row.empty()) {
34  break;
35  }
36  int64_t key{0};
37  size_t val{0};
38  CHECK_EQ(rows.colCount(), crt_row.size());
39  {
40  auto scalar_r = boost::get<ScalarTargetValue>(&crt_row[0]);
41  CHECK(scalar_r);
42  auto p = boost::get<int64_t>(scalar_r);
43  CHECK(p);
44  if (count_first) {
45  val = *p;
46  } else {
47  key = *p;
48  }
49  }
50  {
51  auto scalar_r = boost::get<ScalarTargetValue>(&crt_row[1]);
52  CHECK(scalar_r);
53  auto p = boost::get<int64_t>(scalar_r);
54  CHECK(p);
55  if (count_first) {
56  key = *p;
57  } else {
58  val = *p;
59  }
60  }
61  if (i < truncate_n) {
62  const auto it_ok = map_.emplace(key, SpeculativeTopNVal{val, false});
63  CHECK(it_ok.second);
64  } else {
65  unknown_ = val;
66  }
67  }
68 }
69 
71  for (auto& kv : map_) {
72  auto& this_entry = kv.second;
73  const auto that_it = that.map_.find(kv.first);
74  if (that_it != that.map_.end()) {
75  const auto& that_entry = that_it->second;
76  CHECK(!that_entry.unknown);
77  this_entry.val += that_entry.val;
78  that.map_.erase(that_it);
79  } else {
80  this_entry.val += that.unknown_;
81  this_entry.unknown = that.unknown_;
82  }
83  }
84  for (const auto& kv : that.map_) {
85  const auto it_ok = map_.emplace(
86  kv.first, SpeculativeTopNVal{kv.second.val + unknown_, unknown_ != 0});
87  CHECK(it_ok.second);
88  }
89  unknown_ += that.unknown_;
90 }
91 
92 std::shared_ptr<ResultSet> SpeculativeTopNMap::asRows(
93  const RelAlgExecutionUnit& ra_exe_unit,
94  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
96  const Executor* executor,
97  const size_t top_n,
98  const bool desc) const {
99  std::vector<SpeculativeTopNEntry> vec;
100  for (const auto& kv : map_) {
101  vec.emplace_back(SpeculativeTopNEntry{kv.first, kv.second.val, kv.second.unknown});
102  }
103  if (desc) {
104  std::sort(vec.begin(), vec.end(), std::greater<SpeculativeTopNEntry>());
105  } else {
106  std::sort(vec.begin(), vec.end());
107  }
108  const auto num_rows = std::min(top_n, vec.size());
109  for (size_t i = 0; i < num_rows; ++i) {
110  if (vec[i].unknown) {
111  throw SpeculativeTopNFailed();
112  }
113  }
114  CHECK_EQ(size_t(2), ra_exe_unit.target_exprs.size());
115  auto query_mem_desc_rs = query_mem_desc;
116  query_mem_desc_rs.setQueryDescriptionType(QueryDescriptionType::GroupByBaselineHash);
117  query_mem_desc_rs.setOutputColumnar(false);
118  query_mem_desc_rs.setEntryCount(num_rows);
119  query_mem_desc_rs.clearSlotInfo();
120  query_mem_desc_rs.addColSlotInfo({std::make_tuple(8, 8)});
121  query_mem_desc_rs.addColSlotInfo({std::make_tuple(8, 8)});
122  auto rs = std::make_shared<ResultSet>(
123  target_exprs_to_infos(ra_exe_unit.target_exprs, query_mem_desc_rs),
125  query_mem_desc_rs,
126  row_set_mem_owner,
127  executor);
128  auto rs_storage = rs->allocateStorage();
129  auto rs_buff = reinterpret_cast<int64_t*>(rs_storage->getUnderlyingBuffer());
130  const bool count_first =
131  dynamic_cast<const Analyzer::AggExpr*>(ra_exe_unit.target_exprs[0]);
132  for (size_t i = 0; i < num_rows; ++i) {
133  rs_buff[0] = vec[i].key;
134  int64_t col0 = vec[i].key;
135  int64_t col1 = vec[i].val;
136  if (count_first) {
137  std::swap(col0, col1);
138  }
139  rs_buff[1] = col0;
140  rs_buff[2] = col1;
141  rs_buff += 3;
142  }
143  return rs;
144 }
145 
146 void SpeculativeTopNBlacklist::add(const std::shared_ptr<Analyzer::Expr> expr,
147  const bool desc) {
148  for (const auto e : blacklist_) {
149  CHECK(!(*e.first == *expr) || e.second != desc);
150  }
151  blacklist_.emplace_back(expr, desc);
152 }
153 
154 bool SpeculativeTopNBlacklist::contains(const std::shared_ptr<Analyzer::Expr> expr,
155  const bool desc) const {
156  for (const auto e : blacklist_) {
157  if (*e.first == *expr && e.second == desc) {
158  return true;
159  }
160  }
161  return false;
162 }
163 
166  if (g_cluster) {
167  return false;
168  }
169  if (ra_exe_unit.target_exprs.size() != 2) {
170  return false;
171  }
172  for (const auto target_expr : ra_exe_unit.target_exprs) {
173  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
174  if (agg_expr && agg_expr->get_aggtype() != kCOUNT) {
175  return false;
176  }
177  }
178  return query_mem_desc.sortOnGpu() && ra_exe_unit.sort_info.limit &&
180 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:198
void reduce(SpeculativeTopNMap &that)
bool contains(const std::shared_ptr< Analyzer::Expr > expr, const bool desc) const
const int8_t const int64_t * num_rows
bool g_cluster
const SortAlgorithm algorithm
bool use_speculative_top_n(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc)
const size_t limit
size_t val
CHECK(cgen_state)
const SortInfo sort_info
Speculative top N algorithm.
Definition: sqldefs.h:71
std::shared_ptr< ResultSet > asRows(const RelAlgExecutionUnit &ra_exe_unit, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const QueryMemoryDescriptor &query_mem_desc, const Executor *executor, const size_t top_n, const bool desc) const
std::vector< std::pair< std::shared_ptr< Analyzer::Expr >, bool > > blacklist_
void add(const std::shared_ptr< Analyzer::Expr > expr, const bool desc)
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
Basic constructors and methods of the row set interface.
std::unordered_map< int64_t, SpeculativeTopNVal > map_