_result_set_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #include "ResultSet.h"

 #include "DataMgr/Allocators/CudaAllocator.h"

 #include "DataMgr/BufferMgr/BufferMgr.h"

 #include "Execute.h"

 #include "GpuMemUtils.h"

 #include "InPlaceSort.h"

 #include "OutputBufferInitialization.h"

 #include "QueryEngine/QueryEngine.h"

 #include "RelAlgExecutionUnit.h"

 #include "RuntimeFunctions.h"

 #include "Shared/Intervals.h"

 #include "Shared/SqlTypesLayout.h"

 #include "Shared/checked_alloc.h"

 #include "Shared/likely.h"

 #include "Shared/thread_count.h"

 #include "Shared/threading.h"


 #include <tbb/parallel_for.h>


 #include <algorithm>

 #include <atomic>

 #include <bitset>

 #include <functional>

 #include <future>

 #include <numeric>


 size_t g_parallel_top_min = 100e3;

 size_t g_parallel_top_max = 20e6;  // In effect only with g_enable_watchdog.

 size_t g_streaming_topn_max = 100e3;

 constexpr int64_t uninitialized_cached_row_count{-1};


 void ResultSet::keepFirstN(const size_t n) {

   invalidateCachedRowCount();

   keep_first_ = n;

 }


 void ResultSet::dropFirstN(const size_t n) {

   invalidateCachedRowCount();

   drop_first_ = n;

 }


 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,

                      const ExecutorDeviceType device_type,

                      const QueryMemoryDescriptor& query_mem_desc,

                      const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,

                      const unsigned block_size,

                      const unsigned grid_size)

     : targets_(targets)

     , device_type_(device_type)

     , device_id_(-1)

     , thread_idx_(-1)

     , query_mem_desc_(query_mem_desc)

     , crt_row_buff_idx_(0)

     , fetched_so_far_(0)

     , drop_first_(0)

     , keep_first_(0)

     , row_set_mem_owner_(row_set_mem_owner)

     , block_size_(block_size)

     , grid_size_(grid_size)

     , data_mgr_(nullptr)

     , separate_varlen_storage_valid_(false)

     , just_explain_(false)

     , for_validation_only_(false)

     , cached_row_count_(uninitialized_cached_row_count)

     , geo_return_type_(GeoReturnType::WktString)

     , cached_(false)

     , query_exec_time_(0)

     , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)

     , can_use_speculative_top_n_sort(std::nullopt) {}


 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,

                      const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,

                      const std::vector<std::vector<const int8_t*>>& col_buffers,

                      const std::vector<std::vector<int64_t>>& frag_offsets,

                      const std::vector<int64_t>& consistent_frag_sizes,

                      const ExecutorDeviceType device_type,

                      const int device_id,

                      const int thread_idx,

                      const QueryMemoryDescriptor& query_mem_desc,

                      const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,

                      const unsigned block_size,

                      const unsigned grid_size)

     : targets_(targets)

     , device_type_(device_type)

     , device_id_(device_id)

     , thread_idx_(thread_idx)

     , query_mem_desc_(query_mem_desc)

     , crt_row_buff_idx_(0)

     , fetched_so_far_(0)

     , drop_first_(0)

     , keep_first_(0)

     , row_set_mem_owner_(row_set_mem_owner)

     , block_size_(block_size)

     , grid_size_(grid_size)

     , lazy_fetch_info_(lazy_fetch_info)

     , col_buffers_{col_buffers}

     , frag_offsets_{frag_offsets}

     , consistent_frag_sizes_{consistent_frag_sizes}

     , data_mgr_(nullptr)

     , separate_varlen_storage_valid_(false)

     , just_explain_(false)

     , for_validation_only_(false)

     , cached_row_count_(uninitialized_cached_row_count)

     , geo_return_type_(GeoReturnType::WktString)

     , cached_(false)

     , query_exec_time_(0)

     , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)

     , can_use_speculative_top_n_sort(std::nullopt) {}


 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,

                      const ExecutorDeviceType device_type,

                      const int device_id,

                      Data_Namespace::DataMgr* data_mgr)

     : device_type_(device_type)

     , device_id_(device_id)

     , thread_idx_(-1)

     , query_mem_desc_{}

     , crt_row_buff_idx_(0)

     , estimator_(estimator)

     , data_mgr_(data_mgr)

     , separate_varlen_storage_valid_(false)

     , just_explain_(false)

     , for_validation_only_(false)

     , cached_row_count_(uninitialized_cached_row_count)

     , geo_return_type_(GeoReturnType::WktString)

     , cached_(false)

     , query_exec_time_(0)

     , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)

     , can_use_speculative_top_n_sort(std::nullopt) {

   if (device_type == ExecutorDeviceType::GPU) {

     device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(

         data_mgr_, estimator_->getBufferSize(), device_id_);

     data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),

                                           estimator_->getBufferSize(),

                                           device_id_,

                                           getQueryEngineCudaStreamForDevice(device_id_));

   } else {

     host_estimator_buffer_ =

         static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));

   }

 }


 ResultSet::ResultSet(const std::string& explanation)

     : device_type_(ExecutorDeviceType::CPU)

     , device_id_(-1)

     , thread_idx_(-1)

     , fetched_so_far_(0)

     , separate_varlen_storage_valid_(false)

     , explanation_(explanation)

     , just_explain_(true)

     , for_validation_only_(false)

     , cached_row_count_(uninitialized_cached_row_count)

     , geo_return_type_(GeoReturnType::WktString)

     , cached_(false)

     , query_exec_time_(0)

     , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)

     , can_use_speculative_top_n_sort(std::nullopt) {}


 ResultSet::ResultSet(int64_t queue_time_ms,

                      int64_t render_time_ms,

                      const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)

     : device_type_(ExecutorDeviceType::CPU)

     , device_id_(-1)

     , thread_idx_(-1)

     , fetched_so_far_(0)

     , row_set_mem_owner_(row_set_mem_owner)

     , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})

     , separate_varlen_storage_valid_(false)

     , just_explain_(true)

     , for_validation_only_(false)

     , cached_row_count_(uninitialized_cached_row_count)

     , geo_return_type_(GeoReturnType::WktString)

     , cached_(false)

     , query_exec_time_(0)

     , query_plan_(EMPTY_HASHED_PLAN_DAG_KEY)

     , can_use_speculative_top_n_sort(std::nullopt) {}


 ResultSet::~ResultSet() {

   if (storage_) {

     if (!storage_->buff_is_provided_) {

       CHECK(storage_->getUnderlyingBuffer());

       free(storage_->getUnderlyingBuffer());

     }

   }

   for (auto& storage : appended_storage_) {

     if (storage && !storage->buff_is_provided_) {

       free(storage->getUnderlyingBuffer());

     }

   }

   if (host_estimator_buffer_) {

     CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);

     free(host_estimator_buffer_);

   }

   if (device_estimator_buffer_) {

     CHECK(data_mgr_);

     data_mgr_->free(device_estimator_buffer_);

   }

 }


 std::string ResultSet::summaryToString() const {

   std::ostringstream oss;

   oss << "Result Set Info" << std::endl;

   oss << "\tLayout: " << query_mem_desc_.queryDescTypeToString() << std::endl;

   oss << "\tColumns: " << colCount() << std::endl;

   oss << "\tRows: " << rowCount() << std::endl;

   oss << "\tEntry count: " << entryCount() << std::endl;

   const std::string is_empty = isEmpty() ? "True" : "False";

   oss << "\tIs empty: " << is_empty << std::endl;

   const std::string did_output_columnar = didOutputColumnar() ? "True" : "False;";

   oss << "\tColumnar: " << did_output_columnar << std::endl;

   oss << "\tLazy-fetched columns: " << getNumColumnsLazyFetched() << std::endl;

   const std::string is_direct_columnar_conversion_possible =

       isDirectColumnarConversionPossible() ? "True" : "False";

   oss << "\tDirect columnar conversion possible: "

       << is_direct_columnar_conversion_possible << std::endl;


   size_t num_columns_zero_copy_columnarizable{0};

   for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {

     if (isZeroCopyColumnarConversionPossible(target_idx)) {

       num_columns_zero_copy_columnarizable++;

     }

   }

   oss << "\tZero-copy columnar conversion columns: "

       << num_columns_zero_copy_columnarizable << std::endl;


   oss << "\tPermutation size: " << permutation_.size() << std::endl;

   oss << "\tLimit: " << keep_first_ << std::endl;

   oss << "\tOffset: " << drop_first_ << std::endl;

   return oss.str();

 }


 ExecutorDeviceType ResultSet::getDeviceType() const {

   return device_type_;

 }


 const ResultSetStorage* ResultSet::allocateStorage() const {

   CHECK(!storage_);

   CHECK(row_set_mem_owner_);

   auto buff = row_set_mem_owner_->allocate(

       query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);

   storage_.reset(

       new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));

   return storage_.get();

 }


 const ResultSetStorage* ResultSet::allocateStorage(

     int8_t* buff,

     const std::vector<int64_t>& target_init_vals,

     std::shared_ptr<VarlenOutputInfo> varlen_output_info) const {

   CHECK(buff);

   CHECK(!storage_);

   storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));

   // TODO: add both to the constructor

   storage_->target_init_vals_ = target_init_vals;

   if (varlen_output_info) {

     storage_->varlen_output_info_ = varlen_output_info;

   }

   return storage_.get();

 }


 const ResultSetStorage* ResultSet::allocateStorage(

     const std::vector<int64_t>& target_init_vals) const {

   CHECK(!storage_);

   CHECK(row_set_mem_owner_);

   auto buff = row_set_mem_owner_->allocate(

       query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);

   storage_.reset(

       new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));

   storage_->target_init_vals_ = target_init_vals;

   return storage_.get();

 }


 size_t ResultSet::getCurrentRowBufferIndex() const {

   if (crt_row_buff_idx_ == 0) {

     throw std::runtime_error("current row buffer iteration index is undefined");

   }

   return crt_row_buff_idx_ - 1;

 }


 // Note: that.appended_storage_ does not get appended to this.

 void ResultSet::append(ResultSet& that) {

   invalidateCachedRowCount();

   if (!that.storage_) {

     return;

   }

   appended_storage_.push_back(std::move(that.storage_));

   query_mem_desc_.setEntryCount(

       query_mem_desc_.getEntryCount() +

       appended_storage_.back()->query_mem_desc_.getEntryCount());

   chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());

   col_buffers_.insert(

       col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());

   frag_offsets_.insert(

       frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());

   consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),

                                 that.consistent_frag_sizes_.begin(),

                                 that.consistent_frag_sizes_.end());

   chunk_iters_.insert(

       chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());

   if (separate_varlen_storage_valid_) {

     CHECK(that.separate_varlen_storage_valid_);

     serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),

                                      that.serialized_varlen_buffer_.begin(),

                                      that.serialized_varlen_buffer_.end());

   }

   for (auto& buff : that.literal_buffers_) {

     literal_buffers_.push_back(std::move(buff));

   }

 }


 ResultSetPtr ResultSet::copy() {

   auto timer = DEBUG_TIMER(__func__);

   if (!storage_) {

     return nullptr;

   }


   auto executor = getExecutor();

   CHECK(executor);

   ResultSetPtr copied_rs = std::make_shared<ResultSet>(targets_,

                                                        device_type_,

                                                        query_mem_desc_,

                                                        row_set_mem_owner_,

                                                        executor->blockSize(),

                                                        executor->gridSize());


   auto allocate_and_copy_storage =

       [&](const ResultSetStorage* prev_storage) -> std::unique_ptr<ResultSetStorage> {

     const auto& prev_qmd = prev_storage->query_mem_desc_;

     const auto storage_size = prev_qmd.getBufferSizeBytes(device_type_);

     auto buff = row_set_mem_owner_->allocate(storage_size, /*thread_idx=*/0);

     std::unique_ptr<ResultSetStorage> new_storage;

     new_storage.reset(new ResultSetStorage(

         prev_storage->targets_, prev_qmd, buff, /*buff_is_provided=*/true));

     new_storage->target_init_vals_ = prev_storage->target_init_vals_;

     if (prev_storage->varlen_output_info_) {

       new_storage->varlen_output_info_ = prev_storage->varlen_output_info_;

     }

     memcpy(new_storage->buff_, prev_storage->buff_, storage_size);

     new_storage->query_mem_desc_ = prev_qmd;

     return new_storage;

   };


   copied_rs->storage_ = allocate_and_copy_storage(storage_.get());

   if (!appended_storage_.empty()) {

     for (const auto& storage : appended_storage_) {

       copied_rs->appended_storage_.push_back(allocate_and_copy_storage(storage.get()));

     }

   }

   std::copy(chunks_.begin(), chunks_.end(), std::back_inserter(copied_rs->chunks_));

   std::copy(chunk_iters_.begin(),

             chunk_iters_.end(),

             std::back_inserter(copied_rs->chunk_iters_));

   std::copy(col_buffers_.begin(),

             col_buffers_.end(),

             std::back_inserter(copied_rs->col_buffers_));

   std::copy(frag_offsets_.begin(),

             frag_offsets_.end(),

             std::back_inserter(copied_rs->frag_offsets_));

   std::copy(consistent_frag_sizes_.begin(),

             consistent_frag_sizes_.end(),

             std::back_inserter(copied_rs->consistent_frag_sizes_));

   if (separate_varlen_storage_valid_) {

     std::copy(serialized_varlen_buffer_.begin(),

               serialized_varlen_buffer_.end(),

               std::back_inserter(copied_rs->serialized_varlen_buffer_));

   }

   std::copy(literal_buffers_.begin(),

             literal_buffers_.end(),

             std::back_inserter(copied_rs->literal_buffers_));

   std::copy(lazy_fetch_info_.begin(),

             lazy_fetch_info_.end(),

             std::back_inserter(copied_rs->lazy_fetch_info_));


   copied_rs->permutation_ = permutation_;

   copied_rs->drop_first_ = drop_first_;

   copied_rs->keep_first_ = keep_first_;

   copied_rs->separate_varlen_storage_valid_ = separate_varlen_storage_valid_;

   copied_rs->query_exec_time_ = query_exec_time_;

   copied_rs->input_table_keys_ = input_table_keys_;

   copied_rs->target_meta_info_ = target_meta_info_;

   copied_rs->geo_return_type_ = geo_return_type_;

   copied_rs->query_plan_ = query_plan_;

   if (can_use_speculative_top_n_sort) {

     copied_rs->can_use_speculative_top_n_sort = can_use_speculative_top_n_sort;

   }


   return copied_rs;

 }


 const ResultSetStorage* ResultSet::getStorage() const {

   return storage_.get();

 }


 size_t ResultSet::colCount() const {

   return just_explain_ ? 1 : targets_.size();

 }


 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {

   if (just_explain_) {

     return SQLTypeInfo(kTEXT, false);

   }

   CHECK_LT(col_idx, targets_.size());

   return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)

                                             : targets_[col_idx].sql_type;

 }


 StringDictionaryProxy* ResultSet::getStringDictionaryProxy(

     const shared::StringDictKey& dict_key) const {

   constexpr bool with_generation = true;

   return (dict_key.db_id > 0 || dict_key.dict_id == DictRef::literalsDictId)

              ? row_set_mem_owner_->getOrAddStringDictProxy(dict_key, with_generation)

              : row_set_mem_owner_->getStringDictProxy(dict_key);

 }


 class ResultSet::CellCallback {

   StringDictionaryProxy::IdMap const id_map_;

   int64_t const null_int_;


  public:

   CellCallback(StringDictionaryProxy::IdMap&& id_map, int64_t const null_int)

       : id_map_(std::move(id_map)), null_int_(null_int) {}

   void operator()(int8_t const* const cell_ptr) const {

     using StringId = int32_t;

     StringId* const string_id_ptr =

         const_cast<StringId*>(reinterpret_cast<StringId const*>(cell_ptr));

     if (*string_id_ptr != null_int_) {

       *string_id_ptr = id_map_[*string_id_ptr];

     }

   }

 };


 // Update any dictionary-encoded targets within storage_ with the corresponding

 // dictionary in the given targets parameter, if their comp_param (dictionary) differs.

 // This may modify both the storage_ values and storage_ targets.

 // Does not iterate through appended_storage_.

 // Iterate over targets starting at index target_idx.

 void ResultSet::translateDictEncodedColumns(std::vector<TargetInfo> const& targets,

                                             size_t const start_idx) {

   if (storage_) {

     CHECK_EQ(targets.size(), storage_->targets_.size());

     RowIterationState state;

     for (size_t target_idx = start_idx; target_idx < targets.size(); ++target_idx) {

       auto const& type_lhs = targets[target_idx].sql_type;

       if (type_lhs.is_dict_encoded_string()) {

         auto& type_rhs =

             const_cast<SQLTypeInfo&>(storage_->targets_[target_idx].sql_type);

         CHECK(type_rhs.is_dict_encoded_string());

         if (type_lhs.getStringDictKey() != type_rhs.getStringDictKey()) {

           auto* const sdp_lhs = getStringDictionaryProxy(type_lhs.getStringDictKey());

           CHECK(sdp_lhs);

           auto const* const sdp_rhs =

               getStringDictionaryProxy(type_rhs.getStringDictKey());

           CHECK(sdp_rhs);

           state.cur_target_idx_ = target_idx;

           CellCallback const translate_string_ids(sdp_lhs->transientUnion(*sdp_rhs),

                                                   inline_int_null_val(type_rhs));

           eachCellInColumn(state, translate_string_ids);

           type_rhs.set_comp_param(type_lhs.get_comp_param());

           type_rhs.setStringDictKey(type_lhs.getStringDictKey());

         }

       }

     }

   }

 }


 // For each cell in column target_idx, callback func with pointer to datum.

 // This currently assumes the column type is a dictionary-encoded string, but this logic

 // can be generalized to other types.

 void ResultSet::eachCellInColumn(RowIterationState& state, CellCallback const& func) {

   size_t const target_idx = state.cur_target_idx_;

   QueryMemoryDescriptor& storage_qmd = storage_->query_mem_desc_;

   CHECK_LT(target_idx, lazy_fetch_info_.size());

   auto& col_lazy_fetch = lazy_fetch_info_[target_idx];

   CHECK(col_lazy_fetch.is_lazily_fetched);

   int const target_size = storage_->targets_[target_idx].sql_type.get_size();

   CHECK_LT(0, target_size) << storage_->targets_[target_idx].toString();

   size_t const nrows = storage_->binSearchRowCount();

   if (storage_qmd.didOutputColumnar()) {

     // Logic based on ResultSet::ColumnWiseTargetAccessor::initializeOffsetsForStorage()

     if (state.buf_ptr_ == nullptr) {

       state.buf_ptr_ = get_cols_ptr(storage_->buff_, storage_qmd);

       state.compact_sz1_ = storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)

                                ? storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)

                                : query_mem_desc_.getEffectiveKeyWidth();

     }

     for (size_t j = state.prev_target_idx_; j < state.cur_target_idx_; ++j) {

       size_t const next_target_idx = j + 1;  // Set state to reflect next target_idx j+1

       state.buf_ptr_ = advance_to_next_columnar_target_buff(

           state.buf_ptr_, storage_qmd, state.agg_idx_);

       auto const& next_agg_info = storage_->targets_[next_target_idx];

       state.agg_idx_ =

           advance_slot(state.agg_idx_, next_agg_info, separate_varlen_storage_valid_);

       state.compact_sz1_ = storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)

                                ? storage_qmd.getPaddedSlotWidthBytes(state.agg_idx_)

                                : query_mem_desc_.getEffectiveKeyWidth();

     }

     for (size_t i = 0; i < nrows; ++i) {

       int8_t const* const pos_ptr = state.buf_ptr_ + i * state.compact_sz1_;

       int64_t pos = read_int_from_buff(pos_ptr, target_size);

       CHECK_GE(pos, 0);

       auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);

       CHECK_LT(size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());

       int8_t const* const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];

       func(col_frag + pos * target_size);

     }

   } else {

     size_t const key_bytes_with_padding =

         align_to_int64(get_key_bytes_rowwise(storage_qmd));

     for (size_t i = 0; i < nrows; ++i) {

       int8_t const* const keys_ptr = row_ptr_rowwise(storage_->buff_, storage_qmd, i);

       int8_t const* const rowwise_target_ptr = keys_ptr + key_bytes_with_padding;

       int64_t pos = *reinterpret_cast<int64_t const*>(rowwise_target_ptr);

       auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);

       CHECK_LT(size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());

       int8_t const* const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];

       func(col_frag + pos * target_size);

     }

   }

 }


 namespace {


 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {

   if (total_row_count < offset) {

     return 0;

   }


   size_t total_truncated_row_count = total_row_count - offset;


   if (limit) {

     return std::min(total_truncated_row_count, limit);

   }


   return total_truncated_row_count;

 }


 }  // namespace


 size_t ResultSet::rowCountImpl(const bool force_parallel) const {

   if (just_explain_) {

     return 1;

   }

   if (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::TableFunction) {

     return entryCount();

   }

   if (!permutation_.empty()) {

     // keep_first_ corresponds to SQL LIMIT

     // drop_first_ corresponds to SQL OFFSET

     return get_truncated_row_count(permutation_.size(), keep_first_, drop_first_);

   }

   if (!storage_) {

     return 0;

   }

   CHECK(permutation_.empty());

   if (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {

     return binSearchRowCount();

   }


   constexpr size_t auto_parallel_row_count_threshold{20000UL};

   if (force_parallel || entryCount() >= auto_parallel_row_count_threshold) {

     return parallelRowCount();

   }

   std::lock_guard<std::mutex> lock(row_iteration_mutex_);

   moveToBegin();

   size_t row_count{0};

   while (true) {

     auto crt_row = getNextRowUnlocked(false, false);

     if (crt_row.empty()) {

       break;

     }

     ++row_count;

   }

   moveToBegin();

   return row_count;

 }


 size_t ResultSet::rowCount(const bool force_parallel) const {

   // cached_row_count_ is atomic, so fetch it into a local variable first

   // to avoid repeat fetches

   const int64_t cached_row_count = cached_row_count_;

   if (cached_row_count != uninitialized_cached_row_count) {

     CHECK_GE(cached_row_count, 0);

     return cached_row_count;

   }

   setCachedRowCount(rowCountImpl(force_parallel));

   return cached_row_count_;

 }


 void ResultSet::invalidateCachedRowCount() const {

   cached_row_count_ = uninitialized_cached_row_count;

 }


 void ResultSet::setCachedRowCount(const size_t row_count) const {

   const int64_t signed_row_count = static_cast<int64_t>(row_count);

   const int64_t old_cached_row_count = cached_row_count_.exchange(signed_row_count);

   CHECK(old_cached_row_count == uninitialized_cached_row_count ||

         old_cached_row_count == signed_row_count);

 }


 size_t ResultSet::binSearchRowCount() const {

   if (!storage_) {

     return 0;

   }


   size_t row_count = storage_->binSearchRowCount();

   for (auto& s : appended_storage_) {

     row_count += s->binSearchRowCount();

   }


   return get_truncated_row_count(row_count, getLimit(), drop_first_);

 }


 size_t ResultSet::parallelRowCount() const {

   using namespace threading;

   auto execute_parallel_row_count =

       [this, parent_thread_local_ids = logger::thread_local_ids()](

           const blocked_range<size_t>& r, size_t row_count) {

         logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();

         for (size_t i = r.begin(); i < r.end(); ++i) {

           if (!isRowAtEmpty(i)) {

             ++row_count;

           }

         }

         return row_count;

       };

   const auto row_count = parallel_reduce(blocked_range<size_t>(0, entryCount()),

                                          size_t(0),

                                          execute_parallel_row_count,

                                          std::plus<int>());

   return get_truncated_row_count(row_count, getLimit(), drop_first_);

 }


 bool ResultSet::isEmpty() const {

   // To simplify this function and de-dup logic with ResultSet::rowCount()

   // (mismatches between the two were causing bugs), we modified this function

   // to simply fetch rowCount(). The potential downside of this approach is that

   // in some cases more work will need to be done, as we can't just stop at the first row.

   // Mitigating that for most cases is the following:

   // 1) rowCount() is cached, so the logic for actually computing row counts will run only

   // once

   //    per result set.

   // 2) If the cache is empty (cached_row_count_ == -1), rowCount() will use parallel

   //    methods if deemed appropriate, which in many cases could be faster for a sparse

   //    large result set that single-threaded iteration from the beginning

   // 3) Often where isEmpty() is needed, rowCount() is also needed. Since the first call

   // to rowCount()

   //    will be cached, there is no extra overhead in these cases


   return rowCount() == size_t(0);

 }


 bool ResultSet::definitelyHasNoRows() const {

   return (!storage_ && !estimator_ && !just_explain_) || cached_row_count_ == 0;

 }


 const QueryMemoryDescriptor& ResultSet::getQueryMemDesc() const {

   CHECK(storage_);

   return storage_->query_mem_desc_;

 }


 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {

   return targets_;

 }


 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {

   CHECK(storage_);

   return storage_->target_init_vals_;

 }


 int8_t* ResultSet::getDeviceEstimatorBuffer() const {

   CHECK(device_type_ == ExecutorDeviceType::GPU);

   CHECK(device_estimator_buffer_);

   return device_estimator_buffer_->getMemoryPtr();

 }


 int8_t* ResultSet::getHostEstimatorBuffer() const {

   return host_estimator_buffer_;

 }


 void ResultSet::syncEstimatorBuffer() const {

   CHECK(device_type_ == ExecutorDeviceType::GPU);

   CHECK(!host_estimator_buffer_);

   CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));

   host_estimator_buffer_ =

       static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));

   CHECK(device_estimator_buffer_);

   auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();

   auto allocator = std::make_unique<CudaAllocator>(

       data_mgr_, device_id_, getQueryEngineCudaStreamForDevice(device_id_));

   allocator->copyFromDevice(

       host_estimator_buffer_, device_buffer_ptr, estimator_->getBufferSize());

 }


 void ResultSet::setQueueTime(const int64_t queue_time) {

   timings_.executor_queue_time = queue_time;

 }


 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {

   timings_.kernel_queue_time = kernel_queue_time;

 }


 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {

   timings_.compilation_queue_time += compilation_queue_time;

 }


 int64_t ResultSet::getQueueTime() const {

   return timings_.executor_queue_time + timings_.kernel_queue_time +

          timings_.compilation_queue_time;

 }


 int64_t ResultSet::getRenderTime() const {

   return timings_.render_time;

 }


 void ResultSet::moveToBegin() const {

   crt_row_buff_idx_ = 0;

   fetched_so_far_ = 0;

 }


 bool ResultSet::isTruncated() const {

   return keep_first_ + drop_first_;

 }


 bool ResultSet::isExplain() const {

   return just_explain_;

 }


 void ResultSet::setValidationOnlyRes() {

   for_validation_only_ = true;

 }


 bool ResultSet::isValidationOnlyRes() const {

   return for_validation_only_;

 }


 int ResultSet::getDeviceId() const {

   return device_id_;

 }


 int ResultSet::getThreadIdx() const {

   return thread_idx_;

 }


 QueryMemoryDescriptor ResultSet::fixupQueryMemoryDescriptor(

     const QueryMemoryDescriptor& query_mem_desc) {

   auto query_mem_desc_copy = query_mem_desc;

   query_mem_desc_copy.resetGroupColWidths(

       std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));

   if (query_mem_desc.didOutputColumnar()) {

     return query_mem_desc_copy;

   }

   query_mem_desc_copy.alignPaddedSlots();

   return query_mem_desc_copy;

 }


 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,

                      size_t top_n,

                      ExecutorDeviceType device_type,

                      const Executor* executor) {

   auto timer = DEBUG_TIMER(__func__);


   if (!storage_) {

     return;

   }

   invalidateCachedRowCount();

   CHECK(!targets_.empty());

 #ifdef HAVE_CUDA

   if (canUseFastBaselineSort(order_entries, top_n)) {

     baselineSort(order_entries, top_n, device_type, executor);

     return;

   }

 #endif  // HAVE_CUDA

   if (query_mem_desc_.sortOnGpu()) {

     try {

       radixSortOnGpu(order_entries);

     } catch (const OutOfMemory&) {

       LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";

       radixSortOnCpu(order_entries);

     } catch (const std::bad_alloc&) {

       LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";

       radixSortOnCpu(order_entries);

     }

     return;

   }

   // This check isn't strictly required, but allows the index buffer to be 32-bit.

   if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {

     throw RowSortException("Sorting more than 4B elements not supported");

   }


   CHECK(permutation_.empty());


   if (top_n && g_parallel_top_min < entryCount()) {

     if (g_enable_watchdog && g_parallel_top_max < entryCount()) {

       throw WatchdogException("Sorting the result would be too slow");

     }

     parallelTop(order_entries, top_n, executor);

   } else {

     if (g_enable_watchdog && Executor::baseline_threshold < entryCount()) {

       throw WatchdogException("Sorting the result would be too slow");

     }

     permutation_.resize(query_mem_desc_.getEntryCount());

     // PermutationView is used to share common API with parallelTop().

     PermutationView pv(permutation_.data(), 0, permutation_.size());

     pv = initPermutationBuffer(pv, 0, permutation_.size());

     if (top_n == 0) {

       top_n = pv.size();  // top_n == 0 implies a full sort

     }

     pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor, false));

     if (pv.size() < permutation_.size()) {

       permutation_.resize(pv.size());

       permutation_.shrink_to_fit();

     }

   }

 }


 #ifdef HAVE_CUDA

 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,

                              const size_t top_n,

                              const ExecutorDeviceType device_type,

                              const Executor* executor) {

   auto timer = DEBUG_TIMER(__func__);

   // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU

   if (device_type == ExecutorDeviceType::GPU && getGpuCount() > 1) {

     try {

       doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n, executor);

     } catch (...) {

       doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);

     }

   } else {

     doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);

   }

 }

 #endif  // HAVE_CUDA


 // Append non-empty indexes i in [begin,end) from findStorage(i) to permutation.

 PermutationView ResultSet::initPermutationBuffer(PermutationView permutation,

                                                  PermutationIdx const begin,

                                                  PermutationIdx const end) const {

   auto timer = DEBUG_TIMER(__func__);

   for (PermutationIdx i = begin; i < end; ++i) {

     const auto storage_lookup_result = findStorage(i);

     const auto lhs_storage = storage_lookup_result.storage_ptr;

     const auto off = storage_lookup_result.fixedup_entry_idx;

     CHECK(lhs_storage);

     if (!lhs_storage->isEmptyEntry(off)) {

       permutation.push_back(i);

     }

   }

   return permutation;

 }


 const Permutation& ResultSet::getPermutationBuffer() const {

   return permutation_;

 }


 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,

                             const size_t top_n,

                             const Executor* executor) {

   auto timer = DEBUG_TIMER(__func__);

   const size_t nthreads = cpu_threads();


   // Split permutation_ into nthreads subranges and top-sort in-place.

   permutation_.resize(query_mem_desc_.getEntryCount());

   std::vector<PermutationView> permutation_views(nthreads);

   threading::task_group top_sort_threads;

   for (auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {

     top_sort_threads.run([this,

                           &order_entries,

                           &permutation_views,

                           top_n,

                           executor,

                           parent_thread_local_ids = logger::thread_local_ids(),

                           interval] {

       logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();

       PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());

       pv = initPermutationBuffer(pv, interval.begin, interval.end);

       const auto compare = createComparator(order_entries, pv, executor, true);

       permutation_views[interval.index] = topPermutation(pv, top_n, compare);

     });

   }

   top_sort_threads.wait();


   // In case you are considering implementing a parallel reduction, note that the

   // ResultSetComparator constructor is O(N) in order to materialize some of the aggregate

   // columns as necessary to perform a comparison. This cost is why reduction is chosen to

   // be serial instead; only one more Comparator is needed below.


   // Left-copy disjoint top-sorted subranges into one contiguous range.

   // ++++....+++.....+++++...  ->  ++++++++++++............

   auto end = permutation_.begin() + permutation_views.front().size();

   for (size_t i = 1; i < nthreads; ++i) {

     std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);

     end += permutation_views[i].size();

   }


   // Top sort final range.

   PermutationView pv(permutation_.data(), end - permutation_.begin());

   const auto compare = createComparator(order_entries, pv, executor, false);

   pv = topPermutation(pv, top_n, compare);

   permutation_.resize(pv.size());

   permutation_.shrink_to_fit();

 }


 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {

   size_t fixedup_entry_idx = entry_idx;

   auto entry_count = storage_->query_mem_desc_.getEntryCount();

   const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();

   if (fixedup_entry_idx < entry_count) {

     return {0, fixedup_entry_idx};

   }

   fixedup_entry_idx -= entry_count;

   for (size_t i = 0; i < appended_storage_.size(); ++i) {

     const auto& desc = appended_storage_[i]->query_mem_desc_;

     CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());

     entry_count = desc.getEntryCount();

     if (fixedup_entry_idx < entry_count) {

       return {i + 1, fixedup_entry_idx};

     }

     fixedup_entry_idx -= entry_count;

   }

   UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "

                 << query_mem_desc_.getEntryCount();

   return {};

 }


 template struct ResultSet::ResultSetComparator<ResultSet::RowWiseTargetAccessor>;

 template struct ResultSet::ResultSetComparator<ResultSet::ColumnWiseTargetAccessor>;


 ResultSet::StorageLookupResult ResultSet::findStorage(const size_t entry_idx) const {

   auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);

   return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),

           fixedup_entry_idx,

           stg_idx};

 }


 template <typename BUFFER_ITERATOR_TYPE>

 void ResultSet::ResultSetComparator<

     BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {

   for (const auto& order_entry : order_entries_) {

     if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {

       count_distinct_materialized_buffers_.emplace_back(

           materializeCountDistinctColumn(order_entry));

     }

   }

 }


 namespace {

 struct IsAggKind {

   std::vector<TargetInfo> const& targets_;

   SQLAgg const agg_kind_;

   IsAggKind(std::vector<TargetInfo> const& targets, SQLAgg const agg_kind)

       : targets_(targets), agg_kind_(agg_kind) {}

   bool operator()(Analyzer::OrderEntry const& order_entry) const {

     return targets_[order_entry.tle_no - 1].agg_kind == agg_kind_;

   }

 };

 }  // namespace


 template <typename BUFFER_ITERATOR_TYPE>

 ResultSet::ApproxQuantileBuffers ResultSet::ResultSetComparator<

     BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumns() const {

   ResultSet::ApproxQuantileBuffers approx_quantile_materialized_buffers;

   for (const auto& order_entry : order_entries_) {

     if (result_set_->targets_[order_entry.tle_no - 1].agg_kind == kAPPROX_QUANTILE) {

       approx_quantile_materialized_buffers.emplace_back(

           materializeApproxQuantileColumn(order_entry));

     }

   }

   return approx_quantile_materialized_buffers;

 }


 template <typename BUFFER_ITERATOR_TYPE>

 ResultSet::ModeBuffers

 ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::materializeModeColumns() const {

   ResultSet::ModeBuffers mode_buffers;

   IsAggKind const is_mode(result_set_->targets_, kMODE);

   mode_buffers.reserve(

       std::count_if(order_entries_.begin(), order_entries_.end(), is_mode));

   for (auto const& order_entry : order_entries_) {

     if (is_mode(order_entry)) {

       mode_buffers.emplace_back(materializeModeColumn(order_entry));

     }

   }

   return mode_buffers;

 }


 template <typename BUFFER_ITERATOR_TYPE>

 std::vector<int64_t>

 ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumn(

     const Analyzer::OrderEntry& order_entry) const {

   const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();

   std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);

   const CountDistinctDescriptor count_distinct_descriptor =

       result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);

   const size_t num_non_empty_entries = permutation_.size();


   const auto work = [&, parent_thread_local_ids = logger::thread_local_ids()](

                         const size_t start, const size_t end) {

     logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();

     for (size_t i = start; i < end; ++i) {

       const PermutationIdx permuted_idx = permutation_[i];

       const auto storage_lookup_result = result_set_->findStorage(permuted_idx);

       const auto storage = storage_lookup_result.storage_ptr;

       const auto off = storage_lookup_result.fixedup_entry_idx;

       const auto value = buffer_itr_.getColumnInternal(

           storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);

       count_distinct_materialized_buffer[permuted_idx] =

           count_distinct_set_size(value.i1, count_distinct_descriptor);

     }

   };

   // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice

   // between thread pool types

   if (single_threaded_) {

     work(0, num_non_empty_entries);

   } else {

     threading::task_group thread_pool;

     for (auto interval : makeIntervals<size_t>(0, num_non_empty_entries, cpu_threads())) {

       thread_pool.run([=] { work(interval.begin, interval.end); });

     }

     thread_pool.wait();

   }

   return count_distinct_materialized_buffer;

 }


 double ResultSet::calculateQuantile(quantile::TDigest* const t_digest) {

   static_assert(sizeof(int64_t) == sizeof(quantile::TDigest*));

   CHECK(t_digest);

   t_digest->mergeBufferFinal();

   double const quantile = t_digest->quantile();

   return boost::math::isnan(quantile) ? NULL_DOUBLE : quantile;

 }


 template <typename BUFFER_ITERATOR_TYPE>

 ResultSet::ApproxQuantileBuffers::value_type

 ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumn(

     const Analyzer::OrderEntry& order_entry) const {

   ResultSet::ApproxQuantileBuffers::value_type materialized_buffer(

       result_set_->query_mem_desc_.getEntryCount());

   const size_t size = permutation_.size();

   const auto work = [&, parent_thread_local_ids = logger::thread_local_ids()](

                         const size_t start, const size_t end) {

     logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();

     for (size_t i = start; i < end; ++i) {

       const PermutationIdx permuted_idx = permutation_[i];

       const auto storage_lookup_result = result_set_->findStorage(permuted_idx);

       const auto storage = storage_lookup_result.storage_ptr;

       const auto off = storage_lookup_result.fixedup_entry_idx;

       const auto value = buffer_itr_.getColumnInternal(

           storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);

       materialized_buffer[permuted_idx] =

           value.i1 ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1))

                    : NULL_DOUBLE;

     }

   };

   if (single_threaded_) {

     work(0, size);

   } else {

     threading::task_group thread_pool;

     for (auto interval : makeIntervals<size_t>(0, size, cpu_threads())) {

       thread_pool.run([=] { work(interval.begin, interval.end); });

     }

     thread_pool.wait();

   }

   return materialized_buffer;

 }


 namespace {

 // i1 is from InternalTargetValue

 int64_t materializeMode(int64_t const i1) {

   if (auto const* const agg_mode = reinterpret_cast<AggMode const*>(i1)) {

     if (std::optional<int64_t> const mode = agg_mode->mode()) {

       return *mode;

     }

   }

   return NULL_BIGINT;

 }


 using ModeBlockedRange = tbb::blocked_range<size_t>;

 }  // namespace


 template <typename BUFFER_ITERATOR_TYPE>

 struct ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::ModeScatter {

   logger::ThreadLocalIds const parent_thread_local_ids_;

   ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE> const* const rsc_;

   Analyzer::OrderEntry const& order_entry_;

   ResultSet::ModeBuffers::value_type& materialized_buffer_;


   void operator()(ModeBlockedRange const& r) const {

     logger::LocalIdsScopeGuard lisg = parent_thread_local_ids_.setNewThreadId();

     for (size_t i = r.begin(); i != r.end(); ++i) {

       PermutationIdx const permuted_idx = rsc_->permutation_[i];

       auto const storage_lookup_result = rsc_->result_set_->findStorage(permuted_idx);

       auto const storage = storage_lookup_result.storage_ptr;

       auto const off = storage_lookup_result.fixedup_entry_idx;

       auto const value = rsc_->buffer_itr_.getColumnInternal(

           storage->buff_, off, order_entry_.tle_no - 1, storage_lookup_result);

       materialized_buffer_[permuted_idx] = materializeMode(value.i1);

     }

   }

 };


 template <typename BUFFER_ITERATOR_TYPE>

 ResultSet::ModeBuffers::value_type

 ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::materializeModeColumn(

     const Analyzer::OrderEntry& order_entry) const {

   ResultSet::ModeBuffers::value_type materialized_buffer(

       result_set_->query_mem_desc_.getEntryCount());

   ModeScatter mode_scatter{

       logger::thread_local_ids(), this, order_entry, materialized_buffer};

   if (single_threaded_) {

     mode_scatter(ModeBlockedRange(0, permutation_.size()));  // Still has new thread_id.

   } else {

     tbb::parallel_for(ModeBlockedRange(0, permutation_.size()), mode_scatter);

   }

   return materialized_buffer;

 }


 template <typename BUFFER_ITERATOR_TYPE>

 bool ResultSet::ResultSetComparator<BUFFER_ITERATOR_TYPE>::operator()(

     const PermutationIdx lhs,

     const PermutationIdx rhs) const {

   // NB: The compare function must define a strict weak ordering, otherwise

   // std::sort will trigger a segmentation fault (or corrupt memory).

   const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);

   const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);

   const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;

   const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;

   const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;

   const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;

   size_t materialized_count_distinct_buffer_idx{0};

   size_t materialized_approx_quantile_buffer_idx{0};

   size_t materialized_mode_buffer_idx{0};


   for (const auto& order_entry : order_entries_) {

     CHECK_GE(order_entry.tle_no, 1);

     // lhs_entry_ti and rhs_entry_ti can differ on comp_param w/ UNION of string dicts.

     const auto& lhs_agg_info = lhs_storage->targets_[order_entry.tle_no - 1];

     const auto& rhs_agg_info = rhs_storage->targets_[order_entry.tle_no - 1];

     const auto lhs_entry_ti = get_compact_type(lhs_agg_info);

     const auto rhs_entry_ti = get_compact_type(rhs_agg_info);

     // When lhs vs rhs doesn't matter, the lhs is used. For example:

     bool float_argument_input = takes_float_argument(lhs_agg_info);

     // Need to determine if the float value has been stored as float

     // or if it has been compacted to a different (often larger 8 bytes)

     // in distributed case the floats are actually 4 bytes

     // TODO the above takes_float_argument() is widely used wonder if this problem

     // exists elsewhere

     if (lhs_entry_ti.get_type() == kFLOAT) {

       const auto is_col_lazy =

           !result_set_->lazy_fetch_info_.empty() &&

           result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;

       if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==

           sizeof(float)) {

         float_argument_input =

             result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;

       }

     }


     if (UNLIKELY(is_distinct_target(lhs_agg_info))) {

       CHECK_LT(materialized_count_distinct_buffer_idx,

                count_distinct_materialized_buffers_.size());


       const auto& count_distinct_materialized_buffer =

           count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];

       const auto lhs_sz = count_distinct_materialized_buffer[lhs];

       const auto rhs_sz = count_distinct_materialized_buffer[rhs];

       ++materialized_count_distinct_buffer_idx;

       if (lhs_sz == rhs_sz) {

         continue;

       }

       return (lhs_sz < rhs_sz) != order_entry.is_desc;

     } else if (UNLIKELY(lhs_agg_info.agg_kind == kAPPROX_QUANTILE)) {

       CHECK_LT(materialized_approx_quantile_buffer_idx,

                approx_quantile_materialized_buffers_.size());

       const auto& approx_quantile_materialized_buffer =

           approx_quantile_materialized_buffers_[materialized_approx_quantile_buffer_idx];

       const auto lhs_value = approx_quantile_materialized_buffer[lhs];

       const auto rhs_value = approx_quantile_materialized_buffer[rhs];

       ++materialized_approx_quantile_buffer_idx;

       if (lhs_value == rhs_value) {

         continue;

       } else if (!lhs_entry_ti.get_notnull()) {

         if (lhs_value == NULL_DOUBLE) {

           return order_entry.nulls_first;

         } else if (rhs_value == NULL_DOUBLE) {

           return !order_entry.nulls_first;

         }

       }

       return (lhs_value < rhs_value) != order_entry.is_desc;

     } else if (UNLIKELY(lhs_agg_info.agg_kind == kMODE)) {

       CHECK_LT(materialized_mode_buffer_idx, mode_buffers_.size());

       auto const& mode_buffer = mode_buffers_[materialized_mode_buffer_idx++];

       int64_t const lhs_value = mode_buffer[lhs];

       int64_t const rhs_value = mode_buffer[rhs];

       if (lhs_value == rhs_value) {

         continue;

         // MODE(x) can only be NULL when the group is empty, since it skips null values.

       } else if (lhs_value == NULL_BIGINT) {  // NULL_BIGINT from materializeMode()

         return order_entry.nulls_first;

       } else if (rhs_value == NULL_BIGINT) {

         return !order_entry.nulls_first;

       } else {

         return result_set_->isLessThan(lhs_entry_ti, lhs_value, rhs_value) !=

                order_entry.is_desc;

       }

     }


     const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,

                                                      fixedup_lhs,

                                                      order_entry.tle_no - 1,

                                                      lhs_storage_lookup_result);

     const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,

                                                      fixedup_rhs,

                                                      order_entry.tle_no - 1,

                                                      rhs_storage_lookup_result);


     if (UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&

                  isNull(rhs_entry_ti, rhs_v, float_argument_input))) {

       continue;

     }

     if (UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&

                  !isNull(rhs_entry_ti, rhs_v, float_argument_input))) {

       return order_entry.nulls_first;

     }

     if (UNLIKELY(isNull(rhs_entry_ti, rhs_v, float_argument_input) &&

                  !isNull(lhs_entry_ti, lhs_v, float_argument_input))) {

       return !order_entry.nulls_first;

     }


     if (LIKELY(lhs_v.isInt())) {

       CHECK(rhs_v.isInt());

       if (UNLIKELY(lhs_entry_ti.is_string() &&

                    lhs_entry_ti.get_compression() == kENCODING_DICT)) {

         CHECK_EQ(4, lhs_entry_ti.get_logical_size());

         CHECK(executor_);

         const auto lhs_string_dict_proxy = executor_->getStringDictionaryProxy(

             lhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_, false);

         const auto rhs_string_dict_proxy = executor_->getStringDictionaryProxy(

             rhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_, false);

         const auto lhs_str = lhs_string_dict_proxy->getString(lhs_v.i1);

         const auto rhs_str = rhs_string_dict_proxy->getString(rhs_v.i1);

         if (lhs_str == rhs_str) {

           continue;

         }

         return (lhs_str < rhs_str) != order_entry.is_desc;

       }


       if (lhs_v.i1 == rhs_v.i1) {

         continue;

       }

       if (lhs_entry_ti.is_fp()) {

         if (float_argument_input) {

           const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));

           const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));

           return (lhs_dval < rhs_dval) != order_entry.is_desc;

         } else {

           const auto lhs_dval =

               *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));

           const auto rhs_dval =

               *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));

           return (lhs_dval < rhs_dval) != order_entry.is_desc;

         }

       }

       return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;

     } else {

       if (lhs_v.isPair()) {

         CHECK(rhs_v.isPair());

         const auto lhs =

             pair_to_double({lhs_v.i1, lhs_v.i2}, lhs_entry_ti, float_argument_input);

         const auto rhs =

             pair_to_double({rhs_v.i1, rhs_v.i2}, rhs_entry_ti, float_argument_input);

         if (lhs == rhs) {

           continue;

         }

         return (lhs < rhs) != order_entry.is_desc;

       } else {

         CHECK(lhs_v.isStr() && rhs_v.isStr());

         const auto lhs = lhs_v.strVal();

         const auto rhs = rhs_v.strVal();

         if (lhs == rhs) {

           continue;

         }

         return (lhs < rhs) != order_entry.is_desc;

       }

     }

   }

   return false;

 }


 // Partial sort permutation into top(least by compare) n elements.

 // If permutation.size() <= n then sort entire permutation by compare.

 // Return PermutationView with new size() = min(n, permutation.size()).

 PermutationView ResultSet::topPermutation(PermutationView permutation,

                                           const size_t n,

                                           const Comparator& compare) {

   auto timer = DEBUG_TIMER(__func__);

   if (n < permutation.size()) {

     std::partial_sort(

         permutation.begin(), permutation.begin() + n, permutation.end(), compare);

     permutation.resize(n);

   } else {

     std::sort(permutation.begin(), permutation.end(), compare);

   }

   return permutation;

 }


 void ResultSet::radixSortOnGpu(

     const std::list<Analyzer::OrderEntry>& order_entries) const {

   auto timer = DEBUG_TIMER(__func__);

   auto data_mgr = &Catalog_Namespace::SysCatalog::instance().getDataMgr();

   const int device_id{0};

   auto allocator = std::make_unique<CudaAllocator>(

       data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));

   CHECK_GT(block_size_, 0);

   CHECK_GT(grid_size_, 0);

   std::vector<int64_t*> group_by_buffers(block_size_);

   group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());

   auto dev_group_by_buffers =

       create_dev_group_by_buffers(allocator.get(),

                                   group_by_buffers,

                                   query_mem_desc_,

                                   block_size_,

                                   grid_size_,

                                   device_id,

                                   ExecutorDispatchMode::KernelPerFragment,

                                   /*num_input_rows=*/-1,

                                   /*prepend_index_buffer=*/true,

                                   /*always_init_group_by_on_host=*/true,

                                   /*use_bump_allocator=*/false,

                                   /*has_varlen_output=*/false,

                                   /*insitu_allocator*=*/nullptr);

   inplace_sort_gpu(

       order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);

   copy_group_by_buffers_from_gpu(

       *allocator,

       group_by_buffers,

       query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),

       dev_group_by_buffers.data,

       query_mem_desc_,

       block_size_,

       grid_size_,

       device_id,

       /*use_bump_allocator=*/false,

       /*has_varlen_output=*/false);

 }


 void ResultSet::radixSortOnCpu(

     const std::list<Analyzer::OrderEntry>& order_entries) const {

   auto timer = DEBUG_TIMER(__func__);

   CHECK(!query_mem_desc_.hasKeylessHash());

   std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());

   std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());

   CHECK_EQ(size_t(1), order_entries.size());

   auto buffer_ptr = storage_->getUnderlyingBuffer();

   for (const auto& order_entry : order_entries) {

     const auto target_idx = order_entry.tle_no - 1;

     const auto sortkey_val_buff = reinterpret_cast<int64_t*>(

         buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));

     const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);

     sort_groups_cpu(sortkey_val_buff,

                     &idx_buff[0],

                     query_mem_desc_.getEntryCount(),

                     order_entry.is_desc,

                     chosen_bytes);

     apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),

                           &idx_buff[0],

                           query_mem_desc_.getEntryCount(),

                           &tmp_buff[0],

                           sizeof(int64_t));

     for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();

          ++target_idx) {

       if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {

         continue;

       }

       const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);

       const auto satellite_val_buff = reinterpret_cast<int64_t*>(

           buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));

       apply_permutation_cpu(satellite_val_buff,

                             &idx_buff[0],

                             query_mem_desc_.getEntryCount(),

                             &tmp_buff[0],

                             chosen_bytes);

     }

   }

 }


 size_t ResultSet::getLimit() const {

   return keep_first_;

 }


 const std::vector<std::string> ResultSet::getStringDictionaryPayloadCopy(

     const shared::StringDictKey& dict_key) const {

   const auto sdp =

       row_set_mem_owner_->getOrAddStringDictProxy(dict_key, /*with_generation=*/true);

   CHECK(sdp);

   return sdp->getDictionary()->copyStrings();

 }


 const std::pair<std::vector<int32_t>, std::vector<std::string>>

 ResultSet::getUniqueStringsForDictEncodedTargetCol(const size_t col_idx) const {

   const auto col_type_info = getColType(col_idx);

   std::unordered_set<int32_t> unique_string_ids_set;

   const size_t num_entries = entryCount();

   std::vector<bool> targets_to_skip(colCount(), true);

   targets_to_skip[col_idx] = false;

   CHECK(col_type_info.is_dict_encoded_type());  // Array<Text> or Text

   const int64_t null_val = inline_fixed_encoding_null_val(

       col_type_info.is_array() ? col_type_info.get_elem_type() : col_type_info);


   for (size_t row_idx = 0; row_idx < num_entries; ++row_idx) {

     const auto result_row = getRowAtNoTranslations(row_idx, targets_to_skip);

     if (!result_row.empty()) {

       if (const auto scalar_col_val =

               boost::get<ScalarTargetValue>(&result_row[col_idx])) {

         const int32_t string_id =

             static_cast<int32_t>(boost::get<int64_t>(*scalar_col_val));

         if (string_id != null_val) {

           unique_string_ids_set.emplace(string_id);

         }

       } else if (const auto array_col_val =

                      boost::get<ArrayTargetValue>(&result_row[col_idx])) {

         if (*array_col_val) {

           for (const ScalarTargetValue& scalar : array_col_val->value()) {

             const int32_t string_id = static_cast<int32_t>(boost::get<int64_t>(scalar));

             if (string_id != null_val) {

               unique_string_ids_set.emplace(string_id);

             }

           }

         }

       }

     }

   }


   const size_t num_unique_strings = unique_string_ids_set.size();

   std::vector<int32_t> unique_string_ids(num_unique_strings);

   size_t string_idx{0};

   for (const auto unique_string_id : unique_string_ids_set) {

     unique_string_ids[string_idx++] = unique_string_id;

   }


   const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(

       col_type_info.getStringDictKey(), /*with_generation=*/true);

   CHECK(sdp);


   return std::make_pair(unique_string_ids, sdp->getStrings(unique_string_ids));

 }


 bool ResultSet::isDirectColumnarConversionPossible() const {

   if (!g_enable_direct_columnarization) {

     return false;

   } else if (query_mem_desc_.didOutputColumnar()) {

     return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::Projection ||

                                     query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::TableFunction ||

                                     query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::GroupByPerfectHash ||

                                     query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::GroupByBaselineHash);

   } else {

     CHECK(!(query_mem_desc_.getQueryDescriptionType() ==

             QueryDescriptionType::TableFunction));

     return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::GroupByPerfectHash ||

                                     query_mem_desc_.getQueryDescriptionType() ==

                                         QueryDescriptionType::GroupByBaselineHash);

   }

 }


 bool ResultSet::isZeroCopyColumnarConversionPossible(size_t column_idx) const {

   return query_mem_desc_.didOutputColumnar() &&

          (query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection ||

           query_mem_desc_.getQueryDescriptionType() ==

               QueryDescriptionType::TableFunction) &&

          appended_storage_.empty() && storage_ &&

          (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);

 }


 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {

   CHECK(isZeroCopyColumnarConversionPossible(column_idx));

   return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);

 }


 const size_t ResultSet::getColumnarBufferSize(size_t column_idx) const {

   const auto col_context = query_mem_desc_.getColSlotContext();

   const auto idx = col_context.getSlotsForCol(column_idx).front();

   return query_mem_desc_.getPaddedSlotBufferSize(idx);

   if (checkSlotUsesFlatBufferFormat(idx)) {

     return query_mem_desc_.getFlatBufferSize(idx);

   }

   const size_t padded_slot_width = static_cast<size_t>(getPaddedSlotWidthBytes(idx));

   return padded_slot_width * entryCount();

 }


 // returns a bitmap (and total number) of all single slot targets

 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {

   std::vector<bool> target_bitmap(targets_.size(), true);

   size_t num_single_slot_targets = 0;

   for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {

     const auto& sql_type = targets_[target_idx].sql_type;

     if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {

       target_bitmap[target_idx] = false;

     } else if (sql_type.is_varlen()) {

       target_bitmap[target_idx] = false;

     } else {

       num_single_slot_targets++;

     }

   }

   return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);

 }


 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()

     const {

   CHECK(isDirectColumnarConversionPossible());

   auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();


   for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {

     const auto& target = targets_[target_idx];

     if (single_slot_targets[target_idx] &&

         (is_distinct_target(target) ||

          shared::is_any<kAPPROX_QUANTILE, kMODE>(target.agg_kind) ||

          (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {

       single_slot_targets[target_idx] = false;

       num_single_slot_targets--;

     }

   }

   CHECK_GE(num_single_slot_targets, size_t(0));

   return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);

 }


 // returns the starting slot index for all targets in the result set

 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {

   std::vector<size_t> slot_indices(targets_.size(), 0);

   size_t slot_index = 0;

   for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {

     slot_indices[target_idx] = slot_index;

     slot_index = advance_slot(slot_index, targets_[target_idx], false);

   }

   return slot_indices;

 }


 // namespace result_set


 bool result_set::can_use_parallel_algorithms(const ResultSet& rows) {

   return !rows.isTruncated();

 }


 namespace {

 struct IsDictEncodedStr {

   bool operator()(TargetInfo const& target_info) const {

     return target_info.sql_type.is_dict_encoded_string();

   }

 };

 }  // namespace


 std::optional<size_t> result_set::first_dict_encoded_idx(

     std::vector<TargetInfo> const& targets) {

   auto const itr = std::find_if(targets.begin(), targets.end(), IsDictEncodedStr{});

   return itr == targets.end() ? std::nullopt

                               : std::make_optional<size_t>(itr - targets.begin());

 }


 bool result_set::use_parallel_algorithms(const ResultSet& rows) {

   return result_set::can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;

 }

heavyai::GroupByPerfectHash
GroupByPerfectHash
Definition: enums.h:58

CountDistinctDescriptor
Definition: CountDistinctDescriptor.h:41

anonymous_namespace{RelAlgExecutor.cpp}::is_agg
bool is_agg(const Analyzer::Expr *expr)
Definition: RelAlgExecutor.cpp:1957

ResultSet::getThreadIdx
int getThreadIdx() const
Definition: ResultSet.cpp:762

ResultSet::syncEstimatorBuffer
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:702

java::sql::ResultSet::ResultSetComparator::ModeScatter::order_entry_
Analyzer::OrderEntry const & order_entry_
Definition: ResultSet.cpp:1107

threading_std::task_group::run
void run(F &&f)
Definition: threading_std.h:114

SQLAgg
SQLAgg
Definition: sqldefs.h:76

CHECK_EQ
#define CHECK_EQ(x, y)
Definition: Logger.h:301

ResultSet::getQueryMemDesc
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:678

sort_groups_cpu
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27

create_dev_group_by_buffers
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *device_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, const bool has_varlen_output, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:70

g_parallel_top_max
size_t g_parallel_top_max
Definition: ResultSet.cpp:50

ResultSet::getStorageIndex
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:926

NULL_DOUBLE
#define NULL_DOUBLE
Definition: InlineNullValues.h:35

VectorView::push_back
DEVICE void push_back(T const &value)
Definition: VectorView.h:73

ResultSet::isValidationOnlyRes
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:754

InPlaceSort.h

anonymous_namespace{ResultSet.cpp}::IsDictEncodedStr
Definition: ResultSet.cpp:1586

kAPPROX_QUANTILE
Definition: sqldefs.h:83

ResultSet::setValidationOnlyRes
void setValidationOnlyRes()
Definition: ResultSet.cpp:750

ResultSet::initPermutationBuffer
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
Definition: ResultSet.cpp:858

WatchdogException
Definition: Execute.h:159

OutOfMemory
Definition: BufferMgr.h:40

SqlTypesLayout.h

g_enable_direct_columnarization
bool g_enable_direct_columnarization
Definition: Execute.cpp:134

ResultSet::moveToBegin
void moveToBegin() const
Definition: ResultSet.cpp:737

ResultSet::getStringDictionaryProxy
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key) const
Definition: ResultSet.cpp:429

advance_to_next_columnar_target_buff
T advance_to_next_columnar_target_buff(T target_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t target_slot_idx)
Definition: ResultSetBufferAccessors.h:111

OutputBufferInitialization.h

NULL_BIGINT
#define NULL_BIGINT
Definition: InlineNullValues.h:33

ResultSet::GeoReturnType
GeoReturnType
Definition: ResultSet.h:542

TargetInfo::sql_type
SQLTypeInfo sql_type
Definition: TargetInfo.h:52

TargetInfo
Definition: TargetInfo.h:49

ResultSet::ResultSetComparator
Definition: ResultSet.h:833

LOG
#define LOG(tag)
Definition: Logger.h:285

ResultSet::sort
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const ExecutorDeviceType device_type, const Executor *executor)
Definition: ResultSet.cpp:778

QueryMemoryDescriptor
Definition: QueryMemoryDescriptor.h:68

java::sql::ResultSet::ResultSetComparator::ModeScatter::rsc_
ResultSet::ResultSetComparator< BUFFER_ITERATOR_TYPE > const *const rsc_
Definition: ResultSet.cpp:1106

thread_count.h

threading_std::task_group
Definition: threading_std.h:109

EMPTY_HASHED_PLAN_DAG_KEY
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
Definition: RelAlgExecutionUnit.h:105

quantile::detail::TDigest::quantile
DEVICE RealType quantile(VectorView< IndexType const  > const partial_sum, RealType const q) const
Definition: quantile.h:858

Executor::baseline_threshold
static const size_t baseline_threshold
Definition: Execute.h:1549

Analyzer::OrderEntry::tle_no
int tle_no
Definition: Analyzer.h:2811

UNREACHABLE
#define UNREACHABLE()
Definition: Logger.h:338

gpu_enabled::sort
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105

ResultSet::getTargetInfos
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:683

kFLOAT
Definition: sqltypes.h:74

result_set::first_dict_encoded_idx
std::optional< size_t > first_dict_encoded_idx(std::vector< TargetInfo > const &)
Definition: ResultSet.cpp:1593

CHECK_GE
#define CHECK_GE(x, y)
Definition: Logger.h:306

ResultSet::setKernelQueueTime
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:720

ResultSet::StorageLookupResult
Definition: ResultSet.h:745

heavyai::Projection
Projection
Definition: enums.h:58

ResultSet::rowCount
size_t rowCount(const bool force_parallel=false) const
Returns the number of valid entries in the result set (i.e that will be returned from the SQL query o...
Definition: ResultSet.cpp:599

ResultSetPtr
std::shared_ptr< ResultSet > ResultSetPtr
Definition: RelAlgExecutionUnit.h:231

quantile::detail::TDigest
Definition: quantile.h:184

java::sql::ResultSet::CellCallback::CellCallback
CellCallback(StringDictionaryProxy::IdMap &&id_map, int64_t const null_int)
Definition: ResultSet.cpp:442

read_int_from_buff
int64_t read_int_from_buff(const int8_t *ptr, const int8_t compact_sz)
Definition: ResultSetBufferAccessors.h:246

ResultSet::keepFirstN
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:54

g_streaming_topn_max
size_t g_streaming_topn_max
Definition: ResultSet.cpp:51

pair_to_double
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
Definition: ResultSetBufferAccessors.h:197

ResultSet::addCompilationQueueTime
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:724

ResultSet::ApproxQuantileBuffers
std::vector< std::vector< double >> ApproxQuantileBuffers
Definition: ResultSet.h:829

takes_float_argument
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:106

ResultSet::parallelTop
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:878

ResultSet::colCount
size_t colCount() const
Definition: ResultSet.cpp:416

inplace_sort_gpu
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
Definition: InPlaceSort.cpp:111

apply_permutation_cpu
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46

VectorView::resize
DEVICE void resize(size_type const size)
Definition: VectorView.h:74

thread_idx_
thread_idx_(0)
Definition: QueryMemoryInitializer.cpp:488

CHECK_GT
#define CHECK_GT(x, y)
Definition: Logger.h:305

RuntimeFunctions.h

ResultSet::getLimit
size_t getLimit() const
Definition: ResultSet.cpp:1409

ResultSet::ResultSetComparator::materializeCountDistinctColumn
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:1011

ExecutorDeviceType
ExecutorDeviceType
Definition: ExecutorDeviceType.h:23

ResultSet::ResultSetComparator::materializeApproxQuantileColumn
ApproxQuantileBuffers::value_type materializeApproxQuantileColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:1057

ResultSet::isTruncated
bool isTruncated() const
Definition: ResultSet.cpp:742

anonymous_namespace{ResultSet.cpp}::get_truncated_row_count
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:545

ResultSet::parallelRowCount
size_t parallelRowCount() const
Definition: ResultSet.cpp:635

ResultSet::baselineSort
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const ExecutorDeviceType device_type, const Executor *executor)

quantile::detail::TDigest::mergeBufferFinal
DEVICE void mergeBufferFinal()
Definition: quantile.h:683

ExecutorDeviceType::GPU

ResultSet::radixSortOnCpu
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:1369

get_compact_type
const SQLTypeInfo get_compact_type(const TargetInfo &target)
Definition: SqlTypesLayout.h:37

ResultSet::definitelyHasNoRows
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:674

result_set::use_parallel_algorithms
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1600

heavyai::TableFunction
TableFunction
Definition: enums.h:58

kDOUBLE
Definition: sqltypes.h:75

report.rows
tuple rows
Definition: report.py:114

ResultSet::isZeroCopyColumnarConversionPossible
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1499

RowSortException
Definition: ResultSet.h:1045

g_parallel_top_min
size_t g_parallel_top_min
Definition: ResultSet.cpp:49

ResultSet::getHostEstimatorBuffer
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:698

Catalog_Namespace::SysCatalog::getDataMgr
Data_Namespace::DataMgr & getDataMgr() const
Definition: SysCatalog.h:234

java::sql::ResultSet::ResultSetComparator::ModeScatter
Definition: ResultSet.cpp:1104

VectorView::size
DEVICE size_type size() const
Definition: VectorView.h:83

ResultSet::invalidateCachedRowCount
void invalidateCachedRowCount() const
Definition: ResultSet.cpp:611

shared::StringDictKey::dict_id
int32_t dict_id
Definition: DbObjectKeys.h:48

anonymous_namespace{ResultSet.cpp}::IsAggKind::IsAggKind
IsAggKind(std::vector< TargetInfo > const &targets, SQLAgg const agg_kind)
Definition: ResultSet.cpp:973

anonymous_namespace{ResultSet.cpp}::IsAggKind::agg_kind_
SQLAgg const agg_kind_
Definition: ResultSet.cpp:972

Catalog_Namespace::SysCatalog::instance
static SysCatalog & instance()
Definition: SysCatalog.h:343

ResultSet::allocateStorage
const ResultSetStorage * allocateStorage() const

advance_slot
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
Definition: ResultSetBufferAccessors.h:75

threading.h

count_distinct_set_size
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75

anonymous_namespace{ResultSet.cpp}::IsDictEncodedStr::operator()
bool operator()(TargetInfo const &target_info) const
Definition: ResultSet.cpp:1587

gpu_enabled::copy
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51

ResultSet::setQueueTime
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:716

CHECK_NE
#define CHECK_NE(x, y)
Definition: Logger.h:302

ResultSet::dropFirstN
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:59

java::sql::ResultSet::CellCallback
Definition: ResultSet.cpp:437

logger::WARNING
Definition: Logger.h:109

ResultSet::getColumnarBufferSize
const size_t getColumnarBufferSize(size_t column_idx) const
Definition: ResultSet.cpp:1513

ResultSet::RowIterationState::agg_idx_
size_t agg_idx_
Definition: ResultSet.h:623

ResultSet::RowIterationState::compact_sz1_
int8_t compact_sz1_
Definition: ResultSet.h:625

VectorView::begin
DEVICE T * begin() const
Definition: VectorView.h:59

StringDictionaryProxy::TranslationMap
Definition: StringDictionaryProxy.h:89

threading_std::task_group::wait
void wait()
Definition: threading_std.h:119

ResultSet::RowIterationState::cur_target_idx_
size_t cur_target_idx_
Definition: ResultSet.h:622

Permutation
std::vector< PermutationIdx > Permutation
Definition: ResultSet.h:153

ResultSet::getSingleSlotTargetBitmap
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1525

executor_
executor_(executor)

g_enable_watchdog
bool g_enable_watchdog

LIKELY
#define LIKELY(x)
Definition: likely.h:24

checked_calloc
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:53

ResultSet::QueryExecutionTimings
Definition: ResultSet.h:369

ResultSet::findStorage
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:951

is_distinct_target
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:102

QueryMemoryDescriptor::getPaddedSlotWidthBytes
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Definition: QueryMemoryDescriptor.cpp:1189

ResultSet::copy
ResultSetPtr copy()
Definition: ResultSet.cpp:333

Comparator
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155

java::sql::ResultSet::ResultSetComparator::ModeScatter::parent_thread_local_ids_
logger::ThreadLocalIds const parent_thread_local_ids_
Definition: ResultSet.cpp:1105

true
bool g_enable_smem_group_by true
Definition: QueryMemoryDescriptor.cpp:28

BufferMgr.h
This file includes the class specification for the buffer manager (BufferMgr), and related data struc...

ResultSet::calculateQuantile
static double calculateQuantile(quantile::TDigest *const t_digest)
Definition: ResultSet.cpp:1047

anonymous_namespace{ResultSet.cpp}::IsAggKind::targets_
std::vector< TargetInfo > const & targets_
Definition: ResultSet.cpp:971

row_ptr_rowwise
T row_ptr_rowwise(T buff, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_idx)
Definition: ResultSetBufferAccessors.h:140

ExecutorDispatchMode::KernelPerFragment

ResultSet::radixSortOnGpu
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:1329

ResultSet::getStorage
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:412

threading_serial::parallel_reduce
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
Definition: threading_serial.h:74

ResultSet::getUniqueStringsForDictEncodedTargetCol
const std::pair< std::vector< int32_t >, std::vector< std::string > > getUniqueStringsForDictEncodedTargetCol(const size_t col_idx) const
Definition: ResultSet.cpp:1422

ResultSet::getQueueTime
int64_t getQueueTime() const
Definition: ResultSet.cpp:728

UNLIKELY
#define UNLIKELY(x)
Definition: likely.h:25

ResultSet::ResultSetComparator::materializeModeColumn
ModeBuffers::value_type materializeModeColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:1126

shared::StringDictKey::db_id
int32_t db_id
Definition: DbObjectKeys.h:47

translate_string_ids
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
Definition: StringDictionary.cpp:2095

PermutationIdx
uint32_t PermutationIdx
Definition: ResultSet.h:152

Data_Namespace::DataMgr
Definition: DataMgr.h:125

CHECK_LT
#define CHECK_LT(x, y)
Definition: Logger.h:303

kTEXT
Definition: sqltypes.h:79

ResultSet::getColType
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:420

Execute.h

ResultSet::getSupportedSingleSlotTargetBitmap
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1549

ResultSet::getDeviceType
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:254

ResultSet::getColumnarBuffer
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1508

ResultSet::isExplain
bool isExplain() const
Definition: ResultSet.cpp:746

ResultSet::eachCellInColumn
void eachCellInColumn(RowIterationState &, CellCallback const &)
Definition: ResultSet.cpp:491

anonymous_namespace{ResultSet.cpp}::IsAggKind
Definition: ResultSet.cpp:970

java::sql::ResultSet::CellCallback::id_map_
StringDictionaryProxy::IdMap const id_map_
Definition: ResultSet.cpp:438

QueryEngine.h

kSAMPLE
Definition: sqldefs.h:84

ResultSet::getStringDictionaryPayloadCopy
const std::vector< std::string > getStringDictionaryPayloadCopy(const shared::StringDictKey &dict_key) const
Definition: ResultSet.cpp:1413

checked_alloc.h

heavyai::GroupByBaselineHash
GroupByBaselineHash
Definition: enums.h:58

logger::LocalIdsScopeGuard
Definition: Logger.h:152

CudaAllocator::allocGpuAbstractBuffer
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
Definition: CudaAllocator.cpp:43

ResultSet::fixupQueryMemoryDescriptor
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:766

ExecutorDeviceType::CPU

getQueryEngineCudaStreamForDevice
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7

ResultSet::rowCountImpl
size_t rowCountImpl(const bool force_parallel) const
Definition: ResultSet.cpp:561

ResultSetStorage
Definition: ResultSetStorage.h:96

ResultSet::getPermutationBuffer
const Permutation & getPermutationBuffer() const
Definition: ResultSet.cpp:874

GpuMemUtils.h

threading_serial::parallel_for
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
Definition: threading_serial.h:34

likely.h

ResultSet::append
void append(ResultSet &that)
Definition: ResultSet.cpp:303

ResultSet::summaryToString
std::string summaryToString() const
Definition: ResultSet.cpp:222

data_mgr_
data_mgr_(data_mgr)

logger::ThreadLocalIds
Definition: Logger.h:136

QueryMemoryDescriptor::didOutputColumnar
bool didOutputColumnar() const
Definition: QueryMemoryDescriptor.h:285

ResultSet::topPermutation
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:1315

ResultSet::getCurrentRowBufferIndex
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:295

false
bool g_enable_watchdog false
Definition: Execute.cpp:80

ResultSet::RowIterationState::buf_ptr_
int8_t const * buf_ptr_
Definition: ResultSet.h:624

CHECK
#define CHECK(condition)
Definition: Logger.h:291

DEBUG_TIMER
#define DEBUG_TIMER(name)
Definition: Logger.h:412

SQLTypeInfo
Definition: sqltypes.h:332

ResultSet::getDeviceEstimatorBuffer
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:692

anonymous_namespace{ResultSet.cpp}::materializeMode
int64_t materializeMode(int64_t const i1)
Definition: ResultSet.cpp:1091

anonymous_namespace{ResultSet.cpp}::ModeBlockedRange
tbb::blocked_range< size_t > ModeBlockedRange
Definition: ResultSet.cpp:1100

inline_int_null_val
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
Definition: InlineNullValues.h:115

inline_fixed_encoding_null_val
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: InlineNullValues.h:153

ResultSet::ResultSetComparator::operator()
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Definition: ResultSet.cpp:1141

ResultSet.h
Basic constructors and methods of the row set interface.

anonymous_namespace{ResultSet.cpp}::IsAggKind::operator()
bool operator()(Analyzer::OrderEntry const &order_entry) const
Definition: ResultSet.cpp:975

ResultSet::isEmpty
bool isEmpty() const
Returns a boolean signifying whether there are valid entries in the result set.
Definition: ResultSet.cpp:655

java::sql::ResultSet::CellCallback::null_int_
int64_t const null_int_
Definition: ResultSet.cpp:439

SQLTypeInfo::is_dict_encoded_string
bool is_dict_encoded_string() const
Definition: sqltypes.h:643

ResultSet::getTargetInitVals
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:687

ResultSet::getSlotIndicesForTargetIndices
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1569

kENCODING_DICT
Definition: sqltypes.h:245

CudaAllocator.h
Allocate GPU memory using GpuBuffers via DataMgr.

anonymous_namespace{Utm.h}::n
constexpr double n
Definition: Utm.h:38

RelAlgExecutionUnit.h
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...

uninitialized_cached_row_count
constexpr int64_t uninitialized_cached_row_count
Definition: ResultSet.cpp:52

Analyzer::OrderEntry
Definition: Analyzer.h:2806

cpu_threads
int cpu_threads()
Definition: thread_count.h:25

java::sql::ResultSet::ResultSetComparator::ModeScatter::operator()
void operator()(ModeBlockedRange const &r) const
Definition: ResultSet.cpp:1110

get_cols_ptr
T get_cols_ptr(T buff, const QueryMemoryDescriptor &query_mem_desc)
Definition: ResultSetBufferAccessors.h:120

ResultSet::~ResultSet
~ResultSet()
Definition: ResultSet.cpp:200

kAVG
Definition: sqldefs.h:77

ResultSet::translateDictEncodedColumns
void translateDictEncodedColumns(std::vector< TargetInfo > const &, size_t const start_idx)
Definition: ResultSet.cpp:459

Intervals.h
Divide up indexes (A, A+1, A+2, ..., B-2, B-1) among N workers as evenly as possible in a range-based...

copy_group_by_buffers_from_gpu
void copy_group_by_buffers_from_gpu(DeviceAllocator &device_allocator, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const int8_t *group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer, const bool has_varlen_output)
Definition: GpuMemUtils.cpp:228

shared::StringDictKey
Definition: DbObjectKeys.h:28

java::sql::ResultSet::CellCallback::operator()
void operator()(int8_t const *const cell_ptr) const
Definition: ResultSet.cpp:444

result_set::can_use_parallel_algorithms
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1581

ResultSet::getRenderTime
int64_t getRenderTime() const
Definition: ResultSet.cpp:733

ResultSet::ResultSet
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:64

ResultSet::setCachedRowCount
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:615

ResultSet::isDirectColumnarConversionPossible
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1477

get_key_bytes_rowwise
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
Definition: ResultSetBufferAccessors.h:125

kMODE
Definition: sqldefs.h:86

ResultSet::RowIterationState
Definition: ResultSet.h:620

logger::thread_local_ids
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:882

align_to_int64
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
Definition: BufferCompaction.h:42

query_mem_desc
query_mem_desc
Definition: QueryMemoryInitializer.cpp:479

ResultSet::ResultSetComparator::materializeModeColumns
ModeBuffers materializeModeColumns() const
Definition: ResultSet.cpp:996

java::sql::ResultSet::ResultSetComparator::ModeScatter::materialized_buffer_
ResultSet::ModeBuffers::value_type & materialized_buffer_
Definition: ResultSet.cpp:1108

dict_ref_t::literalsDictId
static constexpr int32_t literalsDictId
Definition: DictRef.h:18

StringDictionaryProxy
Definition: StringDictionaryProxy.h:39

ResultSet::binSearchRowCount
size_t binSearchRowCount() const
Definition: ResultSet.cpp:622

VectorView
Definition: VectorView.h:34

ResultSet::RowIterationState::prev_target_idx_
size_t prev_target_idx_
Definition: ResultSet.h:621

ResultSet::getDeviceId
int getDeviceId() const
Definition: ResultSet.cpp:758

ResultSet::ModeBuffers
std::vector< std::vector< int64_t >> ModeBuffers
Definition: ResultSet.h:830

ScalarTargetValue
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180

VectorView::end
DEVICE T * end() const
Definition: VectorView.h:67