#include <ColumnarResults.h>

Public Types
using	ReadFunction = std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)>

using	WriteFunction = std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)>

Public Member Functions
	ColumnarResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t executor_id, const size_t thread_idx, const bool is_parallel_execution_enforced=false)

	ColumnarResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const int8_t *one_col_buffer, const size_t num_rows, const SQLTypeInfo &target_type, const size_t executor_id, const size_t thread_idx)

const std::vector< int8_t * > &	getColumnBuffers () const

const size_t	size () const

const SQLTypeInfo &	getColumnType (const int col_id) const

bool	isParallelConversion () const

bool	isDirectColumnarConversionPossible () const

Static Public Member Functions
static std::unique_ptr < ColumnarResults >	mergeResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)

Protected Attributes
std::vector< int8_t * >	column_buffers_

size_t	num_rows_

Private Member Functions
	ColumnarResults (const size_t num_rows, const std::vector< SQLTypeInfo > &target_types, const std::vector< size_t > &padded_target_sizes)

void	writeBackCell (const TargetValue &col_val, const size_t row_idx, const SQLTypeInfo &type_info, int8_t column_buf, std::mutex write_mutex=nullptr)

void	materializeAllColumnsDirectly (const ResultSet &rows, const size_t num_columns)

void	materializeAllColumnsThroughIteration (const ResultSet &rows, const size_t num_columns)

void	materializeAllColumnsGroupBy (const ResultSet &rows, const size_t num_columns)

void	materializeAllColumnsProjection (const ResultSet &rows, const size_t num_columns)

void	materializeAllColumnsTableFunction (const ResultSet &rows, const size_t num_columns)

void	copyAllNonLazyColumns (const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)

void	materializeAllLazyColumns (const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)

void	locateAndCountEntries (const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const

void	compactAndCopyEntries (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)

void	compactAndCopyEntriesWithTargetSkipping (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)

void	compactAndCopyEntriesWithoutTargetSkipping (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)

template<typename DATA_TYPE >
void	writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)

std::vector< WriteFunction >	initWriteFunctions (const ResultSet &rows, const std::vector< bool > &targets_to_skip={})

template<QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
std::vector< ReadFunction >	initReadFunctions (const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})

std::tuple< std::vector < WriteFunction >, std::vector < ReadFunction > >	initAllConversionFunctions (const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})

template<>
void	writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_from_function)

template<>
void	writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_from_function)

Private Attributes
const std::vector< SQLTypeInfo >	target_types_

bool	parallel_conversion_

bool	direct_columnar_conversion_

size_t	thread_idx_

std::shared_ptr< Executor >	executor_

std::vector< size_t >	padded_target_sizes_

Detailed Description

Definition at line 61 of file ColumnarResults.h.

Member Typedef Documentation

using ColumnarResults::ReadFunction = std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>

Definition at line 98 of file ColumnarResults.h.

using ColumnarResults::WriteFunction = std::function<void(const ResultSet&, const size_t, const size_t, const size_t, const size_t, const ReadFunction&)>

Definition at line 107 of file ColumnarResults.h.

Constructor & Destructor Documentation

ColumnarResults::ColumnarResults	(	const std::shared_ptr< RowSetMemoryOwner >	row_set_mem_owner,
		const ResultSet &	rows,
		const size_t	num_columns,
		const std::vector< SQLTypeInfo > &	target_types,
		const size_t	executor_id,
		const size_t	thread_idx,
		const bool	is_parallel_execution_enforced = `false`
	)

Definition at line 256 of file ColumnarResults.cpp.

References CHECK, CHECK_EQ, column_buffers_, anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnArray(), anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnGeoType(), anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnTextEncodingNone(), DEBUG_TIMER, executor_, Executor::getExecutor(), getFlatBufferSize(), initializeFlatBuffer(), isDirectColumnarConversionPossible(), kARRAY, kENCODING_DICT, kENCODING_NONE, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kTEXT, materializeAllColumnsDirectly(), materializeAllColumnsThroughIteration(), num_rows_, padded_target_sizes_, report::rows, target_types_, thread_idx_, and UNREACHABLE.

Referenced by mergeResults().

     : column_buffers_(num_columns)
     , num_rows_(result_set::use_parallel_algorithms(rows) ||
                         rows.isDirectColumnarConversionPossible()
                     ? rows.entryCount()
                     : rows.rowCount())
     , target_types_(target_types)
     , parallel_conversion_(is_parallel_execution_enforced
                                ? true
                                : result_set::use_parallel_algorithms(rows))
     , direct_columnar_conversion_(rows.isDirectColumnarConversionPossible())
     , thread_idx_(thread_idx)
     , padded_target_sizes_(get_padded_target_sizes(rows, target_types)) {
   auto timer = DEBUG_TIMER(__func__);
   column_buffers_.resize(num_columns);
   executor_ = Executor::getExecutor(executor_id);
   CHECK(executor_);
   CHECK_EQ(padded_target_sizes_.size(), target_types.size());
 
   for (size_t i = 0; i < num_columns; ++i) {
     const auto& src_ti = rows.getColType(i);
     // ti is initialized in columnarize_result() function in
     // ColumnFetcher.cpp and it may differ from src_ti with respect to
     // uses_flatbuffer attribute
     const auto& ti = target_types_[i];
 
     if (rows.isZeroCopyColumnarConversionPossible(i)) {
       CHECK_EQ(ti.usesFlatBuffer(), src_ti.usesFlatBuffer());
       // The column buffer will be assigned in
       // ColumnarResults::copyAllNonLazyColumns.
       column_buffers_[i] = nullptr;
       continue;
     }
     CHECK(!(src_ti.usesFlatBuffer() && ti.usesFlatBuffer()));
     // When the source result set uses FlatBuffer layout, it must
     // support zero-copy columnar conversion. Otherwise, the source
     // result will be columnarized according to ti.usesFlatBuffer()
     // state that is set in columnarize_result function in
     // ColumnFetcher.cpp.
     if (src_ti.usesFlatBuffer() && ti.usesFlatBuffer()) {
       // If both source and target result sets use FlatBuffer layout,
       // creating a columnar result should be using zero-copy columnar
       // conversion.
       UNREACHABLE();
     } else if (ti.usesFlatBuffer()) {
       int64_t values_count = -1;
       switch (ti.get_type()) {
         case kARRAY:
           if (ti.get_subtype() == kTEXT && ti.get_compression() == kENCODING_NONE) {
             throw std::runtime_error(
                 "Column<Array<TextEncodedNone>> support not implemented yet "
                 "(ColumnarResults)");
           } else {
             values_count = computeTotalNofValuesForColumnArray(rows, i);
           }
           break;
         case kPOINT:
           values_count = num_rows_;
           break;
         case kLINESTRING:
           values_count =
               computeTotalNofValuesForColumnGeoType<GeoLineStringTargetValue,
                                                     GeoLineStringTargetValuePtr>(
                   rows, ti, i);
           break;
         case kPOLYGON:
           values_count =
               computeTotalNofValuesForColumnGeoType<GeoPolyTargetValue,
                                                     GeoPolyTargetValuePtr>(rows, ti, i);
           break;
         case kMULTIPOINT:
           values_count =
               computeTotalNofValuesForColumnGeoType<GeoMultiPointTargetValue,
                                                     GeoMultiPointTargetValuePtr>(
                   rows, ti, i);
           break;
         case kMULTILINESTRING:
           values_count =
               computeTotalNofValuesForColumnGeoType<GeoMultiLineStringTargetValue,
                                                     GeoMultiLineStringTargetValuePtr>(
                   rows, ti, i);
           break;
         case kMULTIPOLYGON:
           values_count =
               computeTotalNofValuesForColumnGeoType<GeoMultiPolyTargetValue,
                                                     GeoMultiPolyTargetValuePtr>(
                   rows, ti, i);
           break;
         case kTEXT:
           if (ti.get_compression() == kENCODING_NONE) {
             values_count = computeTotalNofValuesForColumnTextEncodingNone(rows, i);
             break;
           }
           if (ti.get_compression() == kENCODING_DICT) {
             values_count = num_rows_;
             break;
           }
         default:
           UNREACHABLE() << "computing number of values not implemented for "
                         << ti.toString();
       }
       // TODO: include sizes count to optimize flatbuffer size
       const int64_t flatbuffer_size = getFlatBufferSize(num_rows_, values_count, ti);
       column_buffers_[i] = row_set_mem_owner->allocate(flatbuffer_size, thread_idx_);
       FlatBufferManager m{column_buffers_[i]};
       initializeFlatBuffer(m, num_rows_, values_count, ti);
       // The column buffer will be initialized either directly or
       // through iteration.
       // TODO: implement QE-808 resolution here.
     } else {
       if (ti.is_varlen()) {
         throw ColumnarConversionNotSupported();
       }
       // The column buffer will be initialized either directly or
       // through iteration.
       column_buffers_[i] =
           row_set_mem_owner->allocate(num_rows_ * padded_target_sizes_[i], thread_idx_);
     }
   }
 
   if (isDirectColumnarConversionPossible() && rows.entryCount() > 0) {
     materializeAllColumnsDirectly(rows, num_columns);
   } else {
     materializeAllColumnsThroughIteration(rows, num_columns);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

ColumnarResults::ColumnarResults	(	const std::shared_ptr< RowSetMemoryOwner >	row_set_mem_owner,
		const int8_t *	one_col_buffer,
		const size_t	num_rows,
		const SQLTypeInfo &	target_type,
		const size_t	executor_id,
		const size_t	thread_idx
	)

Definition at line 389 of file ColumnarResults.cpp.

     : column_buffers_(1)
     , num_rows_(num_rows)
     , target_types_{target_type}
     , parallel_conversion_(false)
     , direct_columnar_conversion_(false)
     , thread_idx_(thread_idx) {
   auto timer = DEBUG_TIMER(__func__);
   const bool is_varlen =
       target_type.is_array() ||
       (target_type.is_string() && target_type.get_compression() == kENCODING_NONE) ||
       target_type.is_geometry();
   if (is_varlen) {
     throw ColumnarConversionNotSupported();
   }
   executor_ = Executor::getExecutor(executor_id);
   padded_target_sizes_.emplace_back(target_type.get_size());
   CHECK(executor_);
   const auto buf_size = num_rows * target_type.get_size();
   column_buffers_[0] =
       reinterpret_cast<int8_t*>(row_set_mem_owner->allocate(buf_size, thread_idx_));
   memcpy(((void*)column_buffers_[0]), one_col_buffer, buf_size);
 }

ColumnarResults::ColumnarResults	(	const size_t	num_rows,
		const std::vector< SQLTypeInfo > &	target_types,
		const std::vector< size_t > &	padded_target_sizes
	)

inlineprivate

Definition at line 114 of file ColumnarResults.h.

       : num_rows_(num_rows)
       , target_types_(target_types)
       , padded_target_sizes_(padded_target_sizes) {}

Member Function Documentation

void ColumnarResults::compactAndCopyEntries	(	const ResultSet &	rows,
		const ColumnBitmap &	bitmap,
		const std::vector< size_t > &	non_empty_per_thread,
		const size_t	num_columns,
		const size_t	entry_count,
		const size_t	num_threads,
		const size_t	size_per_thread
	)

private

This function goes through all non-empty elements marked in the bitmap data structure, and store them back into output column buffers. The output column buffers are compacted without any holes in it.

TODO(Saman): if necessary, we can look into the distribution of non-empty entries and choose a different load-balanced strategy (assigning equal number of non-empties to each thread) as opposed to equal partitioning of the bitmap

Definition at line 1351 of file ColumnarResults.cpp.

References CHECK, CHECK_EQ, compactAndCopyEntriesWithoutTargetSkipping(), compactAndCopyEntriesWithTargetSkipping(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), and gpu_enabled::partial_sum().

Referenced by materializeAllColumnsGroupBy().

                                   {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
   CHECK_EQ(num_threads, non_empty_per_thread.size());
 
   // compute the exclusive scan over all non-empty totals
   std::vector<size_t> global_offsets(num_threads + 1, 0);
   std::partial_sum(non_empty_per_thread.begin(),
                    non_empty_per_thread.end(),
                    std::next(global_offsets.begin()));
 
   const auto slot_idx_per_target_idx = rows.getSlotIndicesForTargetIndices();
   const auto [single_slot_targets_to_skip, num_single_slot_targets] =
       rows.getSupportedSingleSlotTargetBitmap();
 
   // We skip multi-slot targets (e.g., AVG). These skipped targets are treated
   // differently and accessed through result set's iterator
   if (num_single_slot_targets < num_columns) {
     compactAndCopyEntriesWithTargetSkipping(rows,
                                             bitmap,
                                             non_empty_per_thread,
                                             global_offsets,
                                             single_slot_targets_to_skip,
                                             slot_idx_per_target_idx,
                                             num_columns,
                                             entry_count,
                                             num_threads,
                                             size_per_thread);
   } else {
     compactAndCopyEntriesWithoutTargetSkipping(rows,
                                                bitmap,
                                                non_empty_per_thread,
                                                global_offsets,
                                                slot_idx_per_target_idx,
                                                num_columns,
                                                entry_count,
                                                num_threads,
                                                size_per_thread);
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::compactAndCopyEntriesWithoutTargetSkipping	(	const ResultSet &	rows,
		const ColumnBitmap &	bitmap,
		const std::vector< size_t > &	non_empty_per_thread,
		const std::vector< size_t > &	global_offsets,
		const std::vector< size_t > &	slot_idx_per_target_idx,
		const size_t	num_columns,
		const size_t	entry_count,
		const size_t	num_threads,
		const size_t	size_per_thread
	)

private

This functions takes a bitmap of non-empty entries within the result set's storage and compact and copy those contents back into the output column_buffers_. In this variation, all targets are assumed to be single-slot and thus can be directly columnarized.

Definition at line 1530 of file ColumnarResults.cpp.

References threading_serial::async(), CHECK, CHECK_EQ, executor_, g_enable_non_kernel_time_query_interrupt, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), initAllConversionFunctions(), isDirectColumnarConversionPossible(), report::rows, and UNLIKELY.

Referenced by compactAndCopyEntries().

                                   {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
 
   const auto [write_functions, read_functions] =
       initAllConversionFunctions(rows, slot_idx_per_target_idx);
   CHECK_EQ(write_functions.size(), num_columns);
   CHECK_EQ(read_functions.size(), num_columns);
   auto do_work = [&rows,
                   &bitmap,
                   &global_offsets,
                   &num_columns,
                   &slot_idx_per_target_idx,
                   &write_functions = write_functions,
                   &read_functions = read_functions](size_t& entry_idx,
                                                     size_t& non_empty_idx,
                                                     const size_t total_non_empty,
                                                     const size_t local_idx,
                                                     const size_t thread_idx,
                                                     const size_t end_idx) {
     if (non_empty_idx >= total_non_empty) {
       // all non-empty entries has been written back
       entry_idx = end_idx;
       return;
     }
     const size_t output_buffer_row_idx = global_offsets[thread_idx] + non_empty_idx;
     if (bitmap.get(local_idx, thread_idx)) {
       for (size_t column_idx = 0; column_idx < num_columns; column_idx++) {
         write_functions[column_idx](rows,
                                     entry_idx,
                                     output_buffer_row_idx,
                                     column_idx,
                                     slot_idx_per_target_idx[column_idx],
                                     read_functions[column_idx]);
       }
       non_empty_idx++;
     }
   };
   auto compact_buffer_func = [&non_empty_per_thread, &do_work, this](
                                  const size_t start_index,
                                  const size_t end_index,
                                  const size_t thread_idx) {
     const size_t total_non_empty = non_empty_per_thread[thread_idx];
     size_t non_empty_idx = 0;
     size_t local_idx = 0;
     if (g_enable_non_kernel_time_query_interrupt) {
       for (size_t entry_idx = start_index; entry_idx < end_index;
            entry_idx++, local_idx++) {
         if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
                      executor_->checkNonKernelTimeInterrupted())) {
           throw QueryExecutionError(ErrorCode::INTERRUPTED);
         }
         do_work(
             entry_idx, non_empty_idx, total_non_empty, local_idx, thread_idx, end_index);
       }
     } else {
       for (size_t entry_idx = start_index; entry_idx < end_index;
            entry_idx++, local_idx++) {
         do_work(
             entry_idx, non_empty_idx, total_non_empty, local_idx, thread_idx, end_index);
       }
     }
   };
 
   std::vector<std::future<void>> compaction_threads;
   for (size_t thread_idx = 0; thread_idx < num_threads; thread_idx++) {
     const size_t start_entry = thread_idx * size_per_thread;
     const size_t end_entry = std::min(start_entry + size_per_thread, entry_count);
     compaction_threads.push_back(std::async(
         std::launch::async, compact_buffer_func, start_entry, end_entry, thread_idx));
   }
 
   try {
     for (auto& child : compaction_threads) {
       child.wait();
     }
   } catch (QueryExecutionError& e) {
     if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
       throw QueryExecutionError(ErrorCode::INTERRUPTED);
     }
     throw e;
   } catch (...) {
     throw;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::compactAndCopyEntriesWithTargetSkipping	(	const ResultSet &	rows,
		const ColumnBitmap &	bitmap,
		const std::vector< size_t > &	non_empty_per_thread,
		const std::vector< size_t > &	global_offsets,
		const std::vector< bool > &	targets_to_skip,
		const std::vector< size_t > &	slot_idx_per_target_idx,
		const size_t	num_columns,
		const size_t	entry_count,
		const size_t	num_threads,
		const size_t	size_per_thread
	)

private

This functions takes a bitmap of non-empty entries within the result set's storage and compact and copy those contents back into the output column_buffers_. In this variation, multi-slot targets (e.g., AVG) are treated with the existing result set's iterations, but everything else is directly columnarized.

Definition at line 1406 of file ColumnarResults.cpp.

References threading_serial::async(), CHECK, CHECK_EQ, column_buffers_, executor_, g_enable_non_kernel_time_query_interrupt, ColumnBitmap::get(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), initAllConversionFunctions(), isDirectColumnarConversionPossible(), report::rows, target_types_, UNLIKELY, and writeBackCell().

Referenced by compactAndCopyEntries().

                                   {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
 
   const auto [write_functions, read_functions] =
       initAllConversionFunctions(rows, slot_idx_per_target_idx, targets_to_skip);
   CHECK_EQ(write_functions.size(), num_columns);
   CHECK_EQ(read_functions.size(), num_columns);
   std::mutex write_mutex;
   auto do_work = [this,
                   &bitmap,
                   &rows,
                   &slot_idx_per_target_idx,
                   &global_offsets,
                   &targets_to_skip,
                   &num_columns,
                   &write_mutex,
                   &write_functions = write_functions,
                   &read_functions = read_functions](size_t& non_empty_idx,
                                                     const size_t total_non_empty,
                                                     const size_t local_idx,
                                                     size_t& entry_idx,
                                                     const size_t thread_idx,
                                                     const size_t end_idx) {
     if (non_empty_idx >= total_non_empty) {
       // all non-empty entries has been written back
       entry_idx = end_idx;
     }
     const size_t output_buffer_row_idx = global_offsets[thread_idx] + non_empty_idx;
     if (bitmap.get(local_idx, thread_idx)) {
       // targets that are recovered from the result set iterators:
       const auto crt_row = rows.getRowAtNoTranslations(entry_idx, targets_to_skip);
       for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) {
         if (!targets_to_skip.empty() && !targets_to_skip[column_idx]) {
           auto& type_info = target_types_[column_idx];
           writeBackCell(crt_row[column_idx],
                         output_buffer_row_idx,
                         type_info,
                         column_buffers_[column_idx],
                         &write_mutex);
         }
       }
       // targets that are copied directly without any translation/decoding from
       // result set
       for (size_t column_idx = 0; column_idx < num_columns; column_idx++) {
         if (!targets_to_skip.empty() && !targets_to_skip[column_idx]) {
           continue;
         }
         write_functions[column_idx](rows,
                                     entry_idx,
                                     output_buffer_row_idx,
                                     column_idx,
                                     slot_idx_per_target_idx[column_idx],
                                     read_functions[column_idx]);
       }
       non_empty_idx++;
     }
   };
 
   auto compact_buffer_func = [&non_empty_per_thread, &do_work, this](
                                  const size_t start_index,
                                  const size_t end_index,
                                  const size_t thread_idx) {
     const size_t total_non_empty = non_empty_per_thread[thread_idx];
     size_t non_empty_idx = 0;
     size_t local_idx = 0;
     if (g_enable_non_kernel_time_query_interrupt) {
       for (size_t entry_idx = start_index; entry_idx < end_index;
            entry_idx++, local_idx++) {
         if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
                      executor_->checkNonKernelTimeInterrupted())) {
           throw QueryExecutionError(ErrorCode::INTERRUPTED);
         }
         do_work(
             non_empty_idx, total_non_empty, local_idx, entry_idx, thread_idx, end_index);
       }
     } else {
       for (size_t entry_idx = start_index; entry_idx < end_index;
            entry_idx++, local_idx++) {
         do_work(
             non_empty_idx, total_non_empty, local_idx, entry_idx, thread_idx, end_index);
       }
     }
   };
 
   std::vector<std::future<void>> compaction_threads;
   for (size_t thread_idx = 0; thread_idx < num_threads; thread_idx++) {
     const size_t start_entry = thread_idx * size_per_thread;
     const size_t end_entry = std::min(start_entry + size_per_thread, entry_count);
     compaction_threads.push_back(std::async(
         std::launch::async, compact_buffer_func, start_entry, end_entry, thread_idx));
   }
 
   try {
     for (auto& child : compaction_threads) {
       child.wait();
     }
   } catch (QueryExecutionError& e) {
     if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
       throw QueryExecutionError(ErrorCode::INTERRUPTED);
     }
     throw e;
   } catch (...) {
     throw;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::copyAllNonLazyColumns	(	const std::vector< ColumnLazyFetchInfo > &	lazy_fetch_info,
		const ResultSet &	rows,
		const size_t	num_columns
	)

private

Definition at line 1097 of file ColumnarResults.cpp.

References threading_serial::async(), CHECK, column_buffers_, isDirectColumnarConversionPossible(), heavyai::TableFunction, target_types_, and UNREACHABLE.

Referenced by materializeAllColumnsProjection(), and materializeAllColumnsTableFunction().

                               {
   CHECK(isDirectColumnarConversionPossible());
   const auto is_column_non_lazily_fetched = [&lazy_fetch_info](const size_t col_idx) {
     // Saman: make sure when this lazy_fetch_info is empty
     if (lazy_fetch_info.empty()) {
       return true;
     } else {
       return !lazy_fetch_info[col_idx].is_lazily_fetched;
     }
   };
 
   // parallelized by assigning each column to a thread
   std::vector<std::future<void>> direct_copy_threads;
   for (size_t col_idx = 0; col_idx < num_columns; col_idx++) {
     if (rows.isZeroCopyColumnarConversionPossible(col_idx)) {
       CHECK(!column_buffers_[col_idx]);
       // The name of the method implies a copy but this is not a copy!!
       column_buffers_[col_idx] = const_cast<int8_t*>(rows.getColumnarBuffer(col_idx));
     } else if (is_column_non_lazily_fetched(col_idx)) {
       CHECK(!(rows.query_mem_desc_.getQueryDescriptionType() ==
               QueryDescriptionType::TableFunction));
       if (rows.getColType(col_idx).usesFlatBuffer() &&
           target_types_[col_idx].usesFlatBuffer()) {
         // If both source and target result sets use FlatBuffer
         // layout, creating a columnar result should be using
         // zero-copy columnar conversion.
         UNREACHABLE();
       }
       direct_copy_threads.push_back(std::async(
           std::launch::async,
           [&rows, this](const size_t column_index) {
             size_t column_size = rows.getColumnarBufferSize(column_index);
             rows.copyColumnIntoBuffer(
                 column_index, column_buffers_[column_index], column_size);
           },
           col_idx));
     }
   }
 
   for (auto& child : direct_copy_threads) {
     child.wait();
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const std::vector<int8_t*>& ColumnarResults::getColumnBuffers ( ) const

inline

Definition at line 82 of file ColumnarResults.h.

References column_buffers_.

Referenced by ColumnFetcher::transferColumnIfNeeded().

82 { return column_buffers_; }

ColumnarResults::column_buffers_

std::vector< int8_t * > column_buffers_

Definition: ColumnarResults.h:110

Here is the caller graph for this function:

const SQLTypeInfo& ColumnarResults::getColumnType ( const int col_id ) const

inline

Definition at line 86 of file ColumnarResults.h.

References CHECK_GE, CHECK_LT, and target_types_.

Referenced by ColumnFetcher::transferColumnIfNeeded().

                                                            {
     CHECK_GE(col_id, 0);
     CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
     return target_types_[col_id];
   }

Here is the caller graph for this function:

std::tuple< std::vector< ColumnarResults::WriteFunction >, std::vector< ColumnarResults::ReadFunction > > ColumnarResults::initAllConversionFunctions	(	const ResultSet &	rows,
		const std::vector< size_t > &	slot_idx_per_target_idx,
		const std::vector< bool > &	targets_to_skip = `{}`
	)

private

This function goes through all target types in the output, and chooses appropriate write and read functions per target. The goal is then to simply use these functions for each row and per target. Read functions are used to read each cell's data content (particular target in a row), and write functions are used to properly write back the cell's content into the output column buffers.

Definition at line 1917 of file ColumnarResults.cpp.

References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, initWriteFunctions(), and isDirectColumnarConversionPossible().

Referenced by compactAndCopyEntriesWithoutTargetSkipping(), and compactAndCopyEntriesWithTargetSkipping().

                                             {
   CHECK(isDirectColumnarConversionPossible() &&
         (rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
          rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash));
 
   const auto write_functions = initWriteFunctions(rows, targets_to_skip);
   if (rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash) {
     if (rows.didOutputColumnar()) {
       return std::make_tuple(
           std::move(write_functions),
           initReadFunctions<QueryDescriptionType::GroupByPerfectHash, true>(
               rows, slot_idx_per_target_idx, targets_to_skip));
     } else {
       return std::make_tuple(
           std::move(write_functions),
           initReadFunctions<QueryDescriptionType::GroupByPerfectHash, false>(
               rows, slot_idx_per_target_idx, targets_to_skip));
     }
   } else {
     if (rows.didOutputColumnar()) {
       return std::make_tuple(
           std::move(write_functions),
           initReadFunctions<QueryDescriptionType::GroupByBaselineHash, true>(
               rows, slot_idx_per_target_idx, targets_to_skip));
     } else {
       return std::make_tuple(
           std::move(write_functions),
           initReadFunctions<QueryDescriptionType::GroupByBaselineHash, false>(
               rows, slot_idx_per_target_idx, targets_to_skip));
     }
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

template<QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>

std::vector< ColumnarResults::ReadFunction > ColumnarResults::initReadFunctions	(	const ResultSet &	rows,
		const std::vector< size_t > &	slot_idx_per_target_idx,
		const std::vector< bool > &	targets_to_skip = `{}`
	)

private

Initializes a set of read funtions to properly access the contents of the result set's storage buffer. Each particular read function is chosen based on the data type and data size used to store that target in the result set's storage buffer. These functions are then used for each row in the result set.

Definition at line 1819 of file ColumnarResults.cpp.

References CHECK, CHECK_EQ, heavyai::GroupByBaselineHash, anonymous_namespace{ColumnarResults.cpp}::invalid_read_func(), isDirectColumnarConversionPossible(), kDOUBLE, kFLOAT, target_types_, and UNREACHABLE.

                                             {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(COLUMNAR_OUTPUT == rows.didOutputColumnar());
   CHECK(QUERY_TYPE == rows.getQueryDescriptionType());
 
   std::vector<ReadFunction> read_functions;
   read_functions.reserve(target_types_.size());
 
   for (size_t target_idx = 0; target_idx < target_types_.size(); target_idx++) {
     if (!targets_to_skip.empty() && !targets_to_skip[target_idx]) {
       // for targets that should be skipped, we use a placeholder function that should
       // never be called. The CHECKs inside it make sure that never happens.
       read_functions.emplace_back(invalid_read_func);
       continue;
     }
 
     if (QUERY_TYPE == QueryDescriptionType::GroupByBaselineHash) {
       if (rows.getPaddedSlotWidthBytes(slot_idx_per_target_idx[target_idx]) == 0) {
         // for key columns only
         CHECK(rows.query_mem_desc_.getTargetGroupbyIndex(target_idx) >= 0);
         if (target_types_[target_idx].is_fp()) {
           CHECK_EQ(size_t(8), rows.query_mem_desc_.getEffectiveKeyWidth());
           switch (target_types_[target_idx].get_type()) {
             case kFLOAT:
               read_functions.emplace_back(
                   read_float_key_baseline<QUERY_TYPE, COLUMNAR_OUTPUT>);
               break;
             case kDOUBLE:
               read_functions.emplace_back(read_double_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
               break;
             default:
               UNREACHABLE()
                   << "Invalid data type encountered (BaselineHash, floating point key).";
               break;
           }
         } else {
           switch (rows.query_mem_desc_.getEffectiveKeyWidth()) {
             case 8:
               read_functions.emplace_back(read_int64_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
               break;
             case 4:
               read_functions.emplace_back(read_int32_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
               break;
             default:
               UNREACHABLE()
                   << "Invalid data type encountered (BaselineHash, integer key).";
           }
         }
         continue;
       }
     }
     if (target_types_[target_idx].is_fp()) {
       switch (rows.getPaddedSlotWidthBytes(slot_idx_per_target_idx[target_idx])) {
         case 8:
           read_functions.emplace_back(read_double_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         case 4:
           read_functions.emplace_back(read_float_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         default:
           UNREACHABLE() << "Invalid data type encountered (floating point agg column).";
           break;
       }
     } else {
       switch (rows.getPaddedSlotWidthBytes(slot_idx_per_target_idx[target_idx])) {
         case 8:
           read_functions.emplace_back(read_int64_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         case 4:
           read_functions.emplace_back(read_int32_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         case 2:
           read_functions.emplace_back(read_int16_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         case 1:
           read_functions.emplace_back(read_int8_func<QUERY_TYPE, COLUMNAR_OUTPUT>);
           break;
         default:
           UNREACHABLE() << "Invalid data type encountered (integer agg column).";
           break;
       }
     }
   }
   return read_functions;
 }

Here is the call graph for this function:

std::vector< ColumnarResults::WriteFunction > ColumnarResults::initWriteFunctions	(	const ResultSet &	rows,
		const std::vector< bool > &	targets_to_skip = `{}`
	)

private

Initialize a set of write functions per target (i.e., column). Target types' logical size are used to categorize the correct write function per target. These functions are then used for every row in the result set.

Definition at line 1631 of file ColumnarResults.cpp.

References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), run_benchmark_import::result, target_types_, and UNREACHABLE.

Referenced by initAllConversionFunctions().

                                             {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
 
   std::vector<WriteFunction> result;
   result.reserve(target_types_.size());
 
   for (size_t target_idx = 0; target_idx < target_types_.size(); target_idx++) {
     if (!targets_to_skip.empty() && !targets_to_skip[target_idx]) {
       result.emplace_back([](const ResultSet& rows,
                              const size_t input_buffer_entry_idx,
                              const size_t output_buffer_entry_idx,
                              const size_t target_idx,
                              const size_t slot_idx,
                              const ReadFunction& read_function) {
         UNREACHABLE() << "Invalid write back function used.";
       });
       continue;
     }
 
     if (target_types_[target_idx].is_fp()) {
       switch (target_types_[target_idx].get_size()) {
         case 8:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<double>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         case 4:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<float>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         default:
           UNREACHABLE() << "Invalid target type encountered.";
           break;
       }
     } else {
       switch (target_types_[target_idx].get_size()) {
         case 8:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<int64_t>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         case 4:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<int32_t>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         case 2:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<int16_t>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         case 1:
           result.emplace_back(std::bind(&ColumnarResults::writeBackCellDirect<int8_t>,
                                         this,
                                         std::placeholders::_1,
                                         std::placeholders::_2,
                                         std::placeholders::_3,
                                         std::placeholders::_4,
                                         std::placeholders::_5,
                                         std::placeholders::_6));
           break;
         default:
           UNREACHABLE() << "Invalid target type encountered.";
           break;
       }
     }
   }
   return result;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool ColumnarResults::isDirectColumnarConversionPossible ( ) const

inline

Definition at line 93 of file ColumnarResults.h.

References direct_columnar_conversion_.

Referenced by ColumnarResults(), compactAndCopyEntries(), compactAndCopyEntriesWithoutTargetSkipping(), compactAndCopyEntriesWithTargetSkipping(), copyAllNonLazyColumns(), initAllConversionFunctions(), initReadFunctions(), initWriteFunctions(), locateAndCountEntries(), materializeAllColumnsDirectly(), materializeAllColumnsGroupBy(), materializeAllColumnsProjection(), materializeAllColumnsTableFunction(), and materializeAllLazyColumns().

93 { return direct_columnar_conversion_; }

ColumnarResults::direct_columnar_conversion_

bool direct_columnar_conversion_

Definition: ColumnarResults.h:205

Here is the caller graph for this function:

bool ColumnarResults::isParallelConversion ( ) const

inline

Definition at line 92 of file ColumnarResults.h.

References parallel_conversion_.

Referenced by materializeAllColumnsGroupBy(), and materializeAllColumnsThroughIteration().

92 { return parallel_conversion_; }

ColumnarResults::parallel_conversion_

bool parallel_conversion_

Definition: ColumnarResults.h:204

Here is the caller graph for this function:

void ColumnarResults::locateAndCountEntries	(	const ResultSet &	rows,
		ColumnBitmap &	bitmap,
		std::vector< size_t > &	non_empty_per_thread,
		const size_t	entry_count,
		const size_t	num_threads,
		const size_t	size_per_thread
	)		const

private

This function goes through all the keys in the result set, and count the total number of non-empty keys. It also store the location of non-empty keys in a bitmap data structure for later faster access.

Definition at line 1278 of file ColumnarResults.cpp.

References threading_serial::async(), CHECK, CHECK_EQ, executor_, g_enable_non_kernel_time_query_interrupt, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), isDirectColumnarConversionPossible(), report::rows, ColumnBitmap::set(), and UNLIKELY.

Referenced by materializeAllColumnsGroupBy().

                                                                                 {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
   CHECK_EQ(num_threads, non_empty_per_thread.size());
   auto do_work = [&rows, &bitmap](size_t& total_non_empty,
                                   const size_t local_idx,
                                   const size_t entry_idx,
                                   const size_t thread_idx) {
     if (!rows.isRowAtEmpty(entry_idx)) {
       total_non_empty++;
       bitmap.set(local_idx, thread_idx, true);
     }
   };
   auto locate_and_count_func =
       [&do_work, &non_empty_per_thread, this](
           size_t start_index, size_t end_index, size_t thread_idx) {
         size_t total_non_empty = 0;
         size_t local_idx = 0;
         if (g_enable_non_kernel_time_query_interrupt) {
           for (size_t entry_idx = start_index; entry_idx < end_index;
                entry_idx++, local_idx++) {
             if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
                          executor_->checkNonKernelTimeInterrupted())) {
               throw QueryExecutionError(ErrorCode::INTERRUPTED);
             }
             do_work(total_non_empty, local_idx, entry_idx, thread_idx);
           }
         } else {
           for (size_t entry_idx = start_index; entry_idx < end_index;
                entry_idx++, local_idx++) {
             do_work(total_non_empty, local_idx, entry_idx, thread_idx);
           }
         }
         non_empty_per_thread[thread_idx] = total_non_empty;
       };
 
   std::vector<std::future<void>> conversion_threads;
   for (size_t thread_idx = 0; thread_idx < num_threads; thread_idx++) {
     const size_t start_entry = thread_idx * size_per_thread;
     const size_t end_entry = std::min(start_entry + size_per_thread, entry_count);
     conversion_threads.push_back(std::async(
         std::launch::async, locate_and_count_func, start_entry, end_entry, thread_idx));
   }
 
   try {
     for (auto& child : conversion_threads) {
       child.wait();
     }
   } catch (QueryExecutionError& e) {
     if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
       throw QueryExecutionError(ErrorCode::INTERRUPTED);
     }
     throw e;
   } catch (...) {
     throw;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllColumnsDirectly	(	const ResultSet &	rows,
		const size_t	num_columns
	)

private

This function materializes all columns from the main storage and all appended storages and form a single continguous column for each output column. Depending on whether the column is lazily fetched or not, it will treat them differently.

NOTE: this function should only be used when the result set is columnar and completely compacted (e.g., in columnar projections).

Definition at line 1029 of file ColumnarResults.cpp.

References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), materializeAllColumnsGroupBy(), materializeAllColumnsProjection(), materializeAllColumnsTableFunction(), heavyai::Projection, heavyai::TableFunction, and UNREACHABLE.

Referenced by ColumnarResults().

                                                                               {
   CHECK(isDirectColumnarConversionPossible());
   switch (rows.getQueryDescriptionType()) {
     case QueryDescriptionType::Projection: {
       materializeAllColumnsProjection(rows, num_columns);
       break;
     }
     case QueryDescriptionType::TableFunction: {
       materializeAllColumnsTableFunction(rows, num_columns);
       break;
     }
     case QueryDescriptionType::GroupByPerfectHash:
     case QueryDescriptionType::GroupByBaselineHash: {
       materializeAllColumnsGroupBy(rows, num_columns);
       break;
     }
     default:
       UNREACHABLE()
           << "Direct columnar conversion for this query type is not supported yet.";
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllColumnsGroupBy	(	const ResultSet &	rows,
		const size_t	num_columns
	)

private

This function is to directly columnarize a result set for group by queries. Its main difference with the traditional alternative is that it directly reads non-empty entries from the result set, and then writes them into output column buffers, rather than using the result set's iterators.

Definition at line 1243 of file ColumnarResults.cpp.

References CHECK, compactAndCopyEntries(), cpu_threads(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), isParallelConversion(), and locateAndCountEntries().

Referenced by materializeAllColumnsDirectly().

                                                                              {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(rows.getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash ||
         rows.getQueryDescriptionType() == QueryDescriptionType::GroupByBaselineHash);
 
   const size_t num_threads = isParallelConversion() ? cpu_threads() : 1;
   const size_t entry_count = rows.entryCount();
   const size_t size_per_thread = (entry_count + num_threads - 1) / num_threads;
 
   // step 1: compute total non-empty elements and store a bitmap per thread
   std::vector<size_t> non_empty_per_thread(num_threads,
                                            0);  // number of non-empty entries per thread
 
   ColumnBitmap bitmap(size_per_thread, num_threads);
 
   locateAndCountEntries(
       rows, bitmap, non_empty_per_thread, entry_count, num_threads, size_per_thread);
 
   // step 2: go through the generated bitmap and copy/decode corresponding entries
   // into the output buffer
   compactAndCopyEntries(rows,
                         bitmap,
                         non_empty_per_thread,
                         num_columns,
                         entry_count,
                         num_threads,
                         size_per_thread);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllColumnsProjection	(	const ResultSet &	rows,
		const size_t	num_columns
	)

private

This function handles materialization for two types of columns in columnar projections:

for all non-lazy columns, it directly copies the results from the result set's storage into the output column buffers
for all lazy fetched columns, it uses result set's iterators to decode the proper values before storing them into the output column buffers

Definition at line 1059 of file ColumnarResults.cpp.

References CHECK, copyAllNonLazyColumns(), isDirectColumnarConversionPossible(), materializeAllLazyColumns(), and heavyai::Projection.

Referenced by materializeAllColumnsDirectly().

                                                                                 {
   CHECK(rows.query_mem_desc_.didOutputColumnar());
   CHECK(isDirectColumnarConversionPossible() &&
         (rows.query_mem_desc_.getQueryDescriptionType() ==
          QueryDescriptionType::Projection));
 
   const auto& lazy_fetch_info = rows.getLazyFetchInfo();
 
   // We can directly copy each non-lazy column's content
   copyAllNonLazyColumns(lazy_fetch_info, rows, num_columns);
 
   // Only lazy columns are iterated through first and then materialized
   materializeAllLazyColumns(lazy_fetch_info, rows, num_columns);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllColumnsTableFunction	(	const ResultSet &	rows,
		const size_t	num_columns
	)

private

Definition at line 1075 of file ColumnarResults.cpp.

References CHECK, copyAllNonLazyColumns(), isDirectColumnarConversionPossible(), and heavyai::TableFunction.

Referenced by materializeAllColumnsDirectly().

                                                                                    {
   CHECK(rows.query_mem_desc_.didOutputColumnar());
   CHECK(isDirectColumnarConversionPossible() &&
         (rows.query_mem_desc_.getQueryDescriptionType() ==
          QueryDescriptionType::TableFunction));
 
   const auto& lazy_fetch_info = rows.getLazyFetchInfo();
   // Lazy fetching is not currently allowed for table function outputs
   for (const auto& col_lazy_fetch_info : lazy_fetch_info) {
     CHECK(!col_lazy_fetch_info.is_lazily_fetched);
   }
   // We can directly copy each non-lazy column's content
   copyAllNonLazyColumns(lazy_fetch_info, rows, num_columns);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllColumnsThroughIteration	(	const ResultSet &	rows,
		const size_t	num_columns
	)

private

This function iterates through the result set (using the getRowAtNoTranslation and getNextRow family of functions) and writes back the results into output column buffers.

Definition at line 466 of file ColumnarResults.cpp.

References threading_serial::async(), column_buffers_, cpu_threads(), executor_, g_enable_non_kernel_time_query_interrupt, QueryExecutionError::hasErrorCode(), isParallelConversion(), makeIntervals(), num_rows_, report::rows, target_types_, UNLIKELY, and writeBackCell().

Referenced by ColumnarResults().

                                                                                       {
   if (isParallelConversion()) {
     std::atomic<size_t> row_idx{0};
     const size_t worker_count = cpu_threads();
     std::vector<std::future<void>> conversion_threads;
     std::mutex write_mutex;
     const auto do_work =
         [num_columns, &rows, &row_idx, &write_mutex, this](const size_t i) {
           const auto crt_row = rows.getRowAtNoTranslations(i);
           if (!crt_row.empty()) {
             auto cur_row_idx = row_idx.fetch_add(1);
             for (size_t col_idx = 0; col_idx < num_columns; ++col_idx) {
               auto& type_info = target_types_[col_idx];
               writeBackCell(crt_row[col_idx],
                             cur_row_idx,
                             type_info,
                             column_buffers_[col_idx],
                             &write_mutex);
             }
           }
         };
     for (auto interval : makeIntervals(size_t(0), rows.entryCount(), worker_count)) {
       conversion_threads.push_back(std::async(
           std::launch::async,
           [&do_work, this](const size_t start, const size_t end) {
             if (g_enable_non_kernel_time_query_interrupt) {
               size_t local_idx = 0;
               for (size_t i = start; i < end; ++i, ++local_idx) {
                 if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
                              executor_->checkNonKernelTimeInterrupted())) {
                   throw QueryExecutionError(ErrorCode::INTERRUPTED);
                 }
                 do_work(i);
               }
             } else {
               for (size_t i = start; i < end; ++i) {
                 do_work(i);
               }
             }
           },
           interval.begin,
           interval.end));
     }
 
     try {
       for (auto& child : conversion_threads) {
         child.wait();
       }
     } catch (QueryExecutionError& e) {
       if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
         throw QueryExecutionError(ErrorCode::INTERRUPTED);
       }
       throw e;
     } catch (...) {
       throw;
     }
 
     num_rows_ = row_idx;
     rows.setCachedRowCount(num_rows_);
     return;
   }
   bool done = false;
   size_t row_idx = 0;
   const auto do_work = [num_columns, &row_idx, &rows, &done, this]() {
     const auto crt_row = rows.getNextRow(false, false);
     if (crt_row.empty()) {
       done = true;
       return;
     }
     for (size_t i = 0; i < num_columns; ++i) {
       auto& type_info = target_types_[i];
       writeBackCell(crt_row[i], row_idx, type_info, column_buffers_[i]);
     }
     ++row_idx;
   };
   if (g_enable_non_kernel_time_query_interrupt) {
     while (!done) {
       if (UNLIKELY((row_idx & 0xFFFF) == 0 &&
                    executor_->checkNonKernelTimeInterrupted())) {
         throw QueryExecutionError(ErrorCode::INTERRUPTED);
       }
       do_work();
     }
   } else {
     while (!done) {
       do_work();
     }
   }
 
   rows.moveToBegin();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void ColumnarResults::materializeAllLazyColumns	(	const std::vector< ColumnLazyFetchInfo > &	lazy_fetch_info,
		const ResultSet &	rows,
		const size_t	num_columns
	)

private

For all lazy fetched columns, we should iterate through the column's content and properly materialize it.

This function is parallelized through dividing total rows among all existing threads. Since there's no invalid element in the result set (e.g., columnar projections), the output buffer will have as many rows as there are in the result set, removing the need for atomicly incrementing the output buffer position.

Definition at line 1153 of file ColumnarResults.cpp.

References threading_serial::async(), CHECK, CHECK_EQ, column_buffers_, cpu_threads(), executor_, g_enable_non_kernel_time_query_interrupt, QueryExecutionError::hasErrorCode(), isDirectColumnarConversionPossible(), makeIntervals(), report::rows, heavyai::TableFunction, target_types_, UNLIKELY, result_set::use_parallel_algorithms(), and writeBackCell().

Referenced by materializeAllColumnsProjection().

                               {
   CHECK(isDirectColumnarConversionPossible());
   CHECK(!(rows.query_mem_desc_.getQueryDescriptionType() ==
           QueryDescriptionType::TableFunction));
   std::mutex write_mutex;
   const auto do_work_just_lazy_columns = [num_columns, &rows, &write_mutex, this](
                                              const size_t row_idx,
                                              const std::vector<bool>& targets_to_skip) {
     const auto crt_row = rows.getRowAtNoTranslations(row_idx, targets_to_skip);
     for (size_t i = 0; i < num_columns; ++i) {
       if (!targets_to_skip.empty() && !targets_to_skip[i]) {
         auto& type_info = target_types_[i];
         writeBackCell(crt_row[i], row_idx, type_info, column_buffers_[i], &write_mutex);
       }
     }
   };
 
   const auto contains_lazy_fetched_column =
       [](const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info) {
         for (auto& col_info : lazy_fetch_info) {
           if (col_info.is_lazily_fetched) {
             return true;
           }
         }
         return false;
       };
 
   // parallelized by assigning a chunk of rows to each thread)
   const bool skip_non_lazy_columns = rows.isPermutationBufferEmpty();
   if (contains_lazy_fetched_column(lazy_fetch_info)) {
     const size_t worker_count =
         result_set::use_parallel_algorithms(rows) ? cpu_threads() : 1;
     std::vector<std::future<void>> conversion_threads;
     std::vector<bool> targets_to_skip;
     if (skip_non_lazy_columns) {
       CHECK_EQ(lazy_fetch_info.size(), size_t(num_columns));
       targets_to_skip.reserve(num_columns);
       for (size_t i = 0; i < num_columns; i++) {
         // we process lazy columns (i.e., skip non-lazy columns)
         targets_to_skip.push_back(!lazy_fetch_info[i].is_lazily_fetched);
       }
     }
     for (auto interval : makeIntervals(size_t(0), rows.entryCount(), worker_count)) {
       conversion_threads.push_back(std::async(
           std::launch::async,
           [&do_work_just_lazy_columns, &targets_to_skip, this](const size_t start,
                                                                const size_t end) {
             if (g_enable_non_kernel_time_query_interrupt) {
               size_t local_idx = 0;
               for (size_t i = start; i < end; ++i, ++local_idx) {
                 if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
                              executor_->checkNonKernelTimeInterrupted())) {
                   throw QueryExecutionError(ErrorCode::INTERRUPTED);
                 }
                 do_work_just_lazy_columns(i, targets_to_skip);
               }
             } else {
               for (size_t i = start; i < end; ++i) {
                 do_work_just_lazy_columns(i, targets_to_skip);
               }
             }
           },
           interval.begin,
           interval.end));
     }
 
     try {
       for (auto& child : conversion_threads) {
         child.wait();
       }
     } catch (QueryExecutionError& e) {
       if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
         throw QueryExecutionError(ErrorCode::INTERRUPTED);
       }
       throw e;
     } catch (...) {
       throw;
     }
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::unique_ptr< ColumnarResults > ColumnarResults::mergeResults	(	const std::shared_ptr< RowSetMemoryOwner >	row_set_mem_owner,
		const std::vector< std::unique_ptr< ColumnarResults >> &	sub_results
	)

static

Definition at line 418 of file ColumnarResults.cpp.

References gpu_enabled::accumulate(), CHECK_EQ, ColumnarResults(), logger::init(), padded_target_sizes_, run_benchmark_import::result, and target_types_.

Referenced by ColumnFetcher::getAllTableColumnFragments().

                                                                   {
   // TODO: this method requires a safe guard when trying to merge
   // columns using FlatBuffer layout.
   if (sub_results.empty()) {
     return nullptr;
   }
   const auto total_row_count = std::accumulate(
       sub_results.begin(),
       sub_results.end(),
       size_t(0),
       [](const size_t init, const std::unique_ptr<ColumnarResults>& result) {
         return init + result->size();
       });
   std::unique_ptr<ColumnarResults> merged_results(
       new ColumnarResults(total_row_count,
                           sub_results[0]->target_types_,
                           sub_results[0]->padded_target_sizes_));
   const auto col_count = sub_results[0]->column_buffers_.size();
   const auto nonempty_it = std::find_if(
       sub_results.begin(),
       sub_results.end(),
       [](const std::unique_ptr<ColumnarResults>& needle) { return needle->size(); });
   if (nonempty_it == sub_results.end()) {
     return nullptr;
   }
   for (size_t col_idx = 0; col_idx < col_count; ++col_idx) {
     const auto byte_width = merged_results->padded_target_sizes_[col_idx];
     auto write_ptr = row_set_mem_owner->allocate(byte_width * total_row_count);
     merged_results->column_buffers_.push_back(write_ptr);
     for (auto& rs : sub_results) {
       CHECK_EQ(col_count, rs->column_buffers_.size());
       if (!rs->size()) {
         continue;
       }
       CHECK_EQ(byte_width, rs->padded_target_sizes_[col_idx]);
       memcpy(write_ptr, rs->column_buffers_[col_idx], rs->size() * byte_width);
       write_ptr += rs->size() * byte_width;
     }
   }
   return merged_results;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const size_t ColumnarResults::size ( ) const

inline

Definition at line 84 of file ColumnarResults.h.

References num_rows_.

Referenced by ColumnFetcher::transferColumnIfNeeded().

84 { return num_rows_; }

ColumnarResults::num_rows_

size_t num_rows_

Definition: ColumnarResults.h:111

Here is the caller graph for this function:

void ColumnarResults::writeBackCell	(	const TargetValue &	col_val,
		const size_t	row_idx,
		const SQLTypeInfo &	type_info,
		int8_t *	column_buf,
		std::mutex *	write_mutex = `nullptr`
	)

inlineprivate

Definition at line 869 of file ColumnarResults.cpp.

References SQLTypeInfoLite::BIGINT, SQLTypeInfoLite::BOOLEAN, CHECK, SQLTypeInfoLite::DOUBLE, SQLTypeInfoLite::FLOAT, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), SQLTypeInfoLite::INT, SQLTypeInfo::is_array(), SQLTypeInfo::is_geometry(), SQLTypeInfo::is_text_encoding_none(), FlatBufferManager::isFlatBuffer(), kENCODING_NONE, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kTEXT, SQLTypeInfoLite::SMALLINT, SQLTypeInfoLite::subtype, SQLTypeInfoLite::TEXT, SQLTypeInfoLite::TINYINT, anonymous_namespace{ColumnarResults.cpp}::toBuffer(), SQLTypeInfo::toString(), UNREACHABLE, SQLTypeInfo::usesFlatBuffer(), writeBackCellGeoNestedArray(), writeBackCellGeoPoint(), and writeBackCellTextEncodingNone().

Referenced by compactAndCopyEntriesWithTargetSkipping(), materializeAllColumnsThroughIteration(), and materializeAllLazyColumns().

                                                                   {
   if (!type_info.usesFlatBuffer()) {
     toBuffer(col_val, type_info, column_buf + type_info.get_size() * row_idx);
     return;
   }
   CHECK(FlatBufferManager::isFlatBuffer(column_buf));
   FlatBufferManager m{column_buf};
   if (type_info.is_geometry() && type_info.get_type() == kPOINT) {
     writeBackCellGeoPoint(m, row_idx, type_info, col_val, write_mutex);
     return;
   }
   const SQLTypeInfoLite* ti_lite =
       reinterpret_cast<const SQLTypeInfoLite*>(m.get_user_data_buffer());
   CHECK(ti_lite);
   if (type_info.is_array()) {
     if (type_info.get_subtype() == kTEXT &&
         type_info.get_compression() == kENCODING_NONE) {
       throw std::runtime_error(
           "Column<Array<TextEncodedNone>> support not implemented yet (writeBackCell)");
     }
     switch (ti_lite->subtype) {
       case SQLTypeInfoLite::DOUBLE:
         writeBackCellArrayScalar<double, double>(m, row_idx, col_val, write_mutex);
         break;
       case SQLTypeInfoLite::FLOAT:
         writeBackCellArrayScalar<float, float>(m, row_idx, col_val, write_mutex);
         break;
       case SQLTypeInfoLite::BOOLEAN:
       case SQLTypeInfoLite::TINYINT:
         writeBackCellArrayScalar<int8_t, int64_t>(m, row_idx, col_val, write_mutex);
         break;
       case SQLTypeInfoLite::SMALLINT:
         writeBackCellArrayScalar<int16_t, int64_t>(m, row_idx, col_val, write_mutex);
         break;
       case SQLTypeInfoLite::INT:
       case SQLTypeInfoLite::TEXT:
         writeBackCellArrayScalar<int32_t, int64_t>(m, row_idx, col_val, write_mutex);
         break;
       case SQLTypeInfoLite::BIGINT:
         writeBackCellArrayScalar<int64_t, int64_t>(m, row_idx, col_val, write_mutex);
         break;
       default:
         UNREACHABLE();
     }
   } else if (type_info.is_text_encoding_none()) {
     writeBackCellTextEncodingNone(m, row_idx, col_val, write_mutex);
   } else if (type_info.is_geometry()) {
     switch (type_info.get_type()) {
       case kLINESTRING: {
         writeBackCellGeoNestedArray<1,
                                     Geospatial::GeoLineString,
                                     GeoLineStringTargetValue,
                                     GeoLineStringTargetValuePtr,
                                     /*is_multi=*/false>(
             m, row_idx, type_info, col_val, write_mutex);
         break;
       }
       case kPOLYGON: {
         writeBackCellGeoNestedArray<2,
                                     Geospatial::GeoPolygon,
                                     GeoPolyTargetValue,
                                     GeoPolyTargetValuePtr,
                                     /*is_multi=*/false>(
             m, row_idx, type_info, col_val, write_mutex);
         break;
       }
       case kMULTIPOINT: {
         writeBackCellGeoNestedArray<1,
                                     Geospatial::GeoMultiPoint,
                                     GeoMultiPointTargetValue,
                                     GeoMultiPointTargetValuePtr,
                                     /*is_multi=*/true>(
             m, row_idx, type_info, col_val, write_mutex);
         break;
       }
       case kMULTILINESTRING: {
         writeBackCellGeoNestedArray<2,
                                     Geospatial::GeoMultiLineString,
                                     GeoMultiLineStringTargetValue,
                                     GeoMultiLineStringTargetValuePtr,
                                     /*is_multi=*/true>(
             m, row_idx, type_info, col_val, write_mutex);
         break;
       }
       case kMULTIPOLYGON: {
         writeBackCellGeoNestedArray<3,
                                     Geospatial::GeoMultiPolygon,
                                     GeoMultiPolyTargetValue,
                                     GeoMultiPolyTargetValuePtr,
                                     /*is_true=*/false>(
             m, row_idx, type_info, col_val, write_mutex);
         break;
       }
       default:
         UNREACHABLE() << "writeBackCell not implemented for " << type_info.toString();
     }
   } else {
     UNREACHABLE();
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename DATA_TYPE >

void ColumnarResults::writeBackCellDirect	(	const ResultSet &	rows,
		const size_t	input_buffer_entry_idx,
		const size_t	output_buffer_entry_idx,
		const size_t	target_idx,
		const size_t	slot_idx,
		const ReadFunction &	read_from_function
	)

private

A set of write functions to be used to directly write into final column_buffers_. The read_from_function is used to read from the input result set's storage NOTE: currently only used for direct columnarizations

Definition at line 980 of file ColumnarResults.cpp.

References column_buffers_, anonymous_namespace{ColumnarResults.cpp}::fixed_encoding_nullable_val(), and target_types_.

                                                                                   {
   const auto val = static_cast<DATA_TYPE>(fixed_encoding_nullable_val(
       read_from_function(rows, input_buffer_entry_idx, target_idx, slot_idx),
       target_types_[target_idx]));
   reinterpret_cast<DATA_TYPE*>(column_buffers_[target_idx])[output_buffer_entry_idx] =
       val;
 }

Here is the call graph for this function:

template<>

void ColumnarResults::writeBackCellDirect	(	const ResultSet &	rows,
		const size_t	input_buffer_entry_idx,
		const size_t	output_buffer_entry_idx,
		const size_t	target_idx,
		const size_t	slot_idx,
		const ReadFunction &	read_from_function
	)

private

Definition at line 994 of file ColumnarResults.cpp.

                                                                                          {
   const int32_t ival =
       read_from_function(rows, input_buffer_entry_idx, target_idx, slot_idx);
   const float fval = *reinterpret_cast<const float*>(may_alias_ptr(&ival));
   reinterpret_cast<float*>(column_buffers_[target_idx])[output_buffer_entry_idx] = fval;
 }

template<>

void ColumnarResults::writeBackCellDirect	(	const ResultSet &	rows,
		const size_t	input_buffer_entry_idx,
		const size_t	output_buffer_entry_idx,
		const size_t	target_idx,
		const size_t	slot_idx,
		const ReadFunction &	read_from_function
	)

private

Definition at line 1007 of file ColumnarResults.cpp.

                                             {
   const int64_t ival =
       read_from_function(rows, input_buffer_entry_idx, target_idx, slot_idx);
   const double dval = *reinterpret_cast<const double*>(may_alias_ptr(&ival));
   reinterpret_cast<double*>(column_buffers_[target_idx])[output_buffer_entry_idx] = dval;
 }