OmniSciDB  c07336695a
ScanTable.cpp File Reference

Scan through each column of a table via Chunk iterators. More...

#include <boost/functional/hash.hpp>
#include <cfloat>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <iostream>
#include <memory>
#include <random>
#include <string>
#include "../Catalog/Catalog.h"
#include "../Chunk/Chunk.h"
#include "../DataMgr/DataMgr.h"
#include "../Fragmenter/Fragmenter.h"
#include "../Shared/measure.h"
#include "../Shared/sqltypes.h"
+ Include dependency graph for ScanTable.cpp:

Go to the source code of this file.

Functions

void scan_chunk (const ChunkMetadata &chunk_metadata, const Chunk &chunk, size_t &hash, bool use_iter)
 
vector< size_t > scan_table_return_hash (const string &table_name, const Catalog &cat)
 
vector< size_t > scan_table_return_hash_non_iter (const string &table_name, const Catalog &cat)
 

Detailed Description

Scan through each column of a table via Chunk iterators.

Author
Wei Hong wei@m.nosp@m.ap-d.nosp@m..com Copyright (c) 2014 MapD Technologies, Inc. All rights reserved.

Definition in file ScanTable.cpp.

Function Documentation

◆ scan_chunk()

void scan_chunk ( const ChunkMetadata chunk_metadata,
const Chunk chunk,
size_t &  hash,
bool  use_iter 
)

Definition at line 48 of file ScanTable.cpp.

References Chunk_NS::Chunk::begin_iterator(), ChunkIter_get_next(), ChunkIter_get_nth(), ColumnDescriptor::columnType, Chunk_NS::Chunk::get_column_desc(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), anonymous_namespace{ProfileTest.cpp}::hash_combine(), kBIGINT, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kVARCHAR, VarlenDatum::length, and VarlenDatum::pointer.

Referenced by scan_table_return_hash(), and scan_table_return_hash_non_iter().

51  {
52  ChunkIter cit = chunk.begin_iterator(chunk_metadata, 0, 1);
53  VarlenDatum vd;
54  bool is_end;
55  const ColumnDescriptor* cd = chunk.get_column_desc();
56  std::hash<std::string> string_hash;
57  int nth = 0;
58  while (true) {
59  if (use_iter) {
60  {
61  ChunkIter_get_next(&cit, true, &vd, &is_end);
62  }
63  } else {
64  { ChunkIter_get_nth(&cit, nth++, true, &vd, &is_end); }
65  }
66  if (is_end) {
67  {
68  break;
69  }
70  }
71  switch (cd->columnType.get_type()) {
72  case kSMALLINT:
73  boost::hash_combine(hash, *(int16_t*)vd.pointer);
74  break;
75  case kINT:
76  boost::hash_combine(hash, *(int32_t*)vd.pointer);
77  break;
78  case kBIGINT:
79  case kNUMERIC:
80  case kDECIMAL:
81  boost::hash_combine(hash, *(int64_t*)vd.pointer);
82  break;
83  case kFLOAT:
84  boost::hash_combine(hash, *(float*)vd.pointer);
85  break;
86  case kDOUBLE:
87  boost::hash_combine(hash, *(double*)vd.pointer);
88  break;
89  case kVARCHAR:
90  case kCHAR:
91  case kTEXT:
93  // cout << "read string: " << string((char*)vd.pointer, vd.length) << endl;
94  boost::hash_combine(hash, string_hash(string((char*)vd.pointer, vd.length)));
95  }
96  break;
97  case kTIME:
98  case kDATE:
99  case kTIMESTAMP:
100  boost::hash_combine(hash, *(int64_t*)vd.pointer);
101  break;
102  default:
103  assert(false);
104  }
105  }
106 }
Definition: sqltypes.h:51
const ColumnDescriptor * get_column_desc() const
Definition: Chunk.h:52
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:319
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:181
int8_t * pointer
Definition: sqltypes.h:72
specifies the content in-memory of a row in the column metadata table
void hash_combine(std::size_t &seed, T const &v)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
ChunkIter begin_iterator(const ChunkMetadata &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:255
DEVICE void ChunkIter_get_next(ChunkIter *it, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:147
Definition: sqltypes.h:47
SQLTypeInfo columnType
size_t length
Definition: sqltypes.h:71
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ scan_table_return_hash()

vector<size_t> scan_table_return_hash ( const string &  table_name,
const Catalog cat 
)

Definition at line 108 of file ScanTable.cpp.

References Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, measure< TimeT >::execution(), TableDescriptor::fragmenter, Fragmenter_Namespace::TableInfo::fragments, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Fragmenter_Namespace::AbstractFragmenter::getFragmentsForQuery(), Catalog_Namespace::Catalog::getMetadataForTable(), Fragmenter_Namespace::TableInfo::getPhysicalNumTuples(), scan_chunk(), and TableDescriptor::tableId.

Referenced by anonymous_namespace{StorageTest.cpp}::storage_test(), and anonymous_namespace{StorageTest.cpp}::storage_test_parallel().

108  {
109  const TableDescriptor* td = cat.getMetadataForTable(table_name);
110  list<const ColumnDescriptor*> cds =
111  cat.getAllColumnMetadataForTable(td->tableId, false, true, true);
112  vector<size_t> col_hashs(cds.size());
113  int64_t elapsed_time = 0;
114  size_t total_bytes = 0;
116  for (auto frag : query_info.fragments) {
117  int i = 0;
118  for (auto cd : cds) {
119  auto chunk_meta_it = frag.getChunkMetadataMapPhysical().find(cd->columnId);
120  ChunkKey chunk_key{
121  cat.getCurrentDB().dbId, td->tableId, cd->columnId, frag.fragmentId};
122  total_bytes += chunk_meta_it->second.numBytes;
123  auto ms = measure<>::execution([&]() {
124  std::shared_ptr<Chunk> chunkp =
125  Chunk::getChunk(cd,
126  &cat.getDataMgr(),
127  chunk_key,
128  CPU_LEVEL,
129  frag.deviceIds[static_cast<int>(CPU_LEVEL)],
130  chunk_meta_it->second.numBytes,
131  chunk_meta_it->second.numElements);
132  scan_chunk(chunk_meta_it->second, *chunkp, col_hashs[i], true);
133  // call Chunk destructor here
134  });
135  elapsed_time += ms;
136  i++;
137  }
138  }
139  cout << "Scanned " << query_info.getPhysicalNumTuples() << " rows " << total_bytes
140  << " bytes in " << elapsed_time << " ms. at "
141  << (double)total_bytes / (elapsed_time / 1000.0) / 1e6 << " MB/sec." << std::endl;
142  return col_hashs;
143 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
virtual TableInfo getFragmentsForQuery()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
size_t getPhysicalNumTuples() const
Definition: Fragmenter.h:160
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
void scan_chunk(const ChunkMetadata &chunk_metadata, const Chunk &chunk, size_t &hash, bool use_iter)
Definition: ScanTable.cpp:48
std::vector< int > ChunkKey
Definition: types.h:35
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
specifies the content in-memory of a row in the table metadata table
Fragmenter_Namespace::AbstractFragmenter * fragmenter
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ scan_table_return_hash_non_iter()

vector<size_t> scan_table_return_hash_non_iter ( const string &  table_name,
const Catalog cat 
)

Definition at line 145 of file ScanTable.cpp.

References Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, measure< TimeT >::execution(), TableDescriptor::fragmenter, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Fragmenter_Namespace::AbstractFragmenter::getFragmentsForQuery(), Catalog_Namespace::Catalog::getMetadataForTable(), scan_chunk(), and TableDescriptor::tableId.

Referenced by anonymous_namespace{StorageTest.cpp}::storage_test(), and anonymous_namespace{StorageTest.cpp}::storage_test_parallel().

146  {
147  const TableDescriptor* td = cat.getMetadataForTable(table_name);
148  list<const ColumnDescriptor*> cds =
149  cat.getAllColumnMetadataForTable(td->tableId, false, true, true);
150  vector<size_t> col_hashs(cds.size());
152  int64_t elapsed_time = 0;
153  size_t total_bytes = 0;
154  for (auto frag : query_info.fragments) {
155  int i = 0;
156  for (auto cd : cds) {
157  auto chunk_meta_it = frag.getChunkMetadataMapPhysical().find(cd->columnId);
158  ChunkKey chunk_key{
159  cat.getCurrentDB().dbId, td->tableId, cd->columnId, frag.fragmentId};
160  total_bytes += chunk_meta_it->second.numBytes;
161  auto ms = measure<>::execution([&]() {
162  std::shared_ptr<Chunk> chunkp =
163  Chunk::getChunk(cd,
164  &cat.getDataMgr(),
165  chunk_key,
166  CPU_LEVEL,
167  frag.deviceIds[static_cast<int>(CPU_LEVEL)],
168  chunk_meta_it->second.numBytes,
169  chunk_meta_it->second.numElements);
170  scan_chunk(chunk_meta_it->second, *chunkp, col_hashs[i], false);
171  // call Chunk destructor here
172  });
173  elapsed_time += ms;
174  i++;
175  }
176  }
177  cout << "Scanned " << query_info.getPhysicalNumTuples() << " rows " << total_bytes
178  << " bytes in " << elapsed_time << " ms. at "
179  << (double)total_bytes / (elapsed_time / 1000.0) / 1e6 << " MB/sec." << std::endl;
180  return col_hashs;
181 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
virtual TableInfo getFragmentsForQuery()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
void scan_chunk(const ChunkMetadata &chunk_metadata, const Chunk &chunk, size_t &hash, bool use_iter)
Definition: ScanTable.cpp:48
std::vector< int > ChunkKey
Definition: types.h:35
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
specifies the content in-memory of a row in the table metadata table
Fragmenter_Namespace::AbstractFragmenter * fragmenter
+ Here is the call graph for this function:
+ Here is the caller graph for this function: