OmniSciDB  c07336695a
ScanTable.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include <boost/functional/hash.hpp>
26 #include <cfloat>
27 #include <cstdint>
28 #include <cstdlib>
29 #include <cstring>
30 #include <exception>
31 #include <iostream>
32 #include <memory>
33 #include <random>
34 #include <string>
35 #include "../Catalog/Catalog.h"
36 #include "../Chunk/Chunk.h"
37 #include "../DataMgr/DataMgr.h"
38 #include "../Fragmenter/Fragmenter.h"
39 #include "../Shared/measure.h"
40 #include "../Shared/sqltypes.h"
41 
42 using namespace std;
43 using namespace Catalog_Namespace;
44 using namespace Fragmenter_Namespace;
45 using namespace Chunk_NS;
46 using namespace Data_Namespace;
47 
48 void scan_chunk(const ChunkMetadata& chunk_metadata,
49  const Chunk& chunk,
50  size_t& hash,
51  bool use_iter) {
52  ChunkIter cit = chunk.begin_iterator(chunk_metadata, 0, 1);
53  VarlenDatum vd;
54  bool is_end;
55  const ColumnDescriptor* cd = chunk.get_column_desc();
56  std::hash<std::string> string_hash;
57  int nth = 0;
58  while (true) {
59  if (use_iter) {
60  {
61  ChunkIter_get_next(&cit, true, &vd, &is_end);
62  }
63  } else {
64  { ChunkIter_get_nth(&cit, nth++, true, &vd, &is_end); }
65  }
66  if (is_end) {
67  {
68  break;
69  }
70  }
71  switch (cd->columnType.get_type()) {
72  case kSMALLINT:
73  boost::hash_combine(hash, *(int16_t*)vd.pointer);
74  break;
75  case kINT:
76  boost::hash_combine(hash, *(int32_t*)vd.pointer);
77  break;
78  case kBIGINT:
79  case kNUMERIC:
80  case kDECIMAL:
81  boost::hash_combine(hash, *(int64_t*)vd.pointer);
82  break;
83  case kFLOAT:
84  boost::hash_combine(hash, *(float*)vd.pointer);
85  break;
86  case kDOUBLE:
87  boost::hash_combine(hash, *(double*)vd.pointer);
88  break;
89  case kVARCHAR:
90  case kCHAR:
91  case kTEXT:
93  // cout << "read string: " << string((char*)vd.pointer, vd.length) << endl;
94  boost::hash_combine(hash, string_hash(string((char*)vd.pointer, vd.length)));
95  }
96  break;
97  case kTIME:
98  case kDATE:
99  case kTIMESTAMP:
100  boost::hash_combine(hash, *(int64_t*)vd.pointer);
101  break;
102  default:
103  assert(false);
104  }
105  }
106 }
107 
108 vector<size_t> scan_table_return_hash(const string& table_name, const Catalog& cat) {
109  const TableDescriptor* td = cat.getMetadataForTable(table_name);
110  list<const ColumnDescriptor*> cds =
111  cat.getAllColumnMetadataForTable(td->tableId, false, true, true);
112  vector<size_t> col_hashs(cds.size());
113  int64_t elapsed_time = 0;
114  size_t total_bytes = 0;
116  for (auto frag : query_info.fragments) {
117  int i = 0;
118  for (auto cd : cds) {
119  auto chunk_meta_it = frag.getChunkMetadataMapPhysical().find(cd->columnId);
120  ChunkKey chunk_key{
121  cat.getCurrentDB().dbId, td->tableId, cd->columnId, frag.fragmentId};
122  total_bytes += chunk_meta_it->second.numBytes;
123  auto ms = measure<>::execution([&]() {
124  std::shared_ptr<Chunk> chunkp =
125  Chunk::getChunk(cd,
126  &cat.getDataMgr(),
127  chunk_key,
128  CPU_LEVEL,
129  frag.deviceIds[static_cast<int>(CPU_LEVEL)],
130  chunk_meta_it->second.numBytes,
131  chunk_meta_it->second.numElements);
132  scan_chunk(chunk_meta_it->second, *chunkp, col_hashs[i], true);
133  // call Chunk destructor here
134  });
135  elapsed_time += ms;
136  i++;
137  }
138  }
139  cout << "Scanned " << query_info.getPhysicalNumTuples() << " rows " << total_bytes
140  << " bytes in " << elapsed_time << " ms. at "
141  << (double)total_bytes / (elapsed_time / 1000.0) / 1e6 << " MB/sec." << std::endl;
142  return col_hashs;
143 }
144 
145 vector<size_t> scan_table_return_hash_non_iter(const string& table_name,
146  const Catalog& cat) {
147  const TableDescriptor* td = cat.getMetadataForTable(table_name);
148  list<const ColumnDescriptor*> cds =
149  cat.getAllColumnMetadataForTable(td->tableId, false, true, true);
150  vector<size_t> col_hashs(cds.size());
152  int64_t elapsed_time = 0;
153  size_t total_bytes = 0;
154  for (auto frag : query_info.fragments) {
155  int i = 0;
156  for (auto cd : cds) {
157  auto chunk_meta_it = frag.getChunkMetadataMapPhysical().find(cd->columnId);
158  ChunkKey chunk_key{
159  cat.getCurrentDB().dbId, td->tableId, cd->columnId, frag.fragmentId};
160  total_bytes += chunk_meta_it->second.numBytes;
161  auto ms = measure<>::execution([&]() {
162  std::shared_ptr<Chunk> chunkp =
163  Chunk::getChunk(cd,
164  &cat.getDataMgr(),
165  chunk_key,
166  CPU_LEVEL,
167  frag.deviceIds[static_cast<int>(CPU_LEVEL)],
168  chunk_meta_it->second.numBytes,
169  chunk_meta_it->second.numElements);
170  scan_chunk(chunk_meta_it->second, *chunkp, col_hashs[i], false);
171  // call Chunk destructor here
172  });
173  elapsed_time += ms;
174  i++;
175  }
176  }
177  cout << "Scanned " << query_info.getPhysicalNumTuples() << " rows " << total_bytes
178  << " bytes in " << elapsed_time << " ms. at "
179  << (double)total_bytes / (elapsed_time / 1000.0) / 1e6 << " MB/sec." << std::endl;
180  return col_hashs;
181 }
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
Definition: sqltypes.h:51
const ColumnDescriptor * get_column_desc() const
Definition: Chunk.h:52
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
vector< size_t > scan_table_return_hash(const string &table_name, const Catalog &cat)
Definition: ScanTable.cpp:108
vector< size_t > scan_table_return_hash_non_iter(const string &table_name, const Catalog &cat)
Definition: ScanTable.cpp:145
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:319
std::deque< FragmentInfo > fragments
Definition: Fragmenter.h:167
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:181
int8_t * pointer
Definition: sqltypes.h:72
virtual TableInfo getFragmentsForQuery()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
specifies the content in-memory of a row in the column metadata table
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
void hash_combine(std::size_t &seed, T const &v)
size_t getPhysicalNumTuples() const
Definition: Fragmenter.h:160
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
Definition: sqltypes.h:43
ChunkIter begin_iterator(const ChunkMetadata &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:255
void scan_chunk(const ChunkMetadata &chunk_metadata, const Chunk &chunk, size_t &hash, bool use_iter)
Definition: ScanTable.cpp:48
DEVICE void ChunkIter_get_next(ChunkIter *it, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:147
std::vector< int > ChunkKey
Definition: types.h:35
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
Definition: sqltypes.h:47
SQLTypeInfo columnType
specifies the content in-memory of a row in the table metadata table
Fragmenter_Namespace::AbstractFragmenter * fragmenter
size_t length
Definition: sqltypes.h:71