OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CsvShared.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <map>
20 #include <vector>
21 
22 #include "Catalog/Catalog.h"
23 #include "Catalog/ForeignTable.h"
24 #include "DataMgr/Chunk/Chunk.h"
25 #include "ForeignDataWrapper.h"
26 #include "ImportExport/Importer.h"
27 
28 namespace foreign_storage {
33 struct FileRegion {
34  // Name of file containing region
35  std::string filename;
36  // Byte offset (within file) for the beginning of file region
38  // Index of first row in file region relative to the first row/non-header line in the
39  // file
41  // Number of rows in file region
42  size_t row_count;
43  // Size of file region in bytes
44  size_t region_size;
45 
46  FileRegion(std::string name,
47  size_t first_row_offset,
48  size_t first_row_idx,
49  size_t row_cnt,
50  size_t region_sz)
51  : filename(name)
52  , first_row_file_offset(first_row_offset)
53  , first_row_index(first_row_idx)
54  , row_count(row_cnt)
55  , region_size(region_sz) {}
56 
57  FileRegion(size_t first_row_offset,
58  size_t first_row_idx,
59  size_t row_cnt,
60  size_t region_sz)
61  : first_row_file_offset(first_row_offset)
62  , first_row_index(first_row_idx)
63  , row_count(row_cnt)
64  , region_size(region_sz) {}
66  bool operator<(const FileRegion& other) const {
68  }
69 };
70 
71 using FileRegions = std::vector<FileRegion>;
72 
73 // Serialization functions for FileRegion
74 void set_value(rapidjson::Value& json_val,
75  const FileRegion& file_region,
76  rapidjson::Document::AllocatorType& allocator);
77 
78 void get_value(const rapidjson::Value& json_val, FileRegion& file_region);
79 
80 namespace Csv {
81 
82 // Validate CSV Specific options
83 void validate_options(const ForeignTable* foreign_table);
84 
86 
87 // Return true if this used s3 select to access underlying CSV
88 bool validate_and_get_is_s3_select(const ForeignTable* foreign_table);
89 
91  const ChunkKey& chunk_key,
92  std::map<ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
93  const std::map<ChunkKey, AbstractBuffer*>& buffers);
94 
95 // Construct default metadata for given column descriptor with num_elements
96 std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const ColumnDescriptor* column,
97  size_t num_elements);
98 
99 } // namespace Csv
100 } // namespace foreign_storage
FileRegion(size_t first_row_offset, size_t first_row_idx, size_t row_cnt, size_t region_sz)
Definition: CsvShared.h:57
std::vector< int > ChunkKey
Definition: types.h:37
import_export::CopyParams validate_and_get_copy_params(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:126
bool operator<(const FileRegion &other) const
Definition: CsvShared.h:66
string name
Definition: setup.in.py:72
FileRegion(std::string name, size_t first_row_offset, size_t first_row_idx, size_t row_cnt, size_t region_sz)
Definition: CsvShared.h:46
void get_value(const rapidjson::Value &json_val, FileRegion &file_region)
Definition: CsvShared.cpp:44
std::shared_ptr< ChunkMetadata > get_placeholder_metadata(const ColumnDescriptor *column, size_t num_elements)
Definition: CsvShared.cpp:235
void validate_options(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:121
This file contains the class specification and related data structures for Catalog.
std::vector< FileRegion > FileRegions
Definition: CsvShared.h:71
specifies the content in-memory of a row in the column metadata table
bool validate_and_get_is_s3_select(const ForeignTable *foreign_table)
Definition: CsvShared.cpp:93
void set_value(rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
Definition: CsvShared.cpp:26
Chunk_NS::Chunk make_chunk_for_column(const ChunkKey &chunk_key, std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers)
Definition: CsvShared.cpp:183