OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CopyParams.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file CopyParams.h
19  * @brief CopyParams struct
20  *
21  */
22 
23 #pragma once
24 
25 #include <optional>
26 #include <string>
27 
29 #include "Shared/sqltypes.h"
30 
31 namespace import_export {
32 
33 // not too big (need much memory) but not too small (many thread forks)
34 constexpr static size_t kImportFileBufferSize = (1 << 23);
35 
36 // import buffers may grow to this size if necessary
37 constexpr static size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024;
38 
42 
43 struct CopyParams {
44  char delimiter;
45  std::string null_str;
47  bool quoted; // does the input have any quoted fields, default to false
48  char quote;
49  char escape;
50  char line_delim;
53  char array_end;
54  int threads;
55  size_t
56  max_reject; // maximum number of records that can be rejected before copy is failed
58  bool plain_text = false;
60  // s3/parquet related params
61  std::string s3_access_key; // per-query credentials to override the
62  std::string s3_secret_key; // settings in ~/.aws/credentials or environment
63  std::string s3_session_token = "";
64  std::string s3_region;
65  std::string s3_endpoint;
67  8; // maximum number of concurrent file downloads from S3
68  // kafka related params
69  size_t retry_count;
70  size_t retry_wait;
71  size_t batch_size;
72  size_t buffer_size;
73  // geospatial params
74  bool lonlat;
78  int32_t geo_coords_srid;
80  std::string geo_layer_name;
83  int32_t source_srid;
84  std::optional<std::string> regex_path_filter;
85  std::optional<std::string> file_sort_order_by;
86  std::optional<std::string> file_sort_regex;
88  std::string raster_import_bands;
93  std::string add_metadata_columns;
94  // odbc parameters
95  std::string sql_select;
96  std::string sql_order_by;
97  // odbc user mapping parameters
98  std::string username;
99  std::string password;
100  std::string credential_string;
101  // odbc server parameters
102  std::string dsn;
103  std::string connection_string;
104  // regex parameters
105  std::string line_start_regex;
106  std::string line_regex;
107 
109  : delimiter(',')
110  , null_str("\\N")
112  , quoted(true)
113  , quote('"')
114  , escape('"')
115  , line_delim('\n')
116  , array_delim(',')
117  , array_begin('{')
118  , array_end('}')
119  , threads(0)
120  , max_reject(100000)
121  , source_type(import_export::SourceType::kDelimitedFile)
122  , trim_spaces(true)
123  , retry_count(100)
124  , retry_wait(5)
125  , batch_size(1000)
127  , lonlat(true)
131  , geo_coords_srid(4326)
135  , source_srid(0)
139  , raster_point_compute_angle{false} {}
140 
141  CopyParams(char d, const std::string& n, char l, size_t b, size_t retries, size_t wait)
142  : delimiter(d)
143  , null_str(n)
145  , quoted(true)
146  , quote('"')
147  , escape('"')
148  , line_delim(l)
149  , array_delim(',')
150  , array_begin('{')
151  , array_end('}')
152  , threads(0)
153  , max_reject(100000)
154  , source_type(import_export::SourceType::kDelimitedFile)
155  , trim_spaces(true)
156  , retry_count(retries)
157  , retry_wait(wait)
158  , batch_size(b)
160  , lonlat(true)
164  , geo_coords_srid(4326)
168  , source_srid(0)
172  , raster_point_compute_angle{false} {}
173 };
174 
175 } // namespace import_export
std::string s3_secret_key
Definition: CopyParams.h:62
int32_t raster_scanlines_per_thread
Definition: CopyParams.h:89
SQLTypes
Definition: sqltypes.h:38
Constants for Builtin SQL Types supported by HEAVY.AI.
std::string connection_string
Definition: CopyParams.h:103
std::string raster_import_dimensions
Definition: CopyParams.h:92
std::string add_metadata_columns
Definition: CopyParams.h:93
ImportHeaderRow has_header
Definition: CopyParams.h:46
EncodingType
Definition: sqltypes.h:233
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:84
RasterPointType raster_point_type
Definition: CopyParams.h:87
int32_t s3_max_concurrent_downloads
Definition: CopyParams.h:66
std::string sql_order_by
Definition: CopyParams.h:96
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_smem_group_by true
std::string geo_layer_name
Definition: CopyParams.h:80
std::string line_start_regex
Definition: CopyParams.h:105
std::string s3_session_token
Definition: CopyParams.h:63
CopyParams(char d, const std::string &n, char l, size_t b, size_t retries, size_t wait)
Definition: CopyParams.h:141
std::string raster_import_bands
Definition: CopyParams.h:88
bool g_enable_watchdog false
Definition: Execute.cpp:79
static constexpr size_t max_import_buffer_resize_byte_size
Definition: CopyParams.h:37
static constexpr size_t kImportFileBufferSize
Definition: CopyParams.h:34
constexpr double n
Definition: Utm.h:38
std::string s3_access_key
Definition: CopyParams.h:61
RasterPointTransform raster_point_transform
Definition: CopyParams.h:90
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:85
Shared Enum.
std::string credential_string
Definition: CopyParams.h:100
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:86
EncodingType geo_coords_encoding
Definition: CopyParams.h:75