OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PosixFileArchive.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ARCHIVE_POSIXFILEARCHIVE_H_
18 #define ARCHIVE_POSIXFILEARCHIVE_H_
19 
20 #include <cstdio>
21 
22 #include "Archive.h"
23 
24 // archive read buffer size, configurable for unit test.
25 extern size_t g_archive_read_buf_size;
26 
27 // this is the archive class for files hosted locally or remotely with
28 // POSIX compliant file name. !! 7z files work only with this class !!
29 class PosixFileArchive : public Archive {
30  public:
31  PosixFileArchive(const std::string url, const bool plain_text)
32  : Archive(url, plain_text) {
33  // some well-known file.exts imply plain text
34  if (!this->plain_text) {
35  auto const ext = boost::filesystem::path(url_part(5)).extension();
36  this->plain_text = ext == ".csv" || ext == ".tsv" || ext == ".txt" || ext == "";
37  }
38 
39  if (this->plain_text) {
40  buf = new char[g_archive_read_buf_size];
41  }
42 
43  init_for_read();
44  }
45 
46  ~PosixFileArchive() override {
47  if (fp) {
48  fclose(fp);
49  }
50  if (buf) {
51  delete[] buf;
52  }
53  }
54 
55  void init_for_read() override {
56  auto file_path = url_part(5);
57  if (plain_text) {
58  if (nullptr == (fp = fopen(file_path.c_str(), "r"))) {
59  throw std::runtime_error(std::string("fopen(") + file_path +
60  "): " + strerror(errno));
61  }
62  } else {
63  if (ARCHIVE_OK != archive_read_open_filename(ar, file_path.c_str(), 1 << 16)) {
64  throw std::runtime_error(std::string("fopen(") + file_path +
65  "): " + strerror(errno));
66  }
67  }
68  }
69 
70  bool read_next_header() override {
71  if (plain_text) {
72  return !feof(fp);
73  } else {
75  }
76  }
77 
78  bool read_data_block(const void** buff, size_t* size, int64_t* offset) override {
79  if (plain_text) {
80  size_t nread;
81  if (0 >= (nread = fread(buf, 1, g_archive_read_buf_size, fp))) {
82  return false;
83  }
84  *buff = buf;
85  *size = nread;
86  *offset = ftell(fp);
87  return true;
88  } else {
89  // need original (compressed) offset for row estimation of compressed files
90  auto ret = Archive::read_data_block(buff, size, offset);
92  return ret;
93  }
94  }
95 
96  private:
97  char* buf = nullptr;
98  FILE* fp = nullptr;
99 };
100 
101 #endif /* ARCHIVE_POSIXFILEARCHIVE_H_ */
virtual bool read_data_block(const void **buff, size_t *size, int64_t *offset)
Definition: Archive.h:110
~PosixFileArchive() override
PosixFileArchive(const std::string url, const bool plain_text)
bool read_next_header() override
archive * ar
Definition: Archive.h:204
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
bool read_data_block(const void **buff, size_t *size, int64_t *offset) override
size_t g_archive_read_buf_size
Definition: Importer.cpp:109
virtual int64_t get_position_compressed() const
Definition: Archive.h:121
const std::string url_part(const int i)
Definition: Archive.h:193
void init_for_read() override
std::string url
Definition: Archive.h:202
virtual bool read_next_header()
Definition: Archive.h:99
bool plain_text
Definition: Archive.h:206