OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PosixFileArchive.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ARCHIVE_POSIXFILEARCHIVE_H_
18 #define ARCHIVE_POSIXFILEARCHIVE_H_
19 
20 #include <cstdio>
21 
22 #include "Archive.h"
23 
24 // archive read buffer size, configurable for unit test.
25 extern size_t g_archive_read_buf_size;
26 
27 // this is the archive class for files hosted locally or remotely with
28 // POSIX compliant file name. !! 7z files work only with this class !!
29 class PosixFileArchive : public Archive {
30  public:
31  PosixFileArchive(const std::string url, const bool plain_text)
32  : Archive(url, plain_text) {
33  // some well-known file.exts imply plain text
34  if (!this->plain_text) {
35  this->plain_text = boost::filesystem::extension(url_part(5)) == ".csv" ||
36  boost::filesystem::extension(url_part(5)) == ".tsv" ||
37  boost::filesystem::extension(url_part(5)) == ".txt" ||
38  boost::filesystem::extension(url_part(5)) == "";
39  }
40 
41  if (this->plain_text) {
42  buf = new char[g_archive_read_buf_size];
43  }
44 
45  init_for_read();
46  }
47 
48  ~PosixFileArchive() override {
49  if (fp) {
50  fclose(fp);
51  }
52  if (buf) {
53  delete[] buf;
54  }
55  }
56 
57  void init_for_read() override {
58  auto file_path = url_part(5);
59  if (plain_text) {
60  if (nullptr == (fp = fopen(file_path.c_str(), "r"))) {
61  throw std::runtime_error(std::string("fopen(") + file_path +
62  "): " + strerror(errno));
63  }
64  } else {
65  if (ARCHIVE_OK != archive_read_open_filename(ar, file_path.c_str(), 1 << 16)) {
66  throw std::runtime_error(std::string("fopen(") + file_path +
67  "): " + strerror(errno));
68  }
69  }
70  }
71 
72  bool read_next_header() override {
73  if (plain_text) {
74  return !feof(fp);
75  } else {
77  }
78  }
79 
80  bool read_data_block(const void** buff, size_t* size, int64_t* offset) override {
81  if (plain_text) {
82  size_t nread;
83  if (0 >= (nread = fread(buf, 1, g_archive_read_buf_size, fp))) {
84  return false;
85  }
86  *buff = buf;
87  *size = nread;
88  *offset = ftell(fp);
89  return true;
90  } else {
91  // need original (compressed) offset for row estimation of compressed files
92  auto ret = Archive::read_data_block(buff, size, offset);
94  return ret;
95  }
96  }
97 
98  private:
99  char* buf = nullptr;
100  FILE* fp = nullptr;
101 };
102 
103 #endif /* ARCHIVE_POSIXFILEARCHIVE_H_ */
virtual bool read_data_block(const void **buff, size_t *size, int64_t *offset)
Definition: Archive.h:110
~PosixFileArchive() override
PosixFileArchive(const std::string url, const bool plain_text)
bool read_next_header() override
archive * ar
Definition: Archive.h:192
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
bool read_data_block(const void **buff, size_t *size, int64_t *offset) override
size_t g_archive_read_buf_size
Definition: Importer.cpp:110
virtual int64_t get_position_compressed() const
Definition: Archive.h:121
const std::string url_part(const int i)
Definition: Archive.h:185
void init_for_read() override
std::string url
Definition: Archive.h:190
virtual bool read_next_header()
Definition: Archive.h:99
bool plain_text
Definition: Archive.h:194