33 #include <arrow/filesystem/filesystem.h>
35 #include <boost/filesystem.hpp>
45 using ArrowFsComparator =
46 std::function<bool(const arrow::fs::FileInfo&, const arrow::fs::FileInfo&)>;
76 : std::runtime_error(error_message) {}
86 : std::runtime_error(error_message) {}
95 const std::optional<std::string>& sort_regex);
98 const std::string& file_path,
99 const std::optional<std::string>& filter_regex,
100 const std::optional<std::string>& sort_by,
101 const std::optional<std::string>& sort_regex,
102 const bool recurse =
true);
105 std::vector<arrow::fs::FileInfo> arrow_fs_filter_sort_files(
106 const std::vector<arrow::fs::FileInfo>& file_paths,
107 const std::optional<std::string>& filter_regex,
108 const std::optional<std::string>& sort_by,
109 const std::optional<std::string>& sort_regex);
110 #endif // HAVE_AWS_S3
112 const std::function<bool(const std::string&, const std::string&)>
117 lhs_t = dateTimeParse<kDATE>(lhs, 0);
118 }
catch (
const std::exception& e) {
122 rhs_t = dateTimeParse<kDATE>(rhs, 0);
123 }
catch (
const std::exception& e) {
126 return lhs_t < rhs_t;
128 const std::function<bool(const std::string&, const std::string&)>
130 [](
const std::string& lhs,
const std::string& rhs) ->
bool {
134 lhs_i = stoll(lhs, 0);
135 }
catch (
const std::exception& e) {
139 rhs_i = stoll(rhs, 0);
140 }
catch (
const std::exception& e) {
143 return lhs_i < rhs_i;
150 const std::optional<std::string>& sort_by)
155 boost::match_results<std::string::const_iterator> capture_groups;
158 if (boost::regex_search(file_name, capture_groups, regex_pattern)) {
159 std::stringstream ss;
160 for (
size_t i = 1; i < capture_groups.size(); i++) {
161 ss << capture_groups[i];
182 const std::optional<std::string>& sort_by)
188 return comparator_pair->second;
194 [](
const std::string& lhs,
const std::string& rhs) ->
bool {
return lhs < rhs; }},
196 [](
const std::string& lhs,
const std::string& rhs) ->
bool {
197 return boost::filesystem::last_write_time(lhs) <
198 boost::filesystem::last_write_time(rhs);
201 [
this](
const std::string& lhs,
const std::string& rhs) ->
bool {
205 [
this](
const std::string& lhs,
const std::string& rhs) ->
bool {
210 [
this](
const std::string& lhs,
const std::string& rhs) ->
bool {
218 class FileOrderArrow :
public FileOrderBase<ArrowFsComparator> {
220 FileOrderArrow(
const std::optional<std::string>& sort_regex,
221 const std::optional<std::string>& sort_by)
222 : FileOrderBase<ArrowFsComparator>(sort_regex, sort_by) {}
224 inline ArrowFsComparator getFileComparator()
override {
225 auto comparator_pair = comparator_map_.find(getSortBy());
226 CHECK(comparator_pair != comparator_map_.end());
227 return comparator_pair->second;
231 const std::map<std::string, ArrowFsComparator> comparator_map_{
233 [](
const arrow::fs::FileInfo& lhs,
const arrow::fs::FileInfo& rhs) ->
bool {
234 return lhs.path() < rhs.path();
237 [](
const arrow::fs::FileInfo& lhs,
const arrow::fs::FileInfo& rhs) ->
bool {
238 return lhs.mtime() < rhs.mtime();
241 [
this](
const arrow::fs::FileInfo& lhs,
const arrow::fs::FileInfo& rhs) ->
bool {
242 auto lhs_name = lhs.path();
243 auto rhs_name = rhs.path();
244 return this->concatCaptureGroups(lhs_name) < this->concatCaptureGroups(rhs_name);
247 [
this](
const arrow::fs::FileInfo& lhs,
const arrow::fs::FileInfo& rhs) ->
bool {
249 this->concatCaptureGroups(rhs.path()));
252 [
this](
const arrow::fs::FileInfo& lhs,
const arrow::fs::FileInfo& rhs) ->
bool {
254 this->concatCaptureGroups(rhs.path()));
258 #endif // HAVE_AWS_S3
const std::array< std::string, 2 > non_regex_sort_order_types
std::function< bool(const std::string &, const std::string &)> LocalFileComparator
const std::string REGEX_NUMBER_ORDER_TYPE
LocalFileComparator getFileComparator() override
void throw_no_filter_match(const std::string &pattern)
const std::string REGEX_ORDER_TYPE
const std::string FILE_SORT_REGEX_KEY
void validate_sort_options(const std::optional< std::string > &sort_by, const std::optional< std::string > &sort_regex)
virtual std::string getSortBy()
NoRegexFilterMatchException(const std::string &error_message)
const std::string REGEX_DATE_ORDER_TYPE
void throw_file_not_found(const std::string &file_path)
FileNotFoundException(const std::string &error_message)
const std::string PATHNAME_ORDER_TYPE
FileOrderBase(const std::optional< std::string > &sort_regex, const std::optional< std::string > &sort_by)
const std::string FILE_SORT_ORDER_BY_KEY
const std::map< std::string, LocalFileComparator > comparator_map_
bool file_or_glob_path_exists(const std::string &path)
std::vector< std::string > local_glob_filter_sort_files(const std::string &file_path, const std::optional< std::string > &filter_regex, const std::optional< std::string > &sort_by, const std::optional< std::string > &sort_regex, const bool recurse)
std::optional< std::string > sort_regex_
const std::array< std::string, 5 > supported_file_sort_order_types
const std::string DATE_MODIFIED_ORDER_TYPE
virtual T getFileComparator()=0
virtual std::string concatCaptureGroups(const std::string &file_name) const
const std::array< std::string, 3 > regex_sort_order_types
FileOrderLocal(const std::optional< std::string > &sort_regex, const std::optional< std::string > &sort_by)
std::optional< std::string > sort_by_
const std::function< bool(const std::string &, const std::string &)> common_regex_number_comp_
const std::function< bool(const std::string &, const std::string &)> common_regex_date_comp_