33 const std::optional<std::string>& sort_regex) {
38 " must be one of the following options: " +
43 sort_regex.has_value()) {
45 "\" must not be set for selected option \"" +
51 "\" must be set for selected option \"" +
60 std::vector<std::string> file_paths;
62 if (boost::filesystem::is_regular_file(file_path)) {
63 file_paths.emplace_back(file_path);
64 }
else if (recurse && boost::filesystem::is_directory(file_path)) {
65 for (boost::filesystem::recursive_directory_iterator
66 it(file_path, boost::filesystem::symlink_option::recurse),
70 if (!boost::filesystem::is_directory(it->path())) {
71 file_paths.emplace_back(it->path().string());
77 for (
const auto& path : glob_results) {
78 if (recurse && boost::filesystem::is_directory(path)) {
80 file_paths.insert(file_paths.end(), expanded_paths.begin(), expanded_paths.end());
82 file_paths.emplace_back(path);
85 if (file_paths.empty()) {
93 const std::vector<std::string>& file_paths) {
94 boost::regex regex_pattern(pattern);
95 std::vector<std::string> matched_file_paths;
96 for (
const auto& path : file_paths) {
97 if (boost::regex_match(path, regex_pattern)) {
98 matched_file_paths.emplace_back(path);
101 if (matched_file_paths.empty()) {
104 return matched_file_paths;
110 const std::string& file_path,
111 const std::optional<std::string>& filter_regex,
112 const std::optional<std::string>& sort_by,
113 const std::optional<std::string>& sort_regex,
114 const bool recurse) {
116 if (filter_regex.has_value()) {
122 auto lexi_comp = initial_file_order.getFileComparator();
123 std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
126 auto comp = file_order.getFileComparator();
127 std::stable_sort(result_files.begin(), result_files.end(), comp);
134 std::vector<arrow::fs::FileInfo> arrow_fs_regex_file_filter(
135 const std::string& pattern,
136 const std::vector<arrow::fs::FileInfo>& file_info_list) {
137 boost::regex regex_pattern(pattern);
138 std::vector<arrow::fs::FileInfo> matched_file_info_list;
139 for (
const auto& file_info : file_info_list) {
140 if (boost::regex_match(file_info.path(), regex_pattern)) {
141 matched_file_info_list.emplace_back(file_info);
144 if (matched_file_info_list.empty()) {
147 return matched_file_info_list;
152 std::vector<arrow::fs::FileInfo> arrow_fs_filter_sort_files(
153 const std::vector<arrow::fs::FileInfo>& file_paths,
154 const std::optional<std::string>& filter_regex,
155 const std::optional<std::string>& sort_by,
156 const std::optional<std::string>& sort_regex) {
157 auto result_files = filter_regex.has_value()
158 ? arrow_fs_regex_file_filter(filter_regex.value(), file_paths)
163 auto lexi_comp = initial_file_order.getFileComparator();
164 std::stable_sort(result_files.begin(), result_files.end(), lexi_comp);
166 auto file_order = FileOrderArrow(sort_regex, sort_by);
167 auto comp = file_order.getFileComparator();
168 std::stable_sort(result_files.begin(), result_files.end(), comp);
172 #endif // HAVE_AWS_S3
175 return boost::filesystem::exists(path) || !
heavyai::glob(path).empty();
bool contains(const T &container, const U &element)
const std::array< std::string, 2 > non_regex_sort_order_types
void throw_no_filter_match(const std::string &pattern)
const std::string FILE_SORT_REGEX_KEY
shared utility for globbing files, paths can be specified as either a single file, directory or wildcards
void validate_sort_options(const std::optional< std::string > &sort_by, const std::optional< std::string > &sort_regex)
void throw_file_not_found(const std::string &file_path)
const std::string PATHNAME_ORDER_TYPE
std::vector< std::string > glob_local_recursive_files(const std::string &file_path, const bool recurse)
const std::string FILE_SORT_ORDER_BY_KEY
bool file_or_glob_path_exists(const std::string &path)
std::vector< std::string > local_glob_filter_sort_files(const std::string &file_path, const std::optional< std::string > &filter_regex, const std::optional< std::string > &sort_by, const std::optional< std::string > &sort_regex, const bool recurse)
const std::array< std::string, 5 > supported_file_sort_order_types
const std::array< std::string, 3 > regex_sort_order_types
std::vector< std::string > glob(const std::string &pattern)
std::vector< std::string > regex_file_filter(const std::string &pattern, const std::vector< std::string > &file_paths)