From 4faad07d211582198be6291faa400e98bd3d3548 Mon Sep 17 00:00:00 2001 From: Joaquin Anton Date: Tue, 16 Apr 2024 13:34:21 +0200 Subject: [PATCH] Undo autoformatting Signed-off-by: Joaquin Anton --- dali/operators/reader/loader/coco_loader.cc | 62 +++++----- dali/operators/reader/loader/coco_loader.h | 40 +++---- .../reader/loader/file_label_loader.cc | 5 +- .../reader/loader/file_label_loader.h | 108 +++++++++--------- dali/operators/reader/loader/file_loader.h | 7 +- dali/operators/reader/loader/filesystem.cc | 8 +- dali/operators/reader/loader/filesystem.h | 7 +- .../reader/loader/indexed_file_loader.h | 63 +++++----- .../loader/webdataset/tar_utils_test.cc | 68 +++++------ dali/util/cufile.h | 9 +- internal_tools/test_bundled_libs.py | 2 +- 11 files changed, 189 insertions(+), 190 deletions(-) diff --git a/dali/operators/reader/loader/coco_loader.cc b/dali/operators/reader/loader/coco_loader.cc index 7a5067c29e7..e5e18832cb1 100644 --- a/dali/operators/reader/loader/coco_loader.cc +++ b/dali/operators/reader/loader/coco_loader.cc @@ -57,8 +57,8 @@ struct Annotation { }; template -std::enable_if_t::value, void> Read(std::ifstream& file, T &data, - const char* filename) { +std::enable_if_t::value, void> +Read(std::ifstream& file, T& data, const char* filename) { int64_t bytes = sizeof(T); file.read(reinterpret_cast(&data), bytes); DALI_ENFORCE(file.gcount() == bytes, @@ -79,16 +79,16 @@ void Read(std::ifstream& file, span data, const char* filename) { } template -void Write(std::ofstream &file, T data, const char* filename) { - file.write(reinterpret_cast(&data), sizeof(T)); +void Write(std::ofstream& file, T data, const char* filename) { + file.write(reinterpret_cast(&data), sizeof(T)); DALI_ENFORCE(file.good(), make_string("Error reading from path: ", filename)); } template -void Write(std::ofstream &file, span data, const char* filename) { +void Write(std::ofstream& file, span data, const char* filename) { if (data.empty()) return; - file.write(reinterpret_cast(data.data()), sizeof(T) * data.size()); + file.write(reinterpret_cast(data.data()), sizeof(T) * data.size()); DALI_ENFORCE(file.good(), make_string("Error reading from path: ", filename)); } @@ -123,7 +123,7 @@ void SaveToFile(const std::vector &input, const std::string path) { } template -void SaveToFile(const std::vector> &input, const std::string path) { +void SaveToFile(const std::vector > &input, const std::string path) { if (input.empty()) return; std::ofstream file(path, std::ios_base::binary | std::ios_base::out); @@ -132,7 +132,7 @@ void SaveToFile(const std::vector> &input, const std::string path unsigned size = input.size(); Write(file, size, path.c_str()); - for (auto &v : input) { + for (auto& v : input) { size = v.size(); assert(size > 0); Write(file, size, path.c_str()); @@ -186,7 +186,7 @@ void LoadFromFile(std::vector &output, const std::string path) { } template -void LoadFromFile(std::vector> &output, const std::string path) { +void LoadFromFile(std::vector > &output, const std::string path) { std::ifstream file(path); output.clear(); if (!file.good()) @@ -212,7 +212,7 @@ void LoadFromFile(std::vector &entries, const std::s int id = 0; std::string filename; while (file >> filename) { - entries.push_back({std::move(filename), id}); + entries.emplace_back(std::move(filename), int{id}); ++id; } } @@ -236,7 +236,7 @@ void ParseImageInfo(LookaheadParser &parser, std::vector &image_infos } else if (0 == std::strcmp(internal_key, "file_name")) { image_info.filename_ = parser.GetString(); } else { - parser.SkipValue(); + parser.SkipValue(); } } image_infos.emplace_back(std::move(image_info)); @@ -271,7 +271,8 @@ void ParseCategories(LookaheadParser &parser, std::map &category_ids) } void ParseAnnotations(LookaheadParser &parser, std::vector &annotations, - float min_size_threshold, bool ltrb, bool parse_segmentation, bool parse_rle, + float min_size_threshold, bool ltrb, + bool parse_segmentation, bool parse_rle, bool include_iscrowd = true) { std::string rle_str; std::vector rle_uints; @@ -344,8 +345,8 @@ void ParseAnnotations(LookaheadParser &parser, std::vector &annotati } else if (parser.PeekType() == kArrayType) { annotation.tag_ = Annotation::POLYGON; int coord_offset = 0; - auto &segm_meta = annotation.poly_.segm_meta_; - auto &segm_coords = annotation.poly_.segm_coords_; + auto& segm_meta = annotation.poly_.segm_meta_; + auto& segm_coords = annotation.poly_.segm_coords_; parser.EnterArray(); while (parser.NextArrayValue()) { segm_meta.push_back(coord_offset); @@ -376,7 +377,8 @@ void ParseAnnotations(LookaheadParser &parser, std::vector &annotati } void ParseJsonFile(const OpSpec &spec, std::vector &image_infos, - std::vector &annotations, std::map &category_ids, + std::vector &annotations, + std::map &category_ids, bool parse_segmentation, bool parse_rle) { const auto annotations_file = spec.GetArgument("annotations_file"); @@ -405,8 +407,8 @@ void ParseJsonFile(const OpSpec &spec, std::vector &image_inf } else if (0 == std::strcmp(key, "categories")) { detail::ParseCategories(parser, category_ids); } else if (0 == std::strcmp(key, "annotations")) { - ParseAnnotations(parser, annotations, sz_threshold, ltrb, parse_segmentation, parse_rle, - include_iscrowd); + ParseAnnotations(parser, annotations, sz_threshold, ltrb, parse_segmentation, + parse_rle, include_iscrowd); } else { parser.SkipValue(); } @@ -416,7 +418,7 @@ void ParseJsonFile(const OpSpec &spec, std::vector &image_inf } // namespace detail void CocoLoader::SavePreprocessedAnnotations( - const std::string &path, const std::vector &entries) { + const std::string &path, const std::vector &entries) { using detail::SaveToFile; SaveToFile(offsets_, path + "/offsets.dat"); SaveToFile(boxes_, path + "/boxes.dat"); @@ -449,9 +451,9 @@ void CocoLoader::SavePreprocessedAnnotations( void CocoLoader::ParsePreprocessedAnnotations() { assert(HasPreprocessedAnnotations(spec_)); - const auto path = spec_.HasArgument("meta_files_path") ? - spec_.GetArgument("meta_files_path") : - spec_.GetArgument("preprocessed_annotations"); + const auto path = spec_.HasArgument("meta_files_path") + ? spec_.GetArgument("meta_files_path") + : spec_.GetArgument("preprocessed_annotations"); using detail::LoadFromFile; LoadFromFile(offsets_, path + "/offsets.dat"); LoadFromFile(boxes_, path + "/boxes.dat"); @@ -488,12 +490,13 @@ void CocoLoader::ParseJsonAnnotations() { std::map category_ids; bool parse_segmentation = output_polygon_masks_ || output_pixelwise_masks_; - detail::ParseJsonFile(spec_, image_infos, annotations, category_ids, parse_segmentation, - output_pixelwise_masks_); + detail::ParseJsonFile(spec_, image_infos, annotations, category_ids, + parse_segmentation, output_pixelwise_masks_); if (images_.empty()) { - std::sort(image_infos.begin(), image_infos.end(), - [&](auto &left, auto &right) { return left.original_id_ < right.original_id_; }); + std::sort(image_infos.begin(), image_infos.end(), [&](auto &left, auto &right) { + return left.original_id_ < right.original_id_; + }); for (auto &info : image_infos) { images_.push_back(info.filename_); } @@ -541,7 +544,7 @@ void CocoLoader::ParseJsonAnnotations() { int64_t sample_vertices_count = 0; int64_t mask_offset = masks_rles_.size(); int64_t mask_count = 0; - for (const auto *annotation_ptr : img_annotations_map[image_id]) { + for (const auto* annotation_ptr : img_annotations_map[image_id]) { const auto &annotation = *annotation_ptr; if (remap_classes) { labels_.push_back(category_ids[annotation.category_id_]); @@ -625,7 +628,7 @@ void CocoLoader::ParseJsonAnnotations() { } } - file_label_entries_.push_back({std::move(image_info.filename_), new_image_id}); + file_label_entries_.emplace_back(std::move(image_info.filename_), new_image_id); new_image_id++; } } @@ -634,8 +637,9 @@ void CocoLoader::ParseJsonAnnotations() { images_.clear(); if (spec_.GetArgument("save_preprocessed_annotations")) { - SavePreprocessedAnnotations(spec_.GetArgument("save_preprocessed_annotations_dir"), - file_label_entries_); + SavePreprocessedAnnotations( + spec_.GetArgument("save_preprocessed_annotations_dir"), + file_label_entries_); } } diff --git a/dali/operators/reader/loader/coco_loader.h b/dali/operators/reader/loader/coco_loader.h index c580294a0c2..bd72db7eeaa 100644 --- a/dali/operators/reader/loader/coco_loader.h +++ b/dali/operators/reader/loader/coco_loader.h @@ -18,15 +18,15 @@ #include #include #include +#include #include #include -#include +#include "dali/operators/reader/loader/file_label_loader.h" #include "dali/core/common.h" #include "dali/core/error_handling.h" #include "dali/core/geom/vec.h" #include "dali/core/unique_handle.h" -#include "dali/operators/reader/loader/file_label_loader.h" extern "C" { #include "third_party/cocoapi/common/maskApi.h" @@ -60,8 +60,7 @@ inline bool HasSavePreprocessedAnnotations(const OpSpec &spec) { inline bool HasSavePreprocessedAnnotationsDir(const OpSpec &spec) { return spec.HasArgument("save_preprocessed_annotations_dir") || - (spec.HasArgument("dump_meta_files_path") && - spec.GetArgument("dump_meta_files_path")); + (spec.HasArgument("dump_meta_files_path") && spec.GetArgument("dump_meta_files_path")); } struct RLEMask : public UniqueHandle { @@ -90,12 +89,8 @@ struct RLEMask : public UniqueHandle { rleFree(&handle); } - const RLE *operator->() const { - return &handle_; - } - RLE *operator->() { - return &handle_; - } + const RLE* operator->() const { return &handle_; } + RLE* operator->() { return &handle_; } }; using RLEMaskPtr = std::shared_ptr; @@ -103,13 +98,13 @@ using RLEMaskPtr = std::shared_ptr; class DLL_PUBLIC CocoLoader : public FileLabelLoaderBase { public: explicit inline CocoLoader(const OpSpec &spec) - : FileLabelLoaderBase(spec, spec.GetArgument("shuffle_after_epoch")), - spec_(spec) { + : FileLabelLoaderBase(spec, spec.GetArgument("shuffle_after_epoch")) + , spec_(spec) { has_preprocessed_annotations_ = HasPreprocessedAnnotations(spec); DALI_ENFORCE(has_preprocessed_annotations_ || spec.HasArgument("annotations_file"), - "Either ``annotations_file`` or ``preprocessed_annotations`` must be provided"); + "Either ``annotations_file`` or ``preprocessed_annotations`` must be provided"); if (has_preprocessed_annotations_) { - for (const char *arg_name : {"annotations_file", "skip_empty", "ratio", "ltrb", "images", + for (const char* arg_name : {"annotations_file", "skip_empty", "ratio", "ltrb", "images", "size_threshold", "dump_meta_files", "dump_meta_files_path"}) { if (spec.HasArgument(arg_name)) DALI_FAIL(make_string("When reading data from preprocessed annotation files, \"", @@ -126,9 +121,8 @@ class DLL_PUBLIC CocoLoader : public FileLabelLoaderBase { } if (HasSavePreprocessedAnnotations(spec_) != HasSavePreprocessedAnnotationsDir(spec_)) { - DALI_FAIL( - "``save_preprocessed_annotations`` and ``save_preprocessed_annotations_dir`` " - "should be provided together"); + DALI_FAIL("``save_preprocessed_annotations`` and ``save_preprocessed_annotations_dir`` " + "should be provided together"); } } @@ -159,9 +153,11 @@ class DLL_PUBLIC CocoLoader : public FileLabelLoaderBase { PixelwiseMasksInfo pixelwise_masks_info(int image_idx) const { assert(output_pixelwise_masks_); - return {{heights_[image_idx], widths_[image_idx], 1}, - {masks_rles_.data() + mask_offsets_[image_idx], mask_counts_[image_idx]}, - {masks_rles_idx_.data() + mask_offsets_[image_idx], mask_counts_[image_idx]}}; + return { + {heights_[image_idx], widths_[image_idx], 1}, + {masks_rles_.data() + mask_offsets_[image_idx], mask_counts_[image_idx]}, + {masks_rles_idx_.data() + mask_offsets_[image_idx], mask_counts_[image_idx]} + }; } span polygons(int image_idx) const { @@ -205,8 +201,8 @@ class DLL_PUBLIC CocoLoader : public FileLabelLoaderBase { void ParseJsonAnnotations(); - void SavePreprocessedAnnotations(const std::string &path, - const std::vector &entries); + void SavePreprocessedAnnotations( + const std::string &path, const std::vector &image_id_pairs); private: const OpSpec spec_; diff --git a/dali/operators/reader/loader/file_label_loader.cc b/dali/operators/reader/loader/file_label_loader.cc index c28ef0f40d8..14f3fea399c 100644 --- a/dali/operators/reader/loader/file_label_loader.cc +++ b/dali/operators/reader/loader/file_label_loader.cc @@ -30,7 +30,7 @@ void FileLabelLoaderBase::PrepareEmpty(ImageLabelWrappe template void FileLabelLoaderBase::ReadSample(ImageLabelWrapper &image_label) { - auto entry = file_label_entries_[current_index_++]; + auto image_pair = file_label_entries_[current_index_++]; // handle wrap-around MoveToNextShard(current_index_); @@ -50,11 +50,10 @@ void FileLabelLoaderBase::ReadSample(ImageLabelWrapper return; } - auto uri = filesystem::join_path(file_root_, entry.filename); auto current_image = FileStream::Open(uri, {read_ahead_, !copy_read_data_, false}, entry.size); Index image_size = current_image->Size(); - if (copy_read_data_ || !current_image->CanMemoryMap()) { + if (copy_read_data_) { if (image_label.image.shares_data()) { image_label.image.Reset(); } diff --git a/dali/operators/reader/loader/file_label_loader.h b/dali/operators/reader/loader/file_label_loader.h index 5fbf6a9f0fe..6385cd5077c 100755 --- a/dali/operators/reader/loader/file_label_loader.h +++ b/dali/operators/reader/loader/file_label_loader.h @@ -16,18 +16,19 @@ #define DALI_OPERATORS_READER_LOADER_FILE_LABEL_LOADER_H_ #include -#include #include -#include +#include + #include -#include #include #include #include #include +#include + #include "dali/core/common.h" -#include "dali/operators/reader/loader/filesystem.h" #include "dali/operators/reader/loader/loader.h" +#include "dali/operators/reader/loader/filesystem.h" #include "dali/util/file.h" namespace dali { @@ -37,17 +38,19 @@ struct ImageLabelWrapper { int label; }; -template -class DLL_PUBLIC FileLabelLoaderBase - : public Loader { +template +class DLL_PUBLIC FileLabelLoaderBase : public Loader { public: using Base = Loader; - explicit inline FileLabelLoaderBase(const OpSpec &spec, bool shuffle_after_epoch = false) - : Base(spec), - shuffle_after_epoch_(shuffle_after_epoch), - current_index_(0), - current_epoch_(0) { - traverse_opts_.read_label = true; + explicit inline FileLabelLoaderBase( + const OpSpec& spec, + bool shuffle_after_epoch = false) + : Base(spec), + shuffle_after_epoch_(shuffle_after_epoch), + current_index_(0), + current_epoch_(0) { + vector files; vector labels; @@ -55,29 +58,24 @@ class DLL_PUBLIC FileLabelLoaderBase has_labels_arg_ = spec.TryGetRepeatedArgument(labels, "labels"); has_file_list_arg_ = spec.TryGetArgument(file_list_, "file_list"); has_file_root_arg_ = spec.TryGetArgument(file_root_, "file_root"); - bool has_file_filters_arg = - spec.TryGetRepeatedArgument(traverse_opts_.file_filters, "file_filters"); - bool has_dir_filters_arg = - spec.TryGetRepeatedArgument(traverse_opts_.dir_filters, "dir_filters"); + bool has_file_filters_arg = spec.TryGetRepeatedArgument(filters_, "file_filters"); // TODO(ksztenderski): CocoLoader inherits after FileLabelLoader and it doesn't work with // GetArgument. spec.TryGetArgument(traverse_opts_.case_sensitive_filter, "case_sensitive_filter"); DALI_ENFORCE(has_file_root_arg_ || has_files_arg_ || has_file_list_arg_, - "``file_root`` argument is required when not using ``files`` or ``file_list``."); + "``file_root`` argument is required when not using ``files`` or ``file_list``."); DALI_ENFORCE(has_files_arg_ + has_file_list_arg_ <= 1, - "File paths can be provided through ``files`` or ``file_list`` but not both."); + "File paths can be provided through ``files`` or ``file_list`` but not both."); DALI_ENFORCE(has_files_arg_ || !has_labels_arg_, - "The argument ``labels`` is valid only when file paths " - "are provided as ``files`` argument."); + "The argument ``labels`` is valid only when file paths " + "are provided as ``files`` argument."); - DALI_ENFORCE(!has_file_filters_arg || traverse_opts_.file_filters.size() > 0, - "``file_filters`` list cannot be empty."); - DALI_ENFORCE(!has_dir_filters_arg || traverse_opts_.dir_filters.size() > 0, - "``dir_filters`` list cannot be empty."); + DALI_ENFORCE(!has_file_filters_arg || filters_.size() > 0, + "``file_filters`` list cannot be empty."); if (has_file_list_arg_) { DALI_ENFORCE(!file_list_.empty(), "``file_list`` argument cannot be empty"); @@ -92,28 +90,27 @@ class DLL_PUBLIC FileLabelLoaderBase if (has_files_arg_) { DALI_ENFORCE(files.size() > 0, "``files`` specified an empty list."); if (has_labels_arg_) { - DALI_ENFORCE( - files.size() == labels.size(), - make_string("Provided ", labels.size(), " labels for ", files.size(), " files.")); + DALI_ENFORCE(files.size() == labels.size(), make_string("Provided ", labels.size(), + " labels for ", files.size(), " files.")); for (int i = 0, n = files.size(); i < n; i++) - file_label_entries_.push_back({std::move(files[i]), labels[i]}); + file_label_entries_.emplace_back(std::move(files[i]), labels[i]); } else { - for (int i = 0, n = files.size(); i < n; i++) - file_label_entries_.push_back({std::move(files[i]), i}); + for (int i = 0, n = files.size(); i < n; i++) + file_label_entries_.emplace_back(std::move(files[i]), i); } } /* - * Those options are mutually exclusive as `shuffle_after_epoch` will make every shard looks - * differently after each epoch so coexistence with `stick_to_shard` doesn't make any sense - * Still when `shuffle_after_epoch` we will set `stick_to_shard` internally in the - * FileLabelLoader so all DALI instances will do shuffling after each epoch + * Those options are mutually exclusive as `shuffle_after_epoch` will make every shard looks differently + * after each epoch so coexistence with `stick_to_shard` doesn't make any sense + * Still when `shuffle_after_epoch` we will set `stick_to_shard` internally in the FileLabelLoader so all + * DALI instances will do shuffling after each epoch */ - DALI_ENFORCE(!(shuffle_after_epoch_ && stick_to_shard_), - "shuffle_after_epoch and stick_to_shard cannot be both true"); + DALI_ENFORCE(!(shuffle_after_epoch_ && stick_to_shard_), + "shuffle_after_epoch and stick_to_shard cannot be both true"); DALI_ENFORCE(!(shuffle_after_epoch_ && shuffle_), - "shuffle_after_epoch and random_shuffle cannot be both true"); + "shuffle_after_epoch and random_shuffle cannot be both true"); /* * Imply `stick_to_shard` from `shuffle_after_epoch` */ @@ -121,7 +118,8 @@ class DLL_PUBLIC FileLabelLoaderBase stick_to_shard_ = true; } if (!dont_use_mmap_) { - mmap_reserver_ = FileStream::MappingReserver(static_cast(initial_buffer_fill_)); + mmap_reserver_ = FileStream::MappingReserver( + static_cast(initial_buffer_fill_)); } copy_read_data_ = dont_use_mmap_ || !mmap_reserver_.CanShareMappedData(); } @@ -143,7 +141,7 @@ class DLL_PUBLIC FileLabelLoaderBase vector line_buf(16 << 10); // 16 kB should be more than enough for a line char *line = line_buf.data(); - for (int n = 1; s.getline(line, line_buf.size()); n++) { + for (int n = 1; s.getline(line, line_buf.size()); n++) { // parse the line backwards: // - skip trailing whitespace // - consume digits @@ -164,15 +162,15 @@ class DLL_PUBLIC FileLabelLoaderBase for (; i >= 0 && isspace(line[i]); i--) {} int name_end = i + 1; - DALI_ENFORCE( - name_end > 0 && name_end < label_start && label_start >= 2 && label_end > label_start, - make_string("Incorrect format of the list file \"", file_list_, "\":", n, - " expected file name followed by a label; got: ", line)); + DALI_ENFORCE(name_end > 0 && name_end < label_start && + label_start >= 2 && label_end > label_start, + make_string("Incorrect format of the list file \"", file_list_, "\":", n, + " expected file name followed by a label; got: ", line)); line[label_end] = 0; line[name_end] = 0; - file_label_entries_.push_back({std::string(line), std::atoi(line + label_start)}); + file_label_entries_.emplace_back(line, std::atoi(line + label_start)); } DALI_ENFORCE(s.eof(), "Wrong format of file_list: " + file_list_); @@ -225,23 +223,23 @@ class DLL_PUBLIC FileLabelLoaderBase current_epoch_ = state.current_epoch; } - using Base::copy_read_data_; + using Base::shard_id_; + using Base::virtual_shard_id_; + using Base::num_shards_; + using Base::stick_to_shard_; + using Base::shuffle_; using Base::dont_use_mmap_; using Base::initial_buffer_fill_; + using Base::copy_read_data_; + using Base::read_ahead_; using Base::IsCheckpointingEnabled; - using Base::MoveToNextShard; - using Base::num_shards_; using Base::PrepareEmptyTensor; - using Base::read_ahead_; - using Base::shard_id_; + using Base::MoveToNextShard; using Base::ShouldSkipImage; - using Base::shuffle_; - using Base::stick_to_shard_; - using Base::virtual_shard_id_; string file_root_, file_list_; - vector file_label_entries_; - vector backup_file_label_entries_; + vector> file_label_entries_; + vector> backup_file_label_entries_; filesystem::TraverseDirectoriesOptions traverse_opts_; bool has_files_arg_ = false; diff --git a/dali/operators/reader/loader/file_loader.h b/dali/operators/reader/loader/file_loader.h index 8ec907f95e0..36920fc8774 100755 --- a/dali/operators/reader/loader/file_loader.h +++ b/dali/operators/reader/loader/file_loader.h @@ -124,7 +124,7 @@ class FileLoader : public Loader { char *line = line_buf.data(); while (s.getline(line, line_buf.size())) { if (line[0]) // skip empty lines - file_entries_.push_back({std::string(line)}); + file_entries_.emplace_back(line); } DALI_ENFORCE(s.eof(), "Wrong format of file_list: " + file_list_); } @@ -157,10 +157,10 @@ class FileLoader : public Loader { // With checkpointing enabled dataset order must be easy to restore. // Shuffling is run with different seed every epoch, so this doesn't // reduce the randomness. - file_entries_ = backup_file_entries_; + files_ = backup_files_; } std::mt19937 g(kDaliDataloaderSeed + current_epoch_); - std::shuffle(file_entries_.begin(), file_entries_.end(), g); + std::shuffle(files_.begin(), files_.end(), g); } } @@ -185,7 +185,6 @@ class FileLoader : public Loader { string file_list_, file_root_, file_filter_; filesystem::TraverseDirectoriesOptions traverse_opts_; - vector file_entries_; vector backup_file_entries_; diff --git a/dali/operators/reader/loader/filesystem.cc b/dali/operators/reader/loader/filesystem.cc index 1055feeeee6..a442028d8f0 100644 --- a/dali/operators/reader/loader/filesystem.cc +++ b/dali/operators/reader/loader/filesystem.cc @@ -64,7 +64,9 @@ std::vector list_subdirectories(const std::string &parent_dir, // open the root DIR *dir = opendir(parent_dir.c_str()); DALI_ENFORCE(dir != nullptr, make_string("Failed to open ", parent_dir)); - auto cleanup = AtScopeExit([&dir] { closedir(dir); }); + auto cleanup = AtScopeExit([&dir] { + closedir(dir); + }); struct dirent *entry; std::vector subdirs; @@ -102,7 +104,9 @@ std::vector list_files(const std::string &parent_dir, bool case_sensitive_filter = true) { DIR *dir = opendir(parent_dir.c_str()); DALI_ENFORCE(dir != nullptr, make_string("Failed to open ", parent_dir)); - auto cleanup = AtScopeExit([&dir] { closedir(dir); }); + auto cleanup = AtScopeExit([&dir] { + closedir(dir); + }); dirent *entry; std::vector files; diff --git a/dali/operators/reader/loader/filesystem.h b/dali/operators/reader/loader/filesystem.h index 65f6ca9f2f1..e11a0933918 100644 --- a/dali/operators/reader/loader/filesystem.h +++ b/dali/operators/reader/loader/filesystem.h @@ -26,9 +26,10 @@ namespace filesystem { struct FileLabelEntry { std::string filename; - std::optional label = {}; // only if read_label==true - std::optional size = - {}; // only populated when size is known without opening (e.g. s3) + // only if read_label==true + std::optional label = {}; + // only populated when size is known without opening (e.g. s3) + std::optional size = {}; }; struct TraverseDirectoriesOptions { diff --git a/dali/operators/reader/loader/indexed_file_loader.h b/dali/operators/reader/loader/indexed_file_loader.h index 0275ae8a824..20197a769ac 100755 --- a/dali/operators/reader/loader/indexed_file_loader.h +++ b/dali/operators/reader/loader/indexed_file_loader.h @@ -15,14 +15,14 @@ #ifndef DALI_OPERATORS_READER_LOADER_INDEXED_FILE_LOADER_H_ #define DALI_OPERATORS_READER_LOADER_INDEXED_FILE_LOADER_H_ +#include +#include +#include #include #include -#include #include -#include -#include +#include #include -#include #include "dali/core/common.h" #include "dali/core/mm/memory.h" @@ -35,21 +35,19 @@ namespace dali { class IndexedFileLoader : public Loader, true> { public: explicit IndexedFileLoader(const OpSpec& spec) - : Loader(spec), - uris_(spec.GetRepeatedArgument("path")), - index_uris_(spec.GetRepeatedArgument("index_path")), - current_index_(0), - current_file_index_(0), - current_file_(nullptr), - use_o_direct_(spec.HasArgument("use_o_direct") && spec.GetArgument("use_o_direct")) { - DALI_ENFORCE(dont_use_mmap_ || !use_o_direct_, - make_string("Cannot use use_o_direct with ", "``dont_use_mmap=False``.")); - if (use_o_direct_) { - o_direct_chunk_size_ = ODirectFileStream::GetChunkSize(); - o_direct_alignm_ = ODirectFileStream::GetAlignment(); - o_direct_read_len_alignm_ = ODirectFileStream::GetLenAlignment(); + : Loader(spec), + uris_(spec.GetRepeatedArgument("path")), + index_uris_(spec.GetRepeatedArgument("index_path")), + current_index_(0), current_file_index_(0), current_file_(nullptr), + use_o_direct_(spec.HasArgument("use_o_direct") && spec.GetArgument("use_o_direct")) { + DALI_ENFORCE(dont_use_mmap_ || !use_o_direct_, make_string("Cannot use use_o_direct with ", + "``dont_use_mmap=False``.")); + if (use_o_direct_) { + o_direct_chunk_size_ = ODirectFileStream::GetChunkSize(); + o_direct_alignm_ = ODirectFileStream::GetAlignment(); + o_direct_read_len_alignm_ = ODirectFileStream::GetLenAlignment(); + } } - } void ReadSample(Tensor& tensor) override { MoveToNextShard(current_index_); @@ -73,8 +71,7 @@ class IndexedFileLoader : public Loader, true> { current_file_ = FileStream::Open(uris_[file_index], {read_ahead_, use_mmap, use_o_direct}); current_file_index_ = file_index; // invalidate the buffer - if (use_o_direct) - read_buffer_.reset(); + if (use_o_direct_) read_buffer_.reset(); } // if image is cached, skip loading @@ -117,8 +114,8 @@ class IndexedFileLoader : public Loader, true> { */ // read again if there is no buffer of the requested piece if outside of the it bool after_buffer_start = seek_pos >= static_cast(read_buffer_pos_); - bool before_buffer_end = - seek_pos + size < static_cast(read_buffer_pos_ + read_buffer_data_size_); + bool before_buffer_end = seek_pos + size < + static_cast(read_buffer_pos_ + read_buffer_data_size_); // buffer need to exists and the ata we look for needs to be inside it if (!read_buffer_ || !(after_buffer_start && before_buffer_end)) { // check how much we need to allocate to house the required sample, but no less than @@ -149,13 +146,13 @@ class IndexedFileLoader : public Loader, true> { auto read_start = block_start + read_off; // we should read either the chunk size or the reminder of the file auto min_read = std::min(o_direct_chunk_size_tmp, seek_pos + size - read_start); - auto work = [tmp_file_ptr, file, dst_ptr, o_direct_chunk_size_tmp, min_read, read_start, - file_name]() { + auto work = [tmp_file_ptr, file, dst_ptr, o_direct_chunk_size_tmp, min_read, + read_start, file_name]() { auto ret = file->ReadAt(dst_ptr, o_direct_chunk_size_tmp, read_start); DALI_ENFORCE(ret >= min_read && ret <= o_direct_chunk_size_tmp, - make_string("Failed to read file: ", file_name, ", read: ", ret, - " while it should be in range [", min_read, ", ", - o_direct_chunk_size_tmp, "]")); + make_string("Failed to read file: ", file_name, + ", read: ", ret, " while it should be in range [", min_read, + ", ", o_direct_chunk_size_tmp, "]")); }; // store the work lambda into queue so the prefetch thread can pick them up latter and // execute in multiple threads @@ -168,8 +165,8 @@ class IndexedFileLoader : public Loader, true> { } else { tensor.Resize({size}, DALI_UINT8); - int64 n_read = - current_file_->Read(reinterpret_cast(tensor.raw_mutable_data()), size); + int64 n_read = current_file_->Read(reinterpret_cast(tensor.raw_mutable_data()), + size); DALI_ENFORCE(n_read == size, "Error reading from a file " + uris_[current_file_index_]); } } @@ -188,7 +185,7 @@ class IndexedFileLoader : public Loader, true> { virtual void ReadIndexFile(const std::vector& index_uris) { DALI_ENFORCE(index_uris.size() == uris_.size(), - "Number of index files needs to match the number of data files"); + "Number of index files needs to match the number of data files"); for (size_t i = 0; i < index_uris.size(); ++i) { std::ifstream fin(index_uris[i]); DALI_ENFORCE(fin.good(), "Failed to open file " + index_uris[i]); @@ -207,7 +204,8 @@ class IndexedFileLoader : public Loader, true> { void PrepareMetadataImpl() override { if (!dont_use_mmap_) { - mmap_reserver_ = FileStream::MappingReserver(static_cast(initial_buffer_fill_)); + mmap_reserver_ = FileStream::MappingReserver( + static_cast(initial_buffer_fill_)); } copy_read_data_ = dont_use_mmap_ || !mmap_reserver_.CanShareMappedData(); @@ -233,8 +231,7 @@ class IndexedFileLoader : public Loader, true> { FileStream::Open(uris_[file_index], {read_ahead_, !copy_read_data_, use_o_direct_}); current_file_index_ = file_index; // invalidate the buffer - if (use_o_direct_) - read_buffer_.reset(); + if (use_o_direct_) read_buffer_.reset(); } current_file_->SeekRead(seek_pos); } diff --git a/dali/operators/reader/loader/webdataset/tar_utils_test.cc b/dali/operators/reader/loader/webdataset/tar_utils_test.cc index 5f6e7a31aeb..0583f86c28b 100644 --- a/dali/operators/reader/loader/webdataset/tar_utils_test.cc +++ b/dali/operators/reader/loader/webdataset/tar_utils_test.cc @@ -31,10 +31,10 @@ namespace dali { namespace detail { TEST(LibTarUtilsTestSimple, Interface) { - std::string filepath( - dali::filesystem::join_path(testing::dali_extra_path(), "db/webdataset/MNIST/devel-2.tar")); - std::string dummy_filepath( - dali::filesystem::join_path(testing::dali_extra_path(), "db/webdataset/MNIST/devel-1.tar")); + std::string filepath(dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/MNIST/devel-2.tar")); + std::string dummy_filepath(dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/MNIST/devel-1.tar")); TarArchive dummy_archive(FileStream::Open(dummy_filepath)); TarArchive intermediate_archive(std::move(dummy_archive)); @@ -67,8 +67,8 @@ TEST(LibTarUtilsTestSimple, Interface) { } TEST(LibTarUtilsTestSimple, LongNameIndexing) { - std::string filepath( - dali::filesystem::join_path(testing::dali_extra_path(), "db/webdataset/sample-tar/gnu.tar")); + std::string filepath(dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/sample-tar/gnu.tar")); TarArchive archive(FileStream::Open(filepath)); std::string name_prefix(128, '#'); for (int idx = 0; idx < 1000; idx++) { @@ -196,32 +196,34 @@ auto SimpleTarTestsValues() { vector values; SimpleTarTestsData filepaths[] = { - {dali::filesystem::join_path(testing::dali_extra_path(), "db/webdataset/MNIST/devel-0.tar"), - false, - false, - 2000, - 3000, - {".cls", ".jpg"}}, - {dali::filesystem::join_path(testing::dali_extra_path(), - "db/webdataset/sample-tar/empty.tar"), - false, - false, - 0, - 0, - {}}, - {dali::filesystem::join_path(testing::dali_extra_path(), "db/webdataset/sample-tar/v7.tar"), - false, - false, - 0, - 1000, - {""}}, - {dali::filesystem::join_path(testing::dali_extra_path(), - "db/webdataset/sample-tar/oldgnu.tar"), - false, - false, - 0, - 1000, - {""}}}; + { dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/MNIST/devel-0.tar"), + false, + false, + 2000, + 3000, + {".cls", ".jpg"} }, + { dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/sample-tar/empty.tar"), + false, + false, + 0, + 0, + {} }, + { dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/sample-tar/v7.tar"), + false, + false, + 0, + 1000, + {""} }, + { dali::filesystem::join_path(testing::dali_extra_path(), + "db/webdataset/sample-tar/oldgnu.tar"), + false, + false, + 0, + 1000, + {""} } }; for (auto& filepath : filepaths) { for (int read_ahead = 0; read_ahead <= 1; read_ahead++) { @@ -272,7 +274,7 @@ TEST_P(MultiTarTests, Index) { } INSTANTIATE_TEST_SUITE_P(LibTarUtilsTestMultithreaded, MultiTarTests, - ::testing::Values(false, true)); + ::testing::Values(false, true)); } // namespace detail } // namespace dali diff --git a/dali/util/cufile.h b/dali/util/cufile.h index abd0e12e810..422d51e0562 100644 --- a/dali/util/cufile.h +++ b/dali/util/cufile.h @@ -39,8 +39,7 @@ class DLL_PUBLIC CUFileStream : public FileStream { } }; - static std::unique_ptr Open(const std::string& uri, - FileStream::Options options = {}); + static std::unique_ptr Open(const std::string& uri, bool read_ahead, bool use_mmap); /** * @brief Reads `n_bytes` to the buffer at position `offset` * @@ -48,7 +47,7 @@ class DLL_PUBLIC CUFileStream : public FileStream { * The API is the effect how cufile works - it need to get the base address of the registered * buffer and the offset where it should put the data. */ - virtual size_t ReadGPU(void* buffer, size_t n_bytes, ptrdiff_t buffer_offset = 0) = 0; + virtual size_t ReadGPU(void *buffer, size_t n_bytes, ptrdiff_t buffer_offset = 0) = 0; /** * @brief Reads `n_bytes` to the buffer at position `offset` from a given position in the file. @@ -56,8 +55,8 @@ class DLL_PUBLIC CUFileStream : public FileStream { * The file_offset is absolute - the function neither depends on or affects the file pointer. * This function is thread-safe. */ - virtual size_t ReadAtGPU(void* buffer, size_t n_bytes, ptrdiff_t buffer_offset, - int64 file_offset) = 0; + virtual size_t ReadAtGPU(void *buffer, size_t n_bytes, + ptrdiff_t buffer_offset, int64 file_offset) = 0; protected: explicit CUFileStream(const std::string& path) : FileStream(path) {} diff --git a/internal_tools/test_bundled_libs.py b/internal_tools/test_bundled_libs.py index b06974a89ee..b3ad8811002 100644 --- a/internal_tools/test_bundled_libs.py +++ b/internal_tools/test_bundled_libs.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.