diff --git a/.gitattributes b/.gitattributes index 7c0b591b072c..18929de21e98 100644 --- a/.gitattributes +++ b/.gitattributes @@ -26,4 +26,3 @@ LICENSE text *.gif binary *.ttf binary *.pdf binary - diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 000000000000..523aa07a217d --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,62 @@ +name: Linter +on: pull_request +jobs: + Bandit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Run checks + run: | + pip install --user -r <(grep "^bandit" ./requirements.txt) + echo "Bandit version: "`bandit --version | head -1` + bandit -r ./ + isort: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Run checks + run: | + pip install --user -r <(grep "^isort" ./requirements.txt) + isort --check --diff . + Pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Run checks + run: | + pip install --user -r <(grep "^pylint" ./requirements.txt) + echo "Pylint version: "`pylint --version | head -1` + git ls-files -z '*.py' | xargs -0 pylint -r n + remark: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: 12 + + - name: Run checks + run: | + npm ci + echo "remark version: "`npx remark --version` + npx remark --frail . + whitespace: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Check whitespace + run: | + empty_tree="$(git hash-object -t tree --stdin < /dev/null)" + git diff --check "$empty_tree" diff --git a/.gitignore b/.gitignore index 78d001033601..2b2bf6ca9f71 100644 --- a/.gitignore +++ b/.gitignore @@ -57,4 +57,5 @@ cover/ docs/_build/ #Pycharm config files -.idea/ \ No newline at end of file +.idea/ +user_config.py \ No newline at end of file diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 000000000000..ef57375c8b91 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,6 @@ +[settings] +forced_separate = tests +from_first = True +include_trailing_comma = True +line_length = 80 +multi_line_output = 5 diff --git a/.pylintrc b/.pylintrc index 09864236e690..58acd05d34f8 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,8 +1,5 @@ [MASTER] -# Specify a configuration file. -#rcfile= - # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= @@ -34,15 +31,6 @@ unsafe-load-any-extension=no # run arbitrary code extension-pkg-whitelist= -# Allow optimization of some AST trees. This will activate a peephole AST -# optimizer, which will apply various small optimizations. For instance, it can -# be used to obtain the result of joining multiple strings with the addition -# operator. Joining a lot of strings can lead to a maximum recursion error in -# Pylint and this flag can prevent that. It has one side effect, the resulting -# AST will be different than the one from reality. This option is deprecated -# and it will be removed in Pylint 2.0. -optimize-ast=no - [MESSAGES CONTROL] @@ -50,25 +38,6 @@ optimize-ast=no # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED confidence= -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -disable=all -enable= E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0106,E0107,E0110, - E0113,E0114,E0115,E0116,E0117,E0108,E0202,E0203,E0211,E0236, - E0238,E0239,E0240,E0241,E0301,E0302,E0601,E0603,E0604,E0701, - E0702,E0703,E0704,E0710,E0711,E0712,E1003,E1102,E1111,E0112, - E1120,E1121,E1123,E1124,E1125,E1126,E1127,E1132,E1200,E1201, - E1205,E1206,E1300,E1301,E1302,E1303,E1304,E1305,E1306, - C0123,C0200,C0303,C1001, - W0101,W0102,W0104,W0105,W0106,W0107,W0108,W0109,W0110,W0120, - W0122,W0124,W0150,W0199,W0221,W0222,W0233,W0404,W0410,W0601, - W0602,W0604,W0611,W0612,W0622,W0623,W0702,W0705,W0711,W1300, - W1301,W1302,W1303,,W1305,W1306,W1307 - R0102,R0202,R0203 - - # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration @@ -78,7 +47,116 @@ enable= E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0106,E0107,E0110, # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -#disable=old-octal-literal,basestring-builtin,no-absolute-import,old-division,coerce-method,long-suffix,reload-builtin,unichr-builtin,indexing-exception,raising-string,dict-iter-method,metaclass-assignment,filter-builtin-not-iterating,import-star-module-level,next-method-called,cmp-method,raw_input-builtin,old-raise-syntax,cmp-builtin,apply-builtin,getslice-method,input-builtin,backtick,coerce-builtin,range-builtin-not-iterating,xrange-builtin,using-cmp-argument,buffer-builtin,hex-method,execfile-builtin,unpacking-in-except,standarderror-builtin,round-builtin,nonzero-method,unicode-builtin,reduce-builtin,file-builtin,dict-view-method,old-ne-operator,print-statement,suppressed-message,oct-method,useless-suppression,delslice-method,long-builtin,setslice-method,zip-builtin-not-iterating,map-builtin-not-iterating,intern-builtin,parameter-unpacking +disable=all + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + C0123, # unidiomatic-typecheck + C0200, # consider-using-enumerate + C0303, # trailing-whitespace + + E0001, # syntax-error + E0100, # init-is-generator + E0101, # return-in-init + E0102, # function-redefined + E0103, # not-in-loop + E0104, # return-outside-function + E0105, # yield-outside-function + E0107, # nonexistent-operator + E0108, # duplicate-argument-name + E0110, # abstract-class-instantiated + E0112, # too-many-star-expressions + E0113, # invalid-star-assignment-target + E0114, # star-needs-assignment-target + E0115, # nonlocal-and-global + E0116, # continue-in-finally + E0117, # nonlocal-without-binding + E0202, # method-hidden + E0203, # access-member-before-definition + E0211, # no-method-argument + E0236, # invalid-slots-object + E0238, # invalid-slots + E0239, # inherit-non-class + E0240, # inconsistent-mro + E0241, # duplicate-bases + E0301, # non-iterator-returned + E0302, # unexpected-special-method-signature + E0601, # used-before-assignment + E0603, # undefined-all-variable + E0604, # invalid-all-object + E0701, # bad-except-order + E0702, # raising-bad-type + E0703, # bad-exception-context + E0704, # misplaced-bare-raise + E0710, # raising-non-exception + E0711, # notimplemented-raised + E0712, # catching-non-exception + E1003, # bad-super-call + E1102, # not-callable + E1111, # assignment-from-no-return + E1120, # no-value-for-parameter + E1121, # too-many-function-args + E1123, # unexpected-keyword-arg + E1124, # redundant-keyword-arg + E1125, # missing-kwoa + E1126, # invalid-sequence-index + E1127, # invalid-slice-index + E1132, # repeated-keyword + E1200, # logging-unsupported-format + E1201, # logging-format-truncated + E1205, # logging-too-many-args + E1206, # logging-too-few-args + E1300, # bad-format-character + E1301, # truncated-format-string + E1302, # mixed-format-string + E1303, # format-needs-mapping + E1304, # missing-format-string-key + E1305, # too-many-format-args + E1306, # too-few-format-args + + R0202, # no-classmethod-decorator + R0203, # no-staticmethod-decorator + R1703, # simplifiable-if-statement + + W0101, # unreachable + W0102, # dangerous-default-value + W0104, # pointless-statement + W0105, # pointless-string-statement + W0106, # expression-not-assigned + W0107, # unnecessary-pass + W0108, # unnecessary-lambda + W0109, # duplicate-key + W0120, # useless-else-on-loop + W0122, # exec-used + W0124, # confusing-with-statement + W0150, # lost-exception + W0199, # assert-on-tuple + W0221, # arguments-differ + W0222, # signature-differs + W0233, # non-parent-init-called + W0404, # reimported + W0410, # misplaced-future + W0601, # global-variable-undefined + W0602, # global-variable-not-assigned + W0604, # global-at-module-level + W0611, # unused-import + W0612, # unused-variable + W0622, # redefined-builtin + W0702, # bare-except + W0705, # duplicate-except + W0711, # binary-op-exception + W1300, # bad-format-string-key + W1301, # unused-format-string-key + W1302, # bad-format-string + W1303, # missing-format-argument-key + W1305, # format-combined-specification + W1306, # missing-format-attribute + W1307, # invalid-format-index + W1401, # anomalous-backslash-in-string + W1402, # anomalous-unicode-escape-in-string [REPORTS] @@ -88,12 +166,6 @@ enable= E0001,E0100,E0101,E0102,E0103,E0104,E0105,E0106,E0107,E0110, # mypackage.mymodule.MyReporterClass. output-format=text -# Put messages in a separate file for each module / package specified on the -# command line instead of printing them on stdout. Reports (if any) will be -# written in a file name "pylint_global.[txt|html]". This option is deprecated -# and it will be removed in Pylint 2.0. -files-output=no - # Tells whether to display a full report or only the messages reports=yes @@ -112,7 +184,9 @@ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / stateme [BASIC] # Good variable names which should always be accepted, separated by a comma -good-names=i,j,k,ex,Run,_ +good-names=i,j,k,ex,Run,_,x,y,w,h,d,c,id,it + +allowed-redefined-builtins=id,format,dir # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata @@ -215,12 +289,6 @@ ignore-long-lines=^\s*(# )??$ # else. single-line-if-stmt=no -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma,dict-separator - # Maximum number of lines in a module max-module-lines=1000 @@ -300,7 +368,7 @@ ignored-classes=optparse.Values,thread._local,_thread._local # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. -generated-members= +generated-members=on_error # List of decorators that produce context managers, such as # contextlib.contextmanager. Add to this list to register other decorators that diff --git a/.remarkrc.js b/.remarkrc.js new file mode 100644 index 000000000000..ea15e6028d09 --- /dev/null +++ b/.remarkrc.js @@ -0,0 +1,18 @@ +exports.settings = { bullet: '*', paddedTable: false }; + +exports.plugins = [ + 'remark-frontmatter', + 'remark-gfm', + 'remark-preset-lint-recommended', + 'remark-preset-lint-consistent', + ['remark-lint-list-item-indent', 'space'], + ['remark-lint-no-dead-urls', false], // Does not work because of github protection system + ['remark-lint-maximum-line-length', 80], + ['remark-lint-maximum-heading-length', 120], + ['remark-lint-strong-marker', '*'], + ['remark-lint-emphasis-marker', '_'], + ['remark-lint-unordered-list-marker-style', '-'], + ['remark-lint-ordered-list-marker-style', '.'], + ['remark-lint-no-file-name-irregular-characters', false], + ['remark-lint-list-item-spacing', false], +]; diff --git a/CHANGELOG.md b/CHANGELOG.md index 94b8d7b49d64..e2beacdf0ae5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 14/07/2021 - Release v0.1.10 +### Added +- Support for import/export zip archives with images () +- Subformat importers for VOC and COCO () +- Support for KITTI dataset segmentation and detection format () +- Updated YOLO format user manual () +- `ItemTransform` class, which describes item-wise dataset `Transform`s () +- `keep-empty` export parameter in VOC format () +- A base class for dataset validation plugins () +- Partial support for the Open Images format; + only images and image-level labels can be read/written + (, + ). +- Support for Supervisely Point Cloud dataset format (, ) +- Support for KITTI Raw / Velodyne Points dataset format () +- Support for CIFAR-100 and documentation for CIFAR-10/100 () + +### Changed +- Tensorflow AVX check is made optional in API and disabled by default () +- Extensions for images in ImageNet_txt are now mandatory () +- Several dependencies now have lower bounds () + +### Deprecated +- TBD + +### Removed +- TBD + +### Fixed +- Incorrect image layout on saving and a problem with ecoding on loading () +- An error when xpath fiter is applied to the dataset or its subset () +- Tracking of `Dataset` changes done by transforms () +- Improved CLI startup time in several cases () + +### Security +- Known issue: loading CIFAR can result in arbitrary code execution () + ## 03/06/2021 - Release v0.1.9 ### Added - Support for escaping in attribute values in LabelMe format () @@ -20,51 +57,57 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for Validator configurable threshold () ### Changed -- LabelMe format saves dataset items with their relative paths by subsets without changing names () -- Allowed arbitrary subset count and names in classification and detection splitters () +- LabelMe format saves dataset items with their relative paths by subsets + without changing names () +- Allowed arbitrary subset count and names in classification and detection + splitters () - Annotation-less dataset elements are now participate in subset splitting () - Classification task in LFW dataset format () - Testing is now performed with pytest instead of unittest () ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed -- Added support for auto-merging (joining) of datasets with no labels and having labels () +- Added support for auto-merging (joining) of datasets with no labels and + having labels () - Allowed explicit label removal in `remap_labels` transform () - Image extension in CVAT format export () - Added a label "face" for bounding boxes in Wider Face () -- Allowed adding "difficult", "truncated", "occluded" attributes when converting to Pascal VOC if these attributes are not present () +- Allowed adding "difficult", "truncated", "occluded" attributes when + converting to Pascal VOC if these attributes are not present () - Empty lines in YOLO annotations are ignored () - Export in VOC format when no image info is available () - Fixed saving attribute in WiderFace extractor () ### Security -- +- TBD ## 31/03/2021 - Release v0.1.8 ### Added -- +- TBD ### Changed -- Added an option to allow undeclared annotation attributes in CVAT format export () -- COCO exports images in separate dirs by subsets. Added an option to control this () +- Added an option to allow undeclared annotation attributes in CVAT format + export () +- COCO exports images in separate dirs by subsets. Added an option to control + this () ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed - Instance masks of `background` class no more introduce an instance () - Added support for label attributes in Datumaro format () ### Security -- +- TBD ## 24/03/2021 - Release v0.1.7 ### Added @@ -78,10 +121,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Common ICDAR format is split into 3 sub-formats () ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed - The ability to work with file names containing Cyrillic and spaces () @@ -91,27 +134,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Saving of masks in VOC when masks are not requested () ### Security -- +- TBD ## 03/02/2021 - Release v0.1.6.1 (hotfix) ### Added -- +- TBD ### Changed -- +- TBD ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed - Images with no annotations are exported again in VOC formats () - Inference result for only one output layer in OpenVINO launcher () ### Security -- +- TBD ## 02/26/2021 - Release v0.1.6 ### Added @@ -119,26 +162,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Laziness, source caching, tracking of changes and partial updating for `Dataset` () - `Market-1501` dataset format () - `LFW` dataset format () -- Support of polygons' and masks' confusion matrices and mismathing classes in `diff` command () +- Support of polygons' and masks' confusion matrices and mismathing classes in + `diff` command () - Add near duplicate image removal plugin () -- Sampler Plugin that analyzes inference result from the given dataset and selects samples for annotation() +- Sampler Plugin that analyzes inference result from the given dataset and + selects samples for annotation() ### Changed - OpenVINO model launcher is updated for OpenVINO r2021.1 () ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed - High memory consumption and low performance of mask import/export, #53 () -- Masks, covered by class 0 (background), should be exported with holes inside () +- Masks, covered by class 0 (background), should be exported with holes inside +() - `diff` command invocation problem with missing class methods () ### Security -- +- TBD ## 01/23/2021 - Release v0.1.5 ### Added @@ -151,7 +197,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - `Dataset` class extended with new operations: `save`, `load`, `export`, `import_from`, `detect`, `run_model` () -- Allowed importing `Extractor`-only defined formats (in `Project.import_from`, `dataset.import_from` and CLI/`project import`) () +- Allowed importing `Extractor`-only defined formats + (in `Project.import_from`, `dataset.import_from` and CLI/`project import`) () - `datum project ...` commands replaced with `datum ...` commands () - Supported more image formats in `ImageNet` extractors () - Allowed adding `Importer`-defined formats as project sources (`source add`) () @@ -161,10 +208,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `datum project ...` CLI context () ### Removed -- +- TBD ### Fixed -- Allow plugins inherited from `Extractor` (instead of only `SourceExtractor`) () +- Allow plugins inherited from `Extractor` (instead of only `SourceExtractor`) + () - Windows installation with `pip` for `pycocotools` () - `YOLO` extractor path matching on Windows () - Fixed inplace file copying when saving images () @@ -172,7 +220,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed model copying on addition in CLI () ### Security -- +- TBD ## 12/10/2020 - Release v0.1.4 ### Added @@ -185,29 +233,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Allow Pascal VOC to search in subdirectories () ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed -- +- TBD ### Security -- +- TBD ## 10/28/2020 - Release v0.1.3 ### Added - `ImageNet` and `ImageNetTxt` dataset formats () ### Changed -- +- TBD ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed - Default `label-map` parameter value for VOC converter () @@ -217,20 +265,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Supported empty attribute values in CVAT extractor () ### Security -- +- TBD ## 10/05/2020 - Release v0.1.2 ### Added -- `ByteImage` class to represent encoded images in memory and avoid recoding on save () +- `ByteImage` class to represent encoded images in memory and avoid recoding + on save () ### Changed - Implementation of format plugins simplified () -- `default` is now a default subset name, instead of `None`. The values are interchangeable. () +- `default` is now a default subset name, instead of `None`. The values are + interchangeable. () - Improved performance of transforms () ### Deprecated -- +- TBD ### Removed - `image/depth` value from VOC export () @@ -239,7 +289,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Zero division errors in dataset statistics () ### Security -- +- TBD ## 09/24/2020 - Release v0.1.1 @@ -249,19 +299,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - MOTS png mask format support () ### Changed -- +- TBD ### Deprecated -- +- TBD ### Removed -- +- TBD ### Fixed -- +- TBD ### Security -- +- TBD ## 09/10/2020 - Release v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d7787182011..087578a7e4de 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -259,4 +259,4 @@ def test_can_convert_polygons_to_mask(self): from polygon to mask. 4. Verify that resulting segmentation mask is equal to the expected mask. """ -``` \ No newline at end of file +``` diff --git a/README.md b/README.md index 725c671f1fa6..f6b652384bf2 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,8 @@ CVAT annotations ---> Publication, statistics etc. -- Convert PASCAL VOC dataset to COCO format, keep only images with `cat` class presented: +- Convert PASCAL VOC dataset to COCO format, keep only images with `cat` class + presented: ```bash # Download VOC dataset: # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar @@ -42,7 +43,8 @@ CVAT annotations ---> Publication, statistics etc. -- --reindex 1 # avoid annotation id conflicts ``` -- Convert only non-`occluded` annotations from a [CVAT](https://github.com/opencv/cvat) project to TFrecord: +- Convert only non-`occluded` annotations from a + [CVAT](https://github.com/opencv/cvat) project to TFrecord: ```bash # export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir datum filter -e '/item/annotation[occluded="False"]' \ @@ -51,7 +53,8 @@ CVAT annotations ---> Publication, statistics etc. --format tf_detection_api -- --save-images ``` -- Annotate MS COCO dataset, extract image subset, re-annotate it in [CVAT](https://github.com/opencv/cvat), update old dataset: +- Annotate MS COCO dataset, extract image subset, re-annotate it in + [CVAT](https://github.com/opencv/cvat), update old dataset: ```bash # Download COCO dataset http://cocodataset.org/#download # Put images to coco/images/ and annotations to coco/annotations/ @@ -64,7 +67,8 @@ CVAT annotations ---> Publication, statistics etc. datum export --format coco ``` -- Annotate instance polygons in [CVAT](https://github.com/opencv/cvat), export as masks in COCO: +- Annotate instance polygons in [CVAT](https://github.com/opencv/cvat), export + as masks in COCO: ```bash datum convert --input-format cvat --input-path \ --output-format coco -- --segmentation-mode masks @@ -138,6 +142,8 @@ CVAT annotations ---> Publication, statistics etc. - [MNIST in CSV](https://pjreddie.com/projects/mnist-in-csv/) (`classification`) - [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) - [Cityscapes](https://www.cityscapes-dataset.com/) + - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`) + - [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format_navi) (`point cloud`) - [CVAT](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) - [LabelMe](http://labelme.csail.mit.edu/Release3.0) - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`) @@ -259,12 +265,15 @@ for item in dataset: dataset.export('dst/dir', 'coco') ``` -> Check our [developer guide](docs/developer_guide.md) for additional information. +> Check our [developer guide](docs/developer_guide.md) for additional + information. ## Contributing [(Back to top)](#table-of-contents) -Feel free to [open an Issue](https://github.com/openvinotoolkit/datumaro/issues/new), if you -think something needs to be changed. You are welcome to participate in development, -instructions are available in our [contribution guide](CONTRIBUTING.md). +Feel free to +[open an Issue](https://github.com/openvinotoolkit/datumaro/issues/new), if you +think something needs to be changed. You are welcome to participate in +development, instructions are available in our +[contribution guide](CONTRIBUTING.md). diff --git a/datum.py b/datum.py index 12c150bd167e..d19ffe646aec 100755 --- a/datum.py +++ b/datum.py @@ -3,6 +3,5 @@ from datumaro.cli.__main__ import main - if __name__ == '__main__': sys.exit(main()) diff --git a/datumaro/__main__.py b/datumaro/__main__.py index be1cb092981b..062f6f0cd460 100644 --- a/datumaro/__main__.py +++ b/datumaro/__main__.py @@ -7,6 +7,5 @@ from datumaro.cli.__main__ import main - if __name__ == '__main__': sys.exit(main()) diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py index 528c6d019e95..b2113b6f1ff3 100644 --- a/datumaro/cli/__main__.py +++ b/datumaro/cli/__main__.py @@ -8,10 +8,9 @@ import os.path as osp import sys -from . import contexts, commands -from .util import CliException, add_subparser from ..version import VERSION - +from . import commands, contexts +from .util import CliException, add_subparser _log_levels = { 'debug': log.DEBUG, diff --git a/datumaro/cli/commands/__init__.py b/datumaro/cli/commands/__init__.py index 9324f122523f..febb60775ab1 100644 --- a/datumaro/cli/commands/__init__.py +++ b/datumaro/cli/commands/__init__.py @@ -5,9 +5,6 @@ # pylint: disable=redefined-builtin from . import ( - create, add, remove, import_, - explain, - export, merge, convert, transform, filter, - diff, ediff, stats, - info, validate + add, convert, create, diff, ediff, explain, export, filter, import_, info, + merge, remove, stats, transform, validate, ) diff --git a/datumaro/cli/commands/add.py b/datumaro/cli/commands/add.py index c43936ec814a..4f12975acac8 100644 --- a/datumaro/cli/commands/add.py +++ b/datumaro/cli/commands/add.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import - from ..contexts.source import build_add_parser as build_parser + +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/convert.py b/datumaro/cli/commands/convert.py index 6d657d9b0bb5..bef04ef91234 100644 --- a/datumaro/cli/commands/convert.py +++ b/datumaro/cli/commands/convert.py @@ -7,8 +7,8 @@ import os import os.path as osp -from datumaro.components.project import Environment from datumaro.components.dataset import Dataset +from datumaro.components.project import Environment from ..contexts.project import FilterModes from ..util import CliException, MultilineFormatter, make_file_name diff --git a/datumaro/cli/commands/create.py b/datumaro/cli/commands/create.py index 1396d5f9ed69..bc3a6de7feeb 100644 --- a/datumaro/cli/commands/create.py +++ b/datumaro/cli/commands/create.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_create_parser as build_parser -from ..contexts.project import build_create_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/diff.py b/datumaro/cli/commands/diff.py index a50c8f0a4e6f..3c4ce327142a 100644 --- a/datumaro/cli/commands/diff.py +++ b/datumaro/cli/commands/diff.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_diff_parser as build_parser -from ..contexts.project import build_diff_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/ediff.py b/datumaro/cli/commands/ediff.py index ac5ba8c4678f..e835d2e58f6a 100644 --- a/datumaro/cli/commands/ediff.py +++ b/datumaro/cli/commands/ediff.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_ediff_parser as build_parser -from ..contexts.project import build_ediff_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/explain.py b/datumaro/cli/commands/explain.py index dc4256194ced..fe5c1dd39fa5 100644 --- a/datumaro/cli/commands/explain.py +++ b/datumaro/cli/commands/explain.py @@ -8,9 +8,12 @@ import os.path as osp from datumaro.components.project import Project -from datumaro.util.command_targets import (TargetKinds, target_selector, - ProjectTarget, SourceTarget, ImageTarget, is_project_path) +from datumaro.util.command_targets import ( + ImageTarget, ProjectTarget, SourceTarget, TargetKinds, is_project_path, + target_selector, +) from datumaro.util.image import load_image, save_image + from ..util import MultilineFormatter from ..util.project import load_project @@ -108,8 +111,8 @@ def explain_command(args): args.project_dir = osp.dirname(osp.abspath(args.target[1])) - import cv2 from matplotlib import cm + import cv2 project = load_project(args.project_dir) diff --git a/datumaro/cli/commands/export.py b/datumaro/cli/commands/export.py index 1efb50645961..52bf5505e626 100644 --- a/datumaro/cli/commands/export.py +++ b/datumaro/cli/commands/export.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_export_parser as build_parser -from ..contexts.project import build_export_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/filter.py b/datumaro/cli/commands/filter.py index 0b0d28cb9fcc..13468665a680 100644 --- a/datumaro/cli/commands/filter.py +++ b/datumaro/cli/commands/filter.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_filter_parser as build_parser -from ..contexts.project import build_filter_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/import_.py b/datumaro/cli/commands/import_.py index 74c47ab3cca6..0d6ef829c031 100644 --- a/datumaro/cli/commands/import_.py +++ b/datumaro/cli/commands/import_.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_import_parser as build_parser -from ..contexts.project import build_import_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/info.py b/datumaro/cli/commands/info.py index fa2af5ddd984..e6c693bbf3bf 100644 --- a/datumaro/cli/commands/info.py +++ b/datumaro/cli/commands/info.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_info_parser as build_parser -from ..contexts.project import build_info_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/merge.py b/datumaro/cli/commands/merge.py index 2abb56e46515..fee44ff1a589 100644 --- a/datumaro/cli/commands/merge.py +++ b/datumaro/cli/commands/merge.py @@ -2,17 +2,17 @@ # # SPDX-License-Identifier: MIT +from collections import OrderedDict import argparse import json import logging as log import os.path as osp -from collections import OrderedDict -from datumaro.components.project import Project +from datumaro.components.errors import MergeError, QualityError from datumaro.components.operations import IntersectMerge -from datumaro.components.errors import QualityError, MergeError +from datumaro.components.project import Project -from ..util import at_least, MultilineFormatter, CliException +from ..util import CliException, MultilineFormatter, at_least from ..util.project import generate_next_file_name, load_project @@ -119,5 +119,5 @@ def save_merge_report(merger, path): ('All errors', all_errors), ]) - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: json.dump(errors, f, indent=4) \ No newline at end of file diff --git a/datumaro/cli/commands/remove.py b/datumaro/cli/commands/remove.py index 3ea0bbffa99f..003c36f80090 100644 --- a/datumaro/cli/commands/remove.py +++ b/datumaro/cli/commands/remove.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.source import build_remove_parser as build_parser -from ..contexts.source import build_remove_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/stats.py b/datumaro/cli/commands/stats.py index cb54eec394a4..fdbb75bbaef1 100644 --- a/datumaro/cli/commands/stats.py +++ b/datumaro/cli/commands/stats.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_stats_parser as build_parser -from ..contexts.project import build_stats_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/transform.py b/datumaro/cli/commands/transform.py index 474d6a2128d9..ff4d50948611 100644 --- a/datumaro/cli/commands/transform.py +++ b/datumaro/cli/commands/transform.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import +from ..contexts.project import build_transform_parser as build_parser -from ..contexts.project import build_transform_parser as build_parser \ No newline at end of file +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/commands/validate.py b/datumaro/cli/commands/validate.py index 13794187a787..94ed9c62324c 100644 --- a/datumaro/cli/commands/validate.py +++ b/datumaro/cli/commands/validate.py @@ -2,6 +2,8 @@ # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import - from ..contexts.project import build_validate_parser as build_parser + +__all__ = [ + 'build_parser', +] diff --git a/datumaro/cli/contexts/__init__.py b/datumaro/cli/contexts/__init__.py index b903435527e0..1598ff704c00 100644 --- a/datumaro/cli/contexts/__init__.py +++ b/datumaro/cli/contexts/__init__.py @@ -3,4 +3,4 @@ # # SPDX-License-Identifier: MIT -from . import project, source, model \ No newline at end of file +from . import model, project, source diff --git a/datumaro/cli/contexts/model.py b/datumaro/cli/contexts/model.py index dfb2dc5ba955..20f1bffee393 100644 --- a/datumaro/cli/contexts/model.py +++ b/datumaro/cli/contexts/model.py @@ -12,8 +12,9 @@ from datumaro.util import error_rollback from ..util import CliException, MultilineFormatter, add_subparser -from ..util.project import load_project, \ - generate_next_name, generate_next_file_name +from ..util.project import ( + generate_next_file_name, generate_next_name, load_project, +) def build_add_parser(parser_ctor=argparse.ArgumentParser): diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py index ff4dfb10bd69..0e03593c2dce 100644 --- a/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/cli/contexts/project/__init__.py @@ -2,27 +2,30 @@ # # SPDX-License-Identifier: MIT +from enum import Enum import argparse import json import logging as log import os import os.path as osp import shutil + import numpy as np -from enum import Enum from datumaro.components.dataset_filter import DatasetItemEncoder from datumaro.components.extractor import AnnotationType -from datumaro.components.operations import (DistanceComparator, - ExactComparator, compute_ann_statistics, compute_image_statistics) -from datumaro.components.project import \ - PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG +from datumaro.components.operations import ( + DistanceComparator, ExactComparator, compute_ann_statistics, + compute_image_statistics, +) +from datumaro.components.project import PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG from datumaro.components.project import Environment, Project -from datumaro.components.validator import validate_annotations, TaskType +from datumaro.components.validator import TaskType from datumaro.util import error_rollback -from ...util import (CliException, MultilineFormatter, add_subparser, - make_file_name) +from ...util import ( + CliException, MultilineFormatter, add_subparser, make_file_name, +) from ...util.project import generate_next_file_name, load_project from .diff import DatasetDiffVisualizer @@ -612,7 +615,7 @@ def ediff_command(args): output["matches"] = matches output_file = generate_next_file_name('diff', ext='.json') - with open(output_file, 'w') as f: + with open(output_file, 'w', encoding='utf-8') as f: json.dump(output, f, indent=4, sort_keys=True) print("Found:") @@ -716,7 +719,7 @@ def stats_command(args): dst_file = generate_next_file_name('statistics', ext='.json') log.info("Writing project statistics to '%s'" % dst_file) - with open(dst_file, 'w') as f: + with open(dst_file, 'w', encoding='utf-8') as f: json.dump(stats, f, indent=4, sort_keys=True) def build_info_parser(parser_ctor=argparse.ArgumentParser): @@ -794,6 +797,14 @@ def print_extractor_info(extractor, indent=''): return 0 def build_validate_parser(parser_ctor=argparse.ArgumentParser): + def _parse_task_type(s): + try: + return TaskType[s.lower()].name + except: + raise argparse.ArgumentTypeError("Unknown task type %s. Expected " + "one of: %s" % (s, ', '.join(t.name for t in TaskType))) + + parser = parser_ctor(help="Validate project", description=""" Validates project based on specified task type and stores @@ -801,11 +812,11 @@ def build_validate_parser(parser_ctor=argparse.ArgumentParser): """, formatter_class=MultilineFormatter) - parser.add_argument('task_type', - choices=[task_type.name for task_type in TaskType], - help="Task type for validation") + parser.add_argument('-t', '--task_type', type=_parse_task_type, + help="Task type for validation, one of %s" % \ + ', '.join(t.name for t in TaskType)) parser.add_argument('-s', '--subset', dest='subset_name', default=None, - help="Subset to validate (default: None)") + help="Subset to validate (default: whole dataset)") parser.add_argument('-p', '--project', dest='project_dir', default='.', help="Directory of the project to validate (default: current dir)") parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, @@ -816,19 +827,24 @@ def build_validate_parser(parser_ctor=argparse.ArgumentParser): def validate_command(args): project = load_project(args.project_dir) - task_type = args.task_type - subset_name = args.subset_name - dst_file_name = f'validation_results-{task_type}' + dst_file_name = f'report-{args.task_type}' dataset = project.make_dataset() - if subset_name is not None: - dataset = dataset.get_subset(subset_name) - dst_file_name += f'-{subset_name}' + if args.subset_name is not None: + dataset = dataset.get_subset(args.subset_name) + dst_file_name += f'-{args.subset_name}' + + try: + validator_type = project.env.validators[args.task_type] + except KeyError: + raise CliException("Validator type '%s' is not found" % args.task_type) extra_args = {} - from datumaro.components.validator import _Validator - extra_args = _Validator.parse_cmdline(args.extra_args) - validation_results = validate_annotations(dataset, task_type, **extra_args) + if hasattr(validator_type, 'parse_cmdline'): + extra_args = validator_type.parse_cmdline(args.extra_args) + + validator = validator_type(**extra_args) + report = validator.validate(dataset) def numpy_encoder(obj): if isinstance(obj, np.generic): @@ -843,12 +859,12 @@ def _make_serializable(d): if isinstance(val, dict): _make_serializable(val) - _make_serializable(validation_results) + _make_serializable(report) dst_file = generate_next_file_name(dst_file_name, ext='.json') log.info("Writing project validation results to '%s'" % dst_file) - with open(dst_file, 'w') as f: - json.dump(validation_results, f, indent=4, sort_keys=True, + with open(dst_file, 'w', encoding='utf-8') as f: + json.dump(report, f, indent=4, sort_keys=True, default=numpy_encoder) def build_parser(parser_ctor=argparse.ArgumentParser): diff --git a/datumaro/cli/contexts/project/diff.py b/datumaro/cli/contexts/project/diff.py index 52eeddc64572..cb919e156fab 100644 --- a/datumaro/cli/contexts/project/diff.py +++ b/datumaro/cli/contexts/project/diff.py @@ -4,29 +4,28 @@ # SPDX-License-Identifier: MIT from collections import Counter +from enum import Enum, auto from itertools import zip_longest -from enum import Enum import logging as log import os import os.path as osp +import warnings import cv2 import numpy as np -_formats = ['simple'] - -import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") import tensorboardX as tb - _formats.append('tensorboard') from datumaro.components.dataset import IDataset from datumaro.components.extractor import AnnotationType, LabelCategories from datumaro.util.image import save_image -OutputFormat = Enum('Formats', _formats) +class OutputFormat(Enum): + simple = auto() + tensorboard = auto() class DatasetDiffVisualizer: OutputFormat = OutputFormat @@ -218,8 +217,9 @@ def draw_bbox(self, img, shape, label, color): def get_label_diff_file(self): if self.label_diff_writer is None: - self.label_diff_writer = \ - open(osp.join(self.save_dir, 'label_diff.txt'), 'w') + self.label_diff_writer = open( + osp.join(self.save_dir, 'label_diff.txt'), + 'w', encoding='utf-8') return self.label_diff_writer def save_item_label_diff(self, item_a, item_b, diff): diff --git a/datumaro/cli/contexts/source.py b/datumaro/cli/contexts/source.py index caea28446cd1..54863340feb3 100644 --- a/datumaro/cli/contexts/source.py +++ b/datumaro/cli/contexts/source.py @@ -9,7 +9,8 @@ import shutil from datumaro.components.project import Environment -from ..util import add_subparser, CliException, MultilineFormatter + +from ..util import CliException, MultilineFormatter, add_subparser from ..util.project import load_project diff --git a/datumaro/cli/util/__init__.py b/datumaro/cli/util/__init__.py index 0a4357f70003..7fdded9cf9c9 100644 --- a/datumaro/cli/util/__init__.py +++ b/datumaro/cli/util/__init__.py @@ -68,7 +68,8 @@ def make_file_name(s): Normalizes string, converts to lowercase, removes non-alpha characters, and converts spaces to hyphens. """ - import unicodedata, re + import re + import unicodedata s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') s = s.decode() s = re.sub(r'[^\w\s-]', '', s).strip().lower() diff --git a/datumaro/components/__init__.py b/datumaro/components/__init__.py index 5a1ec10f3a99..6ea3037239e0 100644 --- a/datumaro/components/__init__.py +++ b/datumaro/components/__init__.py @@ -1,5 +1,4 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT - diff --git a/datumaro/components/algorithms/__init__.py b/datumaro/components/algorithms/__init__.py index 5a1ec10f3a99..6ea3037239e0 100644 --- a/datumaro/components/algorithms/__init__.py +++ b/datumaro/components/algorithms/__init__.py @@ -1,5 +1,4 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT - diff --git a/datumaro/components/algorithms/rise.py b/datumaro/components/algorithms/rise.py index 3fb9a895c106..f2ffd4604d2b 100644 --- a/datumaro/components/algorithms/rise.py +++ b/datumaro/components/algorithms/rise.py @@ -5,9 +5,10 @@ # pylint: disable=unused-variable -import numpy as np from math import ceil +import numpy as np + from datumaro.components.extractor import AnnotationType from datumaro.util.annotation_util import nms @@ -200,4 +201,4 @@ def apply(self, image, progressive=False): if progressive: yield self.normalize_hmaps(heatmaps.copy(), total_counts) - yield self.normalize_hmaps(heatmaps, total_counts) \ No newline at end of file + yield self.normalize_hmaps(heatmaps, total_counts) diff --git a/datumaro/components/cli_plugin.py b/datumaro/components/cli_plugin.py index 702158aa7077..76b54be19720 100644 --- a/datumaro/components/cli_plugin.py +++ b/datumaro/components/cli_plugin.py @@ -18,7 +18,18 @@ def _get_name(cls): @staticmethod def _get_doc(cls): - return getattr(cls, '__doc__', "") + doc = getattr(cls, '__doc__', "") + if doc: + from datumaro.components.converter import Converter + from datumaro.components.extractor import ( + Extractor, Importer, Transform, + ) + from datumaro.components.launcher import Launcher + base_classes = [Launcher, Extractor, Transform, Importer, Converter] + + if any(getattr(t, '__doc__', '') == doc for t in base_classes): + doc = '' + return doc @classmethod def build_cmdline_parser(cls, **kwargs): @@ -45,6 +56,6 @@ def parse_cmdline(cls, args=None): return args def remove_plugin_type(s): - for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}: + for t in {'transform', 'extractor', 'converter', 'launcher', 'importer', 'validator'}: s = s.replace('_' + t, '') return s diff --git a/datumaro/components/config.py b/datumaro/components/config.py index 72c461ae8f11..9f21e7fd99ba 100644 --- a/datumaro/components/config.py +++ b/datumaro/components/config.py @@ -212,7 +212,7 @@ def set(self, key, value): @staticmethod def parse(path, *args, **kwargs): if isinstance(path, str): - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: return Config(yaml.safe_load(f), *args, **kwargs) else: return Config(yaml.safe_load(path), *args, **kwargs) @@ -224,7 +224,7 @@ def yaml_representer(dumper, value): def dump(self, path): if isinstance(path, str): - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: yaml.dump(self, f) else: yaml.dump(self, path) diff --git a/datumaro/components/config_model.py b/datumaro/components/config_model.py index 49f85e91332d..8cc536b96934 100644 --- a/datumaro/components/config_model.py +++ b/datumaro/components/config_model.py @@ -3,10 +3,9 @@ # # SPDX-License-Identifier: MIT -from datumaro.components.config import Config, \ - DictConfig as _DictConfig, \ - SchemaBuilder as _SchemaBuilder - +from datumaro.components.config import Config +from datumaro.components.config import DictConfig as _DictConfig +from datumaro.components.config import SchemaBuilder as _SchemaBuilder SOURCE_SCHEMA = _SchemaBuilder() \ .add('url', str) \ diff --git a/datumaro/components/converter.py b/datumaro/components/converter.py index a63f7bd1f70d..238e4e79d0e1 100644 --- a/datumaro/components/converter.py +++ b/datumaro/components/converter.py @@ -1,15 +1,16 @@ - -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from typing import Union import logging as log import os import os.path as osp import shutil from datumaro.components.cli_plugin import CliPlugin -from datumaro.util.image import save_image, ByteImage +from datumaro.components.extractor import DatasetItem +from datumaro.util.image import Image class Converter(CliPlugin): @@ -50,24 +51,34 @@ def __init__(self, extractor, save_dir, save_images=False, self._extractor = extractor self._save_dir = save_dir - def _find_image_ext(self, item): + def _find_image_ext(self, item: Union[DatasetItem, Image]): src_ext = None - if item.has_image: + + if isinstance(item, DatasetItem) and item.has_image: src_ext = item.image.ext + elif isinstance(item, Image): + src_ext = item.ext return self._image_ext or src_ext or self._default_image_ext - def _make_image_filename(self, item, *, name=None, subdir=None): + def _make_item_filename(self, item, *, name=None, subdir=None): name = name or item.id subdir = subdir or '' - return osp.join(subdir, name + self._find_image_ext(item)) + return osp.join(subdir, name) + + def _make_image_filename(self, item, *, name=None, subdir=None): + return self._make_item_filename(item, name=name, subdir=subdir) + \ + self._find_image_ext(item) + + def _make_pcd_filename(self, item, *, name=None, subdir=None): + return self._make_item_filename(item, name=name, subdir=subdir) + '.pcd' def _save_image(self, item, path=None, *, name=None, subdir=None, basedir=None): assert not ((subdir or name or basedir) and path), \ "Can't use both subdir or name or basedir and path arguments" - if not item.image.has_data: + if not item.has_image or not item.image.has_data: log.warning("Item '%s' has no image", item.id) return @@ -76,15 +87,23 @@ def _save_image(self, item, path=None, *, self._make_image_filename(item, name=name, subdir=subdir)) path = osp.abspath(path) - src_ext = item.image.ext.lower() - dst_ext = osp.splitext(osp.basename(path))[1].lower() + item.image.save(path) + + def _save_point_cloud(self, item=None, path=None, *, + name=None, subdir=None, basedir=None): + assert not ((subdir or name or basedir) and path), \ + "Can't use both subdir or name or basedir and path arguments" + + if not item.point_cloud: + log.warning("Item '%s' has no pcd", item.id) + return + + basedir = basedir or self._save_dir + path = path or osp.join(basedir, + self._make_pcd_filename(item, name=name, subdir=subdir)) + path = osp.abspath(path) os.makedirs(osp.dirname(path), exist_ok=True) - if src_ext == dst_ext and osp.isfile(item.image.path): - if item.image.path != path: - shutil.copyfile(item.image.path, path) - elif src_ext == dst_ext and isinstance(item.image, ByteImage): - with open(path, 'wb') as f: - f.write(item.image.get_bytes()) - else: - save_image(path, item.image.data) + if item.point_cloud and osp.isfile(item.point_cloud): + if item.point_cloud != path: + shutil.copyfile(item.point_cloud, path) diff --git a/datumaro/components/dataset.py b/datumaro/components/dataset.py index 3cc16eb0e52d..7072150f2df6 100644 --- a/datumaro/components/dataset.py +++ b/datumaro/components/dataset.py @@ -2,27 +2,30 @@ # # SPDX-License-Identifier: MIT -#pylint: disable=redefined-builtin - from contextlib import contextmanager -from enum import Enum -from typing import Iterable, Iterator, Optional, Tuple, Union, Dict, List +from copy import copy +from enum import Enum, auto +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +import inspect import logging as log import os import os.path as osp import shutil -from datumaro.components.dataset_filter import \ - XPathDatasetFilter, XPathAnnotationsFilter -from datumaro.components.extractor import (CategoriesInfo, Extractor, - IExtractor, LabelCategories, AnnotationType, DatasetItem, - DEFAULT_SUBSET_NAME, Transform) +from datumaro.components.dataset_filter import ( + XPathAnnotationsFilter, XPathDatasetFilter, +) from datumaro.components.environment import Environment -from datumaro.components.errors import DatumaroError, RepeatedItemError -from datumaro.util import error_rollback +from datumaro.components.errors import ( + CategoriesRedefinedError, DatumaroError, RepeatedItemError, +) +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, CategoriesInfo, DatasetItem, Extractor, + IExtractor, ItemTransform, LabelCategories, Transform, +) +from datumaro.util import error_rollback, is_member_redefined from datumaro.util.log_utils import logging_disabled - DEFAULT_FORMAT = 'datumaro' IDataset = IExtractor @@ -39,36 +42,47 @@ def __iter__(self) -> Iterator[DatasetItem]: def __len__(self) -> int: return len(self._traversal_order) + def is_empty(self) -> bool: + # Subsets might contain removed items, so this may differ from __len__ + return all(len(s) == 0 for s in self.data.values()) + def put(self, item) -> bool: subset = self.data.setdefault(item.subset, {}) - is_new = subset.get(item.id) == None + is_new = subset.get(item.id) is None self._traversal_order[(item.id, item.subset)] = item subset[item.id] = item return is_new - def _get(self, id, subset=None, dummy=None): - id = str(id) - subset = subset or DEFAULT_SUBSET_NAME + def get(self, id: Union[str, DatasetItem], subset: Optional[str] = None, + dummy: Any = None) -> Optional[DatasetItem]: + if isinstance(id, DatasetItem): + id, subset = id.id, id.subset + else: + id = str(id) + subset = subset or DEFAULT_SUBSET_NAME return self.data.get(subset, {}).get(id, dummy) - def get(self, id, subset=None) -> Optional[DatasetItem]: - return self._get(id, subset) - - def remove(self, id, subset=None) -> bool: - id = str(id) - subset = subset or DEFAULT_SUBSET_NAME + def remove(self, id: Union[str, DatasetItem], + subset: Optional[str] = None) -> bool: + if isinstance(id, DatasetItem): + id, subset = id.id, id.subset + else: + id = str(id) + subset = subset or DEFAULT_SUBSET_NAME - subset_data = self.data.get(subset, {}) - is_removed = subset_data.pop(id, None) is not None + subset_data = self.data.setdefault(subset, {}) + is_removed = subset_data.get(id) is not None + subset_data[id] = None if is_removed: self._traversal_order.pop((id, subset)) return is_removed def __contains__(self, x: Union[DatasetItem, Tuple[str, str]]) -> bool: - if isinstance(x, DatasetItem): - x = (x.id, x.subset) - return self.get(*x) is not None + if not isinstance(x, tuple): + x = [x] + dummy = 0 + return self.get(*x, dummy=dummy) is not dummy def get_subset(self, name): return self.data.get(name, {}) @@ -76,9 +90,15 @@ def get_subset(self, name): def subsets(self): return self.data + def __copy__(self): + copied = DatasetItemStorage() + copied._traversal_order = copy(self._traversal_order) + copied.data = copy(self.data) + return copied + class DatasetItemStorageDatasetView(IDataset): class Subset(IDataset): - def __init__(self, parent, name): + def __init__(self, parent: 'DatasetItemStorageDatasetView', name: str): super().__init__() self.parent = parent self.name = name @@ -88,7 +108,9 @@ def _data(self): return self.parent._get_subset_data(self.name) def __iter__(self): - yield from self._data.values() + for item in self._data.values(): + if item: + yield item def __len__(self): return len(self._data) @@ -97,22 +119,22 @@ def put(self, item): return self._data.put(item) def get(self, id, subset=None): - assert subset or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (subset or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self._data.get(id, subset) def remove(self, id, subset=None): - assert subset or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (subset or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self._data.remove(id, subset) def get_subset(self, name): - assert name or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (name or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self def subsets(self): - return { self.name or DEFAULT_SUBSET_NAME: self } + return { self.name or DEFAULT_SUBSET_NAME : self } def categories(self): return self.parent.categories() @@ -144,7 +166,10 @@ def get(self, id, subset=None): return self._parent.get(id, subset=subset) -ItemStatus = Enum('ItemStatus', ['added', 'modified', 'removed']) +class ItemStatus(Enum): + added = auto() + modified = auto() + removed = auto() class DatasetPatch: def __init__(self, data: DatasetItemStorage, @@ -159,12 +184,9 @@ def __init__(self, data: DatasetItemStorage, @property def updated_subsets(self) -> Dict[str, ItemStatus]: if self._updated_subsets is None: - subset_stats = set() + self._updated_subsets = {} for _, subset in self.updated_items: - subset_stats.add(subset) - self._updated_subsets = { - subset: ItemStatus.modified for subset in subset_stats - } + self._updated_subsets.setdefault(subset, ItemStatus.modified) return self._updated_subsets def as_dataset(self, parent: IDataset) -> IDataset: @@ -187,22 +209,24 @@ def put(self, item): return self.parent.put(item, subset=self.name) def get(self, id, subset=None): - assert subset or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (subset or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self.parent.get(id, subset=self.name) def remove(self, id, subset=None): - assert subset or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (subset or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self.parent.remove(id, subset=self.name) def get_subset(self, name): - assert name or DEFAULT_SUBSET_NAME == \ - self.name or DEFAULT_SUBSET_NAME + assert (name or DEFAULT_SUBSET_NAME) == \ + (self.name or DEFAULT_SUBSET_NAME) return self def subsets(self): - return { self.name or DEFAULT_SUBSET_NAME: self } + if (self.name or DEFAULT_SUBSET_NAME) == DEFAULT_SUBSET_NAME: + return self.parent.subsets() + return { self.name: self } def categories(self): return self.parent.categories() @@ -220,75 +244,194 @@ def __init__(self, source: IDataset = None, raise ValueError("Can't use both source and categories") self._categories = categories - # possible combinations - # 1. source + storage (patch) + # Possible combinations: + # 1. source + storage + # - Storage contains a patch to the Source data. # 2. no source + storage - # cache or just a dataset from scratch, or cached transform - # - In this case updated_items describes the patch + # - a dataset created from scratch + # - a dataset from a source or transform, which was cached self._source = source self._storage = DatasetItemStorage() # patch or cache + self._transforms = [] # A stack of postponed transforms + + # Describes changes in the dataset since initialization self._updated_items = {} # (id, subset) -> ItemStatus - self._transformed = False - self._length = None + self._flush_changes = False # Deferred flush indicator + + self._length = 0 if source is None else None def is_cache_initialized(self) -> bool: - return self._source is None + return self._source is None and not self._transforms @property def _is_unchanged_wrapper(self) -> bool: - return self._source is not None and not self._updated_items + return self._source is not None and self._storage.is_empty() and \ + not self._transforms def init_cache(self): if not self.is_cache_initialized(): for _ in self._iter_init_cache(): pass - self._length = len(self._storage) - def _iter_init_cache(self) -> Iterable[DatasetItem]: - # Merges the source and patch, caches the result and - # provides an iterator for the resulting item sequence. + # Merges the source, source transforms and patch, caches the result + # and provides an iterator for the resulting item sequence. # # If iterated in parallel, the result is undefined. # If storage is changed during iteration, the result is undefined. # # TODO: can potentially be optimized by sharing # the cache between parallel consumers and introducing some kind of lock + # + # Cases: + # 1. Has source and patch + # 2. Has source, transforms and patch + # a. Transforms affect only an item (i.e. they are local) + # b. Transforms affect whole dataset + # + # The patch is always applied on top of the source / transforms stack. + + class _StackedTransform(Transform): + def __init__(self, source, transforms): + super().__init__(source) + + self.is_local = True + self.transforms = [] + for transform in transforms: + source = transform[0](source, *transform[1], **transform[2]) + self.transforms.append(source) + + if self.is_local and not isinstance(source, ItemTransform): + self.is_local = False + + def transform_item(self, item): + for t in self.transforms: + if item is None: + break + item = t.transform_item(item) + return item + + def __iter__(self): + yield from self.transforms[-1] + + def categories(self): + return self.transforms[-1].categories() + + def _update_status(item_id, new_status: ItemStatus): + current_status = self._updated_items.get(item_id) + + if current_status is None: + self._updated_items[item_id] = new_status + elif new_status == ItemStatus.removed: + if current_status == ItemStatus.added: + self._updated_items.pop(item_id) + else: + self._updated_items[item_id] = ItemStatus.removed + elif new_status == ItemStatus.modified: + if current_status != ItemStatus.added: + self._updated_items[item_id] = ItemStatus.modified + elif new_status == ItemStatus.added: + if current_status != ItemStatus.added: + self._updated_items[item_id] = ItemStatus.modified + else: + assert False, "Unknown status %s" % new_status patch = self._storage # must be empty after transforming cache = DatasetItemStorage() + source = self._source + transform = None + + if self._transforms: + transform = _StackedTransform(source, self._transforms) + if transform.is_local: + # An optimized way to find modified items: + # Transform items inplace and analyze transform outputs + pass + else: + # A generic way to find modified items: + # Collect all the dataset original ids and compare + # with transform outputs. + # TODO: introduce Extractor.items() / .ids() to avoid extra + # dataset traversals? + old_ids = set((item.id, item.subset) for item in source) + source = transform i = -1 - for i, item in enumerate(self._source): - if item in cache: - raise RepeatedItemError((item.id, item.subset)) + for i, item in enumerate(source): + if transform and transform.is_local: + old_id = (item.id, item.subset) + item = transform.transform_item(item) + + item_id = (item.id, item.subset) if item else None + + if item_id in cache: + raise RepeatedItemError(item_id) + if item in patch: - item = patch.get(item.id, item.subset) - if self._updated_items.get((item.id, item.subset)) == \ - ItemStatus.removed: - item = None - if item: - cache.put(item) - yield item + # Apply changes from the patch + item = patch.get(*item_id) + elif transform and not self._flush_changes: + # Find changes made by transforms, if not overridden by patch + if transform.is_local: + if not item: + _update_status(old_id, ItemStatus.removed) + elif old_id != item_id: + _update_status(old_id, ItemStatus.removed) + _update_status(item_id, ItemStatus.added) + else: + # Consider all items modified without comparison, + # because such comparison would be very expensive + _update_status(old_id, ItemStatus.modified) + else: + if item: + if item_id not in old_ids: + _update_status(item_id, ItemStatus.added) + else: + _update_status(item_id, ItemStatus.modified) + + if not item: + continue + + cache.put(item) + yield item + if i == -1: cache = patch for item in patch: - self._updated_items[(item.id, item.subset)] = ItemStatus.added + if not self._flush_changes: + _update_status((item.id, item.subset), ItemStatus.added) yield item else: for item in patch: if item in cache: # already processed continue - self._updated_items[(item.id, item.subset)] = ItemStatus.added + if not self._flush_changes: + _update_status((item.id, item.subset), ItemStatus.added) cache.put(item) yield item + if not self._flush_changes and transform and not transform.is_local: + # Mark removed items that were not produced by transforms + for old_id in old_ids: + if old_id not in self._updated_items: + self._updated_items[old_id] = ItemStatus.removed + self._storage = cache - source_cat = self._source.categories() + self._length = len(cache) + + if transform: + source_cat = transform.categories() + else: + source_cat = source.categories() if source_cat is not None: self._categories = source_cat - self._length = len(cache) + self._source = None + self._transforms = [] + + if self._flush_changes: + self._flush_changes = False + self._updated_items = {} def __iter__(self) -> Iterable[DatasetItem]: if self._is_unchanged_wrapper: @@ -309,18 +452,29 @@ def __len__(self) -> int: return self._length def categories(self) -> CategoriesInfo: - if self._categories is not None: + if self.is_cache_initialized(): + return self._categories + elif self._categories is not None: + return self._categories + elif any(is_member_redefined('categories', Transform, t[0]) + for t in self._transforms): + self.init_cache() return self._categories else: return self._source.categories() + def define_categories(self, categories: CategoriesInfo): + if self._categories or self._source is not None: + raise CategoriesRedefinedError() + self._categories = categories + def put(self, item): is_new = self._storage.put(item) - if not self.is_cache_initialized() or not is_new: - self._updated_items[(item.id, item.subset)] = ItemStatus.modified - elif is_new: + if not self.is_cache_initialized() or is_new: self._updated_items[(item.id, item.subset)] = ItemStatus.added + else: + self._updated_items[(item.id, item.subset)] = ItemStatus.modified if is_new and not self.is_cache_initialized(): self._length = None @@ -360,40 +514,50 @@ def get_subset(self, name): return self._merged().get_subset(name) def subsets(self): - subsets = {} - if not self.is_cache_initialized(): - subsets.update(self._source.subsets()) - subsets.update(self._storage.subsets()) - return subsets - - def transform(self, method, *args, **kwargs): - self._source = method(self._merged(), *args, **kwargs) - self._storage = DatasetItemStorage() - # TODO: can be optimized by analyzing methods - self._categories = None + # TODO: check if this can be optimized in case of transforms + # and other cases + return self._merged().subsets() + + def transform(self, method: Transform, *args, **kwargs): + # Flush accumulated changes + if not self._storage.is_empty(): + source = self._merged() + self._storage = DatasetItemStorage() + else: + source = self._source + + if not self._transforms: + # The stack of transforms only needs a single source + self._source = source + self._transforms.append((method, args, kwargs)) + + if is_member_redefined('categories', Transform, method): + self._categories = None self._length = None - self._transformed = True - self._updated_items = {} def has_updated_items(self): - return self._transformed or self._updated_items + return self._transforms or self._updated_items def get_patch(self): # Patch includes only added or modified items. # To find removed items, one needs to consult updated_items list. - if self._transformed: + if self._transforms: self.init_cache() - # Consider all items modified after transforming - self._updated_items = { - (item.id, item.subset): ItemStatus.modified - for item in self._storage - } - return DatasetPatch(self._storage, self._categories, + + # The current patch (storage) can miss some removals done + # so we add them manually + patch = copy(self._storage) + for (item_id, subset), status in self._updated_items.items(): + if status is ItemStatus.removed: + patch.remove(item_id, subset) + + return DatasetPatch(patch, self._categories, self._updated_items) def flush_changes(self): self._updated_items = {} - self._transformed = False + if not (self.is_cache_initialized() or self._is_unchanged_wrapper): + self._flush_changes = True class Dataset(IDataset): @@ -454,8 +618,7 @@ def __init__(self, source: IDataset = None, self._source_path = None def define_categories(self, categories: Dict): - assert not self._data._categories and self._data._source is None - self._data._categories = categories + self._data.define_categories(categories) def init_cache(self): self._data.init_cache() @@ -513,8 +676,18 @@ def update(self, items: Iterable[DatasetItem]) -> 'Dataset': def transform(self, method: Union[str, Transform], *args, **kwargs) -> 'Dataset': + """ + Applies some function to dataset items. + """ + if isinstance(method, str): - method = self.env.make_transform(method) + method = self.env.transforms[method] + + if inspect.isclass(method) and issubclass(method, Transform): + pass + else: + raise TypeError("Unexpected 'method' argument type: %s" % \ + type(method)) self._data.transform(method, *args, **kwargs) if self.is_eager: @@ -533,9 +706,11 @@ def run_model(self, model, batch_size=1) -> 'Dataset': raise TypeError('Unexpected model argument type: %s' % type(model)) def select(self, pred): - class _DatasetFilter(Transform): - def __iter__(self): - return filter(pred, iter(self._extractor)) + class _DatasetFilter(ItemTransform): + def transform_item(self, item): + if pred(item): + return item + return None return self.transform(_DatasetFilter) @@ -684,4 +859,4 @@ def eager_mode(new_mode=True, dataset: Dataset = None): Dataset._global_eager = new_mode yield finally: - Dataset._global_eager = old_mode \ No newline at end of file + Dataset._global_eager = old_mode diff --git a/datumaro/components/dataset_filter.py b/datumaro/components/dataset_filter.py index 2fe1443d5155..dd06919edd14 100644 --- a/datumaro/components/dataset_filter.py +++ b/datumaro/components/dataset_filter.py @@ -4,10 +4,12 @@ # SPDX-License-Identifier: MIT import logging as log -from lxml import etree as ET # lxml has proper XPath implementation -from datumaro.components.extractor import (Transform, - Annotation, AnnotationType, - Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, + +from lxml import etree as ET # lxml has proper XPath implementation + +from datumaro.components.extractor import ( + Annotation, AnnotationType, Bbox, Caption, ItemTransform, Label, Mask, + Points, Polygon, PolyLine, ) @@ -213,19 +215,28 @@ def encode_annotation(cls, o, categories=None): def to_string(encoded_item): return ET.tostring(encoded_item, encoding='unicode', pretty_print=True) -def XPathDatasetFilter(extractor, xpath=None): - if xpath is None: - return extractor - try: - xpath = ET.XPath(xpath) - except Exception: - log.error("Failed to create XPath from expression '%s'", xpath) - raise - f = lambda item: bool(xpath( - DatasetItemEncoder.encode(item, extractor.categories()))) - return extractor.select(f) - -class XPathAnnotationsFilter(Transform): +class XPathDatasetFilter(ItemTransform): + def __init__(self, extractor, xpath=None): + super().__init__(extractor) + + if xpath is not None: + try: + xpath = ET.XPath(xpath) + except Exception: + log.error("Failed to create XPath from expression '%s'", xpath) + raise + + self._f = lambda item: bool(xpath( + DatasetItemEncoder.encode(item, extractor.categories()))) + else: + self._f = None + + def transform_item(self, item): + if self._f and not self._f(item): + return None + return item + +class XPathAnnotationsFilter(ItemTransform): def __init__(self, extractor, xpath=None, remove_empty=False): super().__init__(extractor) @@ -239,12 +250,6 @@ def __init__(self, extractor, xpath=None, remove_empty=False): self._remove_empty = remove_empty - def __iter__(self): - for item in self._extractor: - item = self.transform_item(item) - if item is not None: - yield item - def transform_item(self, item): if self._filter is None: return item diff --git a/datumaro/components/environment.py b/datumaro/components/environment.py index c27131a84143..0170bf979dc5 100644 --- a/datumaro/components/environment.py +++ b/datumaro/components/environment.py @@ -4,32 +4,24 @@ from functools import partial from glob import glob -import git +from typing import Dict, Iterable import inspect import logging as log import os import os.path as osp +import git + from datumaro.components.config import Config from datumaro.components.config_model import Model, Source from datumaro.util.os_util import import_foreign_module class Registry: - def __init__(self, config=None, item_type=None): - self.item_type = item_type - + def __init__(self): self.items = {} - if config is not None: - self.load(config) - - def load(self, config): - pass - def register(self, name, value): - if self.item_type: - value = self.item_type(value) self.items[name] = value return value @@ -46,44 +38,34 @@ def __getitem__(self, key): def __contains__(self, key): return key in self.items + def __iter__(self): + return iter(self.items) class ModelRegistry(Registry): - def __init__(self, config=None): - super().__init__(config, item_type=Model) - - def load(self, config): - # TODO: list default dir, insert values - if 'models' in config: - for name, model in config.models.items(): - self.register(name, model) + def batch_register(self, items: Dict[str, Model]): + for name, model in items.items(): + self.register(name, model) class SourceRegistry(Registry): - def __init__(self, config=None): - super().__init__(config, item_type=Source) - - def load(self, config): - # TODO: list default dir, insert values - if 'sources' in config: - for name, source in config.sources.items(): - self.register(name, source) - + def batch_register(self, items: Dict[str, Source]): + for name, source in items.items(): + self.register(name, source) class PluginRegistry(Registry): - def __init__(self, config=None, builtin=None, local=None): - super().__init__(config) + def __init__(self, filter=None): #pylint: disable=redefined-builtin + super().__init__() + self.filter = filter + def batch_register(self, values: Iterable): from datumaro.components.cli_plugin import CliPlugin - if builtin is not None: - for v in builtin: - k = CliPlugin._get_name(v) - self.register(k, v) - if local is not None: - for v in local: - k = CliPlugin._get_name(v) - self.register(k, v) + for v in values: + if self.filter and not self.filter(v): + continue + name = CliPlugin._get_name(v) + self.register(name, v) class GitWrapper: def __init__(self, config=None): @@ -128,50 +110,64 @@ def remove_submodule(self, name, **kwargs): class Environment: _builtin_plugins = None - PROJECT_EXTRACTOR_NAME = 'datumaro_project' def __init__(self, config=None): from datumaro.components.project import ( - PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA, load_project_as_dataset) + PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA, + ) config = Config(config, fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA) - self.models = ModelRegistry(config) - self.sources = SourceRegistry(config) + self.models = ModelRegistry() + self.sources = SourceRegistry() self.git = GitWrapper(config) - env_dir = osp.join(config.project_dir, config.env_dir) - builtin = self._load_builtin_plugins() - custom = self._load_plugins2(osp.join(env_dir, config.plugins_dir)) - select = lambda seq, t: [e for e in seq if issubclass(e, t)] - from datumaro.components.converter import Converter - from datumaro.components.extractor import (Importer, Extractor, - Transform) - from datumaro.components.launcher import Launcher - self.extractors = PluginRegistry( - builtin=select(builtin, Extractor), - local=select(custom, Extractor) - ) - self.extractors.register(self.PROJECT_EXTRACTOR_NAME, - load_project_as_dataset) + def _filter(accept, skip=None): + accept = (accept, ) if inspect.isclass(accept) else tuple(accept) + skip = {skip} if inspect.isclass(skip) else set(skip or []) + skip = tuple(skip | set(accept)) + return lambda t: issubclass(t, accept) and t not in skip - self.importers = PluginRegistry( - builtin=select(builtin, Importer), - local=select(custom, Importer) - ) - self.launchers = PluginRegistry( - builtin=select(builtin, Launcher), - local=select(custom, Launcher) - ) - self.converters = PluginRegistry( - builtin=select(builtin, Converter), - local=select(custom, Converter) - ) - self.transforms = PluginRegistry( - builtin=select(builtin, Transform), - local=select(custom, Transform) + from datumaro.components.converter import Converter + from datumaro.components.extractor import ( + Extractor, Importer, ItemTransform, SourceExtractor, Transform, ) + from datumaro.components.launcher import Launcher + self._extractors = PluginRegistry(_filter(Extractor, + skip=SourceExtractor)) + self._importers = PluginRegistry(_filter(Importer)) + self._launchers = PluginRegistry(_filter(Launcher)) + self._converters = PluginRegistry(_filter(Converter)) + self._transforms = PluginRegistry(_filter(Transform, + skip=ItemTransform)) + self._builtins_initialized = False + + def _get_plugin_registry(self, name): + if not self._builtins_initialized: + self._builtins_initialized = True + self._register_builtin_plugins() + return getattr(self, name) + + @property + def extractors(self) -> PluginRegistry: + return self._get_plugin_registry('_extractors') + + @property + def importers(self) -> PluginRegistry: + return self._get_plugin_registry('_importers') + + @property + def launchers(self) -> PluginRegistry: + return self._get_plugin_registry('_launchers') + + @property + def converters(self) -> PluginRegistry: + return self._get_plugin_registry('_converters') + + @property + def transforms(self) -> PluginRegistry: + return self._get_plugin_registry('_transforms') @staticmethod def _find_plugins(plugins_dir): @@ -211,7 +207,15 @@ def _import_module(cls, module_dir, module_name, types, package=None): return exports @classmethod - def _load_plugins(cls, plugins_dir, types): + def _load_plugins(cls, plugins_dir, types=None): + if not types: + from datumaro.components.converter import Converter + from datumaro.components.extractor import ( + Extractor, Importer, Transform, + ) + from datumaro.components.launcher import Launcher + types = [Extractor, Converter, Importer, Launcher, Transform] + types = tuple(types) plugins = cls._find_plugins(plugins_dir) @@ -247,24 +251,28 @@ def _load_plugins(cls, plugins_dir, types): @classmethod def _load_builtin_plugins(cls): - if not cls._builtin_plugins: + if cls._builtin_plugins is None: plugins_dir = osp.join( __file__[: __file__.rfind(osp.join('datumaro', 'components'))], osp.join('datumaro', 'plugins') ) assert osp.isdir(plugins_dir), plugins_dir - cls._builtin_plugins = cls._load_plugins2(plugins_dir) + cls._builtin_plugins = cls._load_plugins(plugins_dir) return cls._builtin_plugins - @classmethod - def _load_plugins2(cls, plugins_dir): - from datumaro.components.converter import Converter - from datumaro.components.extractor import (Extractor, Importer, - Transform) - from datumaro.components.launcher import Launcher - types = [Extractor, Converter, Importer, Launcher, Transform] + def load_plugins(self, plugins_dir): + plugins = self._load_plugins(plugins_dir) + self._register_plugins(plugins) + + def _register_builtin_plugins(self): + self._register_plugins(self._load_builtin_plugins()) - return cls._load_plugins(plugins_dir, types) + def _register_plugins(self, plugins): + self.extractors.batch_register(plugins) + self.importers.batch_register(plugins) + self.launchers.batch_register(plugins) + self.converters.batch_register(plugins) + self.transforms.batch_register(plugins) def make_extractor(self, name, *args, **kwargs): return self.extractors.get(name)(*args, **kwargs) diff --git a/datumaro/components/errors.py b/datumaro/components/errors.py index 717399b060d3..ac23e3a9b0ef 100644 --- a/datumaro/components/errors.py +++ b/datumaro/components/errors.py @@ -17,6 +17,10 @@ class RepeatedItemError(DatasetError): def __str__(self): return "Item %s is repeated in the source sequence." % (self.item_id, ) +class CategoriesRedefinedError(DatasetError): + def __str__(self): + return "Categories can only be set once for a dataset" + @attrs class MismatchingImageInfoError(DatasetError): a = attrib() diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py index e8cc4f89c99b..17220d04afc4 100644 --- a/datumaro/components/extractor.py +++ b/datumaro/components/extractor.py @@ -1,32 +1,30 @@ - # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT -from enum import Enum +from enum import Enum, auto from glob import iglob -from typing import Iterable, List, Dict, Optional -import numpy as np +from typing import Callable, Dict, Iterable, List, Optional import os import os.path as osp +from attr import attrib, attrs import attr -from attr import attrs, attrib +import numpy as np +from datumaro.util.attrs_util import default_if_none, not_empty from datumaro.util.image import Image -from datumaro.util.attrs_util import not_empty, default_if_none -AnnotationType = Enum('AnnotationType', - [ - 'label', - 'mask', - 'points', - 'polygon', - 'polyline', - 'bbox', - 'caption', - ]) +class AnnotationType(Enum): + label = auto() + mask = auto() + points = auto() + polygon = auto() + polyline = auto() + bbox = auto() + caption = auto() + cuboid_3d = auto() _COORDINATE_ROUNDING_DIGITS = 2 @@ -361,6 +359,72 @@ def as_polygon(self): def get_area(self): return 0 + +@attrs +class Cuboid3d(Annotation): + _type = AnnotationType.cuboid_3d + _points = attrib(type=list, default=None) + label = attrib(converter=attr.converters.optional(int), + default=None, kw_only=True) + + @_points.validator + def _points_validator(self, attribute, points): + if points is None: + points = [0, 0, 0, 0, 0, 0, 1, 1, 1] + else: + assert len(points) == 3 + 3 + 3, points + points = [round(p, _COORDINATE_ROUNDING_DIGITS) for p in points] + self._points = points + + # will be overridden by attrs, then will be overridden again by us + # attrs' method will be renamed to __attrs_init__ + def __init__(self, position, rotation=None, scale=None, **kwargs): + assert len(position) == 3, position + if not rotation: + rotation = [0] * 3 + if not scale: + scale = [1] * 3 + kwargs.pop('points', None) + self.__attrs_init__(points=[*position, *rotation, *scale], **kwargs) + __actual_init__ = __init__ # save pointer + + @property + def position(self): + """[x, y, z]""" + return self._points[0:3] + + @position.setter + def _set_poistion(self, value): + # TODO: fix the issue with separate coordinate rounding: + # self.position[0] = 12.345676 + # - the number assigned won't be rounded. + self.position[:] = \ + [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value] + + @property + def rotation(self): + """[rx, ry, rz]""" + return self._points[3:6] + + @rotation.setter + def _set_rotation(self, value): + self.rotation[:] = \ + [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value] + + @property + def scale(self): + """[sx, sy, sz]""" + return self._points[6:9] + + @scale.setter + def _set_scale(self, value): + self.scale[:] = \ + [round(p, _COORDINATE_ROUNDING_DIGITS) for p in value] + +assert not hasattr(Cuboid3d, '__attrs_init__') # hopefully, it will be supported +setattr(Cuboid3d, '__attrs_init__', Cuboid3d.__init__) +setattr(Cuboid3d, '__init__', Cuboid3d.__actual_init__) + @attrs class Polygon(_Shape): _type = AnnotationType.polygon @@ -468,11 +532,11 @@ def add(self, label_id, labels=None, joints=None): @attrs class Points(_Shape): - Visibility = Enum('Visibility', [ - ('absent', 0), - ('hidden', 1), - ('visible', 2), - ]) + class Visibility(Enum): + absent = 0 + hidden = 1 + visible = 2 + _type = AnnotationType.points visibility = attrib(type=list, default=None) @@ -519,17 +583,38 @@ class DatasetItem: type=str, validator=not_empty) annotations = attrib(factory=list, validator=default_if_none(list)) subset = attrib(converter=lambda v: v or DEFAULT_SUBSET_NAME, default=None) + + # Currently unused path = attrib(factory=list, validator=default_if_none(list)) + # TODO: introduce "media" field with type info. Replace image and pcd. image = attrib(type=Image, default=None) - @image.validator - def _image_validator(self, attribute, image): + # TODO: introduce pcd type like Image + point_cloud = attrib(type=str, default=None) + related_images = attrib(type=List[Image], default=None) + + def __attrs_post_init__(self): + if (self.has_image and self.has_point_cloud): + raise ValueError("Can't set both image and point cloud info") + if self.related_images and not self.has_point_cloud: + raise ValueError("Related images require point cloud") + + def _image_converter(image): if callable(image) or isinstance(image, np.ndarray): image = Image(data=image) elif isinstance(image, str): image = Image(path=image) - assert image is None or isinstance(image, Image) - self.image = image + assert image is None or isinstance(image, Image), type(image) + return image + image.converter = _image_converter + + def _related_image_converter(images): + return list(map(__class__._image_converter, images or [])) + related_images.converter = _related_image_converter + + @point_cloud.validator + def _point_cloud_validator(self, attribute, pcd): + assert pcd is None or isinstance(pcd, str), type(pcd) attributes = attrib(factory=dict, validator=default_if_none(dict)) @@ -537,19 +622,26 @@ def _image_validator(self, attribute, image): def has_image(self): return self.image is not None + @property + def has_point_cloud(self): + return self.point_cloud is not None + def wrap(item, **kwargs): return attr.evolve(item, **kwargs) CategoriesInfo = Dict[AnnotationType, Categories] -class IExtractor: #pylint: disable=redefined-builtin +class IExtractor: def __iter__(self) -> Iterable[DatasetItem]: raise NotImplementedError() def __len__(self) -> int: raise NotImplementedError() + def __bool__(self): # avoid __len__ use for truth checking + return True + def subsets(self) -> Dict[str, 'IExtractor']: raise NotImplementedError() @@ -594,6 +686,9 @@ def get_subset(self, name): if self._subsets is None: self._init_cache() if name in self._subsets: + if len(self._subsets) == 1: + return self + return self.select(lambda item: item.subset == name) else: raise Exception("Unknown subset '%s', available subsets: %s" % \ @@ -616,7 +711,7 @@ def categories(_): def categories(self): return {} - def get(self, id, subset=None): #pylint: disable=redefined-builtin + def get(self, id, subset=None): subset = subset or DEFAULT_SUBSET_NAME for item in self: if item.id == id and item.subset == subset: @@ -640,7 +735,7 @@ def __iter__(self): def __len__(self): return len(self._items) - def get(self, id, subset=None): #pylint: disable=redefined-builtin + def get(self, id, subset=None): assert subset == self._subset, '%s != %s' % (subset, self._subset) return super().get(id, subset or self._subset) @@ -654,7 +749,7 @@ def find_sources(cls, path) -> List[Dict]: raise NotImplementedError() def __call__(self, path, **extra_params): - from datumaro.components.project import Project # cyclic import + from datumaro.components.project import Project # cyclic import project = Project() sources = self.find_sources(osp.normpath(path)) @@ -672,13 +767,39 @@ def __call__(self, path, **extra_params): return project @classmethod - def _find_sources_recursive(cls, path, ext, extractor_name, - filename='*', dirname='', file_filter=None, max_depth=3): + def _find_sources_recursive(cls, path: str, ext: Optional[str], + extractor_name: str, filename: str = '*', dirname: str = '', + file_filter: Optional[Callable[[str], bool]] = None, + max_depth: int = 3): + """ + Finds sources in the specified location, using the matching pattern + to filter file names and directories. + Supposed to be used, and to be the only call in subclasses. + + Paramters: + - path - a directory or file path, where sources need to be found. + - ext - file extension to match. To match directories, + set this parameter to None or ''. Comparison is case-independent, + a starting dot is not required. + - extractor_name - the name of the associated Extractor type + - filename - a glob pattern for file names + - dirname - a glob pattern for filename prefixes + - file_filter - a callable (abspath: str) -> bool, to filter paths found + - max_depth - the maximum depth for recursive search. + + Returns: a list of source configurations + (i.e. Extractor type names and c-tor parameters) + """ + + if ext: + if not ext.startswith('.'): + ext = '.' + ext + ext = ext.lower() - if (path.endswith(ext) and osp.isfile(path)) or \ - (not ext and osp.isdir(path) and dirname and \ - os.sep + osp.normpath(dirname) + os.sep in \ - osp.abspath(path) + os.sep): + if (ext and path.lower().endswith(ext) and osp.isfile(path)) or \ + (not ext and dirname and osp.isdir(path) and \ + os.sep + osp.normpath(dirname.lower()) + os.sep in \ + osp.abspath(path.lower()) + os.sep): sources = [{'url': path, 'format': extractor_name}] else: sources = [] @@ -691,7 +812,13 @@ def _find_sources_recursive(cls, path, ext, extractor_name, break return sources + class Transform(Extractor): + """ + A base class for dataset transformations that change dataset items + or their annotations. + """ + @staticmethod def wrap_item(item, **kwargs): return item.wrap(**kwargs) @@ -701,10 +828,6 @@ def __init__(self, extractor): self._extractor = extractor - def __iter__(self): - for item in self._extractor: - yield self.transform_item(item) - def categories(self): return self._extractor.categories() @@ -721,5 +844,19 @@ def __len__(self): self._length = len(self._extractor) return super().__len__() - def transform_item(self, item: DatasetItem) -> DatasetItem: +class ItemTransform(Transform): + def transform_item(self, item: DatasetItem) -> Optional[DatasetItem]: + """ + Returns a modified copy of the input item. + + Avoid changing and returning the input item, because it can lead to + unexpected problems. Use wrap_item() or item.wrap() to simplify copying. + """ + raise NotImplementedError() + + def __iter__(self): + for item in self._extractor: + item = self.transform_item(item) + if item is not None: + yield item diff --git a/datumaro/components/launcher.py b/datumaro/components/launcher.py index 8f05b36bee52..9e376b14baa0 100644 --- a/datumaro/components/launcher.py +++ b/datumaro/components/launcher.py @@ -5,8 +5,9 @@ import numpy as np -from datumaro.components.extractor import (Transform, LabelCategories, - AnnotationType) +from datumaro.components.extractor import ( + AnnotationType, LabelCategories, Transform, +) from datumaro.util import take_by diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py index 002e76f62386..98c0aa289289 100644 --- a/datumaro/components/operations.py +++ b/datumaro/components/operations.py @@ -4,27 +4,32 @@ from collections import OrderedDict from copy import deepcopy +from unittest import TestCase import hashlib import logging as log +from attr import attrib, attrs import attr import cv2 import numpy as np -from attr import attrib, attrs -from unittest import TestCase from datumaro.components.cli_plugin import CliPlugin -from datumaro.util import find, filter_dict -from datumaro.components.extractor import (AnnotationType, Bbox, - CategoriesInfo, Label, - LabelCategories, PointsCategories, MaskCategories) -from datumaro.components.errors import (DatumaroError, FailedAttrVotingError, - FailedLabelVotingError, MismatchingImageInfoError, NoMatchingAnnError, - NoMatchingItemError, AnnotationsTooCloseError, WrongGroupError) from datumaro.components.dataset import Dataset, DatasetItemStorage -from datumaro.util.attrs_util import ensure_cls, default_if_none -from datumaro.util.annotation_util import (segment_iou, bbox_iou, - mean_bbox, OKS, find_instances, max_bbox, smooth_line) +from datumaro.components.errors import ( + AnnotationsTooCloseError, DatumaroError, FailedAttrVotingError, + FailedLabelVotingError, MismatchingImageInfoError, NoMatchingAnnError, + NoMatchingItemError, WrongGroupError, +) +from datumaro.components.extractor import ( + AnnotationType, Bbox, CategoriesInfo, Label, LabelCategories, + MaskCategories, PointsCategories, +) +from datumaro.util import filter_dict, find +from datumaro.util.annotation_util import ( + OKS, bbox_iou, find_instances, max_bbox, mean_bbox, segment_iou, + smooth_line, +) +from datumaro.util.attrs_util import default_if_none, ensure_cls def get_ann_type(anns, t): @@ -442,6 +447,8 @@ def _for_type(t, **kwargs): return _make(PointsMerger, **kwargs) elif t is AnnotationType.caption: return _make(CaptionsMerger, **kwargs) + elif t is AnnotationType.cuboid_3d: + return _make(Cuboid3dMerger, **kwargs) else: raise NotImplementedError("Type %s is not supported" % t) @@ -776,6 +783,11 @@ class CaptionsMatcher(AnnotationMatcher): def match_annotations(self, sources): raise NotImplementedError() +@attrs +class Cuboid3dMatcher(_ShapeMatcher): + @staticmethod + def distance(a, b): + raise NotImplementedError() @attrs(kw_only=True) class AnnotationMerger: @@ -818,19 +830,7 @@ class _ShapeMerger(AnnotationMerger, _ShapeMatcher): quorum = attrib(converter=int, default=0) def merge_clusters(self, clusters): - merged = [] - for cluster in clusters: - label, label_score = self.find_cluster_label(cluster) - shape, shape_score = self.merge_cluster_shape(cluster) - - shape.z_order = max(cluster, key=lambda a: a.z_order).z_order - shape.label = label - shape.attributes['score'] = label_score * shape_score \ - if label is not None else shape_score - - merged.append(shape) - - return merged + return list(map(self.merge_cluster, clusters)) def find_cluster_label(self, cluster): votes = {} @@ -861,6 +861,17 @@ def merge_cluster_shape(self, cluster): for s in cluster) / len(cluster) return shape, shape_score + def merge_cluster(self, cluster): + label, label_score = self.find_cluster_label(cluster) + shape, shape_score = self.merge_cluster_shape(cluster) + + shape.z_order = max(cluster, key=lambda a: a.z_order).z_order + shape.label = label + shape.attributes['score'] = label_score * shape_score \ + if label is not None else shape_score + + return shape + @attrs class BboxMerger(_ShapeMerger, BboxMatcher): pass @@ -885,6 +896,26 @@ class LineMerger(_ShapeMerger, LineMatcher): class CaptionsMerger(AnnotationMerger, CaptionsMatcher): pass +@attrs +class Cuboid3dMerger(_ShapeMerger, Cuboid3dMatcher): + @staticmethod + def _merge_cluster_shape_mean_box_nearest(cluster): + raise NotImplementedError() + # mbbox = Bbox(*mean_cuboid(cluster)) + # dist = (segment_iou(mbbox, s) for s in cluster) + # nearest_pos, _ = max(enumerate(dist), key=lambda e: e[1]) + # return cluster[nearest_pos] + + def merge_cluster(self, cluster): + label, label_score = self.find_cluster_label(cluster) + shape, shape_score = self.merge_cluster_shape(cluster) + + shape.label = label + shape.attributes['score'] = label_score * shape_score \ + if label is not None else shape_score + + return shape + def match_segments(a_segms, b_segms, distance=segment_iou, dist_thresh=1.0, label_matcher=lambda a, b: a.label == b.label): assert callable(distance), distance @@ -1292,7 +1323,8 @@ def _default_hash(item): log.warning("Item (%s, %s) has no image " "info, counted as unique", item.id, item.subset) return None - return hashlib.md5(item.image.data.tobytes()).hexdigest() + # ignore B303 (md5 check), because the hash is not used in a security context + return hashlib.md5(item.image.data.tobytes()).hexdigest() # nosec if item_hash is None: item_hash = _default_hash @@ -1508,4 +1540,4 @@ def compare_datasets(self, a, b): a_unmatched |= set(a_id for a_id, m in a_matches.items() if not m) b_unmatched |= set(b_id for b_id, m in b_matches.items() if not m) - return matched, unmatched, a_unmatched, b_unmatched, errors \ No newline at end of file + return matched, unmatched, a_unmatched, b_unmatched, errors diff --git a/datumaro/components/project.py b/datumaro/components/project.py index 829fde46bac8..8996ffce9281 100644 --- a/datumaro/components/project.py +++ b/datumaro/components/project.py @@ -9,11 +9,13 @@ import shutil from datumaro.components.config import Config -from datumaro.components.config_model import (Model, Source, - PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA) -from datumaro.components.dataset import (IDataset, Dataset, DEFAULT_FORMAT) -from datumaro.components.dataset_filter import (XPathAnnotationsFilter, - XPathDatasetFilter) +from datumaro.components.config_model import ( + PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA, Model, Source, +) +from datumaro.components.dataset import DEFAULT_FORMAT, Dataset, IDataset +from datumaro.components.dataset_filter import ( + XPathAnnotationsFilter, XPathDatasetFilter, +) from datumaro.components.environment import Environment from datumaro.components.errors import DatumaroError from datumaro.components.extractor import DEFAULT_SUBSET_NAME, Extractor @@ -38,7 +40,7 @@ def __len__(self): def categories(self): return self.parent.categories() - def get(self, id, subset=None): #pylint: disable=redefined-builtin + def get(self, id, subset=None): subset = subset or self.name assert subset == self.name, '%s != %s' % (subset, self.name) return super().get(id, subset) @@ -53,13 +55,17 @@ def __init__(self, project): sources = {} for s_name, source in config.sources.items(): - s_format = source.format or env.PROJECT_EXTRACTOR_NAME + s_format = source.format url = source.url if not source.url: url = osp.join(config.project_dir, config.sources_dir, s_name) - sources[s_name] = Dataset.import_from(url, - format=s_format, env=env, **source.options) + + if s_format: + sources[s_name] = Dataset.import_from(url, + format=s_format, env=env, **source.options) + else: + sources[s_name] = Project.load(url).make_dataset() self._sources = sources own_source = None @@ -91,8 +97,7 @@ def __init__(self, project): item = ExactMerge.merge_items(existing_item, item, path=path) else: s_config = config.sources[source_name] - if s_config and \ - s_config.format != env.PROJECT_EXTRACTOR_NAME: + if s_config and s_config.format: # NOTE: consider imported sources as our own dataset path = None else: @@ -137,22 +142,20 @@ def categories(self): def __len__(self): return sum(len(s) for s in self._subsets.values()) - def get(self, id, subset=None, \ - path=None): #pylint: disable=redefined-builtin + def get(self, id, subset=None, path=None): # pylint: disable=arguments-differ if path: source = path[0] return self._sources[source].get(id=id, subset=subset) return self._subsets.get(subset, {}).get(id) - def put(self, item, id=None, subset=None, \ - path=None): #pylint: disable=redefined-builtin + def put(self, item, id=None, subset=None, path=None): if path is None: path = item.path if path: source = path[0] # TODO: reverse remapping - self._sources[source].put(item, id=id, subset=subset) + self._sources[source].put(item, id=id, subset=subset, path=path[1:]) if id is None: id = item.id @@ -277,8 +280,7 @@ def categories(_): return self.transform(_DatasetFilter) - def export(self, save_dir: str, format, \ - **kwargs): #pylint: disable=redefined-builtin + def export(self, save_dir: str, format, **kwargs): dataset = Dataset.from_extractors(self, env=self.env) dataset.export(save_dir, format, **kwargs) @@ -415,6 +417,8 @@ def __init__(self, config=None, env=None): fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA) if env is None: env = Environment(self.config) + env.models.batch_register(self.config.models) + env.sources.batch_register(self.config.sources) elif config is not None: raise ValueError("env can only be provided when no config provided") self.env = env @@ -485,6 +489,3 @@ def local_model_dir(self, model_name): def local_source_dir(self, source_name): return osp.join(self.config.sources_dir, source_name) - -def load_project_as_dataset(url): - return Project.load(url).make_dataset() diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py index 1e910029f8a2..fa8bc4441a8d 100644 --- a/datumaro/components/validator.py +++ b/datumaro/components/validator.py @@ -2,1269 +2,72 @@ # # SPDX-License-Identifier: MIT -from copy import deepcopy -from enum import Enum -from typing import Union - -import numpy as np +from enum import Enum, auto +from typing import Dict, List from datumaro.components.dataset import IDataset -from datumaro.components.errors import (MissingLabelCategories, - MissingAnnotation, MultiLabelAnnotations, MissingAttribute, - UndefinedLabel, UndefinedAttribute, LabelDefinedButNotFound, - AttributeDefinedButNotFound, OnlyOneLabel, FewSamplesInLabel, - FewSamplesInAttribute, ImbalancedLabels, ImbalancedAttribute, - ImbalancedDistInLabel, ImbalancedDistInAttribute, - NegativeLength, InvalidValue, FarFromLabelMean, - FarFromAttrMean, OnlyOneAttributeValue) -from datumaro.components.extractor import AnnotationType, LabelCategories -from datumaro.components.cli_plugin import CliPlugin -from datumaro.util import parse_str_enum_value - - -Severity = Enum('Severity', ['warning', 'error']) - -TaskType = Enum('TaskType', ['classification', 'detection', 'segmentation']) - - -class _Validator(CliPlugin): - # statistics templates - numerical_stat_template = { - 'items_far_from_mean': {}, - 'mean': None, - 'stdev': None, - 'min': None, - 'max': None, - 'median': None, - 'histogram': { - 'bins': [], - 'counts': [], - }, - 'distribution': np.array([]) - } - - """ - A base class for task-specific validators. - - Attributes - ---------- - task_type : str or TaskType - task type (ie. classification, detection, segmentation) - - Methods - ------- - compute_statistics(dataset): - Computes various statistics of the dataset based on task type. - generate_reports(stats): - Abstract method that must be implemented in a subclass. - """ - - @classmethod - def build_cmdline_parser(cls, **kwargs): - parser = super().build_cmdline_parser(**kwargs) - parser.add_argument('-fs', '--few_samples_thr', default=1, type=int, - help="Threshold for giving a warning for minimum number of" - "samples per class") - parser.add_argument('-ir', '--imbalance_ratio_thr', default=50, type=int, - help="Threshold for giving data imbalance warning;" - "IR(imbalance ratio) = majority/minority") - parser.add_argument('-m', '--far_from_mean_thr', default=5.0, type=float, - help="Threshold for giving a warning that data is far from mean;" - "A constant used to define mean +/- k * standard deviation;") - parser.add_argument('-dr', '--dominance_ratio_thr', default=0.8, type=float, - help="Threshold for giving a warning for bounding box imbalance;" - "Dominace_ratio = ratio of Top-k bin to total in histogram;") - parser.add_argument('-k', '--topk_bins', default=0.1, type=float, - help="Ratio of bins with the highest number of data" - "to total bins in the histogram; [0, 1]; 0.1 = 10%;") - return parser - - def __init__(self, task_type, few_samples_thr=None, - imbalance_ratio_thr=None, far_from_mean_thr=None, - dominance_ratio_thr=None, topk_bins=None): - """ - Validator - - Parameters - --------------- - few_samples_thr: int - minimum number of samples per class - warn user when samples per class is less than threshold - imbalance_ratio_thr: int - ratio of majority attribute to minority attribute - warn user when annotations are unevenly distributed - far_from_mean_thr: float - constant used to define mean +/- m * stddev - warn user when there are too big or small values - dominance_ratio_thr: float - ratio of Top-k bin to total - warn user when dominance ratio is over threshold - topk_bins: float - ratio of selected bins with most item number to total bins - warn user when values are not evenly distributed - """ - self.task_type = parse_str_enum_value(task_type, TaskType, - default=TaskType.classification) - - if self.task_type == TaskType.classification: - self.ann_types = {AnnotationType.label} - self.str_ann_type = "label" - elif self.task_type == TaskType.detection: - self.ann_types = {AnnotationType.bbox} - self.str_ann_type = "bounding box" - elif self.task_type == TaskType.segmentation: - self.ann_types = {AnnotationType.mask, AnnotationType.polygon} - self.str_ann_type = "mask or polygon" - - self.few_samples_thr = few_samples_thr - self.imbalance_ratio_thr = imbalance_ratio_thr - self.far_from_mean_thr = far_from_mean_thr - self.dominance_thr = dominance_ratio_thr - self.topk_bins_ratio = topk_bins - - def _compute_common_statistics(self, dataset): - defined_attr_template = { - 'items_missing_attribute': [], - 'distribution': {} - } - undefined_attr_template = { - 'items_with_undefined_attr': [], - 'distribution': {} - } - undefined_label_template = { - 'count': 0, - 'items_with_undefined_label': [], - } - - stats = { - 'label_distribution': { - 'defined_labels': {}, - 'undefined_labels': {}, - }, - 'attribute_distribution': { - 'defined_attributes': {}, - 'undefined_attributes': {} - }, - } - stats['total_ann_count'] = 0 - stats['items_missing_annotation'] = [] - - label_dist = stats['label_distribution'] - attr_dist = stats['attribute_distribution'] - defined_label_dist = label_dist['defined_labels'] - defined_attr_dist = attr_dist['defined_attributes'] - undefined_label_dist = label_dist['undefined_labels'] - undefined_attr_dist = attr_dist['undefined_attributes'] - - label_categories = dataset.categories().get(AnnotationType.label, - LabelCategories()) - base_valid_attrs = label_categories.attributes - - for category in label_categories: - defined_label_dist[category.name] = 0 - - filtered_anns = [] - for item in dataset: - item_key = (item.id, item.subset) - annotations = [] - for ann in item.annotations: - if ann.type in self.ann_types: - annotations.append(ann) - ann_count = len(annotations) - filtered_anns.append((item_key, annotations)) - - if ann_count == 0: - stats['items_missing_annotation'].append(item_key) - stats['total_ann_count'] += ann_count - - for ann in annotations: - if not 0 <= ann.label < len(label_categories): - label_name = ann.label - - label_stats = undefined_label_dist.setdefault( - ann.label, deepcopy(undefined_label_template)) - label_stats['items_with_undefined_label'].append( - item_key) - - label_stats['count'] += 1 - valid_attrs = set() - missing_attrs = set() - else: - label_name = label_categories[ann.label].name - defined_label_dist[label_name] += 1 - - defined_attr_stats = defined_attr_dist.setdefault( - label_name, {}) - - valid_attrs = base_valid_attrs.union( - label_categories[ann.label].attributes) - ann_attrs = getattr(ann, 'attributes', {}).keys() - missing_attrs = valid_attrs.difference(ann_attrs) - - for attr in valid_attrs: - defined_attr_stats.setdefault( - attr, deepcopy(defined_attr_template)) - - for attr in missing_attrs: - attr_dets = defined_attr_stats[attr] - attr_dets['items_missing_attribute'].append( - item_key) - - for attr, value in ann.attributes.items(): - if attr not in valid_attrs: - undefined_attr_stats = \ - undefined_attr_dist.setdefault( - label_name, {}) - attr_dets = undefined_attr_stats.setdefault( - attr, deepcopy(undefined_attr_template)) - attr_dets['items_with_undefined_attr'].append( - item_key) - else: - attr_dets = defined_attr_stats[attr] - - attr_dets['distribution'].setdefault(str(value), 0) - attr_dets['distribution'][str(value)] += 1 - - return stats, filtered_anns - - @staticmethod - def _update_prop_distributions(curr_prop_stats, target_stats): - for prop, val in curr_prop_stats.items(): - prop_stats = target_stats[prop] - prop_dist = prop_stats['distribution'] - prop_stats['distribution'] = np.append(prop_dist, val) - - @staticmethod - def _compute_prop_stats_from_dist(dist_by_label, dist_by_attr): - for label_name, stats in dist_by_label.items(): - prop_stats_list = list(stats.values()) - attr_label = dist_by_attr.get(label_name, {}) - for vals in attr_label.values(): - for val_stats in vals.values(): - prop_stats_list += list(val_stats.values()) - - for prop_stats in prop_stats_list: - prop_dist = prop_stats.pop('distribution', []) - if len(prop_dist) > 0: - prop_stats['mean'] = np.mean(prop_dist) - prop_stats['stdev'] = np.std(prop_dist) - prop_stats['min'] = np.min(prop_dist) - prop_stats['max'] = np.max(prop_dist) - prop_stats['median'] = np.median(prop_dist) - - counts, bins = np.histogram(prop_dist) - prop_stats['histogram']['bins'] = bins.tolist() - prop_stats['histogram']['counts'] = counts.tolist() - - def _compute_far_from_mean(self, prop_stats, val, item_key, ann): - def _far_from_mean(val, mean, stdev): - thr = self.far_from_mean_thr - return val > mean + (thr * stdev) or val < mean - (thr * stdev) - - mean = prop_stats['mean'] - stdev = prop_stats['stdev'] - - if _far_from_mean(val, mean, stdev): - items_far_from_mean = prop_stats['items_far_from_mean'] - far_from_mean = items_far_from_mean.setdefault( - item_key, {}) - far_from_mean[ann.id] = val - - def compute_statistics(self, dataset): - """ - Computes statistics of the dataset based on task type. - - Parameters - ---------- - dataset : IDataset object - - Returns - ------- - stats (dict): A dict object containing statistics of the dataset. - """ - return NotImplementedError - - def _check_missing_label_categories(self, stats): - validation_reports = [] - - if len(stats['label_distribution']['defined_labels']) == 0: - validation_reports += self._generate_validation_report( - MissingLabelCategories, Severity.error) - - return validation_reports - - def _check_missing_annotation(self, stats): - validation_reports = [] - - items_missing = stats['items_missing_annotation'] - for item_id, item_subset in items_missing: - validation_reports += self._generate_validation_report( - MissingAnnotation, Severity.warning, item_id, item_subset, - self.str_ann_type) - - return validation_reports - - def _check_missing_attribute(self, label_name, attr_name, attr_dets): - validation_reports = [] - - items_missing_attr = attr_dets['items_missing_attribute'] - for item_id, item_subset in items_missing_attr: - details = (item_subset, label_name, attr_name) - validation_reports += self._generate_validation_report( - MissingAttribute, Severity.warning, item_id, *details) - - return validation_reports - - def _check_undefined_label(self, label_name, label_stats): - validation_reports = [] - - items_with_undefined_label = label_stats['items_with_undefined_label'] - for item_id, item_subset in items_with_undefined_label: - details = (item_subset, label_name) - validation_reports += self._generate_validation_report( - UndefinedLabel, Severity.error, item_id, *details) - - return validation_reports - - def _check_undefined_attribute(self, label_name, attr_name, attr_dets): - validation_reports = [] - - items_with_undefined_attr = attr_dets['items_with_undefined_attr'] - for item_id, item_subset in items_with_undefined_attr: - details = (item_subset, label_name, attr_name) - validation_reports += self._generate_validation_report( - UndefinedAttribute, Severity.error, item_id, *details) - return validation_reports - def _check_label_defined_but_not_found(self, stats): - validation_reports = [] - count_by_defined_labels = stats['label_distribution']['defined_labels'] - labels_not_found = [label_name - for label_name, count in count_by_defined_labels.items() - if count == 0] +class Severity(Enum): + warning = auto() + error = auto() - for label_name in labels_not_found: - validation_reports += self._generate_validation_report( - LabelDefinedButNotFound, Severity.warning, label_name) - return validation_reports +class TaskType(Enum): + classification = auto() + detection = auto() + segmentation = auto() - def _check_attribute_defined_but_not_found(self, label_name, attr_stats): - validation_reports = [] - attrs_not_found = [attr_name - for attr_name, attr_dets in attr_stats.items() - if len(attr_dets['distribution']) == 0] - for attr_name in attrs_not_found: - details = (label_name, attr_name) - validation_reports += self._generate_validation_report( - AttributeDefinedButNotFound, Severity.warning, *details) - - return validation_reports - - def _check_only_one_label(self, stats): - validation_reports = [] - count_by_defined_labels = stats['label_distribution']['defined_labels'] - labels_found = [label_name - for label_name, count in count_by_defined_labels.items() - if count > 0] - - if len(labels_found) == 1: - validation_reports += self._generate_validation_report( - OnlyOneLabel, Severity.warning, labels_found[0]) - - return validation_reports - - def _check_only_one_attribute_value(self, label_name, attr_name, attr_dets): - validation_reports = [] - values = list(attr_dets['distribution'].keys()) - - if len(values) == 1: - details = (label_name, attr_name, values[0]) - validation_reports += self._generate_validation_report( - OnlyOneAttributeValue, Severity.warning, *details) - - return validation_reports - - def _check_few_samples_in_label(self, stats): - validation_reports = [] - thr = self.few_samples_thr - - defined_label_dist = stats['label_distribution']['defined_labels'] - labels_with_few_samples = [(label_name, count) - for label_name, count in defined_label_dist.items() - if 0 < count <= thr] - - for label_name, count in labels_with_few_samples: - validation_reports += self._generate_validation_report( - FewSamplesInLabel, Severity.warning, label_name, count) - - return validation_reports - - def _check_few_samples_in_attribute(self, label_name, - attr_name, attr_dets): - validation_reports = [] - thr = self.few_samples_thr - - attr_values_with_few_samples = [(attr_value, count) - for attr_value, count in attr_dets['distribution'].items() - if count <= thr] - - for attr_value, count in attr_values_with_few_samples: - details = (label_name, attr_name, attr_value, count) - validation_reports += self._generate_validation_report( - FewSamplesInAttribute, Severity.warning, *details) - - return validation_reports - - def _check_imbalanced_labels(self, stats): - validation_reports = [] - thr = self.imbalance_ratio_thr - - defined_label_dist = stats['label_distribution']['defined_labels'] - count_by_defined_labels = [count - for label, count in defined_label_dist.items()] - - if len(count_by_defined_labels) == 0: - return validation_reports - - count_max = np.max(count_by_defined_labels) - count_min = np.min(count_by_defined_labels) - balance = count_max / count_min if count_min > 0 else float('inf') - if balance >= thr: - validation_reports += self._generate_validation_report( - ImbalancedLabels, Severity.warning) - - return validation_reports - - def _check_imbalanced_attribute(self, label_name, attr_name, attr_dets): - validation_reports = [] - thr = self.imbalance_ratio_thr - - count_by_defined_attr = list(attr_dets['distribution'].values()) - if len(count_by_defined_attr) == 0: - return validation_reports - - count_max = np.max(count_by_defined_attr) - count_min = np.min(count_by_defined_attr) - balance = count_max / count_min if count_min > 0 else float('inf') - if balance >= thr: - validation_reports += self._generate_validation_report( - ImbalancedAttribute, Severity.warning, label_name, attr_name) - - return validation_reports - - def _check_imbalanced_dist_in_label(self, label_name, label_stats): - validation_reports = [] - thr = self.dominance_thr - topk_ratio = self.topk_bins_ratio - - for prop, prop_stats in label_stats.items(): - value_counts = prop_stats['histogram']['counts'] - n_bucket = len(value_counts) - if n_bucket < 2: - continue - topk = max(1, int(np.around(n_bucket * topk_ratio))) - - if topk > 0: - topk_values = np.sort(value_counts)[-topk:] - ratio = np.sum(topk_values) / np.sum(value_counts) - if ratio >= thr: - details = (label_name, f"{self.str_ann_type} {prop}") - validation_reports += self._generate_validation_report( - ImbalancedDistInLabel, Severity.warning, *details) - - return validation_reports - - def _check_imbalanced_dist_in_attr(self, label_name, attr_name, attr_stats): - validation_reports = [] - thr = self.dominance_thr - topk_ratio = self.topk_bins_ratio - - for attr_value, value_stats in attr_stats.items(): - for prop, prop_stats in value_stats.items(): - value_counts = prop_stats['histogram']['counts'] - n_bucket = len(value_counts) - if n_bucket < 2: - continue - topk = max(1, int(np.around(n_bucket * topk_ratio))) - - if topk > 0: - topk_values = np.sort(value_counts)[-topk:] - ratio = np.sum(topk_values) / np.sum(value_counts) - if ratio >= thr: - details = (label_name, attr_name, attr_value, - f"{self.str_ann_type} {prop}") - validation_reports += self._generate_validation_report( - ImbalancedDistInAttribute, - Severity.warning, - *details - ) - - return validation_reports - - def _check_invalid_value(self, stats): - validation_reports = [] - - items_w_invalid_val = stats['items_with_invalid_value'] - for item_dets, anns_w_invalid_val in items_w_invalid_val.items(): - item_id, item_subset = item_dets - for ann_id, props in anns_w_invalid_val.items(): - for prop in props: - details = (item_subset, ann_id, - f"{self.str_ann_type} {prop}") - validation_reports += self._generate_validation_report( - InvalidValue, Severity.error, item_id, *details) - - return validation_reports - - def _check_far_from_label_mean(self, label_name, label_stats): - validation_reports = [] - - for prop, prop_stats in label_stats.items(): - items_far_from_mean = prop_stats['items_far_from_mean'] - if prop_stats['mean'] is not None: - mean = round(prop_stats['mean'], 2) - - for item_dets, anns_far in items_far_from_mean.items(): - item_id, item_subset = item_dets - for ann_id, val in anns_far.items(): - val = round(val, 2) - details = (item_subset, label_name, ann_id, - f"{self.str_ann_type} {prop}", mean, val) - validation_reports += self._generate_validation_report( - FarFromLabelMean, Severity.warning, item_id, *details) - - return validation_reports - - def _check_far_from_attr_mean(self, label_name, attr_name, attr_stats): - validation_reports = [] - - for attr_value, value_stats in attr_stats.items(): - for prop, prop_stats in value_stats.items(): - items_far_from_mean = prop_stats['items_far_from_mean'] - if prop_stats['mean'] is not None: - mean = round(prop_stats['mean'], 2) - - for item_dets, anns_far in items_far_from_mean.items(): - item_id, item_subset = item_dets - for ann_id, val in anns_far.items(): - val = round(val, 2) - details = (item_subset, label_name, ann_id, attr_name, - attr_value, f"{self.str_ann_type} {prop}", - mean, val) - validation_reports += self._generate_validation_report( - FarFromAttrMean, - Severity.warning, - item_id, - *details - ) - - return validation_reports - - def generate_reports(self, stats): - raise NotImplementedError('Should be implemented in a subclass.') - - def _generate_validation_report(self, error, *args, **kwargs): - return [error(*args, **kwargs)] - - -class ClassificationValidator(_Validator): - """ - A validator class for classification tasks. - """ - - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.classification, - few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) - - def _check_multi_label_annotations(self, stats): - validation_reports = [] - - items_with_multiple_labels = stats['items_with_multiple_labels'] - for item_id, item_subset in items_with_multiple_labels: - validation_reports += self._generate_validation_report( - MultiLabelAnnotations, Severity.error, item_id, item_subset) - - return validation_reports - - def compute_statistics(self, dataset): +class Validator: + def validate(self, dataset: IDataset) -> Dict: """ - Computes statistics of the dataset for the classification task. - - Parameters - ---------- - dataset : IDataset object - - Returns - ------- - stats (dict): A dict object containing statistics of the dataset. - """ - - stats, filtered_anns = self._compute_common_statistics(dataset) + Returns the validation results of a dataset based on task type. - stats['items_with_multiple_labels'] = [] + Args: + dataset (IDataset): Dataset to be validated - for item_key, anns in filtered_anns: - ann_count = len(anns) - if ann_count > 1: - stats['items_with_multiple_labels'].append(item_key) + Raises: + ValueError - return stats - - def generate_reports(self, stats): - """ - Validates the dataset for classification tasks based on its statistics. - - Parameters - ---------- - dataset : IDataset object - stats: Dict object - - Returns - ------- - reports (list): List of validation reports (DatasetValidationError). + Returns: + validation_results (dict): + Dict with validation statistics, reports and summary. """ - reports = [] - - reports += self._check_missing_label_categories(stats) - reports += self._check_missing_annotation(stats) - reports += self._check_multi_label_annotations(stats) - reports += self._check_label_defined_but_not_found(stats) - reports += self._check_only_one_label(stats) - reports += self._check_few_samples_in_label(stats) - reports += self._check_imbalanced_labels(stats) - - label_dist = stats['label_distribution'] - attr_dist = stats['attribute_distribution'] - defined_attr_dist = attr_dist['defined_attributes'] - undefined_label_dist = label_dist['undefined_labels'] - undefined_attr_dist = attr_dist['undefined_attributes'] - - defined_labels = defined_attr_dist.keys() - for label_name in defined_labels: - attr_stats = defined_attr_dist[label_name] - - reports += self._check_attribute_defined_but_not_found( - label_name, attr_stats) + validation_results = {} + if not isinstance(dataset, IDataset): + raise TypeError("Invalid dataset type '%s'" % type(dataset)) - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_few_samples_in_attribute( - label_name, attr_name, attr_dets) - reports += self._check_imbalanced_attribute( - label_name, attr_name, attr_dets) - reports += self._check_only_one_attribute_value( - label_name, attr_name, attr_dets) - reports += self._check_missing_attribute( - label_name, attr_name, attr_dets) + # generate statistics + stats = self.compute_statistics(dataset) + validation_results['statistics'] = stats - for label_name, label_stats in undefined_label_dist.items(): - reports += self._check_undefined_label(label_name, label_stats) + # generate validation reports and summary + reports = self.generate_reports(stats) + reports = list(map(lambda r: r.to_dict(), reports)) - for label_name, attr_stats in undefined_attr_dist.items(): - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_undefined_attribute( - label_name, attr_name, attr_dets) - - return reports - - -class DetectionValidator(_Validator): - """ - A validator class for detection tasks. - """ - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.detection, - few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) - - def _check_negative_length(self, stats): - validation_reports = [] - - items_w_neg_len = stats['items_with_negative_length'] - for item_dets, anns_w_neg_len in items_w_neg_len.items(): - item_id, item_subset = item_dets - for ann_id, props in anns_w_neg_len.items(): - for prop, val in props.items(): - val = round(val, 2) - details = (item_subset, ann_id, - f"{self.str_ann_type} {prop}", val) - validation_reports += self._generate_validation_report( - NegativeLength, Severity.error, item_id, *details) - - return validation_reports - - def compute_statistics(self, dataset): - """ - Computes statistics of the dataset for the detection task. - - Parameters - ---------- - dataset : IDataset object - - Returns - ------- - stats (dict): A dict object containing statistics of the dataset. - """ - - stats, filtered_anns = self._compute_common_statistics(dataset) - - # detection-specific - bbox_template = { - 'width': deepcopy(self.numerical_stat_template), - 'height': deepcopy(self.numerical_stat_template), - 'area(wxh)': deepcopy(self.numerical_stat_template), - 'ratio(w/h)': deepcopy(self.numerical_stat_template), - 'short': deepcopy(self.numerical_stat_template), - 'long': deepcopy(self.numerical_stat_template) + summary = { + 'errors': sum(map(lambda r: r['severity'] == 'error', reports)), + 'warnings': sum(map(lambda r: r['severity'] == 'warning', reports)) } - stats['items_with_negative_length'] = {} - stats['items_with_invalid_value'] = {} - stats['bbox_distribution_in_label'] = {} - stats['bbox_distribution_in_attribute'] = {} - stats['bbox_distribution_in_dataset_item'] = {} - - dist_by_label = stats['bbox_distribution_in_label'] - dist_by_attr = stats['bbox_distribution_in_attribute'] - bbox_dist_in_item = stats['bbox_distribution_in_dataset_item'] - items_w_neg_len = stats['items_with_negative_length'] - items_w_invalid_val = stats['items_with_invalid_value'] - - def _generate_ann_bbox_info(_x, _y, _w, _h, area, - ratio, _short, _long): - return { - 'x': _x, - 'y': _y, - 'width': _w, - 'height': _h, - 'area(wxh)': area, - 'ratio(w/h)': ratio, - 'short': _short, - 'long': _long, - } - - def _update_bbox_stats_by_label(item_key, ann, bbox_label_stats): - bbox_has_error = False - - _x, _y, _w, _h = ann.get_bbox() - area = ann.get_area() - - if _h != 0 and _h != float('inf'): - ratio = _w / _h - else: - ratio = float('nan') - - _short = _w if _w < _h else _h - _long = _w if _w > _h else _h - - ann_bbox_info = _generate_ann_bbox_info( - _x, _y, _w, _h, area, ratio, _short, _long) - - for prop, val in ann_bbox_info.items(): - if val == float('inf') or np.isnan(val): - bbox_has_error = True - anns_w_invalid_val = items_w_invalid_val.setdefault( - item_key, {}) - invalid_props = anns_w_invalid_val.setdefault( - ann.id, []) - invalid_props.append(prop) - - for prop in ['width', 'height']: - val = ann_bbox_info[prop] - if val < 1: - bbox_has_error = True - anns_w_neg_len = items_w_neg_len.setdefault( - item_key, {}) - neg_props = anns_w_neg_len.setdefault(ann.id, {}) - neg_props[prop] = val - - if not bbox_has_error: - ann_bbox_info.pop('x') - ann_bbox_info.pop('y') - self._update_prop_distributions(ann_bbox_info, bbox_label_stats) - - return ann_bbox_info, bbox_has_error - - label_categories = dataset.categories().get(AnnotationType.label, - LabelCategories()) - base_valid_attrs = label_categories.attributes - - for item_key, annotations in filtered_anns: - ann_count = len(annotations) - - bbox_dist_in_item[item_key] = ann_count - - for ann in annotations: - if not 0 <= ann.label < len(label_categories): - label_name = ann.label - valid_attrs = set() - else: - label_name = label_categories[ann.label].name - valid_attrs = base_valid_attrs.union( - label_categories[ann.label].attributes) - - bbox_label_stats = dist_by_label.setdefault( - label_name, deepcopy(bbox_template)) - ann_bbox_info, bbox_has_error = \ - _update_bbox_stats_by_label( - item_key, ann, bbox_label_stats) - - for attr, value in ann.attributes.items(): - if attr in valid_attrs: - bbox_attr_label = dist_by_attr.setdefault( - label_name, {}) - bbox_attr_stats = bbox_attr_label.setdefault( - attr, {}) - bbox_val_stats = bbox_attr_stats.setdefault( - str(value), deepcopy(bbox_template)) - - if not bbox_has_error: - self._update_prop_distributions( - ann_bbox_info, bbox_val_stats) - - # Compute prop stats from distribution - self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) + validation_results['validation_reports'] = reports + validation_results['summary'] = summary - def _is_valid_ann(item_key, ann): - has_defined_label = 0 <= ann.label < len(label_categories) - if not has_defined_label: - return False + return validation_results - bbox_has_neg_len = ann.id in items_w_neg_len.get( - item_key, {}) - bbox_has_invalid_val = ann.id in items_w_invalid_val.get( - item_key, {}) - return not (bbox_has_neg_len or bbox_has_invalid_val) - - def _update_props_far_from_mean(item_key, ann): - valid_attrs = base_valid_attrs.union( - label_categories[ann.label].attributes) - label_name = label_categories[ann.label].name - bbox_label_stats = dist_by_label[label_name] - - _x, _y, _w, _h = ann.get_bbox() - area = ann.get_area() - ratio = _w / _h - _short = _w if _w < _h else _h - _long = _w if _w > _h else _h - - ann_bbox_info = _generate_ann_bbox_info( - _x, _y, _w, _h, area, ratio, _short, _long) - ann_bbox_info.pop('x') - ann_bbox_info.pop('y') - - for prop, val in ann_bbox_info.items(): - prop_stats = bbox_label_stats[prop] - self._compute_far_from_mean(prop_stats, val, item_key, ann) - - for attr, value in ann.attributes.items(): - if attr in valid_attrs: - bbox_attr_stats = dist_by_attr[label_name][attr] - bbox_val_stats = bbox_attr_stats[str(value)] - - for prop, val in ann_bbox_info.items(): - prop_stats = bbox_val_stats[prop] - self._compute_far_from_mean(prop_stats, val, - item_key, ann) - - for item_key, annotations in filtered_anns: - for ann in annotations: - if _is_valid_ann(item_key, ann): - _update_props_far_from_mean(item_key, ann) - - return stats - - def generate_reports(self, stats): - """ - Validates the dataset for detection tasks based on its statistics. - - Parameters - ---------- - dataset : IDataset object - stats : Dict object - - Returns - ------- - reports (list): List of validation reports (DatasetValidationError). - """ - - reports = [] - - reports += self._check_missing_label_categories(stats) - reports += self._check_missing_annotation(stats) - reports += self._check_label_defined_but_not_found(stats) - reports += self._check_only_one_label(stats) - reports += self._check_few_samples_in_label(stats) - reports += self._check_imbalanced_labels(stats) - reports += self._check_negative_length(stats) - reports += self._check_invalid_value(stats) - - label_dist = stats['label_distribution'] - attr_dist = stats['attribute_distribution'] - defined_attr_dist = attr_dist['defined_attributes'] - undefined_label_dist = label_dist['undefined_labels'] - undefined_attr_dist = attr_dist['undefined_attributes'] - - dist_by_label = stats['bbox_distribution_in_label'] - dist_by_attr = stats['bbox_distribution_in_attribute'] - - defined_labels = defined_attr_dist.keys() - for label_name in defined_labels: - attr_stats = defined_attr_dist[label_name] - - reports += self._check_attribute_defined_but_not_found( - label_name, attr_stats) - - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_few_samples_in_attribute( - label_name, attr_name, attr_dets) - reports += self._check_imbalanced_attribute( - label_name, attr_name, attr_dets) - reports += self._check_only_one_attribute_value( - label_name, attr_name, attr_dets) - reports += self._check_missing_attribute( - label_name, attr_name, attr_dets) - - bbox_label_stats = dist_by_label[label_name] - bbox_attr_label = dist_by_attr.get(label_name, {}) - - reports += self._check_far_from_label_mean( - label_name, bbox_label_stats) - reports += self._check_imbalanced_dist_in_label( - label_name, bbox_label_stats) - - for attr_name, bbox_attr_stats in bbox_attr_label.items(): - reports += self._check_far_from_attr_mean( - label_name, attr_name, bbox_attr_stats) - reports += self._check_imbalanced_dist_in_attr( - label_name, attr_name, bbox_attr_stats) - - for label_name, label_stats in undefined_label_dist.items(): - reports += self._check_undefined_label(label_name, label_stats) - - for label_name, attr_stats in undefined_attr_dist.items(): - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_undefined_attribute( - label_name, attr_name, attr_dets) - - return reports - - -class SegmentationValidator(_Validator): - """ - A validator class for (instance) segmentation tasks. - """ - - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.segmentation, - few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) - - def compute_statistics(self, dataset): + def compute_statistics(self, dataset: IDataset) -> Dict: """ - Computes statistics of the dataset for the segmentation task. - - Parameters - ---------- - dataset : IDataset object - - Returns - ------- - stats (dict): A dict object containing statistics of the dataset. - """ - - stats, filtered_anns = self._compute_common_statistics(dataset) - - # segmentation-specific - mask_template = { - 'area': deepcopy(self.numerical_stat_template), - 'width': deepcopy(self.numerical_stat_template), - 'height': deepcopy(self.numerical_stat_template) - } - - stats['items_with_invalid_value'] = {} - stats['mask_distribution_in_label'] = {} - stats['mask_distribution_in_attribute'] = {} - stats['mask_distribution_in_dataset_item'] = {} - - dist_by_label = stats['mask_distribution_in_label'] - dist_by_attr = stats['mask_distribution_in_attribute'] - mask_dist_in_item = stats['mask_distribution_in_dataset_item'] - items_w_invalid_val = stats['items_with_invalid_value'] - - def _generate_ann_mask_info(area, _w, _h): - return { - 'area': area, - 'width': _w, - 'height': _h, - } - - def _update_mask_stats_by_label(item_key, ann, mask_label_stats): - mask_has_error = False - - _x, _y, _w, _h = ann.get_bbox() - - # Detete the following block when #226 is resolved - # https://github.com/openvinotoolkit/datumaro/issues/226 - if ann.type == AnnotationType.mask: - _w += 1 - _h += 1 - - area = ann.get_area() - - ann_mask_info = _generate_ann_mask_info(area, _w, _h) - - for prop, val in ann_mask_info.items(): - if val == float('inf') or np.isnan(val): - mask_has_error = True - anns_w_invalid_val = items_w_invalid_val.setdefault( - item_key, {}) - invalid_props = anns_w_invalid_val.setdefault( - ann.id, []) - invalid_props.append(prop) - - if not mask_has_error: - self._update_prop_distributions(ann_mask_info, mask_label_stats) - - return ann_mask_info, mask_has_error - - label_categories = dataset.categories().get(AnnotationType.label, - LabelCategories()) - base_valid_attrs = label_categories.attributes - - for item_key, annotations in filtered_anns: - ann_count = len(annotations) - mask_dist_in_item[item_key] = ann_count - - for ann in annotations: - if not 0 <= ann.label < len(label_categories): - label_name = ann.label - valid_attrs = set() - else: - label_name = label_categories[ann.label].name - valid_attrs = base_valid_attrs.union( - label_categories[ann.label].attributes) - - mask_label_stats = dist_by_label.setdefault( - label_name, deepcopy(mask_template)) - ann_mask_info, mask_has_error = \ - _update_mask_stats_by_label( - item_key, ann, mask_label_stats) - - for attr, value in ann.attributes.items(): - if attr in valid_attrs: - mask_attr_label = dist_by_attr.setdefault( - label_name, {}) - mask_attr_stats = mask_attr_label.setdefault( - attr, {}) - mask_val_stats = mask_attr_stats.setdefault( - str(value), deepcopy(mask_template)) - - if not mask_has_error: - self._update_prop_distributions( - ann_mask_info, mask_val_stats) - - # compute prop stats from dist. - self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) - - def _is_valid_ann(item_key, ann): - has_defined_label = 0 <= ann.label < len(label_categories) - if not has_defined_label: - return False - - mask_has_invalid_val = ann.id in items_w_invalid_val.get( - item_key, {}) - return not mask_has_invalid_val - - def _update_props_far_from_mean(item_key, ann): - valid_attrs = base_valid_attrs.union( - label_categories[ann.label].attributes) - label_name = label_categories[ann.label].name - mask_label_stats = dist_by_label[label_name] - - _x, _y, _w, _h = ann.get_bbox() - - # Detete the following block when #226 is resolved - # https://github.com/openvinotoolkit/datumaro/issues/226 - if ann.type == AnnotationType.mask: - _w += 1 - _h += 1 - area = ann.get_area() - - ann_mask_info = _generate_ann_mask_info(area, _w, _h) - - for prop, val in ann_mask_info.items(): - prop_stats = mask_label_stats[prop] - self._compute_far_from_mean(prop_stats, val, item_key, ann) - - for attr, value in ann.attributes.items(): - if attr in valid_attrs: - mask_attr_stats = dist_by_attr[label_name][attr] - mask_val_stats = mask_attr_stats[str(value)] - - for prop, val in ann_mask_info.items(): - prop_stats = mask_val_stats[prop] - self._compute_far_from_mean(prop_stats, val, - item_key, ann) - - for item_key, annotations in filtered_anns: - for ann in annotations: - if _is_valid_ann(item_key, ann): - _update_props_far_from_mean(item_key, ann) - - return stats - - def generate_reports(self, stats): - """ - Validates the dataset for segmentation tasks based on its statistics. + Computes statistics of the dataset based on task type. - Parameters - ---------- - dataset : IDataset object - stats : Dict object + Args: + dataset (IDataset): a dataset to be validated - Returns - ------- - reports (list): List of validation reports (DatasetValidationError). + Returns: + stats (dict): A dict object containing statistics of the dataset. """ + raise NotImplementedError("Must be implemented in a subclass") - reports = [] - - reports += self._check_missing_label_categories(stats) - reports += self._check_missing_annotation(stats) - reports += self._check_label_defined_but_not_found(stats) - reports += self._check_only_one_label(stats) - reports += self._check_few_samples_in_label(stats) - reports += self._check_imbalanced_labels(stats) - reports += self._check_invalid_value(stats) - - label_dist = stats['label_distribution'] - attr_dist = stats['attribute_distribution'] - defined_attr_dist = attr_dist['defined_attributes'] - undefined_label_dist = label_dist['undefined_labels'] - undefined_attr_dist = attr_dist['undefined_attributes'] - - dist_by_label = stats['mask_distribution_in_label'] - dist_by_attr = stats['mask_distribution_in_attribute'] - - defined_labels = defined_attr_dist.keys() - for label_name in defined_labels: - attr_stats = defined_attr_dist[label_name] - - reports += self._check_attribute_defined_but_not_found( - label_name, attr_stats) - - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_few_samples_in_attribute( - label_name, attr_name, attr_dets) - reports += self._check_imbalanced_attribute( - label_name, attr_name, attr_dets) - reports += self._check_only_one_attribute_value( - label_name, attr_name, attr_dets) - reports += self._check_missing_attribute( - label_name, attr_name, attr_dets) - - mask_label_stats = dist_by_label[label_name] - mask_attr_label = dist_by_attr.get(label_name, {}) - - reports += self._check_far_from_label_mean( - label_name, mask_label_stats) - reports += self._check_imbalanced_dist_in_label( - label_name, mask_label_stats) - - for attr_name, mask_attr_stats in mask_attr_label.items(): - reports += self._check_far_from_attr_mean( - label_name, attr_name, mask_attr_stats) - reports += self._check_imbalanced_dist_in_attr( - label_name, attr_name, mask_attr_stats) - - for label_name, label_stats in undefined_label_dist.items(): - reports += self._check_undefined_label(label_name, label_stats) - - for label_name, attr_stats in undefined_attr_dist.items(): - for attr_name, attr_dets in attr_stats.items(): - reports += self._check_undefined_attribute( - label_name, attr_name, attr_dets) - - return reports - - -def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType], **extra_args): - """ - Returns the validation results of a dataset based on task type. - - Args: - dataset (IDataset): Dataset to be validated - task_type (str or TaskType): Type of the task - (classification, detection, segmentation) - - Raises: - ValueError - - Returns: - validation_results (dict): - Dict with validation statistics, reports and summary. - - """ - - few_samples_thr = extra_args['few_samples_thr'] - imbalance_ratio_thr = extra_args['imbalance_ratio_thr'] - far_from_mean_thr = extra_args['far_from_mean_thr'] - dominance_ratio_thr = extra_args['dominance_ratio_thr'] - topk_bins = extra_args['topk_bins'] - - validation_results = {} - - task_type = parse_str_enum_value(task_type, TaskType) - if task_type == TaskType.classification: - validator = ClassificationValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) - elif task_type == TaskType.detection: - validator = DetectionValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) - elif task_type == TaskType.segmentation: - validator = SegmentationValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) - - if not isinstance(dataset, IDataset): - raise TypeError("Invalid dataset type '%s'" % type(dataset)) - - # generate statistics - stats = validator.compute_statistics(dataset) - validation_results['statistics'] = stats - - # generate validation reports and summary - reports = validator.generate_reports(stats) - reports = list(map(lambda r: r.to_dict(), reports)) - - summary = { - 'errors': sum(map(lambda r: r['severity'] == 'error', reports)), - 'warnings': sum(map(lambda r: r['severity'] == 'warning', reports)) - } - - validation_results['validation_reports'] = reports - validation_results['summary'] = summary - - return validation_results + def generate_reports(self, stats: Dict) -> List[Dict]: + raise NotImplementedError("Must be implemented in a subclass") diff --git a/datumaro/plugins/accuracy_checker_plugin/__init__.py b/datumaro/plugins/accuracy_checker_plugin/__init__.py index fdd6d29179ae..dc281deb652e 100644 --- a/datumaro/plugins/accuracy_checker_plugin/__init__.py +++ b/datumaro/plugins/accuracy_checker_plugin/__init__.py @@ -1,4 +1,3 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT - diff --git a/datumaro/plugins/accuracy_checker_plugin/details/ac.py b/datumaro/plugins/accuracy_checker_plugin/details/ac.py index b235e5784869..63ce0c5fdb7e 100644 --- a/datumaro/plugins/accuracy_checker_plugin/details/ac.py +++ b/datumaro/plugins/accuracy_checker_plugin/details/ac.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: MIT from datumaro.util.tf_util import import_tf + import_tf() # prevent TF loading and potential interpreter crash from itertools import groupby diff --git a/datumaro/plugins/accuracy_checker_plugin/details/representation.py b/datumaro/plugins/accuracy_checker_plugin/details/representation.py index 76da49ee0a56..94659e7aec5c 100644 --- a/datumaro/plugins/accuracy_checker_plugin/details/representation.py +++ b/datumaro/plugins/accuracy_checker_plugin/details/representation.py @@ -4,12 +4,14 @@ # SPDX-License-Identifier: MIT from datumaro.util.tf_util import import_tf + import_tf() # prevent TF loading and potential interpreter crash import accuracy_checker.representation as ac -import datumaro.components.extractor as dm from datumaro.util.annotation_util import softmax +import datumaro.components.extractor as dm + def import_predictions(predictions): # Convert Accuracy checker predictions to Datumaro annotations @@ -56,7 +58,3 @@ def import_prediction(pred): # - else: raise NotImplementedError("Can't convert %s" % type(pred)) - - - - diff --git a/datumaro/plugins/accuracy_checker_plugin/launcher.py b/datumaro/plugins/accuracy_checker_plugin/launcher.py index 152511083011..3ae367aeb854 100644 --- a/datumaro/plugins/accuracy_checker_plugin/launcher.py +++ b/datumaro/plugins/accuracy_checker_plugin/launcher.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: MIT import os.path as osp + import yaml from datumaro.components.cli_plugin import CliPlugin @@ -26,7 +27,7 @@ def build_cmdline_parser(cls, **kwargs): def __init__(self, config, model_dir=None): model_dir = model_dir or '' - with open(osp.join(model_dir, config), 'r') as f: + with open(osp.join(model_dir, config), 'r', encoding='utf-8') as f: config = yaml.safe_load(f) self._launcher = _GenericAcLauncher.from_config(config) diff --git a/datumaro/plugins/camvid_format.py b/datumaro/plugins/camvid_format.py index 0abca1751b5a..cbb483f9bded 100644 --- a/datumaro/plugins/camvid_format.py +++ b/datumaro/plugins/camvid_format.py @@ -1,20 +1,21 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from collections import OrderedDict +from enum import Enum, auto import logging as log import os import os.path as osp -from collections import OrderedDict -from enum import Enum import numpy as np from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, CompiledMask, - DatasetItem, Importer, LabelCategories, Mask, - MaskCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, CompiledMask, DatasetItem, Importer, LabelCategories, Mask, + MaskCategories, SourceExtractor, +) from datumaro.util import find, str_to_bool from datumaro.util.annotation_util import make_label_id_mapping from datumaro.util.image import save_image @@ -67,7 +68,7 @@ def parse_label_map(path): return None label_map = OrderedDict() - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: for line in f: # skip empty and commented lines line = line.strip() @@ -91,7 +92,7 @@ def parse_label_map(path): return label_map def write_label_map(path, label_map): - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: for label_name, label_desc in label_map.items(): if label_desc: color_rgb = ' '.join(str(c) for c in label_desc) @@ -213,7 +214,9 @@ def find_sources(cls, path): file_filter=lambda p: osp.basename(p) != CamvidPath.LABELMAP_FILE) -LabelmapType = Enum('LabelmapType', ['camvid', 'source']) +class LabelmapType(Enum): + camvid = auto() + source = auto() class CamvidConverter(Converter): DEFAULT_IMAGE_EXT = CamvidPath.IMAGE_EXT diff --git a/datumaro/plugins/cifar_format.py b/datumaro/plugins/cifar_format.py index b6f3a01a2343..cb2ea030dd59 100644 --- a/datumaro/plugins/cifar_format.py +++ b/datumaro/plugins/cifar_format.py @@ -2,24 +2,29 @@ # # SPDX-License-Identifier: MIT +from collections import OrderedDict import os import os.path as osp -import pickle +import pickle # nosec - disable B403:import_pickle check import numpy as np + from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Importer, Label, LabelCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) from datumaro.util import cast class CifarPath: BATCHES_META = 'batches.meta' + META = 'meta' TRAIN_ANNOTATION_FILE = 'data_batch_' - IMAGES_DIR = 'images' + USELESS_FILE = 'file.txt~' IMAGE_SIZE = 32 -CifarLabel = ['airplane', 'automobile', 'bird', 'cat', +Cifar10Label = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # Support for Python version CIFAR-10/100 @@ -38,40 +43,61 @@ def __init__(self, path, subset=None): super().__init__(subset=subset) - batches_meta_file = osp.join(osp.dirname(path), CifarPath.BATCHES_META) - self._categories = self._load_categories(batches_meta_file) + self._categories = self._load_categories(osp.dirname(path)) self._items = list(self._load_items(path).values()) def _load_categories(self, path): label_cat = LabelCategories() - if osp.isfile(path): + meta_file = osp.join(path, CifarPath.BATCHES_META) + if not osp.isfile(meta_file): + meta_file = osp.join(path, CifarPath.META) + if osp.isfile(meta_file): + # CIFAR-10: # num_cases_per_batch: 1000 # label_names: ['airplane', 'automobile', 'bird', 'cat', 'deer', # 'dog', 'frog', 'horse', 'ship', 'truck'] # num_vis: 3072 - with open(path, 'rb') as labels_file: - data = pickle.load(labels_file) - for label in data['label_names']: - label_cat.add(label) + # CIFAR-100: + # fine_label_names: ['apple', 'aquarium_fish', 'baby', ...] + # coarse_label_names: ['aquatic_mammals', 'fish', 'flowers', ...] + with open(meta_file, 'rb') as labels_file: + data = pickle.load(labels_file) # nosec - disable B301:pickle check + labels = data.get('label_names') + if labels != None: + for label in labels: + label_cat.add(label) + else: + labels = data.get('fine_label_names') + self._coarse_labels = data.get('coarse_label_names', []) + if labels != None: + for label in labels: + label_cat.add(label) else: - for label in CifarLabel: + for label in Cifar10Label: label_cat.add(label) return { AnnotationType.label: label_cat } def _load_items(self, path): items = {} + label_cat = self._categories[AnnotationType.label] # 'batch_label': 'training batch 1 of 5' # 'data': ndarray # 'filenames': list - # 'labels': list + # CIFAR-10: 'labels': list + # CIFAR-100: 'fine_labels': list + # 'coarse_labels': list + with open(path, 'rb') as anno_file: - annotation_dict = pickle.load(anno_file) + annotation_dict = pickle.load(anno_file, encoding='latin1') # nosec - disable B301:pickle check labels = annotation_dict.get('labels', []) + coarse_labels = annotation_dict.get('coarse_labels', []) + if len(labels) == 0: + labels = annotation_dict.get('fine_labels', []) filenames = annotation_dict.get('filenames', []) images_data = annotation_dict.get('data') size = annotation_dict.get('image_sizes') @@ -89,16 +115,20 @@ def _load_items(self, path): annotations = [] if label != None: annotations.append(Label(label)) + if 0 < len(coarse_labels) and coarse_labels[i] != None and label_cat[label].parent == '': + label_cat[label].parent = self._coarse_labels[coarse_labels[i]] image = None if 0 < len(images_data): image = images_data[i] if size is not None and image is not None: - image = image.reshape(size[i][0], - size[i][1], 3).astype(np.uint8) + image = image.reshape(3, size[i][0], + size[i][1]).astype(np.uint8) + image = np.transpose(image, (1, 2, 0)) elif image is not None: - image = image.reshape(CifarPath.IMAGE_SIZE, - CifarPath.IMAGE_SIZE, 3).astype(np.uint8) + image = image.reshape(3, CifarPath.IMAGE_SIZE, + CifarPath.IMAGE_SIZE).astype(np.uint8) + image = np.transpose(image, (1, 2, 0)) items[item_id] = DatasetItem(id=item_id, subset=self._subset, image=image, annotations=annotations) @@ -111,7 +141,7 @@ class CifarImporter(Importer): def find_sources(cls, path): return cls._find_sources_recursive(path, '', 'cifar', file_filter=lambda p: osp.basename(p) not in - {CifarPath.BATCHES_META, CifarPath.IMAGES_DIR}) + {CifarPath.BATCHES_META, CifarPath.META, CifarPath.USELESS_FILE}) class CifarConverter(Converter): @@ -122,9 +152,20 @@ def apply(self): label_categories = self._extractor.categories()[AnnotationType.label] label_names = [] + coarse_label_names = [] for label in label_categories: label_names.append(label.name) - labels_dict = { 'label_names': label_names } + if label.parent != '' and label.parent not in coarse_label_names: + coarse_label_names.append(label.parent) + coarse_label_names.sort() + + if 0 < len(coarse_label_names): + labels_dict = { 'fine_label_names': label_names, + 'coarse_label_names': coarse_label_names } + coarse_label_names = OrderedDict({name: i for i, name in enumerate(coarse_label_names)}) + else: + labels_dict = { 'label_names': label_names } + batches_meta_file = osp.join(self._save_dir, CifarPath.BATCHES_META) with open(batches_meta_file, 'wb') as labels_file: pickle.dump(labels_dict, labels_file) @@ -132,17 +173,22 @@ def apply(self): for subset_name, subset in self._extractor.subsets().items(): filenames = [] labels = [] + coarse_labels = [] data = [] image_sizes = {} for item in subset: filenames.append(item.id + self._find_image_ext(item)) - anns = [a.label for a in item.annotations + anns = [a for a in item.annotations if a.type == AnnotationType.label] - label = None - if anns: - label = anns[0] - labels.append(label) + if 0 < len(anns): + labels.append(anns[0].label) + if 0 < len(coarse_label_names): + superclass = label_categories[anns[0].label].parent + coarse_labels.append(coarse_label_names[superclass]) + else: + labels.append(None) + coarse_labels.append(None) if item.has_image and self._save_images: image = item.image @@ -150,14 +196,19 @@ def apply(self): data.append(None) else: image = image.data - data.append(image.reshape(-1).astype(np.uint8)) + data.append(np.transpose(image, + (2, 0, 1)).reshape(-1).astype(np.uint8)) if image.shape[0] != CifarPath.IMAGE_SIZE or \ image.shape[1] != CifarPath.IMAGE_SIZE: image_sizes[len(data) - 1] = (image.shape[0], image.shape[1]) annotation_dict = {} annotation_dict['filenames'] = filenames - annotation_dict['labels'] = labels + if 0 < len(labels) and len(labels) == len(coarse_labels): + annotation_dict['fine_labels'] = labels + annotation_dict['coarse_labels'] = coarse_labels + else: + annotation_dict['labels'] = labels annotation_dict['data'] = np.array(data, dtype=object) if len(image_sizes): size = (CifarPath.IMAGE_SIZE, CifarPath.IMAGE_SIZE) @@ -173,8 +224,10 @@ def apply(self): num = subset_name.split('_')[1] filename = CifarPath.TRAIN_ANNOTATION_FILE + num batch_label = 'training batch %s of 5' % (num, ) - if subset_name == 'test': + elif subset_name == 'test': batch_label = 'testing batch 1 of 1' + elif subset_name == 'train': + filename = subset_name if batch_label: annotation_dict['batch_label'] = batch_label diff --git a/datumaro/plugins/cityscapes_format.py b/datumaro/plugins/cityscapes_format.py index 34aca8bd1cbf..dbee3fd17179 100644 --- a/datumaro/plugins/cityscapes_format.py +++ b/datumaro/plugins/cityscapes_format.py @@ -1,27 +1,27 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from collections import OrderedDict +from enum import Enum, auto +from glob import iglob import logging as log import os import os.path as osp -from collections import OrderedDict -from enum import Enum -from glob import iglob import numpy as np from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, CompiledMask, - DatasetItem, Importer, LabelCategories, Mask, - MaskCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, CompiledMask, DatasetItem, Importer, LabelCategories, Mask, + MaskCategories, SourceExtractor, +) from datumaro.util import str_to_bool from datumaro.util.annotation_util import make_label_id_mapping -from datumaro.util.image import save_image, load_image +from datumaro.util.image import load_image, save_image from datumaro.util.mask_tools import generate_colormap, paint_mask - CityscapesLabelMap = OrderedDict([ ('unlabeled', (0, 0, 0)), ('egovehicle', (0, 0, 0)), @@ -98,7 +98,7 @@ def parse_label_map(path): return None label_map = OrderedDict() - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: for line in f: # skip empty and commented lines line = line.strip() @@ -122,7 +122,7 @@ def parse_label_map(path): return label_map def write_label_map(path, label_map): - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: for label_name, label_desc in label_map.items(): if label_desc: color_rgb = ' '.join(str(c) for c in label_desc) @@ -200,7 +200,9 @@ def find_sources(cls, path): max_depth=1) -LabelmapType = Enum('LabelmapType', ['cityscapes', 'source']) +class LabelmapType(Enum): + cityscapes = auto() + source = auto() class CityscapesConverter(Converter): DEFAULT_IMAGE_EXT = '.png' diff --git a/datumaro/plugins/coco_format/converter.py b/datumaro/plugins/coco_format/converter.py index f7caa56b965d..4d444a216563 100644 --- a/datumaro/plugins/coco_format/converter.py +++ b/datumaro/plugins/coco_format/converter.py @@ -1,30 +1,35 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from enum import Enum, auto +from itertools import chain, groupby import json import logging as log -import numpy as np import os import os.path as osp -from enum import Enum -from itertools import chain, groupby +import numpy as np import pycocotools.mask as mask_utils -import datumaro.util.annotation_util as anno_tools -import datumaro.util.mask_tools as mask_tools from datumaro.components.converter import Converter -from datumaro.components.extractor import (DatasetItem, - _COORDINATE_ROUNDING_DIGITS, AnnotationType, Points) from datumaro.components.dataset import ItemStatus +from datumaro.components.extractor import ( + _COORDINATE_ROUNDING_DIGITS, AnnotationType, DatasetItem, Points, +) from datumaro.util import cast, find, str_to_bool from datumaro.util.image import save_image +import datumaro.util.annotation_util as anno_tools +import datumaro.util.mask_tools as mask_tools from .format import CocoPath, CocoTask -SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask']) + +class SegmentationMode(Enum): + guess = auto() + polygons = auto() + mask = auto() class _TaskConverter: def __init__(self, context): @@ -97,8 +102,8 @@ def write(self, path): ann['id'] = next_id next_id += 1 - with open(path, 'w') as outfile: - json.dump(self._data, outfile) + with open(path, 'w', encoding='utf-8') as outfile: + json.dump(self._data, outfile, ensure_ascii=False) @property def annotations(self): @@ -458,8 +463,8 @@ class _StuffConverter(_InstancesConverter): class _PanopticConverter(_TaskConverter): def write(self, path): - with open(path, 'w') as outfile: - json.dump(self._data, outfile) + with open(path, 'w', encoding='utf-8') as outfile: + json.dump(self._data, outfile, ensure_ascii=False) def save_categories(self, dataset): label_categories = dataset.categories().get(AnnotationType.label) diff --git a/datumaro/plugins/coco_format/extractor.py b/datumaro/plugins/coco_format/extractor.py index faecf79f8e28..303f0e13337a 100644 --- a/datumaro/plugins/coco_format/extractor.py +++ b/datumaro/plugins/coco_format/extractor.py @@ -11,15 +11,15 @@ from pycocotools.coco import COCO import pycocotools.mask as mask_utils -from datumaro.components.extractor import (CompiledMask, Mask, SourceExtractor, - DEFAULT_SUBSET_NAME, DatasetItem, - AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption, - LabelCategories, PointsCategories +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, Bbox, Caption, CompiledMask, + DatasetItem, Label, LabelCategories, Mask, Points, PointsCategories, + Polygon, RleMask, SourceExtractor, ) from datumaro.util.image import Image, lazy_image, load_image from datumaro.util.mask_tools import bgr2index -from .format import CocoTask, CocoPath +from .format import CocoPath, CocoTask class _CocoExtractor(SourceExtractor): @@ -61,7 +61,7 @@ def __init__(self, path, task, merge_instance_polygons=False, subset=None): def _make_subset_loader(path): # COCO API has an 'unclosed file' warning coco_api = COCO() - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: dataset = json.load(f) coco_api.dataset = dataset @@ -110,7 +110,7 @@ def _load_person_kp_categories(self, loader): @staticmethod def _load_panoptic_config(path): - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: return json.load(f) def _load_panoptic_categories(self, config): diff --git a/datumaro/plugins/coco_format/format.py b/datumaro/plugins/coco_format/format.py index 7a37bb709c63..57ae40881287 100644 --- a/datumaro/plugins/coco_format/format.py +++ b/datumaro/plugins/coco_format/format.py @@ -1,20 +1,19 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT -from enum import Enum +from enum import Enum, auto -CocoTask = Enum('CocoTask', [ - 'instances', - 'person_keypoints', - 'captions', - 'labels', # extension, does not exist in the original COCO format - 'image_info', - 'panoptic', - 'stuff', -]) +class CocoTask(Enum): + instances = auto() + person_keypoints = auto() + captions = auto() + labels = auto() # extension, does not exist in the original COCO format + image_info = auto() + panoptic = auto() + stuff = auto() class CocoPath: IMAGES_DIR = 'images' diff --git a/datumaro/plugins/coco_format/importer.py b/datumaro/plugins/coco_format/importer.py index 8d41376ea89a..44a4c7362190 100644 --- a/datumaro/plugins/coco_format/importer.py +++ b/datumaro/plugins/coco_format/importer.py @@ -3,19 +3,19 @@ # # SPDX-License-Identifier: MIT -from collections import defaultdict from glob import glob import logging as log import os.path as osp from datumaro.components.extractor import Importer +from datumaro.util import parse_str_enum_value from datumaro.util.log_utils import logging_disabled from .format import CocoTask class CocoImporter(Importer): - _COCO_EXTRACTORS = { + _TASKS = { CocoTask.instances: 'coco_instances', CocoTask.person_keypoints: 'coco_person_keypoints', CocoTask.captions: 'coco_captions', @@ -31,7 +31,7 @@ def detect(cls, path): return len(cls.find_sources(path)) != 0 def __call__(self, path, **extra_params): - from datumaro.components.project import Project # cyclic import + from datumaro.components.project import Project # cyclic import project = Project() subsets = self.find_sources(path) @@ -65,34 +65,64 @@ def __call__(self, path, **extra_params): source_name = osp.splitext(osp.basename(ann_file))[0] project.add_source(source_name, { 'url': ann_file, - 'format': self._COCO_EXTRACTORS[ann_type], + 'format': self._TASKS[ann_type], 'options': dict(extra_params), }) return project - @staticmethod - def find_sources(path): - if path.endswith('.json') and osp.isfile(path): - subset_paths = [path] + @classmethod + def find_sources(cls, path): + if osp.isfile(path): + if len(cls._TASKS) == 1: + return {'': { next(iter(cls._TASKS)): path }} + + if path.endswith('.json'): + subset_paths = [path] else: subset_paths = glob(osp.join(path, '**', '*_*.json'), recursive=True) - subsets = defaultdict(dict) + subsets = {} for subset_path in subset_paths: name_parts = osp.splitext(osp.basename(subset_path))[0] \ .rsplit('_', maxsplit=1) - ann_type = name_parts[0] - try: - ann_type = CocoTask[ann_type] - except KeyError: - log.warning("Skipping '%s': unknown subset " - "type '%s', the only known are: %s" % \ - (subset_path, ann_type, - ', '.join(e.name for e in CocoTask))) + ann_type = parse_str_enum_value(name_parts[0], CocoTask, + default=None) + if ann_type not in cls._TASKS: continue + subset_name = name_parts[1] - subsets[subset_name][ann_type] = subset_path - return dict(subsets) + subsets.setdefault(subset_name, {})[ann_type] = subset_path + + return subsets + + +class CocoImageInfoImporter(CocoImporter): + _TASK = CocoTask.image_info + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoCaptionsImporter(CocoImporter): + _TASK = CocoTask.captions + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoInstancesImporter(CocoImporter): + _TASK = CocoTask.instances + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoPersonKeypointsImporter(CocoImporter): + _TASK = CocoTask.person_keypoints + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoLabelsImporter(CocoImporter): + _TASK = CocoTask.labels + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoPanopticImporter(CocoImporter): + _TASK = CocoTask.panoptic + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } + +class CocoStuffImporter(CocoImporter): + _TASK = CocoTask.stuff + _TASKS = { _TASK: CocoImporter._TASKS[_TASK] } diff --git a/datumaro/plugins/cvat_format/converter.py b/datumaro/plugins/cvat_format/converter.py index 4b8c07982bbc..0257ea3bf160 100644 --- a/datumaro/plugins/cvat_format/converter.py +++ b/datumaro/plugins/cvat_format/converter.py @@ -3,17 +3,18 @@ # # SPDX-License-Identifier: MIT -import logging as log -import os -import os.path as osp from collections import OrderedDict from itertools import chain from xml.sax.saxutils import XMLGenerator +import logging as log +import os +import os.path as osp from datumaro.components.converter import Converter from datumaro.components.dataset import ItemStatus -from datumaro.components.extractor import (AnnotationType, DatasetItem, - LabelCategories) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, LabelCategories, +) from datumaro.util import cast, pairs from .format import CvatPath @@ -370,7 +371,8 @@ def apply(self): os.makedirs(self._images_dir, exist_ok=True) for subset_name, subset in self._extractor.subsets().items(): - with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f: + with open(osp.join(self._save_dir, '%s.xml' % subset_name), + 'w', encoding='utf-8') as f: writer = _SubsetWriter(f, subset_name, subset, self) writer.write() diff --git a/datumaro/plugins/cvat_format/extractor.py b/datumaro/plugins/cvat_format/extractor.py index 466ab96a951a..cf17cbf995e1 100644 --- a/datumaro/plugins/cvat_format/extractor.py +++ b/datumaro/plugins/cvat_format/extractor.py @@ -5,11 +5,12 @@ from collections import OrderedDict import os.path as osp + from defusedxml import ElementTree -from datumaro.components.extractor import (SourceExtractor, DatasetItem, - AnnotationType, Points, Polygon, PolyLine, Bbox, Label, - LabelCategories, Importer +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Importer, Label, LabelCategories, Points, + Polygon, PolyLine, SourceExtractor, ) from datumaro.util.image import Image diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py index 18d16c14286b..765ae6fd640f 100644 --- a/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/plugins/datumaro_format/converter.py @@ -1,24 +1,24 @@ - -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # pylint: disable=no-self-use import json -import numpy as np import os import os.path as osp +import numpy as np +import pycocotools.mask as mask_utils + from datumaro.components.converter import Converter from datumaro.components.dataset import ItemStatus from datumaro.components.extractor import ( - DEFAULT_SUBSET_NAME, Annotation, DatasetItem, _Shape, - Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption, - LabelCategories, MaskCategories, PointsCategories + DEFAULT_SUBSET_NAME, Annotation, Bbox, Caption, Cuboid3d, DatasetItem, + Label, LabelCategories, Mask, MaskCategories, Points, PointsCategories, + Polygon, PolyLine, RleMask, _Shape, ) from datumaro.util import cast -import pycocotools.mask as mask_utils from .format import DatumaroPath @@ -66,6 +66,34 @@ def write_item(self, item): } if item.image.has_size: # avoid occasional loading item_desc['image']['size'] = item.image.size + if item.has_point_cloud: + path = item.point_cloud + if self._context._save_images: + path = self._context._make_pcd_filename(item) + self._context._save_point_cloud(item, + osp.join(self._context._pcd_dir, path)) + + item_desc['point_cloud'] = { + 'path': path + } + if item.related_images: + related_images = [{'path': img.path} for img in item.related_images] + if self._context._save_images: + related_images = [] + for img in item.related_images: + ri_desc = {} + ri_desc['path'] = osp.join(item.id, + osp.splitext(osp.basename(img.path))[0] + \ + self._context._find_image_ext(img)) + + if img.has_data: + img.save(osp.join(self._context._related_images_dir, + ri_desc['path'])) + if img.has_size: + ri_desc['size'] = img.size + related_images.append(ri_desc) + + item_desc['related_images'] = related_images self.items.append(item_desc) for ann in item.annotations: @@ -83,6 +111,8 @@ def write_item(self, item): converted_ann = self._convert_bbox_object(ann) elif isinstance(ann, Caption): converted_ann = self._convert_caption_object(ann) + elif isinstance(ann, Cuboid3d): + converted_ann = self._convert_cuboid_3d_object(ann) else: raise NotImplementedError() annotations.append(converted_ann) @@ -100,8 +130,9 @@ def write_categories(self, categories): self.categories[ann_type.name] = converted_desc def write(self, save_dir): - with open(osp.join(save_dir, '%s.json' % self._name), 'w') as f: - json.dump(self._data, f) + with open(osp.join(save_dir, '%s.json' % self._name), + 'w', encoding='utf-8') as f: + json.dump(self._data, f, ensure_ascii=False) def _convert_annotation(self, obj): assert isinstance(obj, Annotation) @@ -186,6 +217,16 @@ def _convert_caption_object(self, obj): }) return converted + def _convert_cuboid_3d_object(self, obj): + converted = self._convert_annotation(obj) + converted.update({ + 'label_id': cast(obj.label, int), + 'position': [float(p) for p in obj.position], + 'rotation': [float(p) for p in obj.rotation], + 'scale': [float(p) for p in obj.scale] + }) + return converted + def _convert_attribute_categories(self, attributes): return sorted(attributes) @@ -241,6 +282,10 @@ def apply(self): os.makedirs(annotations_dir, exist_ok=True) self._annotations_dir = annotations_dir + self._pcd_dir = osp.join(self._save_dir, DatumaroPath.PCD_DIR) + self._related_images_dir = osp.join(self._save_dir, + DatumaroPath.RELATED_IMAGES_DIR) + subsets = {s: _SubsetWriter(s, self) for s in self._extractor.subsets()} for subset, writer in subsets.items(): writer.write_categories(self._extractor.categories()) @@ -254,7 +299,7 @@ def apply(self): for subset, writer in subsets.items(): writer.write(annotations_dir) - def _save_image(self, item, path=None): + def _save_image(self, item, path=None): # pylint: disable=arguments-differ super()._save_image(item, osp.join(self._images_dir, self._make_image_filename(item))) @@ -290,4 +335,4 @@ def convert(cls, extractor, save_dir, **kwargs): DatumaroConverter.convert(extractor, save_dir=osp.join( project.config.project_dir, project.config.dataset_dir), - **kwargs) \ No newline at end of file + **kwargs) diff --git a/datumaro/plugins/datumaro_format/extractor.py b/datumaro/plugins/datumaro_format/extractor.py index 24179ef4e888..747b1620747d 100644 --- a/datumaro/plugins/datumaro_format/extractor.py +++ b/datumaro/plugins/datumaro_format/extractor.py @@ -1,14 +1,14 @@ - -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT import json import os.path as osp -from datumaro.components.extractor import (SourceExtractor, DatasetItem, - AnnotationType, Label, RleMask, Points, Polygon, PolyLine, Bbox, Caption, - LabelCategories, MaskCategories, PointsCategories, Importer +from datumaro.components.extractor import ( + AnnotationType, Bbox, Caption, Cuboid3d, DatasetItem, Importer, Label, + LabelCategories, MaskCategories, Points, PointsCategories, Polygon, + PolyLine, RleMask, SourceExtractor, ) from datumaro.util.image import Image @@ -18,17 +18,29 @@ class DatumaroExtractor(SourceExtractor): def __init__(self, path): assert osp.isfile(path), path + rootpath = '' if path.endswith(osp.join(DatumaroPath.ANNOTATIONS_DIR, osp.basename(path))): rootpath = path.rsplit(DatumaroPath.ANNOTATIONS_DIR, maxsplit=1)[0] + images_dir = '' if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.IMAGES_DIR)): images_dir = osp.join(rootpath, DatumaroPath.IMAGES_DIR) self._images_dir = images_dir + pcd_dir = '' + if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.PCD_DIR)): + pcd_dir = osp.join(rootpath, DatumaroPath.PCD_DIR) + self._pcd_dir = pcd_dir + + related_images_dir = '' + if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.RELATED_IMAGES_DIR)): + related_images_dir = osp.join(rootpath, DatumaroPath.RELATED_IMAGES_DIR) + self._related_images_dir = related_images_dir + super().__init__(subset=osp.splitext(osp.basename(path))[0]) - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: parsed_anns = json.load(f) self._categories = self._load_categories(parsed_anns) self._items = self._load_items(parsed_anns) @@ -81,11 +93,27 @@ def _load_items(self, parsed): image_path = osp.join(self._images_dir, image_path) image = Image(path=image_path, size=image_info.get('size')) + point_cloud = None + pcd_info = item_desc.get('point_cloud') + if pcd_info: + pcd_path = pcd_info.get('path') + point_cloud = osp.join(self._pcd_dir, pcd_path) + + related_images = None + ri_info = item_desc.get('related_images') + if ri_info: + related_images = [ + Image(size=ri.get('size'), + path=osp.join(self._related_images_dir, ri.get('path')) + ) + for ri in ri_info + ] + annotations = self._load_annotations(item_desc) item = DatasetItem(id=item_id, subset=self._subset, - annotations=annotations, image=image, - attributes=item_desc.get('attr')) + annotations=annotations, image=image, point_cloud=point_cloud, + related_images=related_images, attributes=item_desc.get('attr')) items.append(item) @@ -143,6 +171,11 @@ def _load_annotations(item): loaded.append(Caption(caption, id=ann_id, attributes=attributes, group=group)) + elif ann_type == AnnotationType.cuboid_3d: + loaded.append(Cuboid3d(ann.get('position'), + ann.get('rotation'), ann.get('scale'), label=label_id, + id=ann_id, attributes=attributes, group=group)) + else: raise NotImplementedError() diff --git a/datumaro/plugins/datumaro_format/format.py b/datumaro/plugins/datumaro_format/format.py index 501c100b0928..c24547726654 100644 --- a/datumaro/plugins/datumaro_format/format.py +++ b/datumaro/plugins/datumaro_format/format.py @@ -6,6 +6,8 @@ class DatumaroPath: IMAGES_DIR = 'images' ANNOTATIONS_DIR = 'annotations' + PCD_DIR = 'point_clouds' + RELATED_IMAGES_DIR = 'related_images' MASKS_DIR = 'masks' IMAGE_EXT = '.jpg' diff --git a/datumaro/plugins/icdar_format/extractor.py b/datumaro/plugins/icdar_format/extractor.py index bc6f4e91c43a..7bc85d9f7f27 100644 --- a/datumaro/plugins/icdar_format/extractor.py +++ b/datumaro/plugins/icdar_format/extractor.py @@ -7,8 +7,10 @@ import numpy as np -from datumaro.components.extractor import (Bbox, Caption, DatasetItem, - Importer, Mask, MaskCategories, Polygon, SourceExtractor) +from datumaro.components.extractor import ( + Bbox, Caption, DatasetItem, Importer, Mask, MaskCategories, Polygon, + SourceExtractor, +) from datumaro.util.image import find_images from datumaro.util.mask_tools import lazy_mask diff --git a/datumaro/plugins/icdar_format/format.py b/datumaro/plugins/icdar_format/format.py index fb52a83eaf03..9f24a8275e2e 100644 --- a/datumaro/plugins/icdar_format/format.py +++ b/datumaro/plugins/icdar_format/format.py @@ -1,15 +1,14 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT -from enum import Enum +from enum import Enum, auto -IcdarTask = Enum('IcdarTask', [ - 'word_recognition', - 'text_localization', - 'text_segmentation', -]) +class IcdarTask(Enum): + word_recognition = auto() + text_localization = auto() + text_segmentation = auto() class IcdarPath: IMAGE_EXT = '.png' diff --git a/datumaro/plugins/image_dir_format.py b/datumaro/plugins/image_dir_format.py index 3cca401a43ae..c8d52f03e5ac 100644 --- a/datumaro/plugins/image_dir_format.py +++ b/datumaro/plugins/image_dir_format.py @@ -7,8 +7,8 @@ import os import os.path as osp -from datumaro.components.extractor import DatasetItem, SourceExtractor, Importer from datumaro.components.converter import Converter +from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.util.image import find_images diff --git a/datumaro/plugins/image_zip_format.py b/datumaro/plugins/image_zip_format.py new file mode 100644 index 000000000000..5b0bd5989863 --- /dev/null +++ b/datumaro/plugins/image_zip_format.py @@ -0,0 +1,114 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from enum import Enum +from zipfile import ZIP_BZIP2, ZIP_DEFLATED, ZIP_LZMA, ZIP_STORED, ZipFile +import logging as log +import os +import os.path as osp + +from datumaro.components.converter import Converter +from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor +from datumaro.util import parse_str_enum_value +from datumaro.util.image import IMAGE_EXTENSIONS, ByteImage, encode_image + + +class Compression(Enum): + ZIP_STORED = ZIP_STORED + ZIP_DEFLATED = ZIP_DEFLATED + ZIP_BZIP2 = ZIP_BZIP2 + ZIP_LZMA = ZIP_LZMA + +class ImageZipPath: + DEFAULT_ARCHIVE_NAME = 'default.zip' + DEFAULT_COMPRESSION = Compression.ZIP_STORED + +class ImageZipExtractor(SourceExtractor): + def __init__(self, url, subset=None): + super().__init__(subset=subset) + + assert url.endswith('.zip'), url + + with ZipFile(url, 'r') as zf: + for path in zf.filelist: + item_id, extension = osp.splitext(path.filename) + if extension.lower() not in IMAGE_EXTENSIONS: + continue + image = ByteImage(data=zf.read(path.filename)) + self._items.append(DatasetItem( + id=item_id, image=image, subset=self._subset + )) + +class ImageZipImporter(Importer): + @classmethod + def find_sources(cls, path): + return cls._find_sources_recursive(path, '.zip', 'image_zip') + +class ImageZipConverter(Converter): + DEFAULT_IMAGE_EXT = '.jpg' + + @staticmethod + def _get_compression_method(s): + try: + return Compression[s.upper()] + except KeyError: + import argparse + raise argparse.ArgumentTypeError() + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + + parser.add_argument('--name', type=str, + default=ImageZipPath.DEFAULT_ARCHIVE_NAME, + help="Name of output zipfile (default: %(default)s)" + ) + + parser.add_argument('--compression', type=cls._get_compression_method, + default=ImageZipPath.DEFAULT_COMPRESSION.name, + help="Archive compression method.\nAvailable methods: {} " + "(default: %(default)s)" \ + .format(', '.join(e.name for e in Compression)) + ) + + return parser + + def __init__(self, extractor, save_dir, name=None, + compression=None, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + if name is None: + name = ImageZipPath.DEFAULT_ARCHIVE_NAME + + compression = parse_str_enum_value(compression, Compression, + default=ImageZipPath.DEFAULT_COMPRESSION) + + self._archive_name = name + self._compression = compression.value + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + archive_path = osp.join(self._save_dir, self._archive_name) + + if osp.exists(archive_path): + raise FileExistsError('Zip file: %s, already exist,' + 'specify archive name with --name extra argument' % archive_path) + + with ZipFile(archive_path, 'w', self._compression) as zf: + for item in self._extractor: + if item.has_image: + self._archive_image(zf, item) + else: + log.debug("Item '%s' has no image info", item.id) + + def _archive_image(self, zipfile, item): + image_name = self._make_image_filename(item) + if osp.isfile(item.image.path): + zipfile.write(item.image.path, arcname=image_name) + elif isinstance(item.image, ByteImage): + zipfile.writestr(image_name, item.image.get_bytes()) + elif item.image.has_data: + zipfile.writestr(image_name, + encode_image(item.image.data, osp.splitext(image_name)[1])) diff --git a/datumaro/plugins/imagenet_format.py b/datumaro/plugins/imagenet_format.py index 9254662d069c..b9223a33fa4f 100644 --- a/datumaro/plugins/imagenet_format.py +++ b/datumaro/plugins/imagenet_format.py @@ -6,10 +6,11 @@ import os import os.path as osp -from datumaro.components.extractor import (DatasetItem, Label, - LabelCategories, AnnotationType, SourceExtractor, Importer -) from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) from datumaro.util.image import find_images diff --git a/datumaro/plugins/imagenet_txt_format.py b/datumaro/plugins/imagenet_txt_format.py index 3a1578431de6..a66868d40895 100644 --- a/datumaro/plugins/imagenet_txt_format.py +++ b/datumaro/plugins/imagenet_txt_format.py @@ -6,11 +6,11 @@ import os import os.path as osp -from datumaro.components.extractor import (DatasetItem, Label, - LabelCategories, AnnotationType, SourceExtractor, Importer -) from datumaro.components.converter import Converter -from datumaro.util.image import find_images +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) class ImagenetTxtPath: @@ -49,26 +49,22 @@ def _load_categories(self, labels): def _load_items(self, path): items = {} - image_dir = self.image_dir - if osp.isdir(image_dir): - images = { osp.splitext(osp.relpath(p, image_dir))[0]: p - for p in find_images(image_dir, recursive=True) } - else: - images = {} - with open(path, encoding='utf-8') as f: for line in f: item = line.split('\"') if 1 < len(item): if len(item) == 3: item_id = item[1] - label_ids = [int(id) for id in item[2].split()] + item = item[2].split() + image = item_id + item[0] + label_ids = [int(id) for id in item[1:]] else: raise Exception("Line %s: unexpected number " "of quotes in filename" % line) else: item = line.split() - item_id = item[0] + item_id = osp.splitext(item[0])[0] + image = item[0] label_ids = [int(id) for id in item[1:]] anno = [] @@ -79,7 +75,7 @@ def _load_items(self, path): anno.append(Label(label)) items[item_id] = DatasetItem(id=item_id, subset=self._subset, - image=images.get(item_id), annotations=anno) + image=osp.join(self.image_dir, image), annotations=anno) return items @@ -105,7 +101,11 @@ def apply(self): labels = {} for item in subset: - labels[item.id] = set(p.label for p in item.annotations + item_id = item.id + if 1 < len(item_id.split()): + item_id = '\"' + item_id + '\"' + item_id += self._find_image_ext(item) + labels[item_id] = set(p.label for p in item.annotations if p.type == AnnotationType.label) if self._save_images and item.has_image: @@ -113,10 +113,8 @@ def apply(self): annotation = '' for item_id, item_labels in labels.items(): - if 1 < len(item_id.split()): - item_id = '\"' + item_id + '\"' - annotation += '%s %s\n' % ( - item_id, ' '.join(str(l) for l in item_labels)) + annotation += '%s %s\n' % (item_id, + ' '.join(str(l) for l in item_labels)) with open(annotation_file, 'w', encoding='utf-8') as f: f.write(annotation) diff --git a/datumaro/plugins/kitti_format/__init__.py b/datumaro/plugins/kitti_format/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/datumaro/plugins/kitti_format/converter.py b/datumaro/plugins/kitti_format/converter.py new file mode 100644 index 000000000000..a11c4852118a --- /dev/null +++ b/datumaro/plugins/kitti_format/converter.py @@ -0,0 +1,239 @@ + +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +from enum import Enum, auto +import logging as log +import os +import os.path as osp + +import numpy as np + +from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + AnnotationType, CompiledMask, LabelCategories, +) +from datumaro.util import cast, parse_str_enum_value, str_to_bool +from datumaro.util.annotation_util import make_label_id_mapping +from datumaro.util.image import save_image +from datumaro.util.mask_tools import paint_mask + +from .format import ( + KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, + make_kitti_detection_categories, parse_label_map, write_label_map, +) + + +class LabelmapType(Enum): + kitti = auto() + source = auto() + +class KittiConverter(Converter): + DEFAULT_IMAGE_EXT = KittiPath.IMAGE_EXT + + @staticmethod + def _split_tasks_string(s): + return [KittiTask[i.strip().lower()] for i in s.split(',')] + + @staticmethod + def _get_labelmap(s): + if osp.isfile(s): + return s + try: + return LabelmapType[s.lower()].name + except KeyError: + import argparse + raise argparse.ArgumentTypeError() + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + + parser.add_argument('--apply-colormap', type=str_to_bool, default=True, + help="Use colormap for class masks (default: %(default)s)") + parser.add_argument('--label-map', type=cls._get_labelmap, default=None, + help="Labelmap file path or one of %s" % \ + ', '.join(t.name for t in LabelmapType)) + parser.add_argument('--tasks', type=cls._split_tasks_string, + help="KITTI task filter, comma-separated list of {%s} " + "(default: all)" % ', '.join(t.name for t in KittiTask)) + return parser + + def __init__(self, extractor, save_dir, + tasks=None, apply_colormap=True, allow_attributes=True, + label_map=None, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + assert tasks is None or isinstance(tasks, (KittiTask, list, set)) + if tasks is None: + tasks = set(KittiTask) + elif isinstance(tasks, KittiTask): + tasks = {tasks} + else: + tasks = set(parse_str_enum_value(t, KittiTask) for t in tasks) + self._tasks = tasks + + self._apply_colormap = apply_colormap + + if label_map is None: + label_map = LabelmapType.source.name + if KittiTask.segmentation in self._tasks: + self._load_categories(label_map) + elif KittiTask.detection in self._tasks: + self._load_detection_categories() + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + for subset_name, subset in self._extractor.subsets().items(): + if KittiTask.segmentation in self._tasks: + os.makedirs(osp.join(self._save_dir, subset_name, + KittiPath.INSTANCES_DIR), exist_ok=True) + + for item in subset: + if self._save_images: + self._save_image(item, + subdir=osp.join(subset_name, KittiPath.IMAGES_DIR)) + + masks = [a for a in item.annotations + if a.type == AnnotationType.mask] + if masks and KittiTask.segmentation in self._tasks: + compiled_class_mask = CompiledMask.from_instance_masks(masks, + instance_labels=[self._label_id_mapping(m.label) + for m in masks]) + color_mask_path = osp.join(subset_name, + KittiPath.SEMANTIC_RGB_DIR, item.id + KittiPath.MASK_EXT) + self.save_mask(osp.join(self._save_dir, color_mask_path), + compiled_class_mask.class_mask) + + labelids_mask_path = osp.join(subset_name, + KittiPath.SEMANTIC_DIR, item.id + KittiPath.MASK_EXT) + self.save_mask(osp.join(self._save_dir, labelids_mask_path), + compiled_class_mask.class_mask, apply_colormap=False, + dtype=np.int32) + + # TODO: optimize second merging + compiled_instance_mask = CompiledMask.from_instance_masks(masks, + instance_labels=[(m.label << 8) + m.id for m in masks]) + inst_path = osp.join(subset_name, + KittiPath.INSTANCES_DIR, item.id + KittiPath.MASK_EXT) + self.save_mask(osp.join(self._save_dir, inst_path), + compiled_instance_mask.class_mask, apply_colormap=False, + dtype=np.int32) + + bboxes = [a for a in item.annotations + if a.type == AnnotationType.bbox] + if bboxes and KittiTask.detection in self._tasks: + labels_file = osp.join(self._save_dir, subset_name, + KittiPath.LABELS_DIR, '%s.txt' % item.id) + os.makedirs(osp.dirname(labels_file), exist_ok=True) + with open(labels_file, 'w', encoding='utf-8') as f: + for bbox in bboxes: + label_line = [-1] * 15 + label_line[0] = self.get_label(bbox.label) + label_line[1] = cast(bbox.attributes.get('truncated'), + float, KittiPath.DEFAULT_TRUNCATED) + label_line[2] = cast(bbox.attributes.get('occluded'), + int, KittiPath.DEFAULT_OCCLUDED) + x, y, h, w = bbox.get_bbox() + label_line[4:8] = x, y, x + h, y + w + + label_line = ' '.join(str(v) for v in label_line) + f.write('%s\n' % label_line) + + if KittiTask.segmentation in self._tasks: + self.save_label_map() + + def get_label(self, label_id): + return self._extractor. \ + categories()[AnnotationType.label].items[label_id].name + + def save_label_map(self): + path = osp.join(self._save_dir, KittiPath.LABELMAP_FILE) + write_label_map(path, self._label_map) + + def _load_categories(self, label_map_source): + if label_map_source == LabelmapType.kitti.name: + # use the default KITTI colormap + label_map = KittiLabelMap + + elif label_map_source == LabelmapType.source.name and \ + AnnotationType.mask not in self._extractor.categories(): + # generate colormap for input labels + labels = self._extractor.categories() \ + .get(AnnotationType.label, LabelCategories()) + label_map = OrderedDict((item.name, None) + for item in labels.items) + + elif label_map_source == LabelmapType.source.name and \ + AnnotationType.mask in self._extractor.categories(): + # use source colormap + labels = self._extractor.categories()[AnnotationType.label] + colors = self._extractor.categories()[AnnotationType.mask] + label_map = OrderedDict() + for idx, item in enumerate(labels.items): + color = colors.colormap.get(idx) + if color is not None: + label_map[item.name] = color + + elif isinstance(label_map_source, dict): + label_map = OrderedDict( + sorted(label_map_source.items(), key=lambda e: e[0])) + + elif isinstance(label_map_source, str) and osp.isfile(label_map_source): + label_map = parse_label_map(label_map_source) + + else: + raise Exception("Wrong labelmap specified, " + "expected one of %s or a file path" % \ + ', '.join(t.name for t in LabelmapType)) + + self._categories = make_kitti_categories(label_map) + self._label_map = label_map + self._label_id_mapping = self._make_label_id_map() + + def _load_detection_categories(self): + self._categories = make_kitti_detection_categories() + + def _make_label_id_map(self): + map_id, id_mapping, src_labels, dst_labels = make_label_id_mapping( + self._extractor.categories().get(AnnotationType.label), + self._categories[AnnotationType.label]) + + void_labels = [src_label for src_id, src_label in src_labels.items() + if src_label not in dst_labels] + if void_labels: + log.warning("The following labels are remapped to background: %s" % + ', '.join(void_labels)) + log.debug("Saving segmentations with the following label mapping: \n%s" % + '\n'.join(["#%s '%s' -> #%s '%s'" % + ( + src_id, src_label, id_mapping[src_id], + self._categories[AnnotationType.label] \ + .items[id_mapping[src_id]].name + ) + for src_id, src_label in src_labels.items() + ]) + ) + + return map_id + + def save_mask(self, path, mask, colormap=None, apply_colormap=True, + dtype=np.uint8): + if self._apply_colormap and apply_colormap: + if colormap is None: + colormap = self._categories[AnnotationType.mask].colormap + mask = paint_mask(mask, colormap) + save_image(path, mask, create_dir=True, dtype=dtype) + +class KittiSegmentationConverter(KittiConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = KittiTask.segmentation + super().__init__(*args, **kwargs) + +class KittiDetectionConverter(KittiConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = KittiTask.detection + super().__init__(*args, **kwargs) diff --git a/datumaro/plugins/kitti_format/extractor.py b/datumaro/plugins/kitti_format/extractor.py new file mode 100644 index 000000000000..747858767124 --- /dev/null +++ b/datumaro/plugins/kitti_format/extractor.py @@ -0,0 +1,115 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + +import numpy as np + +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Mask, SourceExtractor, +) +from datumaro.util.image import find_images, load_image + +from .format import ( + KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, + make_kitti_detection_categories, parse_label_map, +) + + +class _KittiExtractor(SourceExtractor): + def __init__(self, path, task, subset=None): + assert osp.isdir(path), path + self._path = path + self._task = task + + if not subset: + subset = osp.splitext(osp.basename(path))[0] + self._subset = subset + super().__init__(subset=subset) + + self._categories = self._load_categories(osp.dirname(self._path)) + self._items = list(self._load_items().values()) + + def _load_categories(self, path): + if self._task == KittiTask.segmentation: + return self._load_categories_segmentation(path) + elif self._task == KittiTask.detection: + return make_kitti_detection_categories() + + def _load_categories_segmentation(self, path): + label_map = None + label_map_path = osp.join(path, KittiPath.LABELMAP_FILE) + if osp.isfile(label_map_path): + label_map = parse_label_map(label_map_path) + else: + label_map = KittiLabelMap + self._labels = [label for label in label_map] + return make_kitti_categories(label_map) + + def _load_items(self): + items = {} + + image_dir = osp.join(self._path, KittiPath.IMAGES_DIR) + for image_path in find_images(image_dir, recursive=True): + image_name = osp.relpath(image_path, image_dir) + sample_id = osp.splitext(image_name)[0] + anns = [] + + instances_path = osp.join(self._path, KittiPath.INSTANCES_DIR, + sample_id + KittiPath.MASK_EXT) + if self._task == KittiTask.segmentation and \ + osp.isfile(instances_path): + instances_mask = load_image(instances_path, dtype=np.int32) + segm_ids = np.unique(instances_mask) + for segm_id in segm_ids: + semantic_id = segm_id >> 8 + ann_id = int(segm_id % 256) + isCrowd = (ann_id == 0) + anns.append(Mask( + image=self._lazy_extract_mask(instances_mask, segm_id), + label=semantic_id, id=ann_id, + attributes={ 'is_crowd': isCrowd })) + + labels_path = osp.join(self._path, KittiPath.LABELS_DIR, + sample_id+'.txt') + if self._task == KittiTask.detection and osp.isfile(labels_path): + with open(labels_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + for line_idx, line in enumerate(lines): + line = line.split() + assert len(line) == 15 + + x1, y1 = float(line[4]), float(line[5]) + x2, y2 = float(line[6]), float(line[7]) + + attributes = {} + attributes['truncated'] = float(line[1]) != 0 + attributes['occluded'] = int(line[2]) != 0 + + label_id = self.categories()[ + AnnotationType.label].find(line[0])[0] + if label_id is None: + raise Exception("Item %s: unknown label '%s'" % \ + (sample_id, line[0])) + + anns.append( + Bbox(x=x1, y=y1, w=x2-x1, h=y2-y1, id=line_idx, + attributes=attributes, label=label_id, + )) + items[sample_id] = DatasetItem(id=sample_id, subset=self._subset, + image=image_path, annotations=anns) + return items + + @staticmethod + def _lazy_extract_mask(mask, c): + return lambda: mask == c + +class KittiSegmentationExtractor(_KittiExtractor): + def __init__(self, path): + super().__init__(path, task=KittiTask.segmentation) + +class KittiDetectionExtractor(_KittiExtractor): + def __init__(self, path): + super().__init__(path, task=KittiTask.detection) \ No newline at end of file diff --git a/datumaro/plugins/kitti_format/format.py b/datumaro/plugins/kitti_format/format.py new file mode 100644 index 000000000000..5bcfdb79d8d3 --- /dev/null +++ b/datumaro/plugins/kitti_format/format.py @@ -0,0 +1,146 @@ + +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +from enum import Enum, auto + +from datumaro.components.extractor import ( + AnnotationType, LabelCategories, MaskCategories, +) +from datumaro.util.mask_tools import generate_colormap + + +class KittiTask(Enum): + segmentation = auto() + detection = auto() + +KittiLabelMap = OrderedDict([ + ('unlabeled', (0, 0, 0)), + ('egovehicle', (0, 0, 0)), + ('rectificationborder', (0, 0, 0)), + ('outofroi', (0, 0, 0)), + ('static', (0, 0, 0)), + ('dynamic', (111, 74, 0)), + ('ground', (81, 0, 81)), + ('road', (128, 64, 128)), + ('sidewalk', (244, 35, 232)), + ('parking', (250, 170, 160)), + ('railtrack', (230, 150, 140)), + ('building', (70, 70, 70)), + ('wall', (102, 102, 156)), + ('fence', (190, 153, 153)), + ('guardrail', (180, 165, 180)), + ('bridge', (150, 100, 100)), + ('tunnel', (150, 120, 90)), + ('pole', (153, 153, 153)), + ('polegroup', (153, 153, 153)), + ('trafficlight', (250, 170, 30)), + ('trafficsign', (220, 220, 0)), + ('vegetation', (107, 142, 35)), + ('terrain', (152, 251, 152)), + ('sky', (70, 130, 180)), + ('person', (220, 20, 60)), + ('rider', (255, 0, 0)), + ('car', (0, 0, 142)), + ('truck', (0, 0, 70)), + ('bus', (0, 60, 100)), + ('caravan', (0, 0, 90)), + ('trailer', (0, 0, 110)), + ('train', (0, 80, 100)), + ('motorcycle', (0, 0, 230)), + ('bicycle', (119, 11, 32)), + ('licenseplate', (0, 0, 142)), +]) + +KittiDetectionLabel = [ + 'Car', + 'Van', + 'Truck', + 'Pedestrian', + 'Person_sitting', + 'Cyclist', + 'Tram', + 'Misc', + 'DontCare', +] + +class KittiPath: + IMAGES_DIR = 'image_2' + INSTANCES_DIR = 'instance' + LABELS_DIR = 'label_2' + SEMANTIC_RGB_DIR = 'semantic_rgb' + SEMANTIC_DIR = 'semantic' + IMAGE_EXT = '.png' + MASK_EXT = '.png' + + LABELMAP_FILE = 'label_colors.txt' + + DEFAULT_TRUNCATED = 0.0 # 0% truncated + DEFAULT_OCCLUDED = 0 # fully visible + + +def make_kitti_categories(label_map=None): + if label_map is None: + label_map = KittiLabelMap + + categories = {} + label_categories = LabelCategories() + for label in label_map: + label_categories.add(label) + categories[AnnotationType.label] = label_categories + + has_colors = any(v is not None for v in label_map.values()) + if not has_colors: # generate new colors + colormap = generate_colormap(len(label_map)) + else: # only copy defined colors + label_id = lambda label: label_categories.find(label)[0] + colormap = { label_id(name): (desc[0], desc[1], desc[2]) + for name, desc in label_map.items() } + mask_categories = MaskCategories(colormap) + mask_categories.inverse_colormap # pylint: disable=pointless-statement + categories[AnnotationType.mask] = mask_categories + return categories + +def make_kitti_detection_categories(): + categories = {} + label_categories = LabelCategories() + for label in KittiDetectionLabel: + label_categories.add(label) + categories[AnnotationType.label] = label_categories + return categories + +def parse_label_map(path): + label_map = OrderedDict() + with open(path, 'r', encoding='utf-8') as f: + for line in f: + # skip empty and commented lines + line = line.strip() + if not line or line and line[0] == '#': + continue + + # color, name + label_desc = line.strip().split() + + if 2 < len(label_desc): + name = label_desc[3] + color = tuple([int(c) for c in label_desc[:-1]]) + else: + name = label_desc[0] + color = None + + if name in label_map: + raise ValueError("Label '%s' is already defined" % name) + + label_map[name] = color + return label_map + +def write_label_map(path, label_map): + with open(path, 'w', encoding='utf-8') as f: + for label_name, label_desc in label_map.items(): + if label_desc: + color_rgb = ' '.join(str(c) for c in label_desc) + else: + color_rgb = '' + f.write('%s %s\n' % (color_rgb, label_name)) \ No newline at end of file diff --git a/datumaro/plugins/kitti_format/importer.py b/datumaro/plugins/kitti_format/importer.py new file mode 100644 index 000000000000..669896c98c6f --- /dev/null +++ b/datumaro/plugins/kitti_format/importer.py @@ -0,0 +1,85 @@ + +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer +from datumaro.util.log_utils import logging_disabled + +from .format import KittiPath, KittiTask + + +class KittiImporter(Importer): + _TASKS = { + KittiTask.segmentation: ('kitti_segmentation', KittiPath.INSTANCES_DIR), + KittiTask.detection: ('kitti_detection', KittiPath.LABELS_DIR), + } + + @classmethod + def detect(cls, path): + with logging_disabled(log.WARN): + return len(cls.find_sources(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subsets = self.find_sources(path) + + if len(subsets) == 0: + raise Exception("Failed to find 'kitti' dataset at '%s'" % path) + + # TODO: should be removed when proper label merging is implemented + conflicting_types = {KittiTask.segmentation, KittiTask.detection} + ann_types = set(t for s in subsets.values() for t in s) \ + & conflicting_types + if 1 <= len(ann_types): + selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0] + if 1 < len(ann_types): + log.warning("Not implemented: " + "Found potentially conflicting source types with labels: %s. " + "Only one type will be used: %s" \ + % (", ".join(t.name for t in ann_types), selected_ann_type.name)) + + for ann_files in subsets.values(): + for ann_type, ann_file in ann_files.items(): + if ann_type in conflicting_types: + if ann_type is not selected_ann_type: + log.warning("Not implemented: " + "conflicting source '%s' is skipped." % ann_file) + continue + log.info("Found a dataset at '%s'" % ann_file) + + source_name = osp.splitext(osp.basename(ann_file))[0] + project.add_source(source_name, { + 'url': ann_file, + 'format': ann_type, + 'options': dict(extra_params), + }) + + return project + + @classmethod + def find_sources(cls, path): + subsets = {} + + for extractor_type, task_dir in cls._TASKS.values(): + subset_paths = glob(osp.join(path, '**', task_dir), recursive=True) + for subset_path in subset_paths: + path = osp.normpath(osp.join(subset_path, "..")) + subset_name = osp.splitext(osp.basename(path))[0] + subsets.setdefault(subset_name, {})[extractor_type] = path + + return subsets + +class KittiDetectionImporter(KittiImporter): + _TASK = KittiTask.detection + _TASKS = { _TASK: KittiImporter._TASKS[_TASK] } + +class KittiSegmentationImporter(KittiImporter): + _TASK = KittiTask.segmentation + _TASKS = { _TASK: KittiImporter._TASKS[_TASK] } \ No newline at end of file diff --git a/datumaro/plugins/kitti_raw_format/__init__.py b/datumaro/plugins/kitti_raw_format/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/datumaro/plugins/kitti_raw_format/converter.py b/datumaro/plugins/kitti_raw_format/converter.py new file mode 100644 index 000000000000..dce1561ee721 --- /dev/null +++ b/datumaro/plugins/kitti_raw_format/converter.py @@ -0,0 +1,472 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from copy import deepcopy +from xml.sax.saxutils import XMLGenerator +import logging as log +import os +import os.path as osp + +from datumaro.components.converter import Converter +from datumaro.components.dataset import ItemStatus +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, LabelCategories, +) +from datumaro.util import cast +from datumaro.util.image import find_images + +from .format import KittiRawPath, OcclusionStates, PoseStates, TruncationStates + + +class _XmlAnnotationWriter: + # Format constants + _tracking_level = 0 + + _tracklets_class_id = 0 + _tracklets_version = 0 + + _tracklet_class_id = 1 + _tracklet_version = 1 + + _poses_class_id = 2 + _poses_version = 0 + + _pose_class_id = 3 + _pose_version = 1 + + # XML headers + _header = """""" + _doctype = "" + + def __init__(self, file, tracklets): + self._file = file + self._tracklets = tracklets + + self._xmlgen = XMLGenerator(self._file, encoding='utf-8') + self._level = 0 + + # See reference for section headers here: + # https://www.boost.org/doc/libs/1_40_0/libs/serialization/doc/traits.html + # XML archives have regular structure, so we only include headers once + self._add_tracklet_header = True + self._add_poses_header = True + self._add_pose_header = True + + def _indent(self, newline=True): + if newline: + self._xmlgen.ignorableWhitespace("\n") + self._xmlgen.ignorableWhitespace(" " * self._level) + + def _add_headers(self): + self._file.write(self._header) + + self._indent(newline=True) + self._file.write(self._doctype) + + def _open_serialization(self): + self._indent(newline=True) + self._xmlgen.startElement("boost_serialization", { + "version": "9", "signature": "serialization::archive" + }) + + def _close_serialization(self): + self._indent(newline=True) + self._xmlgen.endElement("boost_serialization") + + def _add_count(self, count): + self._indent(newline=True) + self._xmlgen.startElement("count", {}) + self._xmlgen.characters(str(count)) + self._xmlgen.endElement("count") + + def _add_item_version(self, version): + self._indent(newline=True) + self._xmlgen.startElement("item_version", {}) + self._xmlgen.characters(str(version)) + self._xmlgen.endElement("item_version") + + def _open_tracklets(self, tracklets): + self._indent(newline=True) + self._xmlgen.startElement("tracklets", { + "version": str(self._tracklets_version), + "tracking_level": str(self._tracking_level), + "class_id": str(self._tracklets_class_id), + }) + self._level += 1 + self._add_count(len(tracklets)) + self._add_item_version(self._tracklet_version) + + def _close_tracklets(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("tracklets") + + def _open_tracklet(self): + self._indent(newline=True) + if self._add_tracklet_header: + self._xmlgen.startElement("item", { + "version": str(self._tracklet_class_id), + "tracking_level": str(self._tracking_level), + "class_id": str(self._tracklet_class_id), + }) + self._add_tracklet_header = False + else: + self._xmlgen.startElement("item", {}) + self._level += 1 + + def _close_tracklet(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("item") + + def _add_tracklet(self, tracklet): + self._open_tracklet() + + for key, value in tracklet.items(): + if key == "poses": + self._add_poses(value) + elif key == "attributes": + self._add_attributes(value) + else: + self._indent(newline=True) + self._xmlgen.startElement(key, {}) + self._xmlgen.characters(str(value)) + self._xmlgen.endElement(key) + + self._close_tracklet() + + def _open_poses(self, poses): + self._indent(newline=True) + if self._add_poses_header: + self._xmlgen.startElement("poses", { + "version": str(self._poses_version), + "tracking_level": str(self._tracking_level), + "class_id": str(self._poses_class_id), + }) + self._add_poses_header = False + else: + self._xmlgen.startElement("poses", {}) + self._level += 1 + + self._add_count(len(poses)) + self._add_item_version(self._poses_version) + + def _close_poses(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("poses") + + def _add_poses(self, poses): + self._open_poses(poses) + + for pose in poses: + self._add_pose(pose) + + self._close_poses() + + def _open_pose(self): + self._indent(newline=True) + if self._add_pose_header: + self._xmlgen.startElement("item", { + "version": str(self._pose_version), + "tracking_level": str(self._tracking_level), + "class_id": str(self._pose_class_id), + }) + self._add_pose_header = False + else: + self._xmlgen.startElement("item", {}) + self._level += 1 + + def _close_pose(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("item") + + def _add_pose(self, pose): + self._open_pose() + + for key, value in pose.items(): + if key == 'attributes': + self._add_attributes(value) + elif key != 'frame_id': + self._indent(newline=True) + self._xmlgen.startElement(key, {}) + self._xmlgen.characters(str(value)) + self._xmlgen.endElement(key) + + self._close_pose() + + def _open_attributes(self): + self._indent(newline=True) + self._xmlgen.startElement("attributes", {}) + self._level += 1 + + def _close_attributes(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("attributes") + + def _add_attributes(self, attributes): + self._open_attributes() + + for name, value in attributes.items(): + self._add_attribute(name, value) + + self._close_attributes() + + def _open_attribute(self): + self._indent(newline=True) + self._xmlgen.startElement("attribute", {}) + self._level += 1 + + def _close_attribute(self): + self._level -= 1 + self._indent(newline=True) + self._xmlgen.endElement("attribute") + + def _add_attribute(self, name, value): + self._open_attribute() + + self._indent(newline=True) + self._xmlgen.startElement("name", {}) + self._xmlgen.characters(name) + self._xmlgen.endElement("name") + + self._xmlgen.startElement("value", {}) + self._xmlgen.characters(str(value)) + self._xmlgen.endElement("value") + + self._close_attribute() + + def write(self): + self._add_headers() + self._open_serialization() + + self._open_tracklets(self._tracklets) + + for tracklet in self._tracklets: + self._add_tracklet(tracklet) + + self._close_tracklets() + + self._close_serialization() + + +class KittiRawConverter(Converter): + DEFAULT_IMAGE_EXT = ".jpg" + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--reindex', action='store_true', + help="Assign new indices to frames and tracks. " + "Allows annotations without 'track_id' (default: %(default)s)") + parser.add_argument('--allow-attrs', action='store_true', + help="Allow writing annotation attributes (default: %(default)s)") + return parser + + def __init__(self, extractor, save_dir, reindex=False, + allow_attrs=False, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + self._reindex = reindex + self._builtin_attrs = \ + KittiRawPath.BUILTIN_ATTRS | KittiRawPath.SPECIAL_ATTRS + self._allow_attrs = allow_attrs + + def _create_tracklets(self, subset): + tracks = {} # track_id -> track + name_mapping = {} # frame_id -> name + + for frame_id, item in enumerate(subset): + frame_id = self._write_item(item, frame_id) + + if frame_id in name_mapping: + raise Exception( + "Item %s: frame id %s is repeated in the dataset" % \ + (item.id, frame_id)) + name_mapping[frame_id] = item.id + + for ann in item.annotations: + if ann.type != AnnotationType.cuboid_3d: + continue + + if ann.label is None: + log.warning("Item %s: skipping a %s%s with no label", + item.id, ann.type.name, + '(#%s) ' % ann.id if ann.id is not None else '') + continue + + label = self._get_label(ann.label).name + + track_id = cast(ann.attributes.get('track_id'), int, None) + if self._reindex and track_id is None: + # In this format, track id is not used for anything except + # annotation grouping. So we only need to pick a definitely + # unused id. A negative one, for example. + track_id = -(len(tracks) + 1) + if track_id is None: + raise Exception("Item %s: expected track annotations " + "having 'track_id' (integer) attribute. " + "Use --reindex to export single shapes." % item.id) + + track = tracks.get(track_id) + if not track: + track = { + "objectType": label, + "h": ann.scale[1], + "w": ann.scale[0], + "l": ann.scale[2], + "first_frame": frame_id, + "poses": [], + "finished": 1 # keep last + } + tracks[track_id] = track + else: + if [track['w'], track['h'], track['l']] != ann.scale: + # Tracks have fixed scale in the format + raise Exception("Item %s: mismatching track shapes, " \ + "track id %s" % (item.id, track_id)) + + if track['objectType'] != label: + raise Exception("Item %s: mismatching track labels, " \ + "track id %s: %s vs. %s" % \ + (item.id, track_id, track['objectType'], label)) + + # If there is a skip in track frames, add missing as outside + if frame_id != track['poses'][-1]['frame_id'] + 1: + last_key_pose = track['poses'][-1] + last_keyframe_id = last_key_pose['frame_id'] + last_key_pose['occlusion_kf'] = 1 + for i in range(last_keyframe_id + 1, frame_id): + pose = deepcopy(last_key_pose) + pose['occlusion'] = OcclusionStates.OCCLUSION_UNSET + pose['truncation'] = TruncationStates.OUT_IMAGE + pose['frame_id'] = i + track['poses'].append(pose) + + occlusion = OcclusionStates.VISIBLE + if 'occlusion' in ann.attributes: + occlusion = OcclusionStates( + ann.attributes['occlusion'].upper()) + elif 'occluded' in ann.attributes: + if ann.attributes['occluded']: + occlusion = OcclusionStates.PARTLY + + truncation = TruncationStates.IN_IMAGE + if 'truncation' in ann.attributes: + truncation = TruncationStates( + ann.attributes['truncation'].upper()) + + pose = { + "tx": ann.position[0], + "ty": ann.position[1], + "tz": ann.position[2], + "rx": ann.rotation[0], + "ry": ann.rotation[1], + "rz": ann.rotation[2], + "state": PoseStates.LABELED.value, + "occlusion": occlusion.value, + "occlusion_kf": \ + int(ann.attributes.get("keyframe", False) == True), + "truncation": truncation.value, + "amt_occlusion": -1, + "amt_border_l": -1, + "amt_border_r": -1, + "amt_occlusion_kf": -1, + "amt_border_kf": -1, + "frame_id": frame_id, + } + + if self._allow_attrs: + attributes = {} + for name, value in ann.attributes.items(): + if name in self._builtin_attrs: + continue + + if isinstance(value, bool): + value = 'true' if value else 'false' + attributes[name] = value + + pose["attributes"] = attributes + + track["poses"].append(pose) + + self._write_name_mapping(name_mapping) + + return [e[1] for e in sorted(tracks.items(), key=lambda e: e[0])] + + def _write_name_mapping(self, name_mapping): + with open(osp.join(self._save_dir, KittiRawPath.NAME_MAPPING_FILE), + 'w', encoding='utf-8') as f: + f.writelines('%s %s\n' % (frame_id, name) + for frame_id, name in name_mapping.items()) + + def _get_label(self, label_id): + if label_id is None: + return "" + label_cat = self._extractor.categories().get( + AnnotationType.label, LabelCategories()) + return label_cat.items[label_id] + + def _write_item(self, item, index): + if not self._reindex: + index = cast(item.attributes.get('frame'), int, index) + + if self._save_images: + if item.has_point_cloud: + self._save_point_cloud(item, subdir=KittiRawPath.PCD_DIR) + + images = sorted(item.related_images, key=lambda img: img.path) + for i, image in enumerate(images): + if image.has_data: + image.save(osp.join(self._save_dir, + KittiRawPath.IMG_DIR_PREFIX + ('%02d' % i), 'data', + item.id + self._find_image_ext(image))) + + else: + log.debug("Item '%s' has no image info", item.id) + + return index + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + if 1 < len(self._extractor.subsets()): + log.warning("Kitti RAW format supports only a single" + "subset. Subset information will be ignored on export.") + + tracklets = self._create_tracklets(self._extractor) + with open(osp.join(self._save_dir, KittiRawPath.ANNO_FILE), + 'w', encoding='utf-8') as f: + writer = _XmlAnnotationWriter(f, tracklets) + writer.write() + + @classmethod + def patch(cls, dataset, patch, save_dir, **kwargs): + conv = cls(patch.as_dataset(dataset), save_dir=save_dir, **kwargs) + conv.apply() + + pcd_dir = osp.abspath(osp.join(save_dir, KittiRawPath.PCD_DIR)) + for (item_id, subset), status in patch.updated_items.items(): + if status != ItemStatus.removed: + item = patch.data.get(item_id, subset) + else: + item = DatasetItem(item_id, subset=subset) + + if not (status == ItemStatus.removed or not item.has_point_cloud): + continue + + pcd_path = osp.join(pcd_dir, conv._make_pcd_filename(item)) + if osp.isfile(pcd_path): + os.unlink(pcd_path) + + for d in os.listdir(save_dir): + image_dir = osp.join(save_dir, d, 'data', osp.dirname(item.id)) + if d.startswith(KittiRawPath.IMG_DIR_PREFIX) and \ + osp.isdir(image_dir): + for p in find_images(image_dir): + if osp.splitext(osp.basename(p))[0] == \ + osp.basename(item.id): + os.unlink(p) diff --git a/datumaro/plugins/kitti_raw_format/extractor.py b/datumaro/plugins/kitti_raw_format/extractor.py new file mode 100644 index 000000000000..179f83c0392d --- /dev/null +++ b/datumaro/plugins/kitti_raw_format/extractor.py @@ -0,0 +1,263 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import os.path as osp + +from defusedxml import ElementTree as ET + +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, Importer, LabelCategories, + SourceExtractor, +) +from datumaro.util import cast +from datumaro.util.image import find_images + +from .format import KittiRawPath, OcclusionStates, TruncationStates + + +class KittiRawExtractor(SourceExtractor): + # http://www.cvlibs.net/datasets/kitti/raw_data.php + # https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_raw_data.zip + # Check cpp header implementation for field meaning + + def __init__(self, path, subset=None): + assert osp.isfile(path), path + self._rootdir = osp.dirname(path) + + super().__init__(subset=subset) + + items, categories = self._parse(path) + self._items = list(self._load_items(items).values()) + self._categories = categories + + @classmethod + def _parse(cls, path): + tracks = [] + track = None + shape = None + attr = None + labels = {} + point_tags = {'tx', 'ty', 'tz', 'rx', 'ry', 'rz'} + + # Can fail with "XML declaration not well-formed" on documents with + # + # ^^^^ + # (like the original Kitti dataset), while + # + # ^^^ + # works. + tree = ET.iterparse(path, events=("start", "end")) + for ev, elem in tree: + if ev == "start": + if elem.tag == 'item': + if track is None: + track = { + 'shapes': [], + 'scale': {}, + 'label': None, + 'attributes': {}, + 'start_frame': None, + 'length': None, + } + else: + shape = { + 'points': {}, + 'attributes': {}, + 'occluded': None, + 'occluded_kf': False, + 'truncated': None, + } + + elif elem.tag == 'attribute': + attr = {} + + elif ev == "end": + if elem.tag == 'item': + assert track is not None + + if shape: + track['shapes'].append(shape) + shape = None + else: + assert track['length'] == len(track['shapes']) + + if track['label']: + labels.setdefault(track['label'], set()) + + for a in track['attributes']: + labels[track['label']].add(a) + + for s in track['shapes']: + for a in s['attributes']: + labels[track['label']].add(a) + + tracks.append(track) + track = None + + # track tags + elif track and elem.tag == 'objectType': + track['label'] = elem.text + elif track and elem.tag in {'h', 'w', 'l'}: + track['scale'][elem.tag] = float(elem.text) + elif track and elem.tag == 'first_frame': + track['start_frame'] = int(elem.text) + elif track and elem.tag == 'count' and track: + track['length'] = int(elem.text) + + # pose tags + elif shape and elem.tag in point_tags: + shape['points'][elem.tag] = float(elem.text) + elif shape and elem.tag == 'occlusion': + shape['occluded'] = OcclusionStates(int(elem.text)) + elif shape and elem.tag == 'occlusion_kf': + shape['occluded_kf'] = elem.text == '1' + elif shape and elem.tag == 'truncation': + shape['truncated'] = TruncationStates(int(elem.text)) + + # common tags + elif attr is not None and elem.tag == 'name': + if not elem.text: + raise ValueError("Attribute name can't be empty") + attr['name'] = elem.text + elif attr is not None and elem.tag == 'value': + attr['value'] = elem.text or '' + elif attr is not None and elem.tag == 'attribute': + if shape: + shape['attributes'][attr['name']] = attr['value'] + else: + track['attributes'][attr['name']] = attr['value'] + attr = None + + if track is not None or shape is not None or attr is not None: + raise Exception("Failed to parse anotations from '%s'" % path) + + special_attrs = KittiRawPath.SPECIAL_ATTRS + common_attrs = ['occluded'] + label_cat = LabelCategories(attributes=common_attrs) + for label, attrs in sorted(labels.items(), key=lambda e: e[0]): + label_cat.add(label, attributes=set(attrs) - special_attrs) + + categories = {AnnotationType.label: label_cat} + + items = {} + for idx, track in enumerate(tracks): + track_id = idx + 1 + for i, ann in enumerate( + cls._parse_track(track_id, track, categories)): + frame_desc = items.setdefault(track['start_frame'] + i, + {'annotations': []}) + frame_desc['annotations'].append(ann) + + return items, categories + + @classmethod + def _parse_attr(cls, value): + if value == 'true': + return True + elif value == 'false': + return False + elif str(cast(value, int, 0)) == value: + return int(value) + elif str(cast(value, float, 0)) == value: + return float(value) + else: + return value + + @classmethod + def _parse_track(cls, track_id, track, categories): + common_attrs = { k: cls._parse_attr(v) + for k, v in track['attributes'].items() } + scale = [track['scale'][k] for k in ['w', 'h', 'l']] + label = categories[AnnotationType.label].find(track['label'])[0] + + kf_occluded = False + for shape in track['shapes']: + occluded = shape['occluded'] in { + OcclusionStates.FULLY, OcclusionStates.PARTLY} + if shape['occluded_kf']: + kf_occluded = occluded + elif shape['occluded'] == OcclusionStates.OCCLUSION_UNSET: + occluded = kf_occluded + + if shape['truncated'] in {TruncationStates.OUT_IMAGE, + TruncationStates.BEHIND_IMAGE}: + # skip these frames + continue + + local_attrs = { k: cls._parse_attr(v) + for k, v in shape['attributes'].items() } + local_attrs['occluded'] = occluded + local_attrs['track_id'] = track_id + attrs = dict(common_attrs) + attrs.update(local_attrs) + + position = [shape['points'][k] for k in ['tx', 'ty', 'tz']] + rotation = [shape['points'][k] for k in ['rx', 'ry', 'rz']] + + yield Cuboid3d(position, rotation, scale, label=label, + attributes=attrs) + + @staticmethod + def _parse_name_mapping(path): + rootdir = osp.dirname(path) + + name_mapping = {} + if osp.isfile(path): + with open(path, encoding='utf-8') as f: + for line in f: + line = line.strip() + if not line or line.startswith('#'): + continue + + idx, path = line.split(maxsplit=1) + path = osp.abspath(osp.join(rootdir, path)) + assert path.startswith(rootdir), path + path = osp.relpath(path, rootdir) + name_mapping[int(idx)] = path + + return name_mapping + + def _load_items(self, parsed): + images = {} + for d in os.listdir(self._rootdir): + image_dir = osp.join(self._rootdir, d, 'data') + if not (d.lower().startswith(KittiRawPath.IMG_DIR_PREFIX) and \ + osp.isdir(image_dir)): + continue + + for p in find_images(image_dir, recursive=True): + image_name = osp.splitext(osp.relpath(p, image_dir))[0] + images.setdefault(image_name, []).append(p) + + name_mapping = self._parse_name_mapping( + osp.join(self._rootdir, KittiRawPath.NAME_MAPPING_FILE)) + + items = {} + for frame_id, item_desc in parsed.items(): + name = name_mapping.get(frame_id, '%010d' % int(frame_id)) + items[frame_id] = DatasetItem(id=name, subset=self._subset, + point_cloud=osp.join(self._rootdir, + KittiRawPath.PCD_DIR, name + '.pcd'), + related_images=sorted(images.get(name, [])), + annotations=item_desc.get('annotations'), + attributes={'frame': int(frame_id)}) + + for frame_id, name in name_mapping.items(): + if frame_id in items: + continue + + items[frame_id] = DatasetItem(id=name, subset=self._subset, + point_cloud=osp.join(self._rootdir, + KittiRawPath.PCD_DIR, name + '.pcd'), + related_images=sorted(images.get(name, [])), + attributes={'frame': int(frame_id)}) + + return items + + +class KittiRawImporter(Importer): + @classmethod + def find_sources(cls, path): + return cls._find_sources_recursive(path, '.xml', 'kitti_raw') diff --git a/datumaro/plugins/kitti_raw_format/format.py b/datumaro/plugins/kitti_raw_format/format.py new file mode 100644 index 000000000000..387e63e7ea5b --- /dev/null +++ b/datumaro/plugins/kitti_raw_format/format.py @@ -0,0 +1,33 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from enum import Enum +import os.path as osp + + +class KittiRawPath: + PCD_DIR = osp.join('velodyne_points', 'data') + IMG_DIR_PREFIX = 'image_' + BUILTIN_ATTRS = {'occluded', 'truncation', 'occlusion'} + SPECIAL_ATTRS = {'track_id', } + ANNO_FILE = 'tracklet_labels.xml' + NAME_MAPPING_FILE = 'frame_list.txt' + +class PoseStates(Enum): + UNSET = 0 + INTERP = 1 + LABELED = 2 + +class OcclusionStates(Enum): + OCCLUSION_UNSET = -1 + VISIBLE = 0 + PARTLY = 1 + FULLY = 2 + +class TruncationStates(Enum): + TRUNCATION_UNSET = -1 + IN_IMAGE = 0 + TRUNCATED = 1 + OUT_IMAGE = 2 + BEHIND_IMAGE = 99 diff --git a/datumaro/plugins/labelme_format.py b/datumaro/plugins/labelme_format.py index 80d9d0d44f2e..f95daf4aeae9 100644 --- a/datumaro/plugins/labelme_format.py +++ b/datumaro/plugins/labelme_format.py @@ -3,21 +3,24 @@ # SPDX-License-Identifier: MIT from collections import defaultdict -from defusedxml import ElementTree from functools import partial from glob import glob, iglob import logging as log -import numpy as np import os import os.path as osp -from datumaro.components.extractor import (Extractor, Importer, - DatasetItem, AnnotationType, Mask, Bbox, Polygon, LabelCategories) +from defusedxml import ElementTree +import numpy as np + from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Extractor, Importer, LabelCategories, + Mask, Polygon, +) from datumaro.util import cast, escape, unescape -from datumaro.util.os_util import split_path from datumaro.util.image import Image, save_image -from datumaro.util.mask_tools import load_mask, find_mask_bbox +from datumaro.util.mask_tools import find_mask_bbox, load_mask +from datumaro.util.os_util import split_path class LabelMePath: @@ -59,7 +62,7 @@ def _parse(self, dataset_root): subset = '' if 1 < len(path_parts): subset = path_parts[0] - item_path = osp.join(*path_parts[1:]) + item_path = osp.join(*path_parts[1:]) # pylint: disable=no-value-for-parameter root = ElementTree.parse(xml_path) @@ -408,8 +411,8 @@ def _save_item(self, item, subset_dir): cast(v, int) is not None and str(int(v)) == v: v = f'"{v}"' # add escaping for string values else: - v = self._escape(v) - attrs.append('%s=%s' % (self._escape(k), v)) + v = self._escape(v) # pylint: disable=redundant-keyword-arg due FP https://github.com/PyCQA/pylint/issues/2271 + attrs.append('%s=%s' % (self._escape(k), v)) # pylint: disable=redundant-keyword-arg due FP https://github.com/PyCQA/pylint/issues/2271 ET.SubElement(obj_elem, 'attributes').text = ', '.join(attrs) obj_id += 1 @@ -439,4 +442,4 @@ def _save_item(self, item, subset_dir): def _paint_mask(mask): # TODO: check if mask colors are random return np.array([[0, 0, 0, 0], [255, 203, 0, 153]], - dtype=np.uint8)[mask.astype(np.uint8)] \ No newline at end of file + dtype=np.uint8)[mask.astype(np.uint8)] diff --git a/datumaro/plugins/lfw_format.py b/datumaro/plugins/lfw_format.py index c4806647cbaf..59aef70ec340 100644 --- a/datumaro/plugins/lfw_format.py +++ b/datumaro/plugins/lfw_format.py @@ -7,8 +7,10 @@ import re from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Importer, Label, LabelCategories, Points, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, Points, + SourceExtractor, +) from datumaro.util.image import find_images @@ -164,7 +166,8 @@ def get_image_name(person, image_id): class LfwImporter(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, LfwPath.PAIRS_FILE, 'lfw') + base, ext = osp.splitext(LfwPath.PAIRS_FILE) + return cls._find_sources_recursive(path, ext, 'lfw', filename=base) class LfwConverter(Converter): DEFAULT_IMAGE_EXT = LfwPath.IMAGE_EXT diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py index 2493b495bc69..5f2cbfe138cc 100644 --- a/datumaro/plugins/market1501_format.py +++ b/datumaro/plugins/market1501_format.py @@ -2,15 +2,14 @@ # # SPDX-License-Identifier: MIT +from distutils.util import strtobool +from itertools import chain import os import os.path as osp import re -from distutils.util import strtobool -from itertools import chain from datumaro.components.converter import Converter -from datumaro.components.extractor import (DatasetItem, Importer, - SourceExtractor) +from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor from datumaro.util.image import find_images diff --git a/datumaro/plugins/mnist_csv_format.py b/datumaro/plugins/mnist_csv_format.py index ae0fa8bf8c80..17e257d263bc 100644 --- a/datumaro/plugins/mnist_csv_format.py +++ b/datumaro/plugins/mnist_csv_format.py @@ -6,9 +6,12 @@ import os.path as osp import numpy as np + from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Importer, Label, LabelCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) class MnistCsvPath: diff --git a/datumaro/plugins/mnist_format.py b/datumaro/plugins/mnist_format.py index 0cd97b06dfde..2eb4c474bc97 100644 --- a/datumaro/plugins/mnist_format.py +++ b/datumaro/plugins/mnist_format.py @@ -7,9 +7,12 @@ import os.path as osp import numpy as np + from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Importer, Label, LabelCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) class MnistPath: diff --git a/datumaro/plugins/mot_format.py b/datumaro/plugins/mot_format.py index 8008f25d2b96..abd1e95fc7a6 100644 --- a/datumaro/plugins/mot_format.py +++ b/datumaro/plugins/mot_format.py @@ -7,20 +7,20 @@ # Another description: https://motchallenge.net/instructions from collections import OrderedDict -import csv from enum import Enum +import csv import logging as log import os import os.path as osp -from datumaro.components.extractor import (SourceExtractor, Importer, - DatasetItem, AnnotationType, Bbox, LabelCategories -) from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Importer, LabelCategories, + SourceExtractor, +) from datumaro.util import cast from datumaro.util.image import Image, find_images - MotLabel = Enum('MotLabel', [ ('pedestrian', 1), ('person on vehicle', 2), diff --git a/datumaro/plugins/mots_format.py b/datumaro/plugins/mots_format.py index 522378d9056e..16a632ba06f8 100644 --- a/datumaro/plugins/mots_format.py +++ b/datumaro/plugins/mots_format.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT @@ -7,14 +7,16 @@ from enum import Enum from glob import iglob import logging as log -import numpy as np import os import os.path as osp -from datumaro.components.extractor import (SourceExtractor, Importer, - DatasetItem, AnnotationType, Mask, LabelCategories -) +import numpy as np + from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, LabelCategories, Mask, + SourceExtractor, +) from datumaro.util.image import find_images, load_image, save_image from datumaro.util.mask_tools import merge_masks @@ -26,12 +28,11 @@ class MotsPath: LABELS_FILE = 'labels.txt' MAX_INSTANCES = 1000 -MotsLabels = Enum('MotsLabels', [ - ('background', 0), - ('car', 1), - ('pedestrian', 2), - ('ignored', 10), -]) +class MotsLabels(Enum): + background = 0 + car = 1 + pedestrian = 2 + ignored = 10 class MotsPngExtractor(SourceExtractor): @staticmethod @@ -51,8 +52,12 @@ def __init__(self, path, subset=None): def _parse_categories(self, path): if osp.isfile(path): - with open(path) as f: - labels = [l.strip() for l in f] + labels = [] + with open(path, encoding='utf-8') as f: + for label in f: + label = label.strip() + if label: + labels.append(label) else: labels = [l.name for l in MotsLabels] return { AnnotationType.label: LabelCategories.from_iterable(labels) } @@ -133,7 +138,8 @@ def apply(self): self._save_annotations(item, anno_dir) - with open(osp.join(anno_dir, MotsPath.LABELS_FILE), 'w') as f: + with open(osp.join(anno_dir, MotsPath.LABELS_FILE), + 'w', encoding='utf-8') as f: f.write('\n'.join(l.name for l in subset.categories()[AnnotationType.label].items)) diff --git a/datumaro/plugins/ndr.py b/datumaro/plugins/ndr.py index 060cd8047e6d..cd6f0112e52d 100644 --- a/datumaro/plugins/ndr.py +++ b/datumaro/plugins/ndr.py @@ -2,23 +2,29 @@ # # SPDX-License-Identifier: MIT -from enum import Enum +from enum import Enum, auto import logging as log +from scipy.linalg import orth import cv2 import numpy as np -from scipy.linalg import orth -from datumaro.components.extractor import Transform, DEFAULT_SUBSET_NAME from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.extractor import DEFAULT_SUBSET_NAME, Transform from datumaro.util import parse_str_enum_value -Algorithm = Enum("Algorithm", ["gradient"]) # other algorithms will be added +class Algorithm(Enum): + gradient = auto() + # other algorithms will be added -OverSamplingMethod = Enum("OverSamplingMethod", ["random", "similarity"]) +class OverSamplingMethod(Enum): + random = auto() + similarity = auto() -UnderSamplingMethod = Enum("UnderSamplingMethod", ["uniform", "inverse"]) +class UnderSamplingMethod(Enum): + uniform = auto() + inverse = auto() class NDR(Transform, CliPlugin): """ diff --git a/datumaro/plugins/open_images_format.py b/datumaro/plugins/open_images_format.py new file mode 100644 index 000000000000..466a5571d2bd --- /dev/null +++ b/datumaro/plugins/open_images_format.py @@ -0,0 +1,378 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import contextlib +import csv +import fnmatch +import glob +import itertools +import json +import logging as log +import os +import os.path as osp +import re + +from attr import attrs + +from datumaro.components.converter import Converter +from datumaro.components.errors import ( + DatasetError, RepeatedItemError, UndefinedLabel, +) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Extractor, Importer, Label, LabelCategories, +) +from datumaro.components.validator import Severity +from datumaro.util.image import find_images +from datumaro.util.os_util import split_path + +# A regex to check whether a subset name can be used as a "normal" path +# component. +# Accepting a subset name that doesn't match this regex could lead +# to accessing data outside of the expected directory, so it's best +# to reject them. +_RE_INVALID_SUBSET = re.compile(r''' + # empty + | \.\.? # special path component + | .*[/\\\0].* # contains special characters +''', re.VERBOSE) + +@attrs(auto_attribs=True) +class UnsupportedSubsetNameError(DatasetError): + subset: str + + def __str__(self): + return "Item %s has an unsupported subset name %r." % (self.item_id, self.subset) + +class OpenImagesPath: + ANNOTATIONS_DIR = 'annotations' + IMAGES_DIR = 'images' + + FULL_IMAGE_DESCRIPTION_FILE_NAME = 'image_ids_and_rotation.csv' + SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS = ( + '*-images-with-rotation.csv', + '*-images-with-labels-with-rotation.csv', + ) + V5_CLASS_DESCRIPTION_FILE_NAME = 'class-descriptions.csv' + HIERARCHY_FILE_NAME = 'bbox_labels_600_hierarchy.json' + + IMAGE_DESCRIPTION_FIELDS = ( + 'ImageID', + 'Subset', + 'OriginalURL', + 'OriginalLandingURL', + 'License', + 'AuthorProfileURL', + 'Author', + 'Title', + 'OriginalSize', + 'OriginalMD5', + 'Thumbnail300KURL', + 'Rotation', + ) + + LABEL_DESCRIPTION_FIELDS = ( + 'ImageID', + 'Source', + 'LabelName', + 'Confidence', + ) + + +class OpenImagesExtractor(Extractor): + def __init__(self, path): + if not osp.isdir(path): + raise FileNotFoundError("Can't read dataset directory '%s'" % path) + + super().__init__() + + self._dataset_dir = path + + self._annotation_files = os.listdir( + osp.join(path, OpenImagesPath.ANNOTATIONS_DIR)) + + self._categories = {} + self._items = [] + + self._load_categories() + self._load_items() + + def __iter__(self): + return iter(self._items) + + def categories(self): + return self._categories + + @contextlib.contextmanager + def _open_csv_annotation(self, file_name): + absolute_path = osp.join(self._dataset_dir, OpenImagesPath.ANNOTATIONS_DIR, file_name) + + with open(absolute_path, 'r', encoding='utf-8', newline='') as f: + yield csv.DictReader(f) + + def _glob_annotations(self, pattern): + for annotation_file in self._annotation_files: + if fnmatch.fnmatch(annotation_file, pattern): + yield annotation_file + + def _load_categories(self): + label_categories = LabelCategories() + + # In OID v6, the class description file is prefixed with `oidv6-`, whereas + # in the previous versions, it isn't. We try to find it regardless. + # We use a wildcard so that if, say, OID v7 is released in the future with + # a similar layout as v6, it's automatically supported. + # If the file doesn't exist with either name, we'll fail trying to open + # `class-descriptions.csv`. + + annotation_name = [ + *self._glob_annotations('oidv*-class-descriptions.csv'), + OpenImagesPath.V5_CLASS_DESCRIPTION_FILE_NAME, + ][0] + + with self._open_csv_annotation(annotation_name) as class_description_reader: + # Prior to OID v6, this file didn't contain a header row. + if annotation_name == OpenImagesPath.V5_CLASS_DESCRIPTION_FILE_NAME: + class_description_reader.fieldnames = ('LabelName', 'DisplayName') + + for class_description in class_description_reader: + label_name = class_description['LabelName'] + label_categories.add(label_name) + + self._categories[AnnotationType.label] = label_categories + + self._load_label_category_parents() + + def _load_label_category_parents(self): + label_categories = self._categories[AnnotationType.label] + + hierarchy_path = osp.join( + self._dataset_dir, OpenImagesPath.ANNOTATIONS_DIR, OpenImagesPath.HIERARCHY_FILE_NAME) + + try: + with open(hierarchy_path, 'rb') as hierarchy_file: + root_node = json.load(hierarchy_file) + except FileNotFoundError: + return + + def set_parents_from_node(node, category): + for child_node in node.get('Subcategory', []): + _, child_category = label_categories.find(child_node['LabelName']) + + if category is not None and child_category is not None: + child_category.parent = category.name + + set_parents_from_node(child_node, child_category) + + _, root_category = label_categories.find(root_node['LabelName']) + set_parents_from_node(root_node, root_category) + + def _load_items(self): + images_dir = osp.join(self._dataset_dir, OpenImagesPath.IMAGES_DIR) + + image_paths_by_id = { + # the first component of `path_parts` is the subset name + '/'.join(path_parts[1:]): path + for path in find_images(images_dir, recursive=True) + for path_parts in [split_path( + osp.splitext(osp.relpath(path, images_dir))[0], + )] + if 1 < len(path_parts) + } + + items_by_id = {} + + def load_from(annotation_name): + with self._open_csv_annotation(annotation_name) as image_reader: + for image_description in image_reader: + image_id = image_description['ImageID'] + if image_id in items_by_id: + raise RepeatedItemError(item_id=image_id) + + subset = image_description['Subset'] + + if _RE_INVALID_SUBSET.fullmatch(subset): + raise UnsupportedSubsetNameError(item_id=image_id, subset=subset) + + items_by_id[image_id] = DatasetItem( + id=image_id, + image=image_paths_by_id.get(image_id), + subset=subset, + ) + + # It's preferable to load the combined image description file, + # because it contains descriptions for training images without human-annotated labels + # (the file specific to the training set doesn't). + # However, if it's missing, we'll try loading subset-specific files instead, so that + # this extractor can be used on individual subsets of the dataset. + try: + load_from(OpenImagesPath.FULL_IMAGE_DESCRIPTION_FILE_NAME) + except FileNotFoundError: + for pattern in OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS: + for path in self._glob_annotations(pattern): + load_from(path) + + self._items.extend(items_by_id.values()) + + self._load_labels(items_by_id) + + def _load_labels(self, items_by_id): + label_categories = self._categories[AnnotationType.label] + + # TODO: implement reading of machine-annotated labels + + for label_path in self._glob_annotations('*-human-imagelabels.csv'): + with self._open_csv_annotation(label_path) as label_reader: + for label_description in label_reader: + image_id = label_description['ImageID'] + item = items_by_id[image_id] + + confidence = float(label_description['Confidence']) + + label_name = label_description['LabelName'] + label_index, _ = label_categories.find(label_name) + if label_index is None: + raise UndefinedLabel( + item_id=item.id, subset=item.subset, + label_name=label_name, severity=Severity.error) + item.annotations.append(Label( + label=label_index, attributes={'score': confidence})) + + +class OpenImagesImporter(Importer): + @classmethod + def find_sources(cls, path): + for pattern in [ + OpenImagesPath.FULL_IMAGE_DESCRIPTION_FILE_NAME, + *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS, + ]: + if glob.glob(osp.join(glob.escape(path), OpenImagesPath.ANNOTATIONS_DIR, pattern)): + return [{'url': path, 'format': 'open_images'}] + + return [] + +class OpenImagesConverter(Converter): + DEFAULT_IMAGE_EXT = '.jpg' + + @contextlib.contextmanager + def _open_csv_annotation(self, file_name, field_names): + absolute_path = osp.join(self._save_dir, OpenImagesPath.ANNOTATIONS_DIR, file_name) + + with open(absolute_path, 'w', encoding='utf-8', newline='') as f: + yield csv.DictWriter(f, field_names) + + def apply(self): + annotations_dir = osp.join(self._save_dir, OpenImagesPath.ANNOTATIONS_DIR) + + os.makedirs(annotations_dir, exist_ok=True) + + self._save_categories() + self._save_label_category_parents() + self._save_subsets() + + def _save_categories(self): + with self._open_csv_annotation( + OpenImagesPath.V5_CLASS_DESCRIPTION_FILE_NAME, ['LabelName', 'DisplayName'], + ) as class_description_writer: + # no .writeheader() here, since we're saving it in the V5 format + + for category in self._extractor.categories()[AnnotationType.label]: + class_description_writer.writerow({ + 'LabelName': category.name, + 'DisplayName': category.name, + }) + + def _save_label_category_parents(self): + all_label_names = set() + hierarchy_nodes = {} + orphan_nodes = [] + + def get_node(name): + return hierarchy_nodes.setdefault(name, {'LabelName': name}) + + for category in self._extractor.categories()[AnnotationType.label]: + all_label_names.add(category.name) + + child_node = get_node(category.name) + + if category.parent: + parent_node = get_node(category.parent) + parent_node.setdefault('Subcategory', []).append(child_node) + else: + orphan_nodes.append(child_node) + + # The hierarchy has to be rooted in a single node. However, there's + # no guarantee that there exists only one orphan (label without a parent). + # Therefore, we create a fake root node and make it the parent of every + # orphan label. + # This is not a violation of the format, because the original OID does + # the same thing. + root_node = { + # Create an OID-like label name that isn't already used by a real label + 'LabelName': next(root_name + for i in itertools.count() + for root_name in [f'/m/{i}'] + if root_name not in all_label_names + ), + # If an orphan has no children, then it makes no semantic difference + # whether it's listed in the hierarchy file or not. So strip such nodes + # to avoid recording meaningless data. + 'Subcategory': [node for node in orphan_nodes if 'Subcategory' in node], + } + + hierarchy_path = osp.join( + self._save_dir, OpenImagesPath.ANNOTATIONS_DIR, OpenImagesPath.HIERARCHY_FILE_NAME) + + with open(hierarchy_path, 'w', encoding='utf-8') as hierarchy_file: + json.dump(root_node, hierarchy_file, indent=4, ensure_ascii=False) + hierarchy_file.write('\n') + + def _save_subsets(self): + label_categories = self._extractor.categories().get( + AnnotationType.label, LabelCategories()) + + for subset_name, subset in self._extractor.subsets().items(): + if _RE_INVALID_SUBSET.fullmatch(subset_name): + raise UnsupportedSubsetNameError(item_id=next(iter(subset)).id, subset=subset) + + image_description_name = f'{subset_name}-images-with-rotation.csv' + label_description_name = f'{subset_name}-annotations-human-imagelabels.csv' + + with \ + self._open_csv_annotation( + image_description_name, OpenImagesPath.IMAGE_DESCRIPTION_FIELDS, + ) as image_description_writer, \ + contextlib.ExitStack() as annotation_writers \ + : + image_description_writer.writeheader() + + # The label description writer is created lazily, + # so that we don't create the label description file if there are no labels. + label_description_writer = None + + for item in subset: + image_description_writer.writerow({ + 'ImageID': item.id, 'Subset': subset_name, + }) + + if self._save_images: + if item.has_image: + self._save_image(item, subdir=osp.join( + OpenImagesPath.IMAGES_DIR, subset_name)) + else: + log.debug("Item '%s' has no image", item.id) + + for annotation in item.annotations: + if annotation.type is AnnotationType.label: + if label_description_writer is None: + label_description_writer = annotation_writers.enter_context( + self._open_csv_annotation( + label_description_name, + OpenImagesPath.LABEL_DESCRIPTION_FIELDS)) + label_description_writer.writeheader() + + label_description_writer.writerow({ + 'ImageID': item.id, + 'LabelName': label_categories[annotation.label].name, + 'Confidence': str(annotation.attributes.get('score', 1)), + }) diff --git a/datumaro/plugins/openvino_plugin/README.md b/datumaro/plugins/openvino_plugin/README.md index fc0d904eb9ef..5de36ba4317b 100644 --- a/datumaro/plugins/openvino_plugin/README.md +++ b/datumaro/plugins/openvino_plugin/README.md @@ -1,17 +1,17 @@ -# OpenVINO™ Inference Interpreter +# OpenVINO™ Inference Interpreter Interpreter samples to parse OpenVINO™ inference outputs. ## Models supported from interpreter samples -There are detection and image classification examples. - +There are detection and image classification examples. + - Detection (SSD-based) - - Intel Pre-trained Models > Object Detection + - Intel Pre-trained Models > Object Detection - [face-detection-0200](https://docs.openvinotoolkit.org/latest/omz_models_intel_face_detection_0200_description_face_detection_0200.html) - [face-detection-0202](https://docs.openvinotoolkit.org/latest/omz_models_intel_face_detection_0202_description_face_detection_0202.html) - [face-detection-0204](https://docs.openvinotoolkit.org/latest/omz_models_intel_face_detection_0204_description_face_detection_0204.html) - [person-detection-0200](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_0200_description_person_detection_0200.html) - [person-detection-0201](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_0201_description_person_detection_0201.html) - - [person-detection-0202](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_0202_description_person_detection_0202.html) + - [person-detection-0202](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_0202_description_person_detection_0202.html) - [person-vehicle-bike-detection-2000](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_vehicle_bike_detection_2000_description_person_vehicle_bike_detection_2000.html) - [person-vehicle-bike-detection-2001](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_vehicle_bike_detection_2001_description_person_vehicle_bike_detection_2001.html) - [person-vehicle-bike-detection-2002](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_vehicle_bike_detection_2002_description_person_vehicle_bike_detection_2002.html) @@ -27,17 +27,21 @@ There are detection and image classification examples. - Public Pre-Trained Models(OMZ) > Classification - [mobilenet-v2-pytorch](https://docs.openvinotoolkit.org/latest/omz_models_public_mobilenet_v2_pytorch_mobilenet_v2_pytorch.html) -You can find more OpenVINO™ Trained Models [here](https://docs.openvinotoolkit.org/latest/omz_models_intel_index.html) -To run the inference with OpenVINO™, the model format should be Intermediate Representation(IR). +You can find more OpenVINO™ Trained Models +[here](https://docs.openvinotoolkit.org/latest/omz_models_intel_index.html) +To run the inference with OpenVINO™, the model format should be Intermediate +Representation(IR). For the Caffe/TensorFlow/MXNet/Kaldi/ONNX models, please see the [Model Conversion Instruction](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model.html) -You need to implement your own interpreter samples to support the other OpenVINO™ Trained Models. +You need to implement your own interpreter samples to support the other +OpenVINO™ Trained Models. ## Model download - Prerequisites - - OpenVINO™ (To install OpenVINO™, please see the [OpenVINO™ Installation Instruction](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_linux.html)) - - OpenVINO™ models (To download OpenVINO™ models, please see the [Model Downloader Instruction](https://docs.openvinotoolkit.org/latest/omz_tools_downloader_README.html)) - - PASCAL VOC 2012 dataset (To download VOC 2012 dataset, please go [VOC2012 download](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit)) + - OpenVINO™ (To install OpenVINO™, please see the + [OpenVINO™ Installation Instruction](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_linux.html)) + - OpenVINO™ models (To download OpenVINO™ models, please see the [Model Downloader Instruction](https://docs.openvinotoolkit.org/latest/omz_tools_downloader_README.html)) + - PASCAL VOC 2012 dataset (To download VOC 2012 dataset, please go [VOC2012 download](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit)) ```bash # cd /deployment_tools/open_model_zoo/tools/downloader @@ -49,13 +53,15 @@ You need to implement your own interpreter samples to support the other OpenVINO ``` ## Model inference -- Prerequisites: - - OpenVINO™ (To install OpenVINO™, please see the [OpenVINO™ Installation Instruction](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_linux.html)) - - Datumaro (To install Datumaro, please see the [User Manual](docs/user_manual.md)) - - OpenVINO™ models (To download OpenVINO™ models, please see the [Model Downloader Instruction](https://docs.openvinotoolkit.org/latest/omz_tools_downloader_README.html)) - - PASCAL VOC 2012 dataset (To download VOC 2012 dataset, please go [VOC2012 download](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit)) - -- To run the inference with OpenVINO™ models and the interpreter samples, please follow the instructions below. +- Prerequisites: + - OpenVINO™ (To install OpenVINO™, please see the + [OpenVINO™ Installation Instruction](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_linux.html)) + - Datumaro (To install Datumaro, please see the [User Manual](docs/user_manual.md)) + - OpenVINO™ models (To download OpenVINO™ models, please see the [Model Downloader Instruction](https://docs.openvinotoolkit.org/latest/omz_tools_downloader_README.html)) + - PASCAL VOC 2012 dataset (To download VOC 2012 dataset, please go [VOC2012 download](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/#devkit)) + +- To run the inference with OpenVINO™ models and the interpreter samples, + please follow the instructions below. ```bash # source /bin/setupvars.sh @@ -76,7 +82,7 @@ You need to implement your own interpreter samples to support the other OpenVINO -i samples/ssd_mobilenet_coco_detection_interp.py datum add path -p proj_ssd_mobilenet_v2_coco_detection -f voc VOCdevkit/ datum model run -p proj_ssd_mobilenet_v2_coco_detection -m model-0 - + # Classification> mobilenet-v2-pytorch source /opt/intel/openvino/bin/setupvars.sh cd datumaro/plugins/openvino_plugin @@ -87,4 +93,4 @@ You need to implement your own interpreter samples to support the other OpenVINO -i samples/mobilenet_v2_pytorch_interp.py datum add path -p proj_mobilenet_v2_classification -f voc VOCdevkit/ datum model run -p proj_mobilenet_v2_classification -m model-0 - ``` \ No newline at end of file + ``` diff --git a/datumaro/plugins/openvino_plugin/launcher.py b/datumaro/plugins/openvino_plugin/launcher.py index 7c64d6fa4499..a5767afbf4fd 100644 --- a/datumaro/plugins/openvino_plugin/launcher.py +++ b/datumaro/plugins/openvino_plugin/launcher.py @@ -1,17 +1,17 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # pylint: disable=exec-used -import cv2 import logging as log -import numpy as np import os.path as osp import shutil from openvino.inference_engine import IECore +import cv2 +import numpy as np from datumaro.components.cli_plugin import CliPlugin from datumaro.components.launcher import Launcher @@ -54,7 +54,7 @@ def copy_model(model_dir, model): class InterpreterScript: def __init__(self, path): - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: script = f.read() context = {} @@ -195,4 +195,3 @@ def categories(self): def process_outputs(self, inputs, outputs): return self._interpreter.process_outputs(inputs, outputs) - diff --git a/datumaro/plugins/openvino_plugin/samples/mobilenet_v2_pytorch_interp.py b/datumaro/plugins/openvino_plugin/samples/mobilenet_v2_pytorch_interp.py index 43ce43fd217d..a765dd42b14c 100644 --- a/datumaro/plugins/openvino_plugin/samples/mobilenet_v2_pytorch_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/mobilenet_v2_pytorch_interp.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: MIT -from datumaro.components.extractor import Label, LabelCategories, AnnotationType +from datumaro.components.extractor import AnnotationType, Label, LabelCategories from datumaro.util.annotation_util import softmax @@ -14,7 +14,7 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, output in zip(inputs, outputs): + for input_, output in zip(inputs, outputs): # pylint: disable=unused-variable image_results = [] output = softmax(output).tolist() label = output.index(max(output)) @@ -30,7 +30,7 @@ def get_categories(): label_categories = LabelCategories() - with open("samples/imagenet.class", "r") as file: + with open("samples/imagenet.class", "r", encoding='utf-8') as file: for line in file.readlines(): label = line.strip() label_categories.add(label) diff --git a/datumaro/plugins/openvino_plugin/samples/ssd_face_detection_interp.py b/datumaro/plugins/openvino_plugin/samples/ssd_face_detection_interp.py index abb4604f8d93..eea9160135e9 100644 --- a/datumaro/plugins/openvino_plugin/samples/ssd_face_detection_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/ssd_face_detection_interp.py @@ -41,9 +41,9 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, detections in zip(inputs, outputs["detection_out"]): + for input_, detections in zip(inputs, outputs["detection_out"]): - input_height, input_width = input.shape[:2] + input_height, input_width = input_.shape[:2] confs = outputs["Softmax_189/Softmax_"] detections = detections[0] @@ -52,7 +52,7 @@ def process_outputs(inputs, outputs): image_results = [] for i, det in enumerate(detections): - image_id = int(det[0]) + image_id = int(det[0]) # pylint: disable=unused-variable label = int(det[1]) conf = float(det[2]) det_confs = confs[conf_ids[i]] @@ -65,10 +65,10 @@ def process_outputs(inputs, outputs): w = min(int(det[5] * input_width - x), input_width) h = min(int(det[6] * input_height - y), input_height) - image_results.append(Bbox(x, y, w, h, label=label, + image_results.append(Bbox(x, y, w, h, label=label, attributes={ 'score': conf, 'scores': list(map(float, det_confs)) } )) - + results.append(image_results) return results diff --git a/datumaro/plugins/openvino_plugin/samples/ssd_mobilenet_coco_detection_interp.py b/datumaro/plugins/openvino_plugin/samples/ssd_mobilenet_coco_detection_interp.py index 3b3e5de252c0..4893a0218470 100644 --- a/datumaro/plugins/openvino_plugin/samples/ssd_mobilenet_coco_detection_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/ssd_mobilenet_coco_detection_interp.py @@ -42,11 +42,11 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, confs, detections in zip( + for input_, confs, detections in zip( inputs, outputs["do_ExpandDims_conf/sigmoid"], outputs["DetectionOutput"] ): - input_height, input_width = input.shape[:2] + input_height, input_width = input_.shape[:2] confs = confs[0].reshape(-1, model_class_num) detections = detections[0] @@ -55,7 +55,7 @@ def process_outputs(inputs, outputs): image_results = [] for i, det in enumerate(detections): - image_id = int(det[0]) + image_id = int(det[0]) # pylint: disable=unused-variable label = int(det[1]) conf = float(det[2]) det_confs = confs[conf_ids[i]] @@ -68,10 +68,10 @@ def process_outputs(inputs, outputs): w = min(int(det[5] * input_width - x), input_width) h = min(int(det[6] * input_height - y), input_height) - image_results.append(Bbox(x, y, w, h, label=label, + image_results.append(Bbox(x, y, w, h, label=label, attributes={ 'score': conf, 'scores': list(map(float, det_confs)) } )) - + results.append(image_results) return results @@ -82,7 +82,7 @@ def get_categories(): label_categories = LabelCategories() - with open("samples/coco.class", "r") as file: + with open("samples/coco.class", "r", encoding='utf-8') as file: for line in file.readlines(): label = line.strip() label_categories.add(label) diff --git a/datumaro/plugins/openvino_plugin/samples/ssd_person_detection_interp.py b/datumaro/plugins/openvino_plugin/samples/ssd_person_detection_interp.py index 3888944df473..daf487ade98a 100644 --- a/datumaro/plugins/openvino_plugin/samples/ssd_person_detection_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/ssd_person_detection_interp.py @@ -41,9 +41,9 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, detections in zip(inputs, outputs["detection_out"]): + for input_, detections in zip(inputs, outputs["detection_out"]): - input_height, input_width = input.shape[:2] + input_height, input_width = input_.shape[:2] confs = outputs["Softmax_189/Softmax_"] detections = detections[0] @@ -52,7 +52,7 @@ def process_outputs(inputs, outputs): image_results = [] for i, det in enumerate(detections): - image_id = int(det[0]) + image_id = int(det[0]) # pylint: disable=unused-variable label = int(det[1]) conf = float(det[2]) det_confs = confs[conf_ids[i]] @@ -65,10 +65,10 @@ def process_outputs(inputs, outputs): w = min(int(det[5] * input_width - x), input_width) h = min(int(det[6] * input_height - y), input_height) - image_results.append(Bbox(x, y, w, h, label=label, + image_results.append(Bbox(x, y, w, h, label=label, attributes={ 'score': conf, 'scores': list(map(float, det_confs)) } )) - + results.append(image_results) return results diff --git a/datumaro/plugins/openvino_plugin/samples/ssd_person_vehicle_bike_detection_interp.py b/datumaro/plugins/openvino_plugin/samples/ssd_person_vehicle_bike_detection_interp.py index a2de43dd6d1f..79db2443c034 100644 --- a/datumaro/plugins/openvino_plugin/samples/ssd_person_vehicle_bike_detection_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/ssd_person_vehicle_bike_detection_interp.py @@ -41,9 +41,9 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, detections in zip(inputs, outputs["detection_out"]): + for input_, detections in zip(inputs, outputs["detection_out"]): - input_height, input_width = input.shape[:2] + input_height, input_width = input_.shape[:2] confs = outputs["Softmax_189/Softmax_"] detections = detections[0] @@ -52,7 +52,7 @@ def process_outputs(inputs, outputs): image_results = [] for i, det in enumerate(detections): - image_id = int(det[0]) + image_id = int(det[0]) # pylint: disable=unused-variable label = int(det[1]) conf = float(det[2]) det_confs = confs[conf_ids[i]] @@ -65,10 +65,10 @@ def process_outputs(inputs, outputs): w = min(int(det[5] * input_width - x), input_width) h = min(int(det[6] * input_height - y), input_height) - image_results.append(Bbox(x, y, w, h, label=label, + image_results.append(Bbox(x, y, w, h, label=label, attributes={ 'score': conf, 'scores': list(map(float, det_confs)) } )) - + results.append(image_results) return results diff --git a/datumaro/plugins/openvino_plugin/samples/ssd_vehicle_detection_interp.py b/datumaro/plugins/openvino_plugin/samples/ssd_vehicle_detection_interp.py index 2866133542df..7ebeb73cbbed 100644 --- a/datumaro/plugins/openvino_plugin/samples/ssd_vehicle_detection_interp.py +++ b/datumaro/plugins/openvino_plugin/samples/ssd_vehicle_detection_interp.py @@ -41,9 +41,9 @@ def process_outputs(inputs, outputs): # results = conversion result; [[ Annotation, ... ], ... ] results = [] - for input, detections in zip(inputs, outputs["detection_out"]): + for input_, detections in zip(inputs, outputs["detection_out"]): - input_height, input_width = input.shape[:2] + input_height, input_width = input_.shape[:2] confs = outputs["Softmax_189/Softmax_"] detections = detections[0] @@ -52,7 +52,7 @@ def process_outputs(inputs, outputs): image_results = [] for i, det in enumerate(detections): - image_id = int(det[0]) + image_id = int(det[0]) # pylint: disable=unused-variable label = int(det[1]) conf = float(det[2]) det_confs = confs[conf_ids[i]] @@ -65,10 +65,10 @@ def process_outputs(inputs, outputs): w = min(int(det[5] * input_width - x), input_width) h = min(int(det[6] * input_height - y), input_height) - image_results.append(Bbox(x, y, w, h, label=label, + image_results.append(Bbox(x, y, w, h, label=label, attributes={ 'score': conf, 'scores': list(map(float, det_confs)) } )) - + results.append(image_results) return results diff --git a/datumaro/plugins/sampler/algorithm/algorithm.py b/datumaro/plugins/sampler/algorithm/algorithm.py index 5dd562f062eb..aa2ee77037a3 100644 --- a/datumaro/plugins/sampler/algorithm/algorithm.py +++ b/datumaro/plugins/sampler/algorithm/algorithm.py @@ -2,13 +2,18 @@ # # SPDX-License-Identifier: MIT -from enum import Enum +from enum import Enum, auto -SamplingMethod = Enum("SamplingMethod", - ["topk", "lowk", "randk", "mixk", "randtopk"]) +class SamplingMethod(Enum): + topk = auto() + lowk = auto() + randk = auto() + mixk = auto() + randtopk = auto() -Algorithm = Enum("Algorithm", ["entropy"]) +class Algorithm(Enum): + entropy = auto() class InferenceResultAnalyzer: """ diff --git a/datumaro/plugins/sampler/sampler.py b/datumaro/plugins/sampler/sampler.py index 2fefa0e1dc31..9672cc7a575b 100644 --- a/datumaro/plugins/sampler/sampler.py +++ b/datumaro/plugins/sampler/sampler.py @@ -6,14 +6,14 @@ import pandas as pd -from datumaro.components.extractor import Transform from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.extractor import Transform -from .algorithm.algorithm import SamplingMethod, Algorithm +from .algorithm.algorithm import Algorithm, SamplingMethod class Sampler(Transform, CliPlugin): - """ + r""" Sampler that analyzes model inference results on the dataset |n and picks the best sample for training.|n |n diff --git a/datumaro/plugins/sly_pointcloud_format/__init__.py b/datumaro/plugins/sly_pointcloud_format/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/datumaro/plugins/sly_pointcloud_format/converter.py b/datumaro/plugins/sly_pointcloud_format/converter.py new file mode 100644 index 000000000000..f5641be9f8df --- /dev/null +++ b/datumaro/plugins/sly_pointcloud_format/converter.py @@ -0,0 +1,428 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# The format is described here: +# https://docs.supervise.ly/data-organization/00_ann_format_navi + +from datetime import datetime +import json +import logging as log +import os +import os.path as osp +import shutil +import uuid + +from datumaro.components.converter import Converter +from datumaro.components.dataset import ItemStatus +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, IExtractor, LabelCategories, +) +from datumaro.util import cast + +from .format import PointCloudPath + + +class _SuperviselyPointCloudDumper: + def __init__(self, extractor: IExtractor, + context: 'SuperviselyPointCloudConverter'): + self._extractor = extractor + self._context = context + + timestamp = str(datetime.now()) + self._default_user_info = { + 'labelerLogin': '', + 'createdAt': timestamp, + 'updatedAt': timestamp, + } + + self._key_id_data = { + 'tags': {}, + 'objects': {}, + 'figures': {}, + 'videos': {} + } + + self._meta_data = { + 'classes': [], + 'tags': [], + 'projectType': 'point_clouds' + } + + # Meta info contents + self._tag_meta = {} # name -> descriptor + + # Registries of item annotations + self._objects = {} # id -> key + + self._label_cat = extractor.categories().get( + AnnotationType.label, LabelCategories()) + + def _write_related_images(self, item): + img_dir = self._related_images_dir + + for img in item.related_images: + name = osp.splitext(osp.basename(img.path))[0] + img_path = osp.join(img_dir, item.id + '_pcd', + name + self._find_image_ext(img)) + if img.has_data: + img.save(img_path) + + img_data = { + 'name': osp.basename(img_path), + 'meta': { + 'sensorsData': { + 'extrinsicMatrix': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + 'intrinsicMatrix': [0, 0, 0, 0, 0, 0, 0, 0, 0] + } + } + } + + with open(osp.join(img_dir, img_path + '.json'), + 'w', encoding='utf-8') as f: + json.dump(img_data, f, ensure_ascii=False, indent=4) + + def _write_pcd(self, item): + self._context._save_point_cloud(item, basedir=self._point_cloud_dir) + + def _write_meta(self): + for tag in self._tag_meta.values(): + if tag['value_type'] is None: + tag['value_type'] = 'any_string' + tag['classes'] = list(tag['classes']) + self._meta_data['tags'] = list(self._tag_meta.values()) + + with open(osp.join(self._save_dir, PointCloudPath.META_FILE), + 'w', encoding='utf-8') as f: + json.dump(self._meta_data, f, ensure_ascii=False, indent=4) + + def _write_key_id(self): + objects = self._objects + key_id_data = self._key_id_data + + key_id_data['objects'] = { v: k for k, v in objects.items() } + + with open(osp.join(self._save_dir, PointCloudPath.KEY_ID_FILE), + 'w', encoding='utf-8') as f: + json.dump(key_id_data, f, ensure_ascii=False, indent=4) + + def _write_item_annotations(self, item): + key_id_data = self._key_id_data + + item_id = cast(item.attributes.get('frame'), int) + if item_id is None or self._context._reindex: + item_id = len(key_id_data['videos']) + 1 + + item_key = str(uuid.uuid4()) + key_id_data['videos'][item_key] = item_id + + item_user_info = {k: item.attributes.get(k, default_v) + for k, default_v in self._default_user_info.items()} + + item_ann_data = { + 'description': item.attributes.get('description', ''), + 'key': item_key, + 'tags': [], + 'objects': [], + 'figures': [], + } + self._export_item_attributes(item, item_ann_data, item_user_info) + self._export_item_annotations(item, item_ann_data, item_user_info) + + ann_path = osp.join(self._ann_dir, item.id + '.pcd.json') + os.makedirs(osp.dirname(ann_path), exist_ok=True) + with open(ann_path,'w', encoding='utf-8') as f: + json.dump(item_ann_data, f, ensure_ascii=False, indent=4) + + def _export_item_attributes(self, item, item_ann_data, item_user_info): + for attr_name, attr_value in item.attributes.items(): + if attr_name in PointCloudPath.SPECIAL_ATTRS: + continue + + attr_value = self._encode_attr_value(attr_value) + + tag = self._register_tag(attr_name, value=attr_value, + applicable_type='imagesOnly') + + if tag['applicable_type'] != 'imagesOnly': + tag['applicable_type'] = 'all' + + value_type = self._define_attr_type(attr_value) + if tag['value_type'] is None: + tag['value_type'] = value_type + elif tag['value_type'] != value_type: + raise Exception("Item %s: mismatching " + "value types for tag %s: %s vs %s" % \ + (item.id, attr_name, tag['value_type'], value_type)) + + tag_key = str(uuid.uuid4()) + item_ann_data['tags'].append({ + 'key': tag_key, + 'name': attr_name, + 'value': attr_value, + **item_user_info, + }) + + # only item attributes are listed in the key_id file + # meta tag ids have no relation to key_id tag ids! + tag_id = len(self._key_id_data['tags']) + 1 + self._key_id_data['tags'][tag_key] = tag_id + + def _export_item_annotations(self, item, item_ann_data, item_user_info): + objects = self._objects + tags = self._tag_meta + label_cat = self._label_cat + key_id_data = self._key_id_data + + image_objects = set() + for ann in item.annotations: + if not ann.type == AnnotationType.cuboid_3d: + continue + + obj_id = cast(ann.attributes.get('track_id', ann.id), int) + if obj_id is None: + # should not be affected by reindex + # because it is used to match figures, + # including different frames + obj_id = len(self._objects) + 1 + + object_key = objects.setdefault(obj_id, str(uuid.uuid4())) + object_label = label_cat[ann.label].name + if obj_id not in image_objects: + ann_user_info = {k: ann.attributes.get(k, default_v) + for k, default_v in item_user_info.items()} + + obj_ann_data = { + 'key': object_key, + 'classTitle': object_label, + 'tags': [], + 'objects': [], + 'figures': [], + **ann_user_info, + } + + for attr_name, attr_value in ann.attributes.items(): + if attr_name in PointCloudPath.SPECIAL_ATTRS: + continue + + attr_value = self._encode_attr_value(attr_value) + + tag = tags.get(attr_name) + if tag is None: + if self._context._allow_undeclared_attrs: + tag = self._register_tag(attr_name, + applicable_type='objectsOnly') + tags[attr_name] = tag + else: + log.warning("Item %s: skipping undeclared " + "attribute '%s' for label '%s' " + "(allow with --allow-undeclared-attrs option)", + item.id, attr_name, object_label) + continue + + if tag['applicable_type'] == 'imagesOnly': + tag['applicable_type'] = 'all' + elif tag['applicable_type'] == 'objectsOnly' and \ + tag['classes']: + tag['classes'].add(object_label) + + value_type = self._define_attr_type(attr_value) + if tag['value_type'] is None: + tag['value_type'] = value_type + elif tag['value_type'] != value_type: + raise Exception("Item %s: mismatching " + "value types for tag %s: %s vs %s" % \ + (item.id, attr_name, tag['value_type'], value_type)) + + tag_key = str(uuid.uuid4()) + obj_ann_data['tags'].append({ + 'key': tag_key, + 'name': attr_name, + 'value': attr_value, + **ann_user_info, + }) + + item_ann_data['objects'].append(obj_ann_data) + + image_objects.add(obj_id) + + figure_key = str(uuid.uuid4()) + item_ann_data['figures'].append({ + 'key': figure_key, + 'objectKey': object_key, + 'geometryType': 'cuboid_3d', + 'geometry': { + 'position': { + 'x': float(ann.position[0]), + 'y': float(ann.position[1]), + 'z': float(ann.position[2]), + }, + 'rotation': { + 'x': float(ann.rotation[0]), + 'y': float(ann.rotation[1]), + 'z': float(ann.rotation[2]), + }, + 'dimensions': { + 'x': float(ann.scale[0]), + 'y': float(ann.scale[1]), + 'z': float(ann.scale[2]), + } + }, + **ann_user_info, + }) + figure_id = ann.id + if self._context._reindex or figure_id is None: + figure_id = len(key_id_data['figures']) + 1 + key_id_data['figures'][figure_key] = figure_id + + @staticmethod + def _encode_attr_value(v): + if v is True or v is False: # use is to check the type too + v = str(v).lower() + return v + + @staticmethod + def _define_attr_type(v): + if isinstance(v, (int, float)): + t = 'any_number' + else: + t = 'any_string' + return t + + def _register_tag(self, name, **kwargs): + tag = { + 'name': name, + 'value_type': None, + 'color': '', + 'id': len(self._tag_meta) + 1, + 'hotkey': '', + 'applicable_type': 'all', + 'classes': set() + } + tag.update(kwargs) + return self._tag_meta.setdefault(name, tag) + + def _make_dirs(self): + save_dir = self._context._save_dir + os.makedirs(save_dir, exist_ok=True) + self._save_dir = save_dir + + base_dir = osp.join(self._save_dir, PointCloudPath.BASE_DIR) + os.makedirs(base_dir, exist_ok=True) + + ann_dir = osp.join(base_dir, PointCloudPath.ANNNOTATION_DIR) + os.makedirs(ann_dir, exist_ok=True) + self._ann_dir = ann_dir + + point_cloud_dir = osp.join(base_dir, PointCloudPath.POINT_CLOUD_DIR) + os.makedirs(point_cloud_dir, exist_ok=True) + self._point_cloud_dir = point_cloud_dir + + related_images_dir = osp.join(base_dir, PointCloudPath.RELATED_IMAGES_DIR) + os.makedirs(related_images_dir, exist_ok=True) + self._related_images_dir = related_images_dir + + def _init_meta(self): + for attr in self._label_cat.attributes: + self._register_tag(attr, applicable_type='objectsOnly') + + for idx, label in enumerate(self._label_cat): + self._meta_data['classes'].append({ + 'id': idx, + 'title': label.name, + 'color': '', + 'shape': 'cuboid_3d', + 'geometry_config': {} + }) + + for attr in label.attributes: + tag = self._register_tag(attr, applicable_type='objectsOnly') + tag['classes'].add(label.name) + + def _find_image_ext(self, image): + src_ext = image.ext + return self._context._image_ext or src_ext or \ + self._context._default_image_ext + + def dump(self): + self._make_dirs() + + self._init_meta() + + for item in self._context._extractor: + if self._context._save_images: + if item.has_point_cloud: + self._write_pcd(item) + else: + log.debug("Item '%s' has no point cloud info", item.id) + + if item.related_images: + self._write_related_images(item) + else: + log.debug("Item '%s' has no related images info", item.id) + + self._write_item_annotations(item) + + self._write_meta() + self._write_key_id() + + +class SuperviselyPointCloudConverter(Converter): + NAME = 'sly_pointcloud' + DEFAULT_IMAGE_EXT = PointCloudPath.DEFAULT_IMAGE_EXT + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--reindex', action='store_true', + help="Assign new indices to frames (default: %(default)s)") + parser.add_argument('--allow-undeclared-attrs', action='store_true', + help="Write annotation attributes even if they are not present in " + "the input dataset metainfo (default: %(default)s)") + return parser + + def __init__(self, extractor, save_dir, reindex=False, + allow_undeclared_attrs=False, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + self._reindex = reindex + self._allow_undeclared_attrs = allow_undeclared_attrs + + def apply(self): + if 1 < len(self._extractor.subsets()): + log.warning("Supervisely pointcloud format supports only a single" + "subset. Subset information will be ignored on export.") + + _SuperviselyPointCloudDumper(self._extractor, self).dump() + + @classmethod + def patch(cls, dataset, patch, save_dir, **kwargs): + conv = cls(patch.as_dataset(dataset), save_dir=save_dir, **kwargs) + conv.apply() + + for (item_id, subset), status in patch.updated_items.items(): + if status != ItemStatus.removed: + item = patch.data.get(item_id, subset) + else: + item = DatasetItem(item_id, subset=subset) + + if not (status == ItemStatus.removed or not item.has_point_cloud): + continue + + pcd_name = conv._make_pcd_filename(item) + + ann_path = osp.join(save_dir, PointCloudPath.BASE_DIR, + PointCloudPath.ANNNOTATION_DIR, pcd_name + '.json') + if osp.isfile(ann_path): + os.remove(ann_path) + + pcd_path = osp.join(save_dir, PointCloudPath.BASE_DIR, + PointCloudPath.POINT_CLOUD_DIR, pcd_name) + if osp.isfile(pcd_path): + os.remove(pcd_path) + + images_dir = osp.join(save_dir, PointCloudPath.BASE_DIR, + PointCloudPath.RELATED_IMAGES_DIR, + osp.splitext(pcd_name)[0] + '_pcd') + if osp.isdir(images_dir): + shutil.rmtree(images_dir) \ No newline at end of file diff --git a/datumaro/plugins/sly_pointcloud_format/extractor.py b/datumaro/plugins/sly_pointcloud_format/extractor.py new file mode 100644 index 000000000000..ef493d0c3a69 --- /dev/null +++ b/datumaro/plugins/sly_pointcloud_format/extractor.py @@ -0,0 +1,185 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import iglob +import json +import os.path as osp + +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, Importer, LabelCategories, + SourceExtractor, +) +from datumaro.util.image import find_images + +from .format import PointCloudPath + + +class SuperviselyPointCloudExtractor(SourceExtractor): + NAME = 'sly_pointcloud' + _SUPPORTED_SHAPES = 'cuboid' + + def __init__(self, path, subset=None): + if not osp.isfile(path): + raise FileNotFoundError("Expected a path to 'meta.json', " + "got '%s'" % path) + + rootdir = osp.abspath(osp.dirname(path)) + self._rootdir = rootdir + + super().__init__(subset=subset) + + items, categories = self._parse(rootdir) + self._items = list(self._load_items(items).values()) + self._categories = categories + + @classmethod + def _parse(cls, rootpath): + with open(osp.join(rootpath, PointCloudPath.KEY_ID_FILE), + encoding='utf-8') as f: + mapping = json.load(f) + + with open(osp.join(rootpath, PointCloudPath.META_FILE), + encoding='utf-8') as f: + meta = json.load(f) + + label_cat = LabelCategories() + for label in meta.get('classes', []): + label_cat.add(label['title']) + + tags = {} + for tag in meta.get('tags', []): + # See reference at: + # https://github.com/supervisely/supervisely/blob/047e52ebe407cfee61464c1bd0beb9c906892253/supervisely_lib/annotation/tag_meta.py#L139 + tags[tag['name']] = tag + + applicable_to = tag.get('applicable_type', 'all') + if applicable_to == 'imagesOnly': + continue # an image attribute + elif applicable_to not in {'all', 'objectsOnly'}: + raise Exception("Unexpected tag 'applicable_type' value '%s'" % \ + applicable_to) + + applicable_classes = tag.get('classes', []) + if not applicable_classes: + label_cat.attributes.add(tag['name']) + else: + for label_name in applicable_classes: + _, label = label_cat.find(label_name) + if label is None: + raise Exception("Unknown class for tag '%s'" % \ + label_name) + + label.attributes.add(tag['name']) + + categories = {AnnotationType.label: label_cat} + + def _get_label_attrs(label_id): + attrs = set(label_cat.attributes) + attrs.update(label_cat[label_id].attributes) + return attrs + + def _parse_tag(tag): + if tag['value'] == 'true': + value = True + elif tag['value'] == 'false': + value = False + else: + value = tag['value'] + return value + + ann_dir = osp.join(rootpath, + PointCloudPath.BASE_DIR, PointCloudPath.ANNNOTATION_DIR) + items = {} + for ann_file in iglob(osp.join(ann_dir, '**', '*.json'), recursive=True): + with open(ann_file, encoding='utf-8') as f: + ann_data = json.load(f) + + objects = {} + for obj in ann_data['objects']: + obj['id'] = mapping['objects'][obj['key']] + objects[obj['key']] = obj + + frame_attributes = {'description': ann_data.get('description', '')} + for tag in ann_data['tags']: + frame_attributes[tag['name']] = _parse_tag(tag) + + frame = mapping['videos'][ann_data['key']] + frame_desc = items.setdefault(frame, { + 'name': osp.splitext(osp.relpath(ann_file, ann_dir))[0], + 'annotations': [], + 'attributes': frame_attributes, + }) + + for figure in ann_data['figures']: + geometry = { + dst_field: [float(figure['geometry'][src_field][axis]) + for axis in ['x', 'y', 'z'] + ] + for src_field, dst_field in { + 'position': 'position', + 'rotation': 'rotation', + 'dimensions': 'scale' + }.items() + } + + ann_id = mapping['figures'][figure['key']] + + obj = objects[figure['objectKey']] + label = categories[AnnotationType.label].find( + obj['classTitle'])[0] + + attributes = {} + attributes['track_id'] = obj['id'] + for tag in obj.get('tags', []): + attributes[tag['name']] = _parse_tag(tag) + for attr in _get_label_attrs(label): + if attr in attributes: + continue + if tags[attr]['value_type'] == 'any_string': + value = '' + elif tags[attr]['value_type'] == 'oneof_string': + value = (tags[attr]['values'] or [''])[0] + elif tags[attr]['value_type'] == 'any_number': + value = 0 + else: + value = None + attributes[attr] = value + + shape = Cuboid3d(**geometry, label=label, + id=ann_id, attributes=attributes) + + frame_desc['annotations'].append(shape) + + return items, categories + + def _load_items(self, parsed): + for frame_id, frame_desc in parsed.items(): + pcd_name = frame_desc['name'] + name = osp.splitext(pcd_name)[0] + pcd_path = osp.join(self._rootdir, PointCloudPath.BASE_DIR, + PointCloudPath.POINT_CLOUD_DIR, pcd_name) + assert pcd_path.endswith('.pcd'), pcd_path + + related_images_dir = osp.join(self._rootdir, + PointCloudPath.BASE_DIR, + PointCloudPath.RELATED_IMAGES_DIR, name + '_pcd') + related_images = None + if osp.isdir(related_images_dir): + related_images = find_images(related_images_dir) + + parsed[frame_id] = DatasetItem(id=name, subset=self._subset, + point_cloud=pcd_path, related_images=related_images, + annotations=frame_desc.get('annotations'), + attributes={'frame': int(frame_id), **frame_desc['attributes']}) + + return parsed + + +class SuperviselyPointCloudImporter(Importer): + NAME = 'sly_pointcloud' + + @classmethod + def find_sources(cls, path): + return cls._find_sources_recursive(path, '.json', 'sly_pointcloud', + filename='meta') diff --git a/datumaro/plugins/sly_pointcloud_format/format.py b/datumaro/plugins/sly_pointcloud_format/format.py new file mode 100644 index 000000000000..03dfc8ecf08d --- /dev/null +++ b/datumaro/plugins/sly_pointcloud_format/format.py @@ -0,0 +1,18 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +class PointCloudPath: + BASE_DIR = 'ds0' + ANNNOTATION_DIR = 'ann' + + DEFAULT_IMAGE_EXT = '.jpg' + + POINT_CLOUD_DIR = 'pointcloud' + RELATED_IMAGES_DIR = 'related_images' + + KEY_ID_FILE = 'key_id_map.json' + META_FILE = 'meta.json' + + SPECIAL_ATTRS = {'description', 'track_id', + 'labelerLogin', 'createdAt', 'updatedAt', 'frame'} \ No newline at end of file diff --git a/datumaro/plugins/splitter.py b/datumaro/plugins/splitter.py index 5d414e1333d7..1a5a1b4d8afc 100644 --- a/datumaro/plugins/splitter.py +++ b/datumaro/plugins/splitter.py @@ -2,22 +2,26 @@ # # SPDX-License-Identifier: MIT +from enum import Enum, auto +from math import gcd +import copy import logging as log + import numpy as np -import copy -from math import gcd -from enum import Enum -from datumaro.components.extractor import (Transform, AnnotationType, - DEFAULT_SUBSET_NAME) from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, Transform, +) from datumaro.util import cast NEAR_ZERO = 1e-7 -SplitTask = Enum( - "split", ["classification", "detection", "segmentation", "reid"] -) +class SplitTask(Enum): + classification = auto() + detection = auto() + segmentation = auto() + reid = auto() class Split(Transform, CliPlugin): diff --git a/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/plugins/tf_detection_api_format/converter.py index 2b0ca88b8776..c913004179c7 100644 --- a/datumaro/plugins/tf_detection_api_format/converter.py +++ b/datumaro/plugins/tf_detection_api_format/converter.py @@ -3,25 +3,25 @@ # # SPDX-License-Identifier: MIT -import codecs from collections import OrderedDict +import codecs import hashlib import logging as log import os import os.path as osp import string -from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME, - LabelCategories -) from datumaro.components.converter import Converter -from datumaro.util.image import encode_image, ByteImage -from datumaro.util.annotation_util import (max_bbox, - find_group_leader, find_instances) +from datumaro.components.extractor import AnnotationType, LabelCategories +from datumaro.util.annotation_util import ( + find_group_leader, find_instances, max_bbox, +) +from datumaro.util.image import ByteImage, encode_image from datumaro.util.mask_tools import merge_masks from datumaro.util.tf_util import import_tf as _import_tf from .format import DetectionApiPath + tf = _import_tf() @@ -196,7 +196,7 @@ def _make_tf_example(self, item): return tf_example - def _save_image(self, item, path=None): + def _save_image(self, item, path=None): # pylint: disable=arguments-differ src_ext = item.image.ext.lower() dst_ext = osp.splitext(osp.basename(path))[1].lower() fmt = DetectionApiPath.IMAGE_EXT_FORMAT.get(dst_ext, '') diff --git a/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/plugins/tf_detection_api_format/extractor.py index 9001a61cbf7f..02d973b4c208 100644 --- a/datumaro/plugins/tf_detection_api_format/extractor.py +++ b/datumaro/plugins/tf_detection_api_format/extractor.py @@ -4,17 +4,20 @@ # SPDX-License-Identifier: MIT from collections import OrderedDict -import numpy as np import os.path as osp import re -from datumaro.components.extractor import (SourceExtractor, DatasetItem, - AnnotationType, Bbox, Mask, LabelCategories, Importer +import numpy as np + +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Importer, LabelCategories, Mask, + SourceExtractor, ) from datumaro.util.image import ByteImage, decode_image, lazy_image from datumaro.util.tf_util import import_tf as _import_tf from .format import DetectionApiPath + tf = _import_tf() @@ -186,4 +189,4 @@ def _parse_tfrecord_file(cls, filepath, subset, images_dir): class TfDetectionApiImporter(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, '.tfrecord', 'tf_detection_api') \ No newline at end of file + return cls._find_sources_recursive(path, '.tfrecord', 'tf_detection_api') diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py index dfecb25a990c..f1e6e362ea16 100644 --- a/datumaro/plugins/transforms.py +++ b/datumaro/plugins/transforms.py @@ -1,9 +1,9 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT from collections import Counter -from enum import Enum +from enum import Enum, auto from itertools import chain import logging as log import os.path as osp @@ -12,17 +12,18 @@ import pycocotools.mask as mask_utils -from datumaro.components.extractor import (Transform, AnnotationType, - RleMask, Polygon, Bbox, Label, DEFAULT_SUBSET_NAME, - LabelCategories, MaskCategories, PointsCategories -) from datumaro.components.cli_plugin import CliPlugin -import datumaro.util.mask_tools as mask_tools -from datumaro.util import parse_str_enum_value, NOTSET +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, Bbox, ItemTransform, Label, + LabelCategories, MaskCategories, PointsCategories, Polygon, RleMask, + Transform, +) +from datumaro.util import NOTSET, parse_str_enum_value from datumaro.util.annotation_util import find_group_leader, find_instances +import datumaro.util.mask_tools as mask_tools -class CropCoveredSegments(Transform, CliPlugin): +class CropCoveredSegments(ItemTransform, CliPlugin): def transform_item(self, item): annotations = [] segments = [] @@ -85,7 +86,7 @@ def _make_group_id(anns, ann_id): max_gid = max(anns, default=0, key=lambda x: x.group) return max_gid + 1 -class MergeInstanceSegments(Transform, CliPlugin): +class MergeInstanceSegments(ItemTransform, CliPlugin): """ Replaces instance masks and, optionally, polygons with a single mask. """ @@ -169,7 +170,7 @@ def find_instances(annotations): return find_instances(a for a in annotations if a.type in {AnnotationType.polygon, AnnotationType.mask}) -class PolygonsToMasks(Transform, CliPlugin): +class PolygonsToMasks(ItemTransform, CliPlugin): def transform_item(self, item): annotations = [] for ann in item.annotations: @@ -190,7 +191,7 @@ def convert_polygon(polygon, img_h, img_w): return RleMask(rle=rle, label=polygon.label, z_order=polygon.z_order, id=polygon.id, attributes=polygon.attributes, group=polygon.group) -class BoxesToMasks(Transform, CliPlugin): +class BoxesToMasks(ItemTransform, CliPlugin): def transform_item(self, item): annotations = [] for ann in item.annotations: @@ -211,7 +212,7 @@ def convert_bbox(bbox, img_h, img_w): return RleMask(rle=rle, label=bbox.label, z_order=bbox.z_order, id=bbox.id, attributes=bbox.attributes, group=bbox.group) -class MasksToPolygons(Transform, CliPlugin): +class MasksToPolygons(ItemTransform, CliPlugin): def transform_item(self, item): annotations = [] for ann in item.annotations: @@ -238,7 +239,7 @@ def convert_mask(mask): for p in polygons ] -class ShapesToBoxes(Transform, CliPlugin): +class ShapesToBoxes(ItemTransform, CliPlugin): def transform_item(self, item): annotations = [] for ann in item.annotations: @@ -274,7 +275,7 @@ def __iter__(self): for i, item in enumerate(self._extractor): yield self.wrap_item(item, id=i + self._start) -class MapSubsets(Transform, CliPlugin): +class MapSubsets(ItemTransform, CliPlugin): @staticmethod def _mapping_arg(s): parts = s.split(':') @@ -387,7 +388,7 @@ def __iter__(self): for i, item in enumerate(self._extractor): yield self.wrap_item(item, subset=self._find_split(i)) -class IdFromImageName(Transform, CliPlugin): +class IdFromImageName(ItemTransform, CliPlugin): def transform_item(self, item): if item.has_image and item.image.path: name = osp.splitext(osp.basename(item.image.path))[0] @@ -397,8 +398,8 @@ def transform_item(self, item): "item has no image info" % item.id) return item -class Rename(Transform, CliPlugin): - """ +class Rename(ItemTransform, CliPlugin): + r""" Renames items in the dataset. Supports regular expressions. The first character in the expression is a delimiter for the pattern and replacement parts. Replacement part can also @@ -408,7 +409,7 @@ class Rename(Transform, CliPlugin): - Replace 'pattern' with 'replacement':|n |s|srename -e '|pattern|replacement|'|n - Remove 'frame_' from item ids:|n - |s|srename -e '|frame_(\d+)|\\1|' + |s|srename -e '|frame_(\d+)|\1|' """ @classmethod @@ -431,7 +432,7 @@ def transform_item(self, item): return self.wrap_item(item, id=self._re.sub(self._sub, item.id) \ .format(item=item)) -class RemapLabels(Transform, CliPlugin): +class RemapLabels(ItemTransform, CliPlugin): """ Changes labels in the dataset.|n |n @@ -454,7 +455,9 @@ class RemapLabels(Transform, CliPlugin): |s|sremap_labels -l person:car -l bus:bus -l cat:dog --default delete """ - DefaultAction = Enum('DefaultAction', ['keep', 'delete']) + class DefaultAction(Enum): + keep = auto() + delete = auto() @staticmethod def _split_arg(s): @@ -559,7 +562,7 @@ def transform_item(self, item): annotations.append(ann.wrap()) return item.wrap(annotations=annotations) -class AnnsToLabels(Transform, CliPlugin): +class AnnsToLabels(ItemTransform, CliPlugin): """ Collects all labels from annotations (of all types) and transforms them into a set of annotations of type Label diff --git a/datumaro/plugins/validators.py b/datumaro/plugins/validators.py new file mode 100644 index 000000000000..c994724519ae --- /dev/null +++ b/datumaro/plugins/validators.py @@ -0,0 +1,1172 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from copy import deepcopy + +import numpy as np + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.errors import ( + AttributeDefinedButNotFound, FarFromAttrMean, FarFromLabelMean, + FewSamplesInAttribute, FewSamplesInLabel, ImbalancedAttribute, + ImbalancedDistInAttribute, ImbalancedDistInLabel, ImbalancedLabels, + InvalidValue, LabelDefinedButNotFound, MissingAnnotation, MissingAttribute, + MissingLabelCategories, MultiLabelAnnotations, NegativeLength, + OnlyOneAttributeValue, OnlyOneLabel, UndefinedAttribute, UndefinedLabel, +) +from datumaro.components.extractor import AnnotationType, LabelCategories +from datumaro.components.validator import Severity, TaskType, Validator +from datumaro.util import parse_str_enum_value + + +class _TaskValidator(Validator, CliPlugin): + # statistics templates + numerical_stat_template = { + 'items_far_from_mean': {}, + 'mean': None, + 'stdev': None, + 'min': None, + 'max': None, + 'median': None, + 'histogram': { + 'bins': [], + 'counts': [], + }, + 'distribution': np.array([]) + } + + """ + A base class for task-specific validators. + + Attributes + ---------- + task_type : str or TaskType + task type (ie. classification, detection, segmentation) + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-fs', '--few_samples_thr', default=1, type=int, + help="Threshold for giving a warning for minimum number of" + "samples per class") + parser.add_argument('-ir', '--imbalance_ratio_thr', default=50, type=int, + help="Threshold for giving data imbalance warning;" + "IR(imbalance ratio) = majority/minority") + parser.add_argument('-m', '--far_from_mean_thr', default=5.0, type=float, + help="Threshold for giving a warning that data is far from mean;" + "A constant used to define mean +/- k * standard deviation;") + parser.add_argument('-dr', '--dominance_ratio_thr', default=0.8, type=float, + help="Threshold for giving a warning for bounding box imbalance;" + "Dominace_ratio = ratio of Top-k bin to total in histogram;") + parser.add_argument('-k', '--topk_bins', default=0.1, type=float, + help="Ratio of bins with the highest number of data" + "to total bins in the histogram; [0, 1]; 0.1 = 10%;") + return parser + + def __init__(self, task_type, few_samples_thr=None, + imbalance_ratio_thr=None, far_from_mean_thr=None, + dominance_ratio_thr=None, topk_bins=None): + """ + Validator + + Parameters + --------------- + few_samples_thr: int + minimum number of samples per class + warn user when samples per class is less than threshold + imbalance_ratio_thr: int + ratio of majority attribute to minority attribute + warn user when annotations are unevenly distributed + far_from_mean_thr: float + constant used to define mean +/- m * stddev + warn user when there are too big or small values + dominance_ratio_thr: float + ratio of Top-k bin to total + warn user when dominance ratio is over threshold + topk_bins: float + ratio of selected bins with most item number to total bins + warn user when values are not evenly distributed + """ + self.task_type = parse_str_enum_value(task_type, TaskType, + default=TaskType.classification) + + if self.task_type == TaskType.classification: + self.ann_types = {AnnotationType.label} + self.str_ann_type = "label" + elif self.task_type == TaskType.detection: + self.ann_types = {AnnotationType.bbox} + self.str_ann_type = "bounding box" + elif self.task_type == TaskType.segmentation: + self.ann_types = {AnnotationType.mask, AnnotationType.polygon} + self.str_ann_type = "mask or polygon" + + self.few_samples_thr = few_samples_thr + self.imbalance_ratio_thr = imbalance_ratio_thr + self.far_from_mean_thr = far_from_mean_thr + self.dominance_thr = dominance_ratio_thr + self.topk_bins_ratio = topk_bins + + def _compute_common_statistics(self, dataset): + defined_attr_template = { + 'items_missing_attribute': [], + 'distribution': {} + } + undefined_attr_template = { + 'items_with_undefined_attr': [], + 'distribution': {} + } + undefined_label_template = { + 'count': 0, + 'items_with_undefined_label': [], + } + + stats = { + 'label_distribution': { + 'defined_labels': {}, + 'undefined_labels': {}, + }, + 'attribute_distribution': { + 'defined_attributes': {}, + 'undefined_attributes': {} + }, + } + stats['total_ann_count'] = 0 + stats['items_missing_annotation'] = [] + + label_dist = stats['label_distribution'] + attr_dist = stats['attribute_distribution'] + defined_label_dist = label_dist['defined_labels'] + defined_attr_dist = attr_dist['defined_attributes'] + undefined_label_dist = label_dist['undefined_labels'] + undefined_attr_dist = attr_dist['undefined_attributes'] + + label_categories = dataset.categories().get(AnnotationType.label, + LabelCategories()) + base_valid_attrs = label_categories.attributes + + for category in label_categories: + defined_label_dist[category.name] = 0 + + filtered_anns = [] + for item in dataset: + item_key = (item.id, item.subset) + annotations = [] + for ann in item.annotations: + if ann.type in self.ann_types: + annotations.append(ann) + ann_count = len(annotations) + filtered_anns.append((item_key, annotations)) + + if ann_count == 0: + stats['items_missing_annotation'].append(item_key) + stats['total_ann_count'] += ann_count + + for ann in annotations: + if not 0 <= ann.label < len(label_categories): + label_name = ann.label + + label_stats = undefined_label_dist.setdefault( + ann.label, deepcopy(undefined_label_template)) + label_stats['items_with_undefined_label'].append( + item_key) + + label_stats['count'] += 1 + valid_attrs = set() + missing_attrs = set() + else: + label_name = label_categories[ann.label].name + defined_label_dist[label_name] += 1 + + defined_attr_stats = defined_attr_dist.setdefault( + label_name, {}) + + valid_attrs = base_valid_attrs.union( + label_categories[ann.label].attributes) + ann_attrs = getattr(ann, 'attributes', {}).keys() + missing_attrs = valid_attrs.difference(ann_attrs) + + for attr in valid_attrs: + defined_attr_stats.setdefault( + attr, deepcopy(defined_attr_template)) + + for attr in missing_attrs: + attr_dets = defined_attr_stats[attr] + attr_dets['items_missing_attribute'].append( + item_key) + + for attr, value in ann.attributes.items(): + if attr not in valid_attrs: + undefined_attr_stats = \ + undefined_attr_dist.setdefault( + label_name, {}) + attr_dets = undefined_attr_stats.setdefault( + attr, deepcopy(undefined_attr_template)) + attr_dets['items_with_undefined_attr'].append( + item_key) + else: + attr_dets = defined_attr_stats[attr] + + attr_dets['distribution'].setdefault(str(value), 0) + attr_dets['distribution'][str(value)] += 1 + + return stats, filtered_anns + + @staticmethod + def _update_prop_distributions(curr_prop_stats, target_stats): + for prop, val in curr_prop_stats.items(): + prop_stats = target_stats[prop] + prop_dist = prop_stats['distribution'] + prop_stats['distribution'] = np.append(prop_dist, val) + + @staticmethod + def _compute_prop_stats_from_dist(dist_by_label, dist_by_attr): + for label_name, stats in dist_by_label.items(): + prop_stats_list = list(stats.values()) + attr_label = dist_by_attr.get(label_name, {}) + for vals in attr_label.values(): + for val_stats in vals.values(): + prop_stats_list += list(val_stats.values()) + + for prop_stats in prop_stats_list: + prop_dist = prop_stats.pop('distribution', []) + if len(prop_dist) > 0: + prop_stats['mean'] = np.mean(prop_dist) + prop_stats['stdev'] = np.std(prop_dist) + prop_stats['min'] = np.min(prop_dist) + prop_stats['max'] = np.max(prop_dist) + prop_stats['median'] = np.median(prop_dist) + + counts, bins = np.histogram(prop_dist) + prop_stats['histogram']['bins'] = bins.tolist() + prop_stats['histogram']['counts'] = counts.tolist() + + def _compute_far_from_mean(self, prop_stats, val, item_key, ann): + def _far_from_mean(val, mean, stdev): + thr = self.far_from_mean_thr + return val > mean + (thr * stdev) or val < mean - (thr * stdev) + + mean = prop_stats['mean'] + stdev = prop_stats['stdev'] + + if _far_from_mean(val, mean, stdev): + items_far_from_mean = prop_stats['items_far_from_mean'] + far_from_mean = items_far_from_mean.setdefault( + item_key, {}) + far_from_mean[ann.id] = val + + def _check_missing_label_categories(self, stats): + validation_reports = [] + + if len(stats['label_distribution']['defined_labels']) == 0: + validation_reports += self._generate_validation_report( + MissingLabelCategories, Severity.error) + + return validation_reports + + def _check_missing_annotation(self, stats): + validation_reports = [] + + items_missing = stats['items_missing_annotation'] + for item_id, item_subset in items_missing: + validation_reports += self._generate_validation_report( + MissingAnnotation, Severity.warning, item_id, item_subset, + self.str_ann_type) + + return validation_reports + + def _check_missing_attribute(self, label_name, attr_name, attr_dets): + validation_reports = [] + + items_missing_attr = attr_dets['items_missing_attribute'] + for item_id, item_subset in items_missing_attr: + details = (item_subset, label_name, attr_name) + validation_reports += self._generate_validation_report( + MissingAttribute, Severity.warning, item_id, *details) + + return validation_reports + + def _check_undefined_label(self, label_name, label_stats): + validation_reports = [] + + items_with_undefined_label = label_stats['items_with_undefined_label'] + for item_id, item_subset in items_with_undefined_label: + details = (item_subset, label_name) + validation_reports += self._generate_validation_report( + UndefinedLabel, Severity.error, item_id, *details) + + return validation_reports + + def _check_undefined_attribute(self, label_name, attr_name, attr_dets): + validation_reports = [] + + items_with_undefined_attr = attr_dets['items_with_undefined_attr'] + for item_id, item_subset in items_with_undefined_attr: + details = (item_subset, label_name, attr_name) + validation_reports += self._generate_validation_report( + UndefinedAttribute, Severity.error, item_id, *details) + + return validation_reports + + def _check_label_defined_but_not_found(self, stats): + validation_reports = [] + count_by_defined_labels = stats['label_distribution']['defined_labels'] + labels_not_found = [label_name + for label_name, count in count_by_defined_labels.items() + if count == 0] + + for label_name in labels_not_found: + validation_reports += self._generate_validation_report( + LabelDefinedButNotFound, Severity.warning, label_name) + + return validation_reports + + def _check_attribute_defined_but_not_found(self, label_name, attr_stats): + validation_reports = [] + attrs_not_found = [attr_name + for attr_name, attr_dets in attr_stats.items() + if len(attr_dets['distribution']) == 0] + + for attr_name in attrs_not_found: + details = (label_name, attr_name) + validation_reports += self._generate_validation_report( + AttributeDefinedButNotFound, Severity.warning, *details) + + return validation_reports + + def _check_only_one_label(self, stats): + validation_reports = [] + count_by_defined_labels = stats['label_distribution']['defined_labels'] + labels_found = [label_name + for label_name, count in count_by_defined_labels.items() + if count > 0] + + if len(labels_found) == 1: + validation_reports += self._generate_validation_report( + OnlyOneLabel, Severity.warning, labels_found[0]) + + return validation_reports + + def _check_only_one_attribute_value(self, label_name, attr_name, attr_dets): + validation_reports = [] + values = list(attr_dets['distribution'].keys()) + + if len(values) == 1: + details = (label_name, attr_name, values[0]) + validation_reports += self._generate_validation_report( + OnlyOneAttributeValue, Severity.warning, *details) + + return validation_reports + + def _check_few_samples_in_label(self, stats): + validation_reports = [] + thr = self.few_samples_thr + + defined_label_dist = stats['label_distribution']['defined_labels'] + labels_with_few_samples = [(label_name, count) + for label_name, count in defined_label_dist.items() + if 0 < count <= thr] + + for label_name, count in labels_with_few_samples: + validation_reports += self._generate_validation_report( + FewSamplesInLabel, Severity.warning, label_name, count) + + return validation_reports + + def _check_few_samples_in_attribute(self, label_name, + attr_name, attr_dets): + validation_reports = [] + thr = self.few_samples_thr + + attr_values_with_few_samples = [(attr_value, count) + for attr_value, count in attr_dets['distribution'].items() + if count <= thr] + + for attr_value, count in attr_values_with_few_samples: + details = (label_name, attr_name, attr_value, count) + validation_reports += self._generate_validation_report( + FewSamplesInAttribute, Severity.warning, *details) + + return validation_reports + + def _check_imbalanced_labels(self, stats): + validation_reports = [] + thr = self.imbalance_ratio_thr + + defined_label_dist = stats['label_distribution']['defined_labels'] + count_by_defined_labels = [count + for label, count in defined_label_dist.items()] + + if len(count_by_defined_labels) == 0: + return validation_reports + + count_max = np.max(count_by_defined_labels) + count_min = np.min(count_by_defined_labels) + balance = count_max / count_min if count_min > 0 else float('inf') + if balance >= thr: + validation_reports += self._generate_validation_report( + ImbalancedLabels, Severity.warning) + + return validation_reports + + def _check_imbalanced_attribute(self, label_name, attr_name, attr_dets): + validation_reports = [] + thr = self.imbalance_ratio_thr + + count_by_defined_attr = list(attr_dets['distribution'].values()) + if len(count_by_defined_attr) == 0: + return validation_reports + + count_max = np.max(count_by_defined_attr) + count_min = np.min(count_by_defined_attr) + balance = count_max / count_min if count_min > 0 else float('inf') + if balance >= thr: + validation_reports += self._generate_validation_report( + ImbalancedAttribute, Severity.warning, label_name, attr_name) + + return validation_reports + + def _check_imbalanced_dist_in_label(self, label_name, label_stats): + validation_reports = [] + thr = self.dominance_thr + topk_ratio = self.topk_bins_ratio + + for prop, prop_stats in label_stats.items(): + value_counts = prop_stats['histogram']['counts'] + n_bucket = len(value_counts) + if n_bucket < 2: + continue + topk = max(1, int(np.around(n_bucket * topk_ratio))) + + if topk > 0: + topk_values = np.sort(value_counts)[-topk:] + ratio = np.sum(topk_values) / np.sum(value_counts) + if ratio >= thr: + details = (label_name, f"{self.str_ann_type} {prop}") + validation_reports += self._generate_validation_report( + ImbalancedDistInLabel, Severity.warning, *details) + + return validation_reports + + def _check_imbalanced_dist_in_attr(self, label_name, attr_name, attr_stats): + validation_reports = [] + thr = self.dominance_thr + topk_ratio = self.topk_bins_ratio + + for attr_value, value_stats in attr_stats.items(): + for prop, prop_stats in value_stats.items(): + value_counts = prop_stats['histogram']['counts'] + n_bucket = len(value_counts) + if n_bucket < 2: + continue + topk = max(1, int(np.around(n_bucket * topk_ratio))) + + if topk > 0: + topk_values = np.sort(value_counts)[-topk:] + ratio = np.sum(topk_values) / np.sum(value_counts) + if ratio >= thr: + details = (label_name, attr_name, attr_value, + f"{self.str_ann_type} {prop}") + validation_reports += self._generate_validation_report( + ImbalancedDistInAttribute, + Severity.warning, + *details + ) + + return validation_reports + + def _check_invalid_value(self, stats): + validation_reports = [] + + items_w_invalid_val = stats['items_with_invalid_value'] + for item_dets, anns_w_invalid_val in items_w_invalid_val.items(): + item_id, item_subset = item_dets + for ann_id, props in anns_w_invalid_val.items(): + for prop in props: + details = (item_subset, ann_id, + f"{self.str_ann_type} {prop}") + validation_reports += self._generate_validation_report( + InvalidValue, Severity.error, item_id, *details) + + return validation_reports + + def _check_far_from_label_mean(self, label_name, label_stats): + validation_reports = [] + + for prop, prop_stats in label_stats.items(): + items_far_from_mean = prop_stats['items_far_from_mean'] + if prop_stats['mean'] is not None: + mean = round(prop_stats['mean'], 2) + + for item_dets, anns_far in items_far_from_mean.items(): + item_id, item_subset = item_dets + for ann_id, val in anns_far.items(): + val = round(val, 2) + details = (item_subset, label_name, ann_id, + f"{self.str_ann_type} {prop}", mean, val) + validation_reports += self._generate_validation_report( + FarFromLabelMean, Severity.warning, item_id, *details) + + return validation_reports + + def _check_far_from_attr_mean(self, label_name, attr_name, attr_stats): + validation_reports = [] + + for attr_value, value_stats in attr_stats.items(): + for prop, prop_stats in value_stats.items(): + items_far_from_mean = prop_stats['items_far_from_mean'] + if prop_stats['mean'] is not None: + mean = round(prop_stats['mean'], 2) + + for item_dets, anns_far in items_far_from_mean.items(): + item_id, item_subset = item_dets + for ann_id, val in anns_far.items(): + val = round(val, 2) + details = (item_subset, label_name, ann_id, attr_name, + attr_value, f"{self.str_ann_type} {prop}", + mean, val) + validation_reports += self._generate_validation_report( + FarFromAttrMean, + Severity.warning, + item_id, + *details + ) + + return validation_reports + + def _generate_validation_report(self, error, *args, **kwargs): + return [error(*args, **kwargs)] + + +class ClassificationValidator(_TaskValidator): + """ + A specific validator class for classification task. + """ + + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.classification, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + + def _check_multi_label_annotations(self, stats): + validation_reports = [] + + items_with_multiple_labels = stats['items_with_multiple_labels'] + for item_id, item_subset in items_with_multiple_labels: + validation_reports += self._generate_validation_report( + MultiLabelAnnotations, Severity.error, item_id, item_subset) + + return validation_reports + + def compute_statistics(self, dataset): + """ + Computes statistics of the dataset for the classification task. + + Parameters + ---------- + dataset : IDataset object + + Returns + ------- + stats (dict): A dict object containing statistics of the dataset. + """ + + stats, filtered_anns = self._compute_common_statistics(dataset) + + stats['items_with_multiple_labels'] = [] + + for item_key, anns in filtered_anns: + ann_count = len(anns) + if ann_count > 1: + stats['items_with_multiple_labels'].append(item_key) + + return stats + + def generate_reports(self, stats): + """ + Validates the dataset for classification tasks based on its statistics. + + Parameters + ---------- + dataset : IDataset object + stats: Dict object + + Returns + ------- + reports (list): List of validation reports (DatasetValidationError). + """ + + reports = [] + + reports += self._check_missing_label_categories(stats) + reports += self._check_missing_annotation(stats) + reports += self._check_multi_label_annotations(stats) + reports += self._check_label_defined_but_not_found(stats) + reports += self._check_only_one_label(stats) + reports += self._check_few_samples_in_label(stats) + reports += self._check_imbalanced_labels(stats) + + label_dist = stats['label_distribution'] + attr_dist = stats['attribute_distribution'] + defined_attr_dist = attr_dist['defined_attributes'] + undefined_label_dist = label_dist['undefined_labels'] + undefined_attr_dist = attr_dist['undefined_attributes'] + + defined_labels = defined_attr_dist.keys() + for label_name in defined_labels: + attr_stats = defined_attr_dist[label_name] + + reports += self._check_attribute_defined_but_not_found( + label_name, attr_stats) + + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_few_samples_in_attribute( + label_name, attr_name, attr_dets) + reports += self._check_imbalanced_attribute( + label_name, attr_name, attr_dets) + reports += self._check_only_one_attribute_value( + label_name, attr_name, attr_dets) + reports += self._check_missing_attribute( + label_name, attr_name, attr_dets) + + for label_name, label_stats in undefined_label_dist.items(): + reports += self._check_undefined_label(label_name, label_stats) + + for label_name, attr_stats in undefined_attr_dist.items(): + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_undefined_attribute( + label_name, attr_name, attr_dets) + + return reports + + +class DetectionValidator(_TaskValidator): + """ + A specific validator class for detection task. + """ + + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.detection, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + + def _check_negative_length(self, stats): + validation_reports = [] + + items_w_neg_len = stats['items_with_negative_length'] + for item_dets, anns_w_neg_len in items_w_neg_len.items(): + item_id, item_subset = item_dets + for ann_id, props in anns_w_neg_len.items(): + for prop, val in props.items(): + val = round(val, 2) + details = (item_subset, ann_id, + f"{self.str_ann_type} {prop}", val) + validation_reports += self._generate_validation_report( + NegativeLength, Severity.error, item_id, *details) + + return validation_reports + + def compute_statistics(self, dataset): + """ + Computes statistics of the dataset for the detection task. + + Parameters + ---------- + dataset : IDataset object + + Returns + ------- + stats (dict): A dict object containing statistics of the dataset. + """ + + stats, filtered_anns = self._compute_common_statistics(dataset) + + # detection-specific + bbox_template = { + 'width': deepcopy(self.numerical_stat_template), + 'height': deepcopy(self.numerical_stat_template), + 'area(wxh)': deepcopy(self.numerical_stat_template), + 'ratio(w/h)': deepcopy(self.numerical_stat_template), + 'short': deepcopy(self.numerical_stat_template), + 'long': deepcopy(self.numerical_stat_template) + } + + stats['items_with_negative_length'] = {} + stats['items_with_invalid_value'] = {} + stats['bbox_distribution_in_label'] = {} + stats['bbox_distribution_in_attribute'] = {} + stats['bbox_distribution_in_dataset_item'] = {} + + dist_by_label = stats['bbox_distribution_in_label'] + dist_by_attr = stats['bbox_distribution_in_attribute'] + bbox_dist_in_item = stats['bbox_distribution_in_dataset_item'] + items_w_neg_len = stats['items_with_negative_length'] + items_w_invalid_val = stats['items_with_invalid_value'] + + def _generate_ann_bbox_info(_x, _y, _w, _h, area, + ratio, _short, _long): + return { + 'x': _x, + 'y': _y, + 'width': _w, + 'height': _h, + 'area(wxh)': area, + 'ratio(w/h)': ratio, + 'short': _short, + 'long': _long, + } + + def _update_bbox_stats_by_label(item_key, ann, bbox_label_stats): + bbox_has_error = False + + _x, _y, _w, _h = ann.get_bbox() + area = ann.get_area() + + if _h != 0 and _h != float('inf'): + ratio = _w / _h + else: + ratio = float('nan') + + _short = _w if _w < _h else _h + _long = _w if _w > _h else _h + + ann_bbox_info = _generate_ann_bbox_info( + _x, _y, _w, _h, area, ratio, _short, _long) + + for prop, val in ann_bbox_info.items(): + if val == float('inf') or np.isnan(val): + bbox_has_error = True + anns_w_invalid_val = items_w_invalid_val.setdefault( + item_key, {}) + invalid_props = anns_w_invalid_val.setdefault( + ann.id, []) + invalid_props.append(prop) + + for prop in ['width', 'height']: + val = ann_bbox_info[prop] + if val < 1: + bbox_has_error = True + anns_w_neg_len = items_w_neg_len.setdefault( + item_key, {}) + neg_props = anns_w_neg_len.setdefault(ann.id, {}) + neg_props[prop] = val + + if not bbox_has_error: + ann_bbox_info.pop('x') + ann_bbox_info.pop('y') + self._update_prop_distributions(ann_bbox_info, bbox_label_stats) + + return ann_bbox_info, bbox_has_error + + label_categories = dataset.categories().get(AnnotationType.label, + LabelCategories()) + base_valid_attrs = label_categories.attributes + + for item_key, annotations in filtered_anns: + ann_count = len(annotations) + + bbox_dist_in_item[item_key] = ann_count + + for ann in annotations: + if not 0 <= ann.label < len(label_categories): + label_name = ann.label + valid_attrs = set() + else: + label_name = label_categories[ann.label].name + valid_attrs = base_valid_attrs.union( + label_categories[ann.label].attributes) + + bbox_label_stats = dist_by_label.setdefault( + label_name, deepcopy(bbox_template)) + ann_bbox_info, bbox_has_error = \ + _update_bbox_stats_by_label( + item_key, ann, bbox_label_stats) + + for attr, value in ann.attributes.items(): + if attr in valid_attrs: + bbox_attr_label = dist_by_attr.setdefault( + label_name, {}) + bbox_attr_stats = bbox_attr_label.setdefault( + attr, {}) + bbox_val_stats = bbox_attr_stats.setdefault( + str(value), deepcopy(bbox_template)) + + if not bbox_has_error: + self._update_prop_distributions( + ann_bbox_info, bbox_val_stats) + + # Compute prop stats from distribution + self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) + + def _is_valid_ann(item_key, ann): + has_defined_label = 0 <= ann.label < len(label_categories) + if not has_defined_label: + return False + + bbox_has_neg_len = ann.id in items_w_neg_len.get( + item_key, {}) + bbox_has_invalid_val = ann.id in items_w_invalid_val.get( + item_key, {}) + return not (bbox_has_neg_len or bbox_has_invalid_val) + + def _update_props_far_from_mean(item_key, ann): + valid_attrs = base_valid_attrs.union( + label_categories[ann.label].attributes) + label_name = label_categories[ann.label].name + bbox_label_stats = dist_by_label[label_name] + + _x, _y, _w, _h = ann.get_bbox() + area = ann.get_area() + ratio = _w / _h + _short = _w if _w < _h else _h + _long = _w if _w > _h else _h + + ann_bbox_info = _generate_ann_bbox_info( + _x, _y, _w, _h, area, ratio, _short, _long) + ann_bbox_info.pop('x') + ann_bbox_info.pop('y') + + for prop, val in ann_bbox_info.items(): + prop_stats = bbox_label_stats[prop] + self._compute_far_from_mean(prop_stats, val, item_key, ann) + + for attr, value in ann.attributes.items(): + if attr in valid_attrs: + bbox_attr_stats = dist_by_attr[label_name][attr] + bbox_val_stats = bbox_attr_stats[str(value)] + + for prop, val in ann_bbox_info.items(): + prop_stats = bbox_val_stats[prop] + self._compute_far_from_mean(prop_stats, val, + item_key, ann) + + for item_key, annotations in filtered_anns: + for ann in annotations: + if _is_valid_ann(item_key, ann): + _update_props_far_from_mean(item_key, ann) + + return stats + + def generate_reports(self, stats): + """ + Validates the dataset for detection tasks based on its statistics. + + Parameters + ---------- + dataset : IDataset object + stats : Dict object + + Returns + ------- + reports (list): List of validation reports (DatasetValidationError). + """ + + reports = [] + + reports += self._check_missing_label_categories(stats) + reports += self._check_missing_annotation(stats) + reports += self._check_label_defined_but_not_found(stats) + reports += self._check_only_one_label(stats) + reports += self._check_few_samples_in_label(stats) + reports += self._check_imbalanced_labels(stats) + reports += self._check_negative_length(stats) + reports += self._check_invalid_value(stats) + + label_dist = stats['label_distribution'] + attr_dist = stats['attribute_distribution'] + defined_attr_dist = attr_dist['defined_attributes'] + undefined_label_dist = label_dist['undefined_labels'] + undefined_attr_dist = attr_dist['undefined_attributes'] + + dist_by_label = stats['bbox_distribution_in_label'] + dist_by_attr = stats['bbox_distribution_in_attribute'] + + defined_labels = defined_attr_dist.keys() + for label_name in defined_labels: + attr_stats = defined_attr_dist[label_name] + + reports += self._check_attribute_defined_but_not_found( + label_name, attr_stats) + + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_few_samples_in_attribute( + label_name, attr_name, attr_dets) + reports += self._check_imbalanced_attribute( + label_name, attr_name, attr_dets) + reports += self._check_only_one_attribute_value( + label_name, attr_name, attr_dets) + reports += self._check_missing_attribute( + label_name, attr_name, attr_dets) + + bbox_label_stats = dist_by_label[label_name] + bbox_attr_label = dist_by_attr.get(label_name, {}) + + reports += self._check_far_from_label_mean( + label_name, bbox_label_stats) + reports += self._check_imbalanced_dist_in_label( + label_name, bbox_label_stats) + + for attr_name, bbox_attr_stats in bbox_attr_label.items(): + reports += self._check_far_from_attr_mean( + label_name, attr_name, bbox_attr_stats) + reports += self._check_imbalanced_dist_in_attr( + label_name, attr_name, bbox_attr_stats) + + for label_name, label_stats in undefined_label_dist.items(): + reports += self._check_undefined_label(label_name, label_stats) + + for label_name, attr_stats in undefined_attr_dist.items(): + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_undefined_attribute( + label_name, attr_name, attr_dets) + + return reports + + +class SegmentationValidator(_TaskValidator): + """ + A specific validator class for (instance) segmentation task. + """ + + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.segmentation, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + + def compute_statistics(self, dataset): + """ + Computes statistics of the dataset for the segmentation task. + + Parameters + ---------- + dataset : IDataset object + + Returns + ------- + stats (dict): A dict object containing statistics of the dataset. + """ + + stats, filtered_anns = self._compute_common_statistics(dataset) + + # segmentation-specific + mask_template = { + 'area': deepcopy(self.numerical_stat_template), + 'width': deepcopy(self.numerical_stat_template), + 'height': deepcopy(self.numerical_stat_template) + } + + stats['items_with_invalid_value'] = {} + stats['mask_distribution_in_label'] = {} + stats['mask_distribution_in_attribute'] = {} + stats['mask_distribution_in_dataset_item'] = {} + + dist_by_label = stats['mask_distribution_in_label'] + dist_by_attr = stats['mask_distribution_in_attribute'] + mask_dist_in_item = stats['mask_distribution_in_dataset_item'] + items_w_invalid_val = stats['items_with_invalid_value'] + + def _generate_ann_mask_info(area, _w, _h): + return { + 'area': area, + 'width': _w, + 'height': _h, + } + + def _update_mask_stats_by_label(item_key, ann, mask_label_stats): + mask_has_error = False + + _x, _y, _w, _h = ann.get_bbox() + + # Detete the following block when #226 is resolved + # https://github.com/openvinotoolkit/datumaro/issues/226 + if ann.type == AnnotationType.mask: + _w += 1 + _h += 1 + + area = ann.get_area() + + ann_mask_info = _generate_ann_mask_info(area, _w, _h) + + for prop, val in ann_mask_info.items(): + if val == float('inf') or np.isnan(val): + mask_has_error = True + anns_w_invalid_val = items_w_invalid_val.setdefault( + item_key, {}) + invalid_props = anns_w_invalid_val.setdefault( + ann.id, []) + invalid_props.append(prop) + + if not mask_has_error: + self._update_prop_distributions(ann_mask_info, mask_label_stats) + + return ann_mask_info, mask_has_error + + label_categories = dataset.categories().get(AnnotationType.label, + LabelCategories()) + base_valid_attrs = label_categories.attributes + + for item_key, annotations in filtered_anns: + ann_count = len(annotations) + mask_dist_in_item[item_key] = ann_count + + for ann in annotations: + if not 0 <= ann.label < len(label_categories): + label_name = ann.label + valid_attrs = set() + else: + label_name = label_categories[ann.label].name + valid_attrs = base_valid_attrs.union( + label_categories[ann.label].attributes) + + mask_label_stats = dist_by_label.setdefault( + label_name, deepcopy(mask_template)) + ann_mask_info, mask_has_error = \ + _update_mask_stats_by_label( + item_key, ann, mask_label_stats) + + for attr, value in ann.attributes.items(): + if attr in valid_attrs: + mask_attr_label = dist_by_attr.setdefault( + label_name, {}) + mask_attr_stats = mask_attr_label.setdefault( + attr, {}) + mask_val_stats = mask_attr_stats.setdefault( + str(value), deepcopy(mask_template)) + + if not mask_has_error: + self._update_prop_distributions( + ann_mask_info, mask_val_stats) + + # compute prop stats from dist. + self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) + + def _is_valid_ann(item_key, ann): + has_defined_label = 0 <= ann.label < len(label_categories) + if not has_defined_label: + return False + + mask_has_invalid_val = ann.id in items_w_invalid_val.get( + item_key, {}) + return not mask_has_invalid_val + + def _update_props_far_from_mean(item_key, ann): + valid_attrs = base_valid_attrs.union( + label_categories[ann.label].attributes) + label_name = label_categories[ann.label].name + mask_label_stats = dist_by_label[label_name] + + _x, _y, _w, _h = ann.get_bbox() + + # Detete the following block when #226 is resolved + # https://github.com/openvinotoolkit/datumaro/issues/226 + if ann.type == AnnotationType.mask: + _w += 1 + _h += 1 + area = ann.get_area() + + ann_mask_info = _generate_ann_mask_info(area, _w, _h) + + for prop, val in ann_mask_info.items(): + prop_stats = mask_label_stats[prop] + self._compute_far_from_mean(prop_stats, val, item_key, ann) + + for attr, value in ann.attributes.items(): + if attr in valid_attrs: + mask_attr_stats = dist_by_attr[label_name][attr] + mask_val_stats = mask_attr_stats[str(value)] + + for prop, val in ann_mask_info.items(): + prop_stats = mask_val_stats[prop] + self._compute_far_from_mean(prop_stats, val, + item_key, ann) + + for item_key, annotations in filtered_anns: + for ann in annotations: + if _is_valid_ann(item_key, ann): + _update_props_far_from_mean(item_key, ann) + + return stats + + def generate_reports(self, stats): + """ + Validates the dataset for segmentation tasks based on its statistics. + + Parameters + ---------- + dataset : IDataset object + stats : Dict object + + Returns + ------- + reports (list): List of validation reports (DatasetValidationError). + """ + + reports = [] + + reports += self._check_missing_label_categories(stats) + reports += self._check_missing_annotation(stats) + reports += self._check_label_defined_but_not_found(stats) + reports += self._check_only_one_label(stats) + reports += self._check_few_samples_in_label(stats) + reports += self._check_imbalanced_labels(stats) + reports += self._check_invalid_value(stats) + + label_dist = stats['label_distribution'] + attr_dist = stats['attribute_distribution'] + defined_attr_dist = attr_dist['defined_attributes'] + undefined_label_dist = label_dist['undefined_labels'] + undefined_attr_dist = attr_dist['undefined_attributes'] + + dist_by_label = stats['mask_distribution_in_label'] + dist_by_attr = stats['mask_distribution_in_attribute'] + + defined_labels = defined_attr_dist.keys() + for label_name in defined_labels: + attr_stats = defined_attr_dist[label_name] + + reports += self._check_attribute_defined_but_not_found( + label_name, attr_stats) + + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_few_samples_in_attribute( + label_name, attr_name, attr_dets) + reports += self._check_imbalanced_attribute( + label_name, attr_name, attr_dets) + reports += self._check_only_one_attribute_value( + label_name, attr_name, attr_dets) + reports += self._check_missing_attribute( + label_name, attr_name, attr_dets) + + mask_label_stats = dist_by_label[label_name] + mask_attr_label = dist_by_attr.get(label_name, {}) + + reports += self._check_far_from_label_mean( + label_name, mask_label_stats) + reports += self._check_imbalanced_dist_in_label( + label_name, mask_label_stats) + + for attr_name, mask_attr_stats in mask_attr_label.items(): + reports += self._check_far_from_attr_mean( + label_name, attr_name, mask_attr_stats) + reports += self._check_imbalanced_dist_in_attr( + label_name, attr_name, mask_attr_stats) + + for label_name, label_stats in undefined_label_dist.items(): + reports += self._check_undefined_label(label_name, label_stats) + + for label_name, attr_stats in undefined_attr_dist.items(): + for attr_name, attr_dets in attr_stats.items(): + reports += self._check_undefined_attribute( + label_name, attr_name, attr_dets) + + return reports diff --git a/datumaro/plugins/vgg_face2_format.py b/datumaro/plugins/vgg_face2_format.py index 33b41021602d..d0cd5b5792bf 100644 --- a/datumaro/plugins/vgg_face2_format.py +++ b/datumaro/plugins/vgg_face2_format.py @@ -7,8 +7,10 @@ import os.path as osp from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem, - Importer, Label, LabelCategories, Points, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Extractor, Importer, Label, + LabelCategories, Points, +) from datumaro.util.image import find_images @@ -20,21 +22,44 @@ class VggFace2Path: LABELS_FILE = 'labels.txt' IMAGES_DIR_NO_LABEL = 'no_label' -class VggFace2Extractor(SourceExtractor): - def __init__(self, path, subset=None): - if not osp.isfile(path): - raise Exception("Can't read .csv annotation file '%s'" % path) - self._path = path - self._dataset_dir = osp.dirname(osp.dirname(path)) +class VggFace2Extractor(Extractor): + def __init__(self, path): + subset = None + if osp.isdir(path): + self._path = path + elif osp.isfile(path): + subset = osp.splitext(osp.basename(path).split('_')[2])[0] + self._path = osp.dirname(path) + else: + raise Exception("Can't read annotations from '%s'" % path) + + annotation_files = [p for p in os.listdir(self._path) + if (osp.basename(p).startswith(VggFace2Path.BBOXES_FILE) or \ + osp.basename(p).startswith(VggFace2Path.LANDMARKS_FILE)) and \ + p.endswith('.csv')] + + if len(annotation_files) < 1: + raise Exception("Can't find annotations in the directory '%s'" % path) + + super().__init__() + + self._dataset_dir = osp.dirname(self._path) + self._subsets = {subset} if subset else set( + osp.splitext(f.split('_')[2])[0] for f in annotation_files + ) - if not subset: - subset = osp.splitext(osp.basename(path))[0] - if subset.startswith(VggFace2Path.LANDMARKS_FILE): - subset = subset.split('_')[2] - super().__init__(subset=subset) + self._categories = {} + self._items = [] - self._categories = self._load_categories() - self._items = list(self._load_items(path).values()) + self._load_categories() + for subset in self._subsets: + self._items.extend(list(self._load_items(subset).values())) + + def __iter__(self): + return iter(self._items) + + def categories(self): + return self._categories def _load_categories(self): label_cat = LabelCategories() @@ -50,58 +75,63 @@ def _load_categories(self): class_name = objects[1] label_cat.add(label, parent=class_name) else: - subset_path = osp.join(self._dataset_dir, self._subset) - if osp.isdir(subset_path): - for images_dir in sorted(os.listdir(subset_path)): - if osp.isdir(osp.join(subset_path, images_dir)) and \ - images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL: - label_cat.add(images_dir) - return { AnnotationType.label: label_cat } - - def _load_items(self, path): - def _split_item_path(path): + for subset in self._subsets: + subset_path = osp.join(self._dataset_dir, subset) + if osp.isdir(subset_path): + for images_dir in sorted(os.listdir(subset_path)): + if osp.isdir(osp.join(subset_path, images_dir)) and \ + images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL: + label_cat.add(images_dir) + self._categories[AnnotationType.label] = label_cat + + def _load_items(self, subset): + def _get_label(path): label_name = path.split('/')[0] label = None if label_name != VggFace2Path.IMAGES_DIR_NO_LABEL: label = \ self._categories[AnnotationType.label].find(label_name)[0] - item_id = path[len(label_name) + 1:] - return item_id, label + return label items = {} - image_dir = osp.join(self._dataset_dir, self._subset) + image_dir = osp.join(self._dataset_dir, subset) if osp.isdir(image_dir): images = { osp.splitext(osp.relpath(p, image_dir))[0]: p for p in find_images(image_dir, recursive=True) } else: images = {} - with open(path, encoding='utf-8') as content: - landmarks_table = list(csv.DictReader(content)) - for row in landmarks_table: - item_id = row['NAME_ID'] - label = None - if '/' in item_id: - item_id, label = _split_item_path(item_id) + landmarks_path = osp.join(self._dataset_dir, VggFace2Path.ANNOTATION_DIR, + VggFace2Path.LANDMARKS_FILE + subset + '.csv') + if osp.isfile(landmarks_path): + with open(landmarks_path, encoding='utf-8') as content: + landmarks_table = list(csv.DictReader(content)) + for row in landmarks_table: + item_id = row['NAME_ID'] + label = None + if '/' in item_id: + label = _get_label(item_id) - if item_id not in items: - items[item_id] = DatasetItem(id=item_id, subset=self._subset, - image=images.get(row['NAME_ID'])) + if item_id not in items: + items[item_id] = DatasetItem(id=item_id, subset=subset, + image=images.get(row['NAME_ID'])) - annotations = items[item_id].annotations - if [a for a in annotations if a.type == AnnotationType.points]: - raise Exception("Item %s: an image can have only one " - "set of landmarks" % item_id) + annotations = items[item_id].annotations + if [a for a in annotations if a.type == AnnotationType.points]: + raise Exception("Item %s: an image can have only one " + "set of landmarks" % item_id) - if len([p for p in row if row[p] == '']) == 0 and len(row) == 11: - annotations.append(Points( - [float(row[p]) for p in row if p != 'NAME_ID'], label=label)) - elif label is not None: - annotations.append(Label(label=label)) + if len([p for p in row if row[p] == '']) == 0 and len(row) == 11: + annotations.append(Points( + [float(row[p]) for p in row if p != 'NAME_ID'], + label=label) + ) + elif label is not None: + annotations.append(Label(label=label)) bboxes_path = osp.join(self._dataset_dir, VggFace2Path.ANNOTATION_DIR, - VggFace2Path.BBOXES_FILE + self._subset + '.csv') + VggFace2Path.BBOXES_FILE + subset + '.csv') if osp.isfile(bboxes_path): with open(bboxes_path, encoding='utf-8') as content: bboxes_table = list(csv.DictReader(content)) @@ -109,10 +139,10 @@ def _split_item_path(path): item_id = row['NAME_ID'] label = None if '/' in item_id: - item_id, label = _split_item_path(item_id) + label = _get_label(item_id) if item_id not in items: - items[item_id] = DatasetItem(id=item_id, subset=self._subset, + items[item_id] = DatasetItem(id=item_id, subset=subset, image=images.get(row['NAME_ID'])) annotations = items[item_id].annotations @@ -128,15 +158,27 @@ def _split_item_path(path): class VggFace2Importer(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, '.csv', 'vgg_face2', - dirname=VggFace2Path.ANNOTATION_DIR, - file_filter=lambda p: \ - not osp.basename(p).startswith(VggFace2Path.BBOXES_FILE)) + if osp.isdir(path): + annotation_dir = osp.join(path, VggFace2Path.ANNOTATION_DIR) + if osp.isdir(annotation_dir): + return [{'url': annotation_dir, 'format': 'vgg_face2'}] + elif osp.isfile(path): + if (osp.basename(path).startswith(VggFace2Path.LANDMARKS_FILE) or \ + osp.basename(path).startswith(VggFace2Path.BBOXES_FILE)) and \ + path.endswith('.csv'): + return [{'url': path, 'format': 'vgg_face2'}] + return [] class VggFace2Converter(Converter): DEFAULT_IMAGE_EXT = VggFace2Path.IMAGE_EXT def apply(self): + def _get_name_id(item_parts, label_name): + if 1 < len(item_parts) and item_parts[0] == label_name: + return '/'.join([label_name, *item_parts[1:]]) + else: + return '/'.join([label_name, *item_parts]) + save_dir = self._save_dir os.makedirs(save_dir, exist_ok=True) @@ -156,16 +198,23 @@ def apply(self): bboxes_table = [] landmarks_table = [] for item in subset: + item_parts = item.id.split('/') if item.has_image and self._save_images: labels = set(p.label for p in item.annotations if getattr(p, 'label') != None) if labels: for label in labels: + image_dir = label_categories[label].name + if 1 < len(item_parts) and image_dir == item_parts[0]: + image_dir = '' self._save_image(item, subdir=osp.join(subset_name, - label_categories[label].name)) + image_dir)) else: + image_dir = VggFace2Path.IMAGES_DIR_NO_LABEL + if 1 < len(item_parts) and image_dir == item_parts[0]: + image_dir = '' self._save_image(item, subdir=osp.join(subset_name, - VggFace2Path.IMAGES_DIR_NO_LABEL)) + image_dir)) landmarks = [a for a in item.annotations if a.type == AnnotationType.points] @@ -175,11 +224,11 @@ def apply(self): if landmarks: if landmarks[0].label is not None and \ label_categories[landmarks[0].label].name: - name_id = label_categories[landmarks[0].label].name \ - + '/' + item.id + name_id = _get_name_id(item_parts, + label_categories[landmarks[0].label].name) else: - name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \ - + '/' + item.id + name_id = _get_name_id(item_parts, + VggFace2Path.IMAGES_DIR_NO_LABEL) points = landmarks[0].points if len(points) != 10: landmarks_table.append({'NAME_ID': name_id}) @@ -199,11 +248,11 @@ def apply(self): if bboxes: if bboxes[0].label is not None and \ label_categories[bboxes[0].label].name: - name_id = label_categories[bboxes[0].label].name \ - + '/' + item.id + name_id = _get_name_id(item_parts, + label_categories[bboxes[0].label].name) else: - name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \ - + '/' + item.id + name_id = _get_name_id(item_parts, + VggFace2Path.IMAGES_DIR_NO_LABEL) bboxes_table.append({'NAME_ID': name_id, 'X': bboxes[0].x, 'Y': bboxes[0].y, 'W': bboxes[0].w, 'H': bboxes[0].h}) @@ -212,16 +261,16 @@ def apply(self): for label in labels: if label.label is not None and \ label_categories[label.label].name: - name_id = label_categories[label.label].name \ - + '/' + item.id + name_id = _get_name_id(item_parts, + label_categories[labels[0].label].name) else: - name_id = VggFace2Path.IMAGES_DIR_NO_LABEL \ - + '/' + item.id + name_id = _get_name_id(item_parts, + VggFace2Path.IMAGES_DIR_NO_LABEL) landmarks_table.append({'NAME_ID': name_id}) if not landmarks and not bboxes and not labels: - landmarks_table.append({'NAME_ID': - VggFace2Path.IMAGES_DIR_NO_LABEL + '/' + item.id}) + landmarks_table.append({'NAME_ID': _get_name_id(item_parts, + VggFace2Path.IMAGES_DIR_NO_LABEL)}) landmarks_path = osp.join(save_dir, VggFace2Path.ANNOTATION_DIR, VggFace2Path.LANDMARKS_FILE + subset_name + '.csv') diff --git a/datumaro/plugins/voc_format/converter.py b/datumaro/plugins/voc_format/converter.py index 54be318b0eca..eef5962ba005 100644 --- a/datumaro/plugins/voc_format/converter.py +++ b/datumaro/plugins/voc_format/converter.py @@ -1,32 +1,34 @@ -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from collections import OrderedDict, defaultdict +from enum import Enum, auto +from itertools import chain import logging as log import os import os.path as osp -from collections import OrderedDict, defaultdict -from enum import Enum -from itertools import chain from lxml import etree as ET from datumaro.components.converter import Converter from datumaro.components.dataset import ItemStatus -from datumaro.components.extractor import (AnnotationType, - CompiledMask, DatasetItem, LabelCategories) +from datumaro.components.extractor import ( + AnnotationType, CompiledMask, DatasetItem, LabelCategories, +) from datumaro.util import find, str_to_bool from datumaro.util.annotation_util import make_label_id_mapping from datumaro.util.image import save_image from datumaro.util.mask_tools import paint_mask, remap_mask -from .format import (VocTask, VocPath, VocInstColormap, - parse_label_map, make_voc_label_map, make_voc_categories, write_label_map +from .format import ( + VocInstColormap, VocPath, VocTask, make_voc_categories, make_voc_label_map, + parse_label_map, write_label_map, ) -def _convert_attr(name, attributes, type_conv, default=None, warn=True): +def _convert_attr(name, attributes, type_conv, default=None): d = object() value = attributes.get(name, d) if value is d: @@ -49,7 +51,9 @@ def _write_xml_bbox(bbox, parent_elem): return bbox_elem -LabelmapType = Enum('LabelmapType', ['voc', 'source']) +class LabelmapType(Enum): + voc = auto() + source = auto() class VocConverter(Converter): DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT @@ -82,6 +86,10 @@ def build_cmdline_parser(cls, **kwargs): parser.add_argument('--allow-attributes', type=str_to_bool, default=True, help="Allow export of attributes (default: %(default)s)") + parser.add_argument('--keep-empty', + type=str_to_bool, default=False, + help="Write subset lists even if they are empty " + "(default: %(default)s)") parser.add_argument('--tasks', type=cls._split_tasks_string, help="VOC task filter, comma-separated list of {%s} " "(default: all)" % ', '.join(t.name for t in VocTask)) @@ -90,7 +98,7 @@ def build_cmdline_parser(cls, **kwargs): def __init__(self, extractor, save_dir, tasks=None, apply_colormap=True, label_map=None, - allow_attributes=True, **kwargs): + allow_attributes=True, keep_empty=False, **kwargs): super().__init__(extractor, save_dir, **kwargs) assert tasks is None or isinstance(tasks, (VocTask, list, set)) @@ -104,6 +112,7 @@ def __init__(self, extractor, save_dir, self._apply_colormap = apply_colormap self._allow_attributes = allow_attributes + self._keep_empty = keep_empty if label_map is None: label_map = LabelmapType.source.name @@ -294,8 +303,12 @@ def save_subsets(self): encoding='unicode', pretty_print=True)) clsdet_list[item.id] = True - layout_list[item.id] = objects_with_parts - action_list[item.id] = objects_with_actions + + if objects_with_parts: + layout_list[item.id] = objects_with_parts + + if objects_with_actions: + action_list[item.id] = objects_with_actions for label_ann in labels: label = self.get_label(label_ann.label) @@ -360,11 +373,15 @@ def save_action_lists(self, subset_name, action_list): items = {k: True for k in action_list} if self._patch and osp.isfile(ann_file): self._get_filtered_lines(ann_file, self._patch, subset_name, items) - with open(ann_file, 'w', encoding='utf-8') as f: - for item in items: - f.write('%s\n' % item) - if not items and not self._patch: + if items or self._keep_empty: + with open(ann_file, 'w', encoding='utf-8') as f: + for item in items: + f.write('%s\n' % item) + elif osp.isfile(ann_file): + os.remove(ann_file) + + if not items and not self._patch and not self._keep_empty: return def _write_item(f, item, objs, action): @@ -381,6 +398,11 @@ def _write_item(f, item, objs, action): for act in chain(*(self._get_actions(l) for l in self._label_map)) } for action, ann_file in all_actions.items(): + if not items and not self._keep_empty: + if osp.isfile(ann_file): + os.remove(ann_file) + continue + lines = {} if self._patch and osp.isfile(ann_file): lines = self._get_filtered_lines(ann_file, None, subset_name) @@ -411,6 +433,11 @@ def _write_item(f, item, item_labels): lines = self._get_filtered_lines(ann_file, self._patch, subset_name, items) + if not items and not self._keep_empty: + if osp.isfile(ann_file): + os.remove(ann_file) + continue + with open(ann_file, 'w', encoding='utf-8') as f: for item in items: if item in class_lists: @@ -426,9 +453,12 @@ def save_clsdet_lists(self, subset_name, clsdet_list): if self._patch and osp.isfile(ann_file): self._get_filtered_lines(ann_file, self._patch, subset_name, items) - with open(ann_file, 'w', encoding='utf-8') as f: - for item in items: - f.write('%s\n' % item) + if items or self._keep_empty: + with open(ann_file, 'w', encoding='utf-8') as f: + for item in items: + f.write('%s\n' % item) + elif osp.isfile(ann_file): + os.remove(ann_file) def save_segm_lists(self, subset_name, segm_list): os.makedirs(self._segm_subsets_dir, exist_ok=True) @@ -438,9 +468,12 @@ def save_segm_lists(self, subset_name, segm_list): if self._patch and osp.isfile(ann_file): self._get_filtered_lines(ann_file, self._patch, subset_name, items) - with open(ann_file, 'w', encoding='utf-8') as f: - for item in items: - f.write('%s\n' % item) + if items or self._keep_empty: + with open(ann_file, 'w', encoding='utf-8') as f: + for item in items: + f.write('%s\n' % item) + elif osp.isfile(ann_file): + os.remove(ann_file) def save_layout_lists(self, subset_name, layout_list): def _write_item(f, item, item_layouts): @@ -460,6 +493,11 @@ def _write_item(f, item, item_layouts): if self._patch and osp.isfile(ann_file): self._get_filtered_lines(ann_file, self._patch, subset_name, items) + if not items and not self._keep_empty: + if osp.isfile(ann_file): + os.remove(ann_file) + return + with open(ann_file, 'w', encoding='utf-8') as f: for item in items: if item in layout_list: @@ -588,8 +626,13 @@ def patch(cls, dataset, patch, save_dir, **kwargs): conv._patch = patch conv.apply() - conv = cls(dataset, save_dir=save_dir, **kwargs) - images_dir = osp.join(save_dir, VocPath.IMAGES_DIR) + # Find images that needs to be removed + # images from different subsets are stored in the common directory + # Avoid situations like: + # (a, test): added + # (a, train): removed + # where the second line removes images from the first. + ids_to_remove = {} for (item_id, subset), status in patch.updated_items.items(): if status != ItemStatus.removed: item = patch.data.get(item_id, subset) @@ -597,9 +640,22 @@ def patch(cls, dataset, patch, save_dir, **kwargs): item = DatasetItem(item_id, subset=subset) if not (status == ItemStatus.removed or not item.has_image): + ids_to_remove[item_id] = (item, False) + else: + ids_to_remove.setdefault(item_id, (item, True)) + + for item, to_remove in ids_to_remove.values(): + if not to_remove: continue - image_path = osp.join(images_dir, conv._make_image_filename(item)) + if conv._tasks & {VocTask.detection, + VocTask.action_classification, VocTask.person_layout}: + ann_path = osp.join(conv._ann_dir, item.id + '.xml') + if osp.isfile(ann_path): + os.remove(ann_path) + + image_path = osp.join(conv._images_dir, + conv._make_image_filename(item)) if osp.isfile(image_path): os.unlink(image_path) diff --git a/datumaro/plugins/voc_format/extractor.py b/datumaro/plugins/voc_format/extractor.py index 9df7cc066d16..a2febd390038 100644 --- a/datumaro/plugins/voc_format/extractor.py +++ b/datumaro/plugins/voc_format/extractor.py @@ -5,22 +5,23 @@ from collections import defaultdict import logging as log -import numpy as np import os.path as osp + from defusedxml import ElementTree +import numpy as np -from datumaro.components.extractor import (SourceExtractor, DatasetItem, - AnnotationType, Label, Mask, Bbox, CompiledMask +from datumaro.components.extractor import ( + AnnotationType, Bbox, CompiledMask, DatasetItem, Label, Mask, + SourceExtractor, ) -from datumaro.util.os_util import dir_items from datumaro.util.image import Image, find_images -from datumaro.util.mask_tools import lazy_mask, invert_colormap +from datumaro.util.mask_tools import invert_colormap, lazy_mask +from datumaro.util.os_util import dir_items from .format import ( - VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories + VocInstColormap, VocPath, VocTask, make_voc_categories, parse_label_map, ) - _inverse_inst_colormap = invert_colormap(VocInstColormap) class _VocExtractor(SourceExtractor): diff --git a/datumaro/plugins/voc_format/format.py b/datumaro/plugins/voc_format/format.py index a03446d511aa..6e9025c69f48 100644 --- a/datumaro/plugins/voc_format/format.py +++ b/datumaro/plugins/voc_format/format.py @@ -1,78 +1,74 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT from collections import OrderedDict -from enum import Enum +from enum import Enum, auto from itertools import chain + import numpy as np -from datumaro.components.extractor import (AnnotationType, - LabelCategories, MaskCategories +from datumaro.components.extractor import ( + AnnotationType, LabelCategories, MaskCategories, ) -VocTask = Enum('VocTask', [ - 'classification', - 'detection', - 'segmentation', - 'action_classification', - 'person_layout', -]) - -VocLabel = Enum('VocLabel', [ - ('background', 0), - ('aeroplane', 1), - ('bicycle', 2), - ('bird', 3), - ('boat', 4), - ('bottle', 5), - ('bus', 6), - ('car', 7), - ('cat', 8), - ('chair', 9), - ('cow', 10), - ('diningtable', 11), - ('dog', 12), - ('horse', 13), - ('motorbike', 14), - ('person', 15), - ('pottedplant', 16), - ('sheep', 17), - ('sofa', 18), - ('train', 19), - ('tvmonitor', 20), - ('ignored', 255), -]) - -VocPose = Enum('VocPose', [ - 'Unspecified', - 'Left', - 'Right', - 'Frontal', - 'Rear', -]) - -VocBodyPart = Enum('VocBodyPart', [ - 'head', - 'hand', - 'foot', -]) - -VocAction = Enum('VocAction', [ - 'other', - 'jumping', - 'phoning', - 'playinginstrument', - 'reading', - 'ridingbike', - 'ridinghorse', - 'running', - 'takingphoto', - 'usingcomputer', - 'walking', -]) +class VocTask(Enum): + classification = auto() + detection = auto() + segmentation = auto() + action_classification = auto() + person_layout = auto() + +class VocLabel(Enum): + background = 0 + aeroplane = 1 + bicycle = 2 + bird = 3 + boat = 4 + bottle = 5 + bus = 6 + car = 7 + cat = 8 + chair = 9 + cow = 10 + diningtable = 11 + dog = 12 + horse = 13 + motorbike = 14 + person = 15 + pottedplant = 16 + sheep = 17 + sofa = 18 + train = 19 + tvmonitor = 20 + ignored = 255 + +class VocPose(Enum): + Unspecified = auto() + Left = auto() + Right = auto() + Frontal = auto() + Rear = auto() + +class VocBodyPart(Enum): + head = auto() + hand = auto() + foot = auto() + +class VocAction(Enum): + other = auto() + jumping = auto() + phoning = auto() + playinginstrument = auto() + reading = auto() + ridingbike = auto() + ridinghorse = auto() + running = auto() + takingphoto = auto() + usingcomputer = auto() + walking = auto() def generate_colormap(length=256): def get_bit(number, index): @@ -126,7 +122,7 @@ def parse_label_map(path): return None label_map = OrderedDict() - with open(path, 'r') as f: + with open(path, 'r', encoding='utf-8') as f: for line in f: # skip empty and commented lines line = line.strip() @@ -163,7 +159,7 @@ def parse_label_map(path): return label_map def write_label_map(path, label_map): - with open(path, 'w') as f: + with open(path, 'w', encoding='utf-8') as f: f.write('# label:color_rgb:parts:actions\n') for label_name, label_desc in label_map.items(): if label_desc[0]: diff --git a/datumaro/plugins/voc_format/importer.py b/datumaro/plugins/voc_format/importer.py index 7da323249b6a..a9918d8c0248 100644 --- a/datumaro/plugins/voc_format/importer.py +++ b/datumaro/plugins/voc_format/importer.py @@ -3,75 +3,93 @@ # # SPDX-License-Identifier: MIT -from glob import glob import os.path as osp from datumaro.components.extractor import Importer -from .format import VocTask, VocPath +from .format import VocPath, VocTask -def find_path(root_path, path, depth=4): - level, is_found = 0, False - full_path = None - while level < depth and not is_found: - full_path = osp.join(root_path, path) - paths = glob(full_path) - if paths: - full_path = paths[0] # ignore all after the first one - is_found = osp.isdir(full_path) - else: - full_path = None - - level += 1 - root_path = osp.join(root_path, '*') - - return full_path class VocImporter(Importer): - _TASKS = [ - (VocTask.classification, 'voc_classification', 'Main'), - (VocTask.detection, 'voc_detection', 'Main'), - (VocTask.segmentation, 'voc_segmentation', 'Segmentation'), - (VocTask.person_layout, 'voc_layout', 'Layout'), - (VocTask.action_classification, 'voc_action', 'Action'), - ] + _TASKS = { + VocTask.classification: ('voc_classification', 'Main'), + VocTask.detection: ('voc_detection', 'Main'), + VocTask.segmentation: ('voc_segmentation', 'Segmentation'), + VocTask.person_layout: ('voc_layout', 'Layout'), + VocTask.action_classification: ('voc_action', 'Action'), + } def __call__(self, path, **extra_params): - from datumaro.components.project import Project # cyclic import + from datumaro.components.project import Project # cyclic import project = Project() - subset_paths = self.find_sources(path) - if len(subset_paths) == 0: + subsets = self.find_sources(path) + if len(subsets) == 0: raise Exception("Failed to find 'voc' dataset at '%s'" % path) - for task, extractor_type, subset_path in subset_paths: + for config in subsets: + subset_path = config['url'] + extractor_type = config['format'] + + task = extractor_type.split('_')[1] + + opts = dict(config.get('options') or {}) + opts.update(extra_params) + project.add_source('%s-%s' % - (task.name, osp.splitext(osp.basename(subset_path))[0]), + (task, osp.splitext(osp.basename(subset_path))[0]), { 'url': subset_path, 'format': extractor_type, - 'options': dict(extra_params), + 'options': opts, }) return project @classmethod def find_sources(cls, path): - # find root path for the dataset - root_path = path - for task, extractor_type, task_dir in cls._TASKS: - task_path = find_path(root_path, osp.join(VocPath.SUBSETS_DIR, task_dir)) - if task_path: - root_path = osp.dirname(osp.dirname(task_path)) - break - - subset_paths = [] - for task, extractor_type, task_dir in cls._TASKS: - task_path = osp.join(root_path, VocPath.SUBSETS_DIR, task_dir) - - if not osp.isdir(task_path): + subsets = [] + + # find root path for the dataset and use it for all tasks + root_path = None + for extractor_type, task_dir in cls._TASKS.values(): + if osp.isfile(path) and \ + not osp.basename(osp.dirname(path)) == task_dir: + continue + + task_subsets = cls._find_sources_recursive(root_path or path, + 'txt', extractor_type, + dirname=osp.join(VocPath.SUBSETS_DIR, task_dir), + file_filter=lambda p: '_' not in osp.basename(p), + max_depth=0 if root_path else 3) + + if not task_subsets: continue - task_subsets = [p for p in glob(osp.join(task_path, '*.txt')) - if '_' not in osp.basename(p)] - subset_paths += [(task, extractor_type, p) for p in task_subsets] - return subset_paths + + subsets.extend(task_subsets) + + if not root_path: + root_path = osp.dirname(osp.dirname( + osp.dirname(task_subsets[0]['url']))) + + return subsets + +class VocClassificationImporter(VocImporter): + _TASK = VocTask.classification + _TASKS = { _TASK: VocImporter._TASKS[_TASK] } + +class VocDetectionImporter(VocImporter): + _TASK = VocTask.detection + _TASKS = { _TASK: VocImporter._TASKS[_TASK] } + +class VocSegmentationImporter(VocImporter): + _TASK = VocTask.segmentation + _TASKS = { _TASK: VocImporter._TASKS[_TASK] } + +class VocLayoutImporter(VocImporter): + _TASK = VocTask.person_layout + _TASKS = { _TASK: VocImporter._TASKS[_TASK] } + +class VocActionImporter(VocImporter): + _TASK = VocTask.action_classification + _TASKS = { _TASK: VocImporter._TASKS[_TASK] } \ No newline at end of file diff --git a/datumaro/plugins/widerface_format.py b/datumaro/plugins/widerface_format.py index a8439dc83ce0..829bf4502676 100644 --- a/datumaro/plugins/widerface_format.py +++ b/datumaro/plugins/widerface_format.py @@ -8,8 +8,10 @@ import re from datumaro.components.converter import Converter -from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem, - Importer, Label, LabelCategories, SourceExtractor) +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Importer, Label, LabelCategories, + SourceExtractor, +) from datumaro.util import str_to_bool diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py index 71f021f0e695..4e768943d98f 100644 --- a/datumaro/plugins/yolo_format/converter.py +++ b/datumaro/plugins/yolo_format/converter.py @@ -3,15 +3,16 @@ # # SPDX-License-Identifier: MIT +from collections import OrderedDict import logging as log import os import os.path as osp -from collections import OrderedDict from datumaro.components.converter import Converter from datumaro.components.dataset import ItemStatus -from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME, - DatasetItem) +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, DatasetItem, +) from .format import YoloPath @@ -93,7 +94,8 @@ def apply(self): subset_list_name = '%s.txt' % subset_name subset_lists[subset_name] = subset_list_name - with open(osp.join(save_dir, subset_list_name), 'w', encoding='utf-8') as f: + with open(osp.join(save_dir, subset_list_name), + 'w', encoding='utf-8') as f: f.writelines('%s\n' % s for s in image_paths.values()) with open(osp.join(save_dir, 'obj.data'), 'w', encoding='utf-8') as f: diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py index 77a7d958b2e2..ad1d902e6d3a 100644 --- a/datumaro/plugins/yolo_format/extractor.py +++ b/datumaro/plugins/yolo_format/extractor.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT @@ -7,11 +7,14 @@ import os.path as osp import re -from datumaro.components.extractor import (SourceExtractor, Extractor, - DatasetItem, AnnotationType, Bbox, LabelCategories, Importer +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Extractor, Importer, LabelCategories, + SourceExtractor, +) +from datumaro.util.image import ( + DEFAULT_IMAGE_META_FILE_NAME, Image, load_image_meta_file, ) from datumaro.util.os_util import split_path -from datumaro.util.image import Image from .format import YoloPath @@ -46,17 +49,12 @@ def __init__(self, config_path, image_info=None): assert image_info is None or isinstance(image_info, (str, dict)) if image_info is None: - image_info = osp.join(rootpath, YoloPath.IMAGE_META_FILE) + image_info = osp.join(rootpath, DEFAULT_IMAGE_META_FILE_NAME) if not osp.isfile(image_info): image_info = {} if isinstance(image_info, str): - if not osp.isfile(image_info): - raise Exception("Can't read image meta file '%s'" % image_info) - with open(image_info, encoding='utf-8') as f: - image_info = {} - for line in f: - image_name, h, w = line.strip().rsplit(maxsplit=2) - image_info[image_name] = (int(h), int(w)) + image_info = load_image_meta_file(image_info) + self._image_info = image_info with open(config_path, 'r', encoding='utf-8') as f: @@ -120,7 +118,7 @@ def name_from_path(cls, path): # NOTE: when path is like [data/]_obj/ # drop everything but # can be , so not just basename() - path = osp.join(*parts[1:]) + path = osp.join(*parts[1:]) # pylint: disable=no-value-for-parameter return osp.splitext(path)[0] def _get(self, item_id, subset_name): @@ -196,4 +194,4 @@ def get_subset(self, name): class YoloImporter(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, '.data', 'yolo') \ No newline at end of file + return cls._find_sources_recursive(path, '.data', 'yolo') diff --git a/datumaro/plugins/yolo_format/format.py b/datumaro/plugins/yolo_format/format.py index 02a07669bb12..8c88951ec872 100644 --- a/datumaro/plugins/yolo_format/format.py +++ b/datumaro/plugins/yolo_format/format.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT @@ -7,5 +7,3 @@ class YoloPath: DEFAULT_SUBSET_NAME = 'train' SUBSET_NAMES = ['train', 'valid'] - - IMAGE_META_FILE = 'images.meta' \ No newline at end of file diff --git a/datumaro/util/__init__.py b/datumaro/util/__init__.py index 79190a0fbb21..7d1af4031aa9 100644 --- a/datumaro/util/__init__.py +++ b/datumaro/util/__init__.py @@ -1,18 +1,20 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT -import attr from contextlib import ExitStack -from distutils.util import strtobool as str_to_bool # pylint: disable=unused-import from functools import partial, wraps from itertools import islice from typing import Iterable, Tuple +import distutils.util +import attr NOTSET = object() +str_to_bool = distutils.util.strtobool + def find(iterable, pred=lambda x: True, default=None): return next((x for x in iterable if pred(x)), default) @@ -104,6 +106,10 @@ def unescape(s: str, escapes: Iterable[Tuple[str, str]]) -> str: s = s.replace(sub, pattern) return s +def is_member_redefined(member_name, base_class, target_class) -> bool: + return getattr(target_class, member_name) != \ + getattr(base_class, member_name) + def optional_arg_decorator(fn): @wraps(fn) def wrapped_decorator(*args, **kwargs): diff --git a/datumaro/util/annotation_util.py b/datumaro/util/annotation_util.py index a9e50306dd03..aecafaee27d3 100644 --- a/datumaro/util/annotation_util.py +++ b/datumaro/util/annotation_util.py @@ -6,8 +6,9 @@ import numpy as np -from datumaro.components.extractor import (LabelCategories, _Shape, Mask, - AnnotationType, RleMask) +from datumaro.components.extractor import ( + AnnotationType, LabelCategories, Mask, RleMask, _Shape, +) from datumaro.util.mask_tools import mask_to_rle diff --git a/datumaro/util/command_targets.py b/datumaro/util/command_targets.py index 50c854f271e0..eb9fbc9ec2b7 100644 --- a/datumaro/util/command_targets.py +++ b/datumaro/util/command_targets.py @@ -1,17 +1,21 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from enum import Enum, auto import argparse -from enum import Enum from datumaro.components.project import Project from datumaro.util.image import load_image -TargetKinds = Enum('TargetKinds', - ['project', 'source', 'external_dataset', 'inference', 'image']) +class TargetKinds(Enum): + project = auto() + source = auto() + external_dataset = auto() + inference = auto() + image = auto() def is_project_name(value, project): return value == project.config.project_name @@ -21,7 +25,7 @@ def is_project_path(value): try: Project.load(value) return True - except Exception: + except Exception: # nosec - disable B110:try_except_pass check pass return False diff --git a/datumaro/util/image.py b/datumaro/util/image.py index e1acd4792d88..fa10040484b8 100644 --- a/datumaro/util/image.py +++ b/datumaro/util/image.py @@ -1,22 +1,28 @@ - -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT -# pylint: disable=unused-import - -from enum import Enum +from enum import Enum, auto from io import BytesIO -from typing import Any, Callable, Iterator, Iterable, Optional, Tuple, Union -import numpy as np +from typing import ( + Any, Callable, Dict, Iterable, Iterator, Optional, Tuple, Union, +) +import importlib import os import os.path as osp +import shlex +import shutil -_IMAGE_BACKENDS = Enum('_IMAGE_BACKENDS', ['cv2', 'PIL']) +import numpy as np + + +class _IMAGE_BACKENDS(Enum): + cv2 = auto() + PIL = auto() _IMAGE_BACKEND = None _image_loading_errors = (FileNotFoundError, ) try: - import cv2 + importlib.import_module('cv2') _IMAGE_BACKEND = _IMAGE_BACKENDS.cv2 except ImportError: import PIL @@ -241,7 +247,7 @@ def __init__(self, data: Union[None, Callable, np.ndarray] = None, assert size is None or len(size) == 2, size if size is not None: assert len(size) == 2 and 0 < size[0] and 0 < size[1], size - size = tuple(size) + size = tuple(map(int, size)) self._size = size # (H, W) assert path is None or isinstance(path, str), path @@ -278,7 +284,7 @@ def data(self) -> np.ndarray: data = self._data if self._size is None and data is not None: - self._size = data.shape[:2] + self._size = tuple(map(int, data.shape[:2])) return data @property @@ -297,7 +303,7 @@ def size(self) -> Optional[Tuple[int, int]]: except _image_loading_errors: return None if data is not None: - self._size = data.shape[:2] + self._size = tuple(map(int, data.shape[:2])) return self._size def __eq__(self, other): @@ -312,6 +318,17 @@ def __eq__(self, other): (self.has_data and np.array_equal(self.data, other.data) or \ not self.has_data) + def save(self, path): + src_ext = self.ext.lower() + dst_ext = osp.splitext(osp.basename(path))[1].lower() + + os.makedirs(osp.dirname(path), exist_ok=True) + if src_ext == dst_ext and osp.isfile(self.path): + if self.path != path: + shutil.copyfile(self.path, path) + else: + save_image(path, self.data) + class ByteImage(Image): def __init__(self, data=None, path=None, ext=None, cache=None, size=None): loader = None @@ -351,4 +368,54 @@ def __eq__(self, other): (np.array_equal(self.size, other.size)) and \ (self.has_data == other.has_data) and \ (self.has_data and self.get_bytes() == other.get_bytes() or \ - not self.has_data) \ No newline at end of file + not self.has_data) + + def save(self, path): + src_ext = self.ext.lower() + dst_ext = osp.splitext(osp.basename(path))[1].lower() + + os.makedirs(osp.dirname(path), exist_ok=True) + if src_ext == dst_ext and osp.isfile(self.path): + if self.path != path: + shutil.copyfile(self.path, path) + elif src_ext == dst_ext: + with open(path, 'wb') as f: + f.write(self.get_bytes()) + else: + save_image(path, self.data) + +ImageMeta = Dict[str, Tuple[int, int]] + +DEFAULT_IMAGE_META_FILE_NAME = 'images.meta' + +def load_image_meta_file(image_meta_path: str) -> ImageMeta: + """ + Loads image metadata from a file with the following format: + + + + ... + + Shell-like comments and quoted fields are allowed. + + This can be useful to support datasets in which image dimensions are + required to interpret annotations. + """ + assert isinstance(image_meta_path, str) + + if not osp.isfile(image_meta_path): + raise Exception("Can't read image meta file '%s'" % image_meta_path) + + image_meta = {} + + with open(image_meta_path, encoding='utf-8') as f: + for line in f: + fields = shlex.split(line, comments=True) + if not fields: + continue + + # ignore extra fields, so that the format can be extended later + image_name, h, w = fields[:3] + image_meta[image_name] = (int(h), int(w)) + + return image_meta diff --git a/datumaro/util/image_cache.py b/datumaro/util/image_cache.py index 08f025828937..e37bd1358e02 100644 --- a/datumaro/util/image_cache.py +++ b/datumaro/util/image_cache.py @@ -4,7 +4,6 @@ from collections import OrderedDict - _instance = None DEFAULT_CAPACITY = 2 diff --git a/datumaro/util/log_utils.py b/datumaro/util/log_utils.py index 6c8d8421e7e9..b4e1c5c7db4b 100644 --- a/datumaro/util/log_utils.py +++ b/datumaro/util/log_utils.py @@ -6,6 +6,7 @@ from contextlib import contextmanager import logging + @contextmanager def logging_disabled(max_level=logging.CRITICAL): previous_level = logging.root.manager.disable diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py index bd763dffea6c..928011515b99 100644 --- a/datumaro/util/mask_tools.py +++ b/datumaro/util/mask_tools.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: MIT from itertools import chain + import numpy as np from datumaro.util.image import lazy_image, load_image @@ -114,8 +115,8 @@ def make_binary_mask(mask): return mask.astype(bool) def bgr2index(img): - if img.dtype.kind not in {'b', 'i', 'u'}: - img = img.astype(np.uint8) + if img.dtype.kind not in {'b', 'i', 'u'} or img.dtype.itemsize < 4: + img = img.astype(np.uint32) return (img[..., 0] << 16) + (img[..., 1] << 8) + img[..., 2] def index2bgr(id_map): diff --git a/datumaro/util/os_util.py b/datumaro/util/os_util.py index 094329206a17..6260f07890e3 100644 --- a/datumaro/util/os_util.py +++ b/datumaro/util/os_util.py @@ -8,7 +8,6 @@ import subprocess import sys - DEFAULT_MAX_DEPTH = 10 def check_instruction_set(instruction): diff --git a/datumaro/util/test_utils.py b/datumaro/util/test_utils.py index 8c5cf05af24c..e8b45f96bb39 100644 --- a/datumaro/util/test_utils.py +++ b/datumaro/util/test_utils.py @@ -1,8 +1,8 @@ - -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT +from enum import Enum, auto import inspect import os import os.path as osp @@ -12,15 +12,19 @@ # Use rmtree from GitPython to avoid the problem with removal of # readonly files on Windows, which Git uses extensively # It double checks if a file cannot be removed because of readonly flag - from git.util import rmtree, rmfile + from git.util import rmfile, rmtree except ImportError: from shutil import rmtree from os import remove as rmfile -from datumaro.components.extractor import AnnotationType from datumaro.components.dataset import Dataset -from datumaro.util import find +from datumaro.components.extractor import AnnotationType +from datumaro.util import filter_dict, find + +class Dimensions(Enum): + dim_2d = auto() + dim_3d = auto() def current_function_name(depth=1): return inspect.getouterframes(inspect.currentframe())[depth].function @@ -90,8 +94,8 @@ def _compare_annotations(expected, actual, ignored_attrs=None): a_attr = expected.attributes b_attr = actual.attributes - expected.attributes = {k:v for k,v in a_attr.items() if k not in ignored_attrs} - actual.attributes = {k:v for k,v in b_attr.items() if k not in ignored_attrs} + expected.attributes = filter_dict(a_attr, exclude_keys=ignored_attrs) + actual.attributes = filter_dict(b_attr, exclude_keys=ignored_attrs) r = expected == actual expected.attributes = a_attr @@ -108,12 +112,20 @@ def compare_datasets(test, expected, actual, ignored_attrs=None, item_b = find(actual, lambda x: x.id == item_a.id and \ x.subset == item_a.subset) test.assertFalse(item_b is None, item_a.id) - test.assertEqual(item_a.attributes, item_b.attributes, item_a.id) + + if ignored_attrs: + test.assertEqual(item_a.attributes, + filter_dict(item_b.attributes, exclude_keys=ignored_attrs), + item_a.id) + else: + test.assertEqual(item_a.attributes, item_b.attributes, item_a.id) + if (require_images and item_a.has_image and item_a.image.has_data) or \ item_a.has_image and item_a.image.has_data and \ item_b.has_image and item_b.image.has_data: test.assertEqual(item_a.image, item_b.image, item_a.id) - test.assertEqual(len(item_a.annotations), len(item_b.annotations)) + test.assertEqual(len(item_a.annotations), len(item_b.annotations), + item_a.id) for ann_a in item_a.annotations: # We might find few corresponding items, so check them all ann_b_matches = [x for x in item_b.annotations @@ -143,6 +155,46 @@ def compare_datasets_strict(test, expected, actual): '%s:\n%s\nvs.\n%s\n' % \ (idx, item_a, item_b)) +def compare_datasets_3d(test, expected, actual, ignored_attrs=None, + require_point_cloud=False): + compare_categories(test, expected.categories(), actual.categories()) + + if actual.subsets(): + test.assertEqual(sorted(expected.subsets()), sorted(actual.subsets())) + + test.assertEqual(len(expected), len(actual)) + for item_a in expected: + item_b = find(actual, lambda x: x.id == item_a.id) + test.assertFalse(item_b is None, item_a.id) + + if ignored_attrs: + test.assertEqual(item_a.attributes, + filter_dict(item_b.attributes, exclude_keys=ignored_attrs), + item_a.id) + else: + test.assertEqual(item_a.attributes, item_b.attributes, item_a.id) + + if (require_point_cloud and item_a.has_point_cloud) or \ + (item_a.has_point_cloud and item_b.has_point_cloud): + test.assertEqual(item_a.point_cloud, item_b.point_cloud, item_a.id) + test.assertEqual( + set(img.path for img in item_a.related_images), + set(img.path for img in item_b.related_images), + item_a.id) + test.assertEqual(len(item_a.annotations), len(item_b.annotations)) + for ann_a in item_a.annotations: + # We might find few corresponding items, so check them all + ann_b_matches = [x for x in item_b.annotations + if x.type == ann_a.type] + test.assertFalse(len(ann_b_matches) == 0, 'ann id: %s' % ann_a.id) + + ann_b = find(ann_b_matches, lambda x: + _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)) + if ann_b is None: + test.fail('ann %s, candidates %s' % (ann_a, ann_b_matches)) + item_b.annotations.remove(ann_b) # avoid repeats + + def test_save_and_load(test, source_dataset, converter, test_dir, importer, target_dataset=None, importer_args=None, compare=None, **kwargs): converter(source_dataset, test_dir) @@ -154,6 +206,9 @@ def test_save_and_load(test, source_dataset, converter, test_dir, importer, if target_dataset is None: target_dataset = source_dataset - if not compare: + if not compare and kwargs.get("dimension") is Dimensions.dim_3d: + compare = compare_datasets_3d + del kwargs["dimension"] + elif not compare: compare = compare_datasets - compare(test, expected=target_dataset, actual=parsed_dataset, **kwargs) \ No newline at end of file + compare(test, expected=target_dataset, actual=parsed_dataset, **kwargs) diff --git a/datumaro/util/tf_util.py b/datumaro/util/tf_util.py index 9eda97bab9c8..a0fd22e2a9d0 100644 --- a/datumaro/util/tf_util.py +++ b/datumaro/util/tf_util.py @@ -4,6 +4,8 @@ # SPDX-License-Identifier: MIT +enable_tf_check = False + def check_import(): # Workaround for checking import availability: # Official TF builds include AVX instructions. Once we try to import, @@ -14,7 +16,8 @@ def check_import(): from .os_util import check_instruction_set - result = subprocess.run([sys.executable, '-c', 'import tensorflow'], + # Disable B603:subprocess_without_shell_equals_true - the command line is controlled + result = subprocess.run([sys.executable, '-c', 'import tensorflow'], # nosec timeout=60, universal_newlines=True, # use text mode for output stream stdout=subprocess.PIPE, stderr=subprocess.PIPE) # capture output @@ -32,13 +35,13 @@ def check_import(): raise ImportError(message) -def import_tf(check=True): +def import_tf(check=None): import sys not_found = object() tf = sys.modules.get('tensorflow', not_found) if tf is None: - import tensorflow as tf # emit default error + import tensorflow as tf # emit default error elif tf is not not_found: return tf @@ -46,6 +49,9 @@ def import_tf(check=True): import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + if check is None: + check = enable_tf_check + if check: try: check_import() diff --git a/datumaro/version.py b/datumaro/version.py index aae69457aadb..4aca18f8a459 100644 --- a/datumaro/version.py +++ b/datumaro/version.py @@ -1 +1 @@ -VERSION = '0.1.9' \ No newline at end of file +VERSION = '0.1.10' \ No newline at end of file diff --git a/docs/design.md b/docs/design.md index b24a57a5958f..4b0da717ae46 100644 --- a/docs/design.md +++ b/docs/design.md @@ -51,7 +51,8 @@ Datumaro is: - Provision of iterators for user code - Dataset downloading - Dataset generation -- Dataset building (export in a specific format, indexation, statistics, documentation) +- Dataset building (export in a specific format, indexation, statistics, +documentation) - Dataset exporting to other formats - Dataset debugging (run inference, generate dataset slices, compute statistics) - "Explainable AI" - highlight network attention areas ([paper](https://arxiv.org/abs/1901.04592)) @@ -65,7 +66,8 @@ Datumaro is: Use case: explanation of network "quality", "stability", "certainty" - adversarial attacks on networks - dataset minification / reduction - Use case: removal of redundant information to reach the same network quality with lesser training time + Use case: removal of redundant information to reach the same network quality + with lesser training time - dataset expansion and filtration of additions Use case: add only important data - guidance for key frame selection for tracking ([paper](https://arxiv.org/abs/1903.11779)) @@ -73,7 +75,7 @@ Datumaro is: ## RC 1 vision -*CVAT integration* +### CVAT integration Datumaro needs to be integrated with [CVAT](https://github.com/openvinotoolkit/cvat), extending CVAT UI capabilities regarding task and project operations. diff --git a/docs/developer_guide.md b/docs/developer_guide.md index e8eff1bcf85f..38efb4a9d1bd 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -144,8 +144,8 @@ The framework provides functions to read and write datasets in specific formats. It is supported by `Extractor`s, `Importer`s, and `Converter`s. Dataset reading is supported by `Extractor`s and `Importer`s: -- An `Extractor` produces a list of `DatasetItem`s corresponding -to the dataset. Annotations are available in the `DatasetItem.annotations` list +- An `Extractor` produces a list of `DatasetItem`s corresponding to the + dataset. Annotations are available in the `DatasetItem.annotations` list - An `Importer` creates a project from a data source location It is possible to add custom `Extractor`s and `Importer`s. To do this, you need diff --git a/docs/formats/cifar_user_manual.md b/docs/formats/cifar_user_manual.md new file mode 100644 index 000000000000..0ec72e14e5b9 --- /dev/null +++ b/docs/formats/cifar_user_manual.md @@ -0,0 +1,170 @@ +# CIFAR user manual + +## Contents + +- [Format specification](#format-specification) +- [Load CIFAR dataset](#load-CIFAR-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to CIFAR](#export-to-CIFAR) +- [Particular use cases](#particular-use-cases) + +## Format specification + +CIFAR format specification available [here](https://www.cs.toronto.edu/~kriz/cifar.html). + +CIFAR dataset format supports `Labels` annotations. + +Datumaro supports Python version CIFAR-10/100. + +## Load CIFAR dataset + +The CIFAR dataset is available for free download: + +- [cifar-10-python.tar.gz](https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz): + CIFAR-10 python version +- [cifar-100-python.tar.gz](https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz): + CIFAR-100 python version + +There are two ways to create Datumaro project and add CIFAR dataset to it: + +``` bash +datum import --format cifar --input-path +# or +datum create +datum add path -f cifar +``` + +It is possible to specify project name and project directory run +`datum create --help` for more information. + +CIFAR-10 dataset directory should have the following structure: + + +``` +└─ Dataset/ + ├── batches.meta + ├── data_batch_1 + ├── data_batch_2 + ├── data_batch_3 + ├── data_batch_4 + ├── data_batch_5 + └── test_batch +``` + +CIFAR-100 dataset directory should have the following structure: + + +``` +└─ Dataset/ + ├── meta + ├── test + └── train +``` + +CIFAR format only supports 32 x 32 images. + +The 100 classes in the CIFAR-100 are grouped into 20 superclasses. Each image +comes with a "fine" label (the class to which it belongs) and a "coarse" label +(the superclass to which it belongs) + +The difference between CIFAR-10 and CIFAR-100 is how labels are stored +in the meta file (batches.meta or meta) and in the annotation file (train, +data_batch_1, test_batch, etc.). + +``` +meta file: +CIFAR-10: num_cases_per_batch: 1000 + label_names: ['airplane', 'automobile', 'bird', ...] + num_vis: 3072 +CIFAR-100: fine_label_names: ['apple', 'aquarium_fish', 'baby', ...] + coarse_label_names: ['aquatic_mammals', 'fish', 'flowers', ...] + +annotation file: +'batch_label': 'training batch 1 of 5' +'data': ndarray +'filenames': list +CIFAR-10: 'labels': list +CIFAR-100: 'fine_labels': list + 'coarse_labels': list +``` + +## Export to other formats + +Datumaro can convert CIFAR dataset into any other format [Datumaro supports](../user_manual.md#supported-formats). +To get the expected result, the dataset needs to be converted to formats +that support the classification task (e.g. MNIST, ImageNet, PascalVOC, +etc.) There are few ways to convert CIFAR dataset to other dataset format: + +``` bash +datum project import -f cifar -i +datum export -f imagenet -o +# or +datum convert -if cifar -i -f imagenet -o +``` + +## Export to CIFAR + +There are few ways to convert dataset to CIFAR format: + +``` bash +# export dataset into CIFAR format from existing project +datum export -p -f cifar -o \ + -- --save-images +# converting to CIFAR format from other format +datum convert -if imagenet -i \ + -f cifar -o -- --save-images +``` + +Extra options for export to CIFAR format: + +- `--save-images` allow to export dataset with saving images +(by default `False`); +- `--image-ext ` allow to specify image extension +for exporting dataset (by default `.png`). + +The format (CIFAR-10 or CIFAR-100) in which the dataset will be +exported depends on the presence of superclasses in the `LabelCategories`. + +## Particular use cases + +Datumaro supports filtering, transformation, merging etc. for all formats +and for the CIFAR format in particular. Follow [user manual](../user_manual.md) +to get more information about these operations. + +There are few examples of using Datumaro operations to solve +particular problems with CIFAR dataset: + +### Example 1. How to create custom CIFAR-like dataset + +```python +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import Label, DatasetItem + +dataset = Dataset.from_iterable([ + DatasetItem(id=0, image=np.ones((32, 32, 3)), + annotations=[Label(3)] + ), + DatasetItem(id=1, image=np.ones((32, 32, 3)), + annotations=[Label(8)] + ) +], categories=[['airplane', 'automobile', 'bird', 'cat', 'deer', + 'dog', 'frog', 'horse', 'ship', 'truck']]) + +dataset.export('./dataset', format='cifar') +``` + +### Example 2. How to filter and convert CIFAR dataset to ImageNet + +Convert CIFAR dataset to ImageNet format, keep only images with `dog` class +presented: + +``` bash +# Download CIFAR-10 dataset: +# https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz +datum convert --input-format cifar --input-path \ + --output-format imagenet \ + --filter '/item[annotation/label="dog"]' +``` + +More examples of working with CIFAR dataset from code can be found in +[tests_cifar](../../tests/test_cifar_format.py) diff --git a/docs/formats/cityscapes_user_manual.md b/docs/formats/cityscapes_user_manual.md index 534f91c726a8..e1719a6c1b6f 100644 --- a/docs/formats/cityscapes_user_manual.md +++ b/docs/formats/cityscapes_user_manual.md @@ -15,7 +15,7 @@ Cityscapes format specification available [here](https://github.com/mcordts/city Cityscapes dataset format supports `Masks` (segmentations tasks) annotations. -## Load Cityscapes dataset +## Load Cityscapes dataset The Cityscapes dataset is available for free [download](https://www.cityscapes-dataset.com/downloads/). @@ -67,10 +67,10 @@ Annotated files description: then the pixels have the regular ID of that class 1. *labelIds.png - class labels are encoded by its ID -To make sure that the selected dataset has been added to the project, you can run -`datum info`, which will display the project and dataset information. +To make sure that the selected dataset has been added to the project, you can +run `datum info`, which will display the project and dataset information. -## Export to other formats +## Export to other formats Datumaro can convert Cityscapes dataset into any other format [Datumaro supports](../user_manual.md#supported-formats). To get the expected result, the dataset needs to be converted to formats @@ -90,7 +90,7 @@ To get information about them, run `datum export -f -- -h` -## Export to Cityscapes +## Export to Cityscapes There are few ways to convert dataset to Cityscapes format: @@ -139,7 +139,7 @@ particular problems with Cityscapes dataset: datum create -o project datum add path -p project -f cityscapes ./Cityscapes/ datum stats -p project -datum export -p final_project -o dataset -f voc --overwrite -- --save-images +datum export -p final_project -o dataset -f voc -- --save-images ``` ### Example 2. How to create custom Cityscapes-like dataset diff --git a/docs/formats/coco_user_manual.md b/docs/formats/coco_user_manual.md index 5b2d37901665..e3a17b406225 100644 --- a/docs/formats/coco_user_manual.md +++ b/docs/formats/coco_user_manual.md @@ -16,7 +16,7 @@ COCO dataset format supports `captions`, `image_info`, `instances`, `panoptic`, `person_keypoints`, `stuff` annotation tasks and, as Datumaro extension, `label` (like `instances` with only `category_id`) -## Load COCO dataset +## Load COCO dataset The COCO dataset is available for free download: @@ -103,16 +103,16 @@ datum import --format coco_stuff --input-path Datumaro supports the following COCO tasks: - [Image Captioning](https://cocodataset.org/#captions-2015) (`coco_caption`) - [Object Detection](https://cocodataset.org/#detection-2020) (`coco_instances`) -- Image classification (our extension) (`coco_labels`) - a format like Object Detection, which uses - only `category_id` and `score` annotation fields +- Image classification (our extension) (`coco_labels`) - a format like + Object Detection, which uses only `category_id` and `score` annotation fields - [Panoptic Segmentation](https://cocodataset.org/#panoptic-2020) (`coco_panoptic`) - [Keypoint Detection](https://cocodataset.org/#keypoints-2020) (`coco_person_keypoints`) - [Stuff Segmentation](https://cocodataset.org/#stuff-2019) (`coco_stuff`) -To make sure that the selected dataset has been added to the project, you can run -`datum info`, which will display the project and dataset information. +To make sure that the selected dataset has been added to the project, you can +run `datum info`, which will display the project and dataset information. -## Export to other formats +## Export to other formats Datumaro can convert COCO dataset into any other format [Datumaro supports](../user_manual.md#supported-formats). To get the expected result, the dataset needs to be converted to formats @@ -132,7 +132,7 @@ To get information about them, run `datum export -f -- -h` -## Export to COCO +## Export to COCO There are few ways to convert dataset to COCO format: @@ -147,24 +147,26 @@ datum convert -if voc -i \ Extra options for export to COCO format: - `--save-images` allow to export dataset with saving images -(by default `False`); + (by default `False`); - `--image-ext IMAGE_EXT` allow to specify image extension -for exporting dataset (by default - keep original or use `.jpg`, if none); -- `--segmentation-mode MODE` allow to specify save mode for instance segmentation: - - 'guess': guess the mode for each instance (using 'is_crowd' attribute as hint) - - 'polygons': save polygons( merge and convert masks, prefer polygons) - - 'mask': save masks (merge and convert polygons, prefer masks) + for exporting dataset (by default - keep original or use `.jpg`, if none); +- `--segmentation-mode MODE` allow to specify save mode for instance + segmentation: + - 'guess': guess the mode for each instance + (using 'is_crowd' attribute as hint) + - 'polygons': save polygons( merge and convert masks, prefer polygons) + - 'mask': save masks (merge and convert polygons, prefer masks) (by default `guess`); -- `--crop-covered` allow to crop covered segments so that background objects -segmentation was more accurate (by default `False`); +- `--crop-covered` allow to crop covered segments so that background objects + segmentation was more accurate (by default `False`); - `--allow-attributes ALLOW_ATTRIBUTES` allow export of attributes -(by default `True`); + (by default `True`); - `--reindex REINDEX` allow to assign new indices to images and annotations, -useful to avoid merge conflicts (by default `False`); + useful to avoid merge conflicts (by default `False`); - `--merge-images` allow to save all images into a single directory -(by default `False`); + (by default `False`); - `--tasks TASKS` allow to specify tasks for export dataset, -by default Datumaro uses all tasks. Example: + by default Datumaro uses all tasks. Example: ```bash datum import -o project -f coco -i diff --git a/docs/formats/image_zip_user_manual.md b/docs/formats/image_zip_user_manual.md new file mode 100644 index 000000000000..5e93403e2dcc --- /dev/null +++ b/docs/formats/image_zip_user_manual.md @@ -0,0 +1,87 @@ +# Image zip user manual + +## Contents +- [Format specification](#format-specification) +- [Load image zip dataset](#load-image-zip-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export unannotated dataset to zip archive](#export-unannotated-dataset-to-zip-archive) + +## Format specification + +- The image zip format allow to export/import unannotated datasets + with images to/from zip archive. + +- The image zip format doesn't support any types of annotations + and attributes. + +## Load Image zip dataset + +Few ways to load unannotated datasets to your Datumaro project: + +- From existing archive: + +```bash +datum import -o project -f image_zip -i ./images.zip +``` + +- From directory with zip archives. Datumaro will loaded images from + all zip files in the directory: + +```bash +datum import -o project -f image_zip -i ./foo +``` + +The directory with zip archives should have the following structure: + +``` +├── foo/ +| ├── archive1.zip/ +| | ├── image_1.jpg +| | ├── image_2.png +| | ├── subdir/ +| | | ├── image_3.jpg +| | | ├── ... +| | ├── ... +| ├── archive2.zip/ +| | ├── image_101.jpg +| | ├── image_102.jpg +| | ├── ... +| ... +``` + +Images in a archives should have supported extension, +follow the [user manual](../user_manual.md#data-formats) to see the supported +extensions. + +## Export to other formats + +Datumaro can load dataset images from a zip archive and convert it to +[another supported dataset format](../user_manual.md#supported-formats), +for example: + +```bash +datum import -o project -f image_zip -i ./images.zip +datum export -f coco -o ./new_dir -- --save-images +``` + +## Export unannotated dataset to zip archive + +Example: exporting images from VOC dataset to zip archives: +```bash +datum import -o project -f voc -i ./VOC2012 +datum export -f image_zip -o ./ --overwrite -- --name voc_images.zip \ + --compression ZIP_DEFLATED +``` + +Extra options for export to image_zip format: + +- `--save-images` allow to export dataset with saving images + (default: `False`); +- `--image-ext ` allow to specify image extension + for exporting dataset (default: use original or `.jpg`, if none); +- `--name` name of output zipfile (default: `default.zip`); +- `--compression` allow to specify archive compression method. + Available methods: + `ZIP_STORED`, `ZIP_DEFLATED`, `ZIP_BZIP2`, `ZIP_LZMA` (default: `ZIP_STORED`). + Follow [zip documentation](https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) + for more information. diff --git a/docs/formats/kitti_raw_user_manual.md b/docs/formats/kitti_raw_user_manual.md new file mode 100644 index 000000000000..351e832d7557 --- /dev/null +++ b/docs/formats/kitti_raw_user_manual.md @@ -0,0 +1,181 @@ +# Velodyne Points / KITTI Raw 3D user manual + +## Contents + +- [Format specification](#format-specification) +- [Import KITTI Raw 3D dataset](#import-kitti-raw-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to KITTI Raw 3d](#export-to-kitti-raw) +- [Examples](#examples) + +## Format specification + +Velodyne Points / KITTI Raw 3D data format: +- [home page](http://www.cvlibs.net/datasets/kitti/raw_data.php). +- [specification](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_raw_data.zip) + +This dataset format supports the following types of annotations: +- `cuboid_3d` (tracks) + +Supported annotation attributes: +- `truncation` (write, string), possible values: `truncation_unset`, + `in_image`, `truncated`, `out_image`, `behind_image` (case-independent). +- `occlusion` (write, string), possible values: `occlusion_unset`, `visible`, + `partly`, `fully` (case-independent). This atribute has priority + over `occluded`. +- `occluded` (read/write, boolean) +- `keyframe` (read/write, boolean). Resposible for `occlusion_kf` field. +- `track_id` (read/write, integer). Indicates the group over frames for + annotations. + +Supported image attributes: +- `frame` (read/write, integer). Indicates frame number of the image. + +## Import KITTI Raw dataset + +The velodyne points/KITTI Raw dataset is available for download: + + + + + +KITTI Raw dataset directory should have the following structure: + + +``` +└─ Dataset/ + ├── image_00/ # optional, aligned images from different cameras + │ └── data/ + │ ├── + │ └── + ├── image_01/ + │ └── data/ + │ ├── + │ └── + ... + │ + ├── velodyne_points/ # optional, 3d point clouds + │ └── data/ + │ ├── + │ └── + ├── tracklet_labels.xml + └── frame_list.txt # optional, required for custom image names + +``` + +The format does not support arbitrary image names and paths, but Datumaro +provides an option to use a special index file to allow this. + +`frame_list.txt` contents: +``` +12345 relative/path/to/name1/from/data +46 relative/path/to/name2/from/data +... +``` + +There are two ways to create Datumaro project and add KITTI dataset to it: + +```bash +datum import --format kitti_raw --input-path +# or +datum create +datum add path -f kitti_raw +``` + +To make sure that the selected dataset has been added to the project, +you can run `datum info`, which will display the project and dataset +information. + +## Export to other formats + +Datumaro can convert KITTI Raw dataset into any other +format [Datumaro supports](../user_manual.md#supported-formats). + +Such conversion will only be successful if the output +format can represent the type of dataset you want to convert, +e.g. 3D point clouds can be saved in Supervisely Point Clouds format, +but not in COCO keypoints. + +There are few ways to convert KITTI Raw dataset to other dataset format: + +``` bash +datum import -f kitti_raw -i -o proj/ +datum export -f sly_pointcloud -o -p proj/ +# or +datum convert -if kitti_raw -i -f sly_pointcloud +``` + +Some formats provide extra options for conversion. +These options are passed after double dash (`--`) in the command line. +To get information about them, run + +`datum export -f -- -h` + +## Export to KITTI Raw + +There are few ways to convert dataset to KITTI Raw format: + +``` bash +# export dataset into KITTI Raw format from existing project +datum export -p -f kitti_raw -o \ + -- --save-images +# converting to KITTI Raw format from other format +datum convert -if sly_pointcloud -i \ + -f kitti_raw -o -- --save-images --reindex +``` + +Extra options for exporting in KITTI Raw format: + +- `--save-images` allow to export dataset with saving images. This will + include point clouds and related images (by default `False`) +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original or use `.png`, if none) +- `--reindex` assigns new indices to frames and tracks. Allows annotations + without `track_id` attribute (they will be exported as signle-frame tracks). +- `--allow-attrs` allows writing arbitrary annotation attributes. They will + be written in `` section of `` + (disabled by default) + +## Examples + +### Example 1. Import dataset, compute statistics + +```bash +datum create -o project +datum add path -p project -f kitti_raw ../../kitti_raw/ +datum stats -p project +``` + +### Example 2. Convert Supervisely Pointclouds to KITTI Raw + +``` bash +datum convert -if sly_pointcloud -i ../sly_pcd/ \ + -f kitti_raw -o my_kitti/ -- --save-images --allow-attrs +``` + +### Example 3. Create a custom dataset + +``` python +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import Cuboid3d, DatasetItem + +dataset = Dataset.from_iterable([ + DatasetItem(id='some/name/qq', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'occluded': False, 'track_id': 1}), + + Cuboid3d(position=[3.4, -2.11, 4.4], label=1, + attributes={'occluded': True, 'track_id': 2}) + ], + pcd='path/to/pcd1.pcd', + related_images=[np.ones((10, 10)), 'path/to/image2.png', 'image3.jpg'], + attributes={'frame': 0} + ), +], categories=['cat', 'dog']) + +dataset.export('my_dataset/', format='kitti_raw', save_images=True) +``` + +More examples of working with KITTI Raw dataset from code can be found in +[tests](../../tests/test_kitti_raw_format.py) diff --git a/docs/formats/kitti_user_manual.md b/docs/formats/kitti_user_manual.md new file mode 100644 index 000000000000..a024be5629d3 --- /dev/null +++ b/docs/formats/kitti_user_manual.md @@ -0,0 +1,212 @@ +# KITTI user manual + +## Contents + +- [Format specification](#format-specification) +- [Load KITTI dataset](#load-KITTI-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to KITTI](#export-to-KITTI) +- [Particular use cases](#particular-use-cases) + +## Format specification + +- Original KITTI dataset format support the following types of annotations: + - `Bounding boxes` (for [object detection](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark)); + - `Masks` (for [segmentation](http://www.cvlibs.net/datasets/kitti/eval_semseg.php?benchmark=semantics2015) task). + +- Supported attributes: + - `truncated`: indicates that the bounding box specified for the object does + not correspond to the full extent of the object; + - `occluded`: indicates that a significant portion of the object within the + bounding box is occluded by another object. + +KITTI segmentations format specification available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_semantics.zip). + +KITTI object detection format specification available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip). + +## Load KITTI dataset + +The KITTI left color images for object detection are available [here](http://www.cvlibs.net/download.php?file=data_object_image_2.zip). + +The KITTI object detection labels are available [here](http://www.cvlibs.net/download.php?file=data_object_label_2.zip). + +The KITTI segmentations dataset is available [here](http://www.cvlibs.net/download.php?file=data_semantics.zip). + +There are two ways to create Datumaro project and add KITTI dataset to it: + +``` bash +datum import --format kitti --input-path +# or +datum create +datum add path -f kitti +``` + +It is possible to specify project name and project directory run +`datum create --help` for more information. + +KITTI segmentation dataset directory should have the following structure: + + +``` +└─ Dataset/ + ├── testing/ + │ └── image_2/ + │ ├── . + │ ├── . + │ └── ... + ├── training/ + │ ├── image_2/ # left color camera images + │ │ ├── . + │ │ ├── . + │ │ └── ... + │ ├── label_2/ # left color camera label files + │ │ ├── .txt + │ │ ├── .txt + │ │ └── ... + │ ├── instance/ # instance segmentation masks + │ │ ├── .png + │ │ ├── .png + │ │ └── ... + │ ├── semantic/ # semantic segmentation masks (labels are encoded by its id) + │ │ ├── .png + │ │ ├── .png + │ │ └── ... + │ └── semantic_rgb/ # semantic segmentation masks (labels are encoded by its color) + │ ├── .png + │ ├── .png + │ └── ... +``` + +You can import dataset for specific tasks +of KITTI dataset instead of the whole dataset, +for example: + +``` bash +datum add path -f kitti_detection +``` + +Datumaro supports the following KITTI tasks: +- Object detection (`kitti_detection`) +- Class and instance segmentation (`kitti_segmentation`) +- [3d point clouds / velodyne points (`kitti_raw`)](./kitti_raw_user_manual.md) + +To make sure that the selected dataset has been added to the project, you can +run `datum info`, which will display the project and dataset information. + +## Export to other formats + +Datumaro can convert KITTI dataset into any other format [Datumaro supports](../user_manual.md#supported-formats). + +Such conversion will only be successful if the output +format can represent the type of dataset you want to convert, +e.g. segmentation annotations can be +saved in `Cityscapes` format, but no as `COCO keypoints`. + +There are few ways to convert KITTI dataset to other dataset format: + +``` bash +datum project import -f kitti -i +datum export -f cityscapes -o +# or +datum convert -if kitti -i -f cityscapes -o +``` + +Some formats provide extra options for conversion. +These options are passed after double dash (`--`) in the command line. +To get information about them, run + +`datum export -f -- -h` + +## Export to KITTI + +There are few ways to convert dataset to KITTI format: + +``` bash +# export dataset into KITTI format from existing project +datum export -p -f kitti -o \ + -- --save-images +# converting to KITTI format from other format +datum convert -if cityscapes -i \ + -f kitti -o -- --save-images +``` + +Extra options for export to KITTI format: +- `--save-images` allow to export dataset with saving images + (by default `False`); +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original or use `.png`, if none). +- `--apply-colormap APPLY_COLORMAP` allow to use colormap for class masks + (in folder `semantic_rgb`, by default `True`); +- `--label_map` allow to define a custom colormap. Example + +``` bash +# mycolormap.txt : +# 0 0 255 sky +# 255 0 0 person +#... +datum export -f kitti -- --label-map mycolormap.txt + +# or you can use original kitti colomap: +datum export -f kitti -- --label-map kitti +``` +- `--tasks TASKS` allow to specify tasks for export dataset, +by default Datumaro uses all tasks. Example: + +```bash +datum import -o project -f kitti -i +datum export -p project -f kitti -- --tasks detection +``` +- `--allow-attributes ALLOW_ATTRIBUTES` allow export of attributes +(by default `True`). + +## Particular use cases + +Datumaro supports filtering, transformation, merging etc. for all formats +and for the KITTI format in particular. Follow +[user manual](../user_manual.md) +to get more information about these operations. + +There are few examples of using Datumaro operations to solve +particular problems with KITTI dataset: + +### Example 1. How to load an original KITTI dataset and convert to Cityscapes + +```bash +datum create -o project +datum add path -p project -f kitti ./KITTI/ +datum stats -p project +datum export -p final_project -o dataset -f cityscapes -- --save-images +``` + +### Example 2. How to create custom KITTI-like dataset + +```python +import numpy as np +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import Mask, DatasetItem + +import datumaro.plugins.kitti_format as KITTI + +label_map = {} +label_map['background'] = (0, 0, 0) +label_map['label_1'] = (1, 2, 3) +label_map['label_2'] = (3, 2, 1) +categories = KITTI.make_kitti_categories(label_map) + +dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((1, 5, 3)), + annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=1, id=0, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=2, id=0, + attributes={'is_crowd': False}), + ] + ), + ], categories=categories) + +dataset.export('./dataset', format='kitti') +``` + +More examples of working with KITTI dataset from code can be found in +[tests](../../tests/test_kitti_format.py) diff --git a/docs/formats/mnist_user_manual.md b/docs/formats/mnist_user_manual.md index 01645e2827e1..e842504dc989 100644 --- a/docs/formats/mnist_user_manual.md +++ b/docs/formats/mnist_user_manual.md @@ -16,21 +16,29 @@ MNIST in CSV format specification available [here](https://pjreddie.com/project MNIST dataset format supports `Labels` annotations. -## Load MNIST dataset +## Load MNIST dataset The MNIST dataset is available for free download: -- [train-images-idx3-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz): training set images -- [train-labels-idx1-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz): training set labels -- [t10k-images-idx3-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz): test set images -- [t10k-labels-idx1-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz): test set labels +- [train-images-idx3-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz): + training set images +- [train-labels-idx1-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz): + training set labels +- [t10k-images-idx3-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz): + test set images +- [t10k-labels-idx1-ubyte.gz](https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz): + test set labels The Fashion MNIST dataset is available for free download: -- [train-images-idx3-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz): training set images -- [train-labels-idx1-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz): training set labels -- [t10k-images-idx3-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz): test set images -- [t10k-labels-idx1-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz): test set labels +- [train-images-idx3-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz): + training set images +- [train-labels-idx1-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz): + training set labels +- [t10k-images-idx3-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz): + test set images +- [t10k-labels-idx1-ubyte.gz](http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz): + test set labels The MNIST in CSV dataset is available for free download: @@ -46,7 +54,8 @@ datum create datum add path -f mnist ``` -There are two ways to create Datumaro project and add MNIST in CSV dataset to it: +There are two ways to create Datumaro project and add MNIST in CSV dataset +to it: ``` bash datum import --format mnist_csv --input-path @@ -64,9 +73,9 @@ MNIST dataset directory should have the following structure: ``` └─ Dataset/ ├── labels.txt # list of non-digit labels (optional) - ├── t10k-images-idx3-ubyte.gz - ├── t10k-labels-idx1-ubyte.gz - ├── train-images-idx3-ubyte.gz + ├── t10k-images-idx3-ubyte.gz + ├── t10k-labels-idx1-ubyte.gz + ├── train-images-idx3-ubyte.gz └── train-labels-idx1-ubyte.gz ``` MNIST in CSV dataset directory should have the following structure: @@ -75,7 +84,7 @@ MNIST in CSV dataset directory should have the following structure: ``` └─ Dataset/ ├── labels.txt # list of non-digit labels (optional) - ├── mnist_test.csv + ├── mnist_test.csv └── mnist_train.csv ``` If the dataset needs non-digit labels, you need to add the labels.txt @@ -97,12 +106,12 @@ Ankle boot MNIST format only supports single channel 28 x 28 images. -## Export to other formats +## Export to other formats Datumaro can convert MNIST dataset into any other format [Datumaro supports](../user_manual.md#supported-formats). To get the expected result, the dataset needs to be converted to formats -that support the classification task (e.g. CIFAR-10/100, ImageNet, PascalVOC, etc.) -There are few ways to convert MNIST dataset to other dataset format: +that support the classification task (e.g. CIFAR-10/100, ImageNet, PascalVOC, +etc.) There are few ways to convert MNIST dataset to other dataset format: ``` bash datum project import -f mnist -i @@ -113,7 +122,7 @@ datum convert -if mnist -i -f imagenet -o These commands also work for MNIST in CSV if you use `mnist_csv` instead of `mnist`. -## Export to MNIST +## Export to MNIST There are few ways to convert dataset to MNIST format: @@ -135,7 +144,7 @@ for exporting dataset (by default `.png`). These commands also work for MNIST in CSV if you use `mnist_csv` instead of `mnist`. -## Particular use cases +## Particular use cases Datumaro supports filtering, transformation, merging etc. for all formats and for the MNIST format in particular. Follow [user manual](../user_manual.md) @@ -144,7 +153,7 @@ to get more information about these operations. There are few examples of using Datumaro operations to solve particular problems with MNIST dataset: -### Example 1. How to create custom MNIST-like dataset +### Example 1. How to create custom MNIST-like dataset ```python from datumaro.components.dataset import Dataset @@ -162,9 +171,10 @@ dataset = Dataset.from_iterable([ dataset.export('./dataset', format='mnist') ``` -### Example 2. How to filter and convert MNIST dataset to ImageNet +### Example 2. How to filter and convert MNIST dataset to ImageNet -Convert MNIST dataset to ImageNet format, keep only images with `3` class presented: +Convert MNIST dataset to ImageNet format, keep only images with `3` class +presented: ``` bash # Download MNIST dataset: diff --git a/docs/formats/open_images_user_manual.md b/docs/formats/open_images_user_manual.md new file mode 100644 index 000000000000..a8ea76662183 --- /dev/null +++ b/docs/formats/open_images_user_manual.md @@ -0,0 +1,182 @@ +# Open Images user manual + +## Contents + +- [Format specification](#format-specification) +- [Load Open Images dataset](#load-open-images-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to Open Images](#export-to-open-images) +- [Particular use cases](#particular-use-cases) + +## Format specification + +A description of the Open Images Dataset (OID) format is available +on [its website](https://storage.googleapis.com/openimages/web/download.html). +Datumaro supports versions 4, 5 and 6. + +Datumaro currently supports only the human-verified image-level label +annotations from this dataset. + +## Load Open Images dataset + +The Open Images dataset is available for free download. + +See the [`open-images-dataset` GitHub repository](https://github.com/cvdfoundation/open-images-dataset) +for information on how to download the images. + +Datumaro also requires the image description files, +which can be downloaded from the following URLs: + +- [complete set](https://storage.googleapis.com/openimages/2018_04/image_ids_and_rotation.csv) +- [train set](https://storage.googleapis.com/openimages/v6/oidv6-train-images-with-labels-with-rotation.csv) +- [validation set](https://storage.googleapis.com/openimages/2018_04/validation/validation-images-with-rotation.csv) +- [test set](https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv) + +Datumaro expects at least one of the files above to be present. + +In addition, the following metadata file must be present as well: + +- [class descriptions](https://storage.googleapis.com/openimages/v6/oidv6-class-descriptions.csv) + +You can optionally download the following additional metadata file: + +- [class hierarchy](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json) + +Annotations can be downloaded from the following URLs: + +- [train image labels](https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-human-imagelabels.csv) +- [validation image labels](https://storage.googleapis.com/openimages/v5/validation-annotations-human-imagelabels.csv) +- [test image labels](https://storage.googleapis.com/openimages/v5/test-annotations-human-imagelabels.csv) + +The annotations are optional. + +There are two ways to create Datumaro project and add OID to it: + +``` bash +datum import --format open_images --input-path +# or +datum create +datum add path -f open_images +``` + +It is possible to specify project name and project directory; run +`datum create --help` for more information. + +Open Images dataset directory should have the following structure: + +``` +└─ Dataset/ + ├── annotations/ + │ └── bbox_labels_600_hierarchy.json + │ └── image_ids_and_rotation.csv + │ └── oidv6-class-descriptions.csv + │ └── *-human-imagelabels.csv + └── images/ + ├── test + │ ├── + │ ├── + │ └── ... + ├── train + │ ├── + │ ├── + │ └── ... + └── validation + ├── + ├── + └── ... +``` + +To use per-subset image description files instead of `image_ids_and_rotation.csv`, +place them in the `annotations` subdirectory. + +## Export to other formats + +Datumaro can convert OID into any other format [Datumaro supports](../user_manual.md#supported-formats). +To get the expected result, the dataset needs to be converted to a format +that supports image-level labels. +There are a few ways to convert OID to other dataset format: + +``` bash +datum project import -f open_images -i +datum export -f cvat -o +# or +datum convert -if open_images -i -f cvat -o +``` + +Some formats provide extra options for conversion. +These options are passed after double dash (`--`) in the command line. +To get information about them, run + +`datum export -f -- -h` + +## Export to Open Images + +There are few ways to convert an existing dataset to the Open Images format: + +``` bash +# export dataset into Open Images format from existing project +datum export -p -f open_images -o \ + -- --save_images + +# convert a dataset in another format to the Open Images format +datum convert -if imagenet -i \ + -f open_images -o \ + -- --save-images +``` + +Extra options for export to the Open Images format: + +- `--save-images` - save image files when exporting the dataset + (by default, `False`) + +- `--image-ext IMAGE_EXT` - save image files with the speficied extension + when exporting the dataset (by default, uses the original extension + or `.jpg` if there isn't one) + +## Particular use cases + +Datumaro supports filtering, transformation, merging etc. for all formats +and for the Open Images format in particular. Follow +[user manual](../user_manual.md) +to get more information about these operations. + +Here are a few examples of using Datumaro operations to solve +particular problems with the Open Images dataset: + +### Example 1. How to load the Open Images dataset and convert to the format used by CVAT + +```bash +datum create -o project +datum add path -p project -f open_images ./open-images-dataset/ +datum stats -p project +datum export -p project -o dataset -f cvat --overwrite -- --save-images +``` + +### Example 2. How to create a custom OID-like dataset + +```python +import numpy as np +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, Label, LabelCategories, DatasetItem, +) + +dataset = Dataset.from_iterable( + [ + DatasetItem( + id='0000000000000001', + image=np.ones((1, 5, 3)), + subset='validation', + annotations=[ + Label(0, attributes={'score': 1}), + Label(1, attributes={'score': 0}), + ], + ), + ], + categories=['/m/0', '/m/1'], +) +dataset.export('./dataset', format='open_images') +``` + +More examples of working with OID from code can be found in +[tests](../../tests/test_open_images_format.py). diff --git a/docs/formats/pascal_voc_user_manual.md b/docs/formats/pascal_voc_user_manual.md index 0e5db0e9258d..f02816b4e198 100644 --- a/docs/formats/pascal_voc_user_manual.md +++ b/docs/formats/pascal_voc_user_manual.md @@ -10,20 +10,20 @@ ## Format specification - Pascal VOC format specification available -[here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). + [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf). -- Original Pascal VOC dataset format support the followoing types of annotations: - - `Labels` (for classification tasks); - - `Bounding boxes` (for detection, action detection and person layout tasks); - - `Masks` (for segmentations tasks). +- Original Pascal VOC dataset format support the following types of annotations: + - `Labels` (for classification tasks); + - `Bounding boxes` (for detection, action detection and person layout tasks); + - `Masks` (for segmentations tasks). - Supported attributes: - - `occluded`: indicates that a significant portion of the object within the + - `occluded`: indicates that a significant portion of the object within the bounding box is occluded by another object; - - `truncated`: indicates that the bounding box specified for the object does + - `truncated`: indicates that the bounding box specified for the object does not correspond to the full extent of the object; - - `difficult`: indicates that the object is considered difficult to recognize; - - action attributes (`jumping`, `reading`, `phoning` and + - `difficult`: indicates that the object is considered difficult to recognize; + - action attributes (`jumping`, `reading`, `phoning` and [more](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/actionexamples/index.html)). ## Load Pascal VOC dataset @@ -85,10 +85,11 @@ Pascal VOC dataset directory should have the following structure: The `ImageSets` directory should contain at least one of the directories: `Main`, `Layout`, `Action`, `Segmentation`. -These directories contain `.txt` files -with a list of images in a subset, the subset name is the same as the `.txt` file name. +These directories contain `.txt` files with a list of images in a subset, +the subset name is the same as the `.txt` file name. -In `label_map.txt` you can define custom color map and non-pascal labels, for example: +In `label_map.txt` you can define custom color map and non-pascal labels, +for example: ``` # label_map [label : color_rgb : parts : actions] @@ -130,8 +131,8 @@ Datumaro supports the following Pascal VOC tasks: - Class and instance segmentation (`voc_segmentation`) - Person layout detection (`voc_layout`) -To make sure that the selected dataset has been added to the project, you can run -`datum info`, which will display the project and dataset information. +To make sure that the selected dataset has been added to the project, you +can run `datum info`, which will display the project and dataset information. ## Export to other formats @@ -175,20 +176,23 @@ datum convert -if imagenet -i \ Extra options for export to Pascal VOC format: -- `--save-images` allow to export dataset with saving images -(by default `False`); +- `--save-images` - allow to export dataset with saving images + (by default `False`) + +- `--image-ext IMAGE_EXT` - allow to specify image extension + for exporting dataset (by default use original or `.jpg` if none) -- `--image-ext IMAGE_EXT` allow to specify image extension -for exporting dataset (by default use original or `.jpg` if none); +- `--apply-colormap APPLY_COLORMAP` - allow to use colormap for class + and instance masks (by default `True`) -- `--apply-colormap APPLY_COLORMAP` allow to use colormap for class -and instance masks (by default `True`); +- `--allow-attributes ALLOW_ATTRIBUTES` - allow export of attributes + (by default `True`) -- `--allow-attributes ALLOW_ATTRIBUTES` allow export of attributes -(by default `True`); +- `--keep-empty KEEP_EMPTY` - write subset lists even if they are empty + (by default: `False`) -- `--tasks TASKS` allow to specify tasks for export dataset, -by default Datumaro uses all tasks. Example: +- `--tasks TASKS` - allow to specify tasks for export dataset, + by default Datumaro uses all tasks. Example: ```bash datum import -o project -f voc -i ./VOC2012 @@ -218,7 +222,8 @@ There are few examples of using Datumaro operations to solve particular problems with Pascal VOC dataset: ### Example 1. How to prepare an original dataset for training. -In this example, preparing the original dataset to train the semantic segmentation model includes: +In this example, preparing the original dataset to train the semantic +segmentation model includes: loading, checking duplicate images, setting the number of images, diff --git a/docs/formats/sly_pointcloud_user_manual.md b/docs/formats/sly_pointcloud_user_manual.md new file mode 100644 index 000000000000..5a0f1882c470 --- /dev/null +++ b/docs/formats/sly_pointcloud_user_manual.md @@ -0,0 +1,183 @@ +# Supervisely Point Cloud user manual + +## Contents + +- [Format specification](#format-specification) +- [Import Supervisely Point Cloud](#import-supervisely-point-cloud-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to Supervisely Point Cloud](#export-to-supervisely-point-cloud) +- [Examples](#examples) + +## Format specification + +Point Cloud data format: +- [specification](https://docs.supervise.ly/data-organization/00_ann_format_navi). +- [example](https://drive.google.com/file/d/1BtZyffWtWNR-mk_PHNPMnGgSlAkkQpBl/view). + +This dataset format supports the following types of annotations: +- `cuboid_3d` + +Supported annotation attributes: +- `track_id` (read/write, integer), responsible for `object` field +- `createdAt` (write, string), +- `updatedAt` (write, string), +- `labelerLogin` (write, string), responsible for the corresponding fields + in the annotation file. +- arbitrary attributes + +Supported image attributes: +- `description` (read/write, string), +- `createdAt` (write, string), +- `updatedAt` (write, string), +- `labelerLogin` (write, string), responsible for the corresponding fields + in the annotation file. +- `frame` (read/write, integer). Indicates frame number of the image. +- arbitrary attributes + +## Import Supervisely Point Cloud dataset + +An example dataset in Supervisely Point Cloud format is available for download: + + + +Point Cloud dataset directory should have the following structure: + + +``` +└─ Dataset/ + ├── ds0/ + │ ├── ann/ + │ │ ├── + │ │ ├── + │ │ └── ... + │ ├── pointcloud/ + │ │ ├── + │ │ ├── + │ │ └── ... + │ ├── related_images/ + │ │ ├── / + │ │ | ├── + │ │ | ├── + │ │ └── ... + ├── key_id_map.json + └── meta.json +``` + +There are two ways to import Supervisely Point Cloud dataset: + +```bash +datum import --format sly_pointcloud --input-path +# or +datum create +datum add path -f sly_pointcloud +``` + +To make sure that the selected dataset has been added to the project, +you can run `datum info`, which will display the project and dataset +information. + +## Export to other formats + +Datumaro can convert Supervisely Point Cloud dataset into any other +format [Datumaro supports](../user_manual.md#supported-formats). + +Such conversion will only be successful if the output +format can represent the type of dataset you want to convert, +e.g. 3D point clouds can be saved in KITTI Raw format, +but not in COCO keypoints. + +There are few ways to convert Supervisely Point Cloud dataset +to other dataset formats: + +``` bash +datum import -f sly_pointcloud -i -o proj/ +datum export -f kitti_raw -o -p proj/ +# or +datum convert -if sly_pointcloud -i -f kitti_raw +``` + +Some formats provide extra options for conversion. +These options are passed after double dash (`--`) in the command line. +To get information about them, run + +`datum export -f -- -h` + +## Export to Supervisely Point Cloud + +There are few ways to convert dataset to Supervisely Point Cloud format: + +``` bash +# export dataset into Supervisely Point Cloud format from existing project +datum export -p -f sly_pointcloud -o \ + -- --save-images +# converting to Supervisely Point Cloud format from other format +datum convert -if kitti_raw -i \ + -f sly_pointcloud -o -- --save-images +``` + +Extra options for exporting in Supervisely Point Cloud format: + +- `--save-images` allow to export dataset with saving images. This will + include point clouds and related images (by default `False`) +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original or use `.png`, if none) +- `--reindex` assigns new indices to frames and annotations. +- `--allow-undeclared-attrs` allows writing arbitrary annotation attributes. + By default, only attributes specified in the input dataset metainfo + will be written. + +## Examples + +### Example 1. Import dataset, compute statistics + +```bash +datum create -o project +datum add path -p project -f sly_pointcloud ../sly_dataset/ +datum stats -p project +``` + +### Example 2. Convert Supervisely Pointclouds to KITTI Raw + +``` bash +datum convert -if sly_pointcloud -i ../sly_pcd/ \ + -f kitti_raw -o my_kitti/ -- --save-images --reindex --allow-attrs +``` + +### Example 3. Create a custom dataset + +``` python +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import Cuboid3d, DatasetItem + +dataset = Dataset.from_iterable([ + DatasetItem(id='frame_1', + annotations=[ + Cuboid3d(id=206, label=0, + position=[320.86, 979.18, 1.04], + attributes={'occluded': False, 'track_id': 1, 'x': 1}), + + Cuboid3d(id=207, label=1, + position=[318.19, 974.65, 1.29], + attributes={'occluded': True, 'track_id': 2}), + ], + pcd='path/to/pcd1.pcd', + attributes={'frame': 0, 'description': 'zzz'} + ), + + DatasetItem(id='frm2', + annotations=[ + Cuboid3d(id=208, label=1, + position=[23.04, 8.75, -0.78], + attributes={'occluded': False, 'track_id': 2}) + ], + pcd='path/to/pcd2.pcd', related_images=['image2.png'], + attributes={'frame': 1} + ), +], categories=['cat', 'dog']) + +dataset.export('my_dataset/', format='sly_pointcloud', save_images=True, + allow_undeclared_attrs=True) +``` + +More examples of working with Supervisely Point Cloud dataset from code can +be found in [tests](../../tests/test_sly_pointcloud_format.py) diff --git a/docs/formats/yolo_user_manual.md b/docs/formats/yolo_user_manual.md index 266547945f56..9da1999f8ec3 100644 --- a/docs/formats/yolo_user_manual.md +++ b/docs/formats/yolo_user_manual.md @@ -9,14 +9,14 @@ ## Format specification -- The YOLO dataset format is for training and validating object detection models. -Specification for this format available -[here](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects). -And also you can find some official examples on working with YOLO dataset -[here](https://pjreddie.com/darknet/yolo/); +- The YOLO dataset format is for training and validating object detection + models. Specification for this format available + [here](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects). + And also you can find some official examples on working with YOLO dataset + [here](https://pjreddie.com/darknet/yolo/); - The YOLO dataset format support the following types of annotations: - - `Bounding boxes` + - `Bounding boxes` - YOLO format doesn't support attributes for annotations; @@ -63,14 +63,14 @@ YOLO dataset directory should have the following structure: │ ├── ... ``` > YOLO dataset cannot contain a subset with a name other than `train` or `valid`. -If imported dataset contains such subsets, they will be ignored. -If you are exporting a project into yolo format, -all subsets different from `train` and `valid` will be skipped. -If there is no subset separation in a project, the data -will be saved in `train` subset. + If imported dataset contains such subsets, they will be ignored. + If you are exporting a project into yolo format, + all subsets different from `train` and `valid` will be skipped. + If there is no subset separation in a project, the data + will be saved in `train` subset. - `obj.data` should have the following content, it is not necessary to have both -subsets, but necessary to have one of them: + subsets, but necessary to have one of them: ``` classes = 5 # optional names = @@ -97,11 +97,13 @@ should contain information about labeled bounding boxes for images: ``` # image1.txt: -# +# 0 0.250000 0.400000 0.300000 0.400000 3 0.600000 0.400000 0.400000 0.266667 ``` -Here `x`, `y`, `width`, and `height` are relative to the image's width and height. +Here `x_center`, `y_center`, `width`, and `height` are relative to the image's +width and height. The `x_center` and `y_center` are center of rectangle +(are not top-left corner). ## Export to other formats @@ -207,4 +209,4 @@ And If you want complete information about each items you can run: ```bash datum import -o project -f yolo -i ./yolo_dataset datum filter -p project --dry-run -e '/item' -``` \ No newline at end of file +``` diff --git a/docs/user_manual.md b/docs/user_manual.md index db7c28f9d590..63ac2fce60d9 100644 --- a/docs/user_manual.md +++ b/docs/user_manual.md @@ -85,12 +85,14 @@ import datumaro ## Supported Formats List of supported formats: -- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`) +- MS COCO + (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`,`panoptic`, `stuff`) - [Format specification](http://cocodataset.org/#format-data) - [Dataset example](../tests/assets/coco_dataset) - `labels` are our extension - like `instances` with only `category_id` - [Format documentation](./formats/coco_user_manual.md) -- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`) +- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), + `action_classification`, `person_layout`) - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) - [Dataset example](../tests/assets/voc_dataset) - [Format documentation](./formats/pascal_voc_user_manual.md) @@ -99,7 +101,8 @@ List of supported formats: - [Dataset example](../tests/assets/yolo_dataset) - [Format documentation](./formats/yolo_user_manual.md) - TF Detection API (`bboxes`, `masks`) - - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), + [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) - [Dataset example](../tests/assets/tf_detection_api_dataset) - WIDER Face (`bboxes`) - [Format specification](http://shuoyang1213.me/WIDERFACE/) @@ -120,6 +123,7 @@ List of supported formats: - CIFAR-10/100 (`classification` (python version)) - [Format specification](https://www.cs.toronto.edu/~kriz/cifar.html) - [Dataset example](../tests/assets/cifar_dataset) + - [Format documentation](./formats/cifar_user_manual.md) - MNIST (`classification`) - [Format specification](http://yann.lecun.com/exdb/mnist/) - [Dataset example](../tests/assets/mnist_dataset) @@ -135,6 +139,18 @@ List of supported formats: - [Format specification](https://www.cityscapes-dataset.com/dataset-overview/) - [Dataset example](../tests/assets/cityscapes_dataset) - [Format documentation](./formats/cityscapes_user_manual.md) +- KITTI (`segmentation`, `detection`) + - [Format specification](http://www.cvlibs.net/datasets/kitti/index.php) + - [Dataset example](../tests/assets/kitti_dataset) + - [Format documentation](./formats/kitti_user_manual.md) +- KITTI 3D (`raw`/`tracklets`/`velodyne points`) + - [Format specification](http://www.cvlibs.net/datasets/kitti/raw_data.php) + - [Dataset example](../tests/assets/kitti_dataset/kitti_raw) + - [Format documentation](./formats/kitti_raw_user_manual.md) +- Supervisely (`pointcloud`) + - [Format specification](https://docs.supervise.ly/data-organization/00_ann_format_navi) + - [Dataset example](../tests/assets/sly_pointcloud) + - [Format documentation](./formats/sly_pointcloud_user_manual.md) - CVAT - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) - [Dataset example](../tests/assets/cvat_dataset) @@ -254,10 +270,11 @@ Available CLI commands: ### Convert datasets -This command allows to convert a dataset from one format into another. In fact, this -command is a combination of `project import` and `project export` and just provides a simpler -way to obtain the same result when no extra options is needed. A list of supported -formats can be found in the `--help` output of this command. +This command allows to convert a dataset from one format into another. +In fact, this command is a combination of `project import` and `project export` +and just provides a simpler way to obtain the same result when no extra options +is needed. A list of supported formats can be found in the `--help` output of +this command. Usage: @@ -437,7 +454,8 @@ datum project filter \ -e '/item[subset="train"]' ``` -Example: extract a dataset with only large annotations of class `cat` and any non-`persons` +Example: extract a dataset with only large annotations of class `cat` and any +non-`persons` ``` bash datum filter \ @@ -516,10 +534,11 @@ datum merge \ ### Merge projects -This command merges items from 2 or more projects and checks annotations for errors. +This command merges items from 2 or more projects and checks annotations for +errors. -Spatial annotations are compared by distance and intersected, labels and attributes -are selected by voting. +Spatial annotations are compared by distance and intersected, labels and +attributes are selected by voting. Merge conflicts, missing items and annotations, other errors are saved into a `.json` file. Usage: @@ -533,7 +552,8 @@ datum merge Example: merge 4 (partially-)intersecting projects, - consider voting succeeded when there are 3+ same votes - consider shapes intersecting when IoU >= 0.6 -- check annotation groups to have `person`, `hand`, `head` and `foot` (`?` for optional) +- check annotation groups to have `person`, `hand`, `head` and `foot` +(`?` for optional) ``` bash datum merge project1/ project2/ project3/ project4/ \ @@ -903,20 +923,43 @@ and stores the result in JSON file. The task types supported are `classification`, `detection`, and `segmentation`. The validation result contains -- annotation statistics based on the task type -- validation reports, such as - - items not having annotations - - items having undefined annotations - - imbalanced distribution in class/attributes - - too small or large values -- summary +- `annotation statistics` based on the task type +- `validation reports`, such as + - items not having annotations + - items having undefined annotations + - imbalanced distribution in class/attributes + - too small or large values +- `summary` Usage: +- There are five configurable parameters for validation + - `few_samples_thr` : threshold for giving a warning for minimum number of + samples per class + - `imbalance_ratio_thr` : threshold for giving imbalance data warning + - `far_from_mean_thr` : threshold for giving a warning that data is far + from mean + - `dominance_ratio_thr` : threshold for giving a warning bounding box + imbalance + - `topk_bins` : ratio of bins with the highest number of data to total bins + in the histogram ``` bash datum validate --help -datum validate -p +datum validate -p -t -- \ + -fs \ + -ir \ + -m \ + -dr \ + -k +``` + +Example : give warning when imbalance ratio of data with classification task +over 40 + +``` bash +datum validate -p prj-cls -t classification -- \ + -ir 40 ``` Here is the list of validation items(a.k.a. anomaly types). @@ -1225,7 +1268,7 @@ datum explain -t image.png -m mymodel \ ``` > Note: this algorithm requires the model to return -> *all* (or a _reasonable_ amount) the outputs and confidences unfiltered, +> _all_ (or a _reasonable_ amount) the outputs and confidences unfiltered, > i.e. all the `Label` annotations for classification models and > all the `Bbox`es for detection models. > You can find examples of the expected model outputs in [`tests/test_RISE.py`](../tests/test_RISE.py) @@ -1333,7 +1376,8 @@ datum transform -t polygons_to_masks datum transform -t shapes_to_boxes ``` -Example: remap dataset labels, `person` to `car` and `cat` to `dog`, keep `bus`, remove others +Example: remap dataset labels, `person` to `car` and `cat` to `dog`, +keep `bus`, remove others ``` bash datum transform -t remap_labels -- \ @@ -1350,13 +1394,16 @@ datum transform -t rename -- -e '|pattern|replacement|' datum transform -t rename -- -e '|frame_(\d+)|\\1|' ``` -Example: sampling dataset items as many as the number of target samples with sampling method entered by the user, divide into `sampled` and `unsampled` subsets +Example: sampling dataset items as many as the number of target samples with +sampling method entered by the user, divide into `sampled` and `unsampled` +subsets - There are five methods of sampling the m option. - - `topk`: Return the k with high uncertainty data - - `lowk`: Return the k with low uncertainty data - - `randk`: Return the random k data - - `mixk`: Return half to topk method and the rest to lowk method - - `randtopk`: First, select 3 times the number of k randomly, and return the topk among them. + - `topk`: Return the k with high uncertainty data + - `lowk`: Return the k with low uncertainty data + - `randk`: Return the random k data + - `mixk`: Return half to topk method and the rest to lowk method + - `randtopk`: First, select 3 times the number of k randomly, and return + the topk among them. ``` bash datum transform -t sampler -- \ @@ -1370,11 +1417,11 @@ datum transform -t sampler -- \ Example : control number of outputs to 100 after NDR - There are two methods in NDR e option - - `random`: sample from removed data randomly - - `similarity`: sample from removed data with ascending + - `random`: sample from removed data randomly + - `similarity`: sample from removed data with ascending - There are two methods in NDR u option - - `uniform`: sample data with uniform distribution - - `inverse`: sample data with reciprocal of the number + - `uniform`: sample data with uniform distribution + - `inverse`: sample data with reciprocal of the number ```bash datum transform -t ndr -- \ @@ -1387,9 +1434,10 @@ datum transform -t ndr -- \ ## Extending -There are few ways to extend and customize Datumaro behaviour, which is supported by plugins. -Check [our contribution guide](../CONTRIBUTING.md) for details on plugin implementation. -In general, a plugin is a Python code file. It must be put into a plugin directory: +There are few ways to extend and customize Datumaro behaviour, which is +supported by plugins. Check [our contribution guide](../CONTRIBUTING.md) for +details on plugin implementation. In general, a plugin is a Python code file. +It must be put into a plugin directory: - `/.datumaro/plugins` for project-specific plugins - `/plugins` for global plugins @@ -1445,15 +1493,15 @@ implementation script to a plugin directory. ### Dataset Conversions ("Transforms") -A Transform is a function for altering a dataset and producing a new one. It can update -dataset items, annotations, classes, and other properties. -A list of available transforms for dataset conversions can be extended by adding a Transform -implementation script into a plugin directory. +A Transform is a function for altering a dataset and producing a new one. +It can update dataset items, annotations, classes, and other properties. +A list of available transforms for dataset conversions can be extended by +adding a Transform implementation script into a plugin directory. ### Model launchers -A list of available launchers for model execution can be extended by adding a Launcher -implementation script into a plugin directory. +A list of available launchers for model execution can be extended by adding +a Launcher implementation script into a plugin directory. ## Links - [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 000000000000..8a599cea063e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,2594 @@ +{ + "name": "datumaro", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@babel/code-frame": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.14.5.tgz", + "integrity": "sha512-9pzDqyc6OLDaqe+zbACgFkb6fKMNG6CObKpnYXChRsvYGyEdc7CA2BaqeOM+vOtCS5ndmJicPJhKAwYRI6UfFw==", + "dev": true, + "requires": { + "@babel/highlight": "^7.14.5" + } + }, + "@babel/helper-validator-identifier": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.5.tgz", + "integrity": "sha512-5lsetuxCLilmVGyiLEfoHBRX8UCFD+1m2x3Rj97WrW3V7H3u4RWRXA4evMjImCsin2J2YT0QaVDGf+z8ondbAg==", + "dev": true + }, + "@babel/highlight": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.5.tgz", + "integrity": "sha512-qf9u2WFWVV0MppaL877j2dBtQIDgmidgjGk5VIMw3OadXvYaXn66U1BFlH2t4+t3i+8PhedppRv+i40ABzd+gg==", + "dev": true, + "requires": { + "@babel/helper-validator-identifier": "^7.14.5", + "chalk": "^2.0.0", + "js-tokens": "^4.0.0" + }, + "dependencies": { + "ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "requires": { + "color-convert": "^1.9.0" + } + }, + "chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + } + }, + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "requires": { + "color-name": "1.1.3" + } + }, + "color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", + "dev": true + }, + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true + }, + "supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + } + } + }, + "@sindresorhus/is": { + "version": "0.14.0", + "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-0.14.0.tgz", + "integrity": "sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==", + "dev": true + }, + "@szmarczak/http-timer": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-1.1.2.tgz", + "integrity": "sha512-XIB2XbzHTN6ieIjfIMV9hlVcfPU26s2vafYWQcZHWXHOxiaRZYEDKEwdl129Zyg50+foYV2jCgtrqSA6qNuNSA==", + "dev": true, + "requires": { + "defer-to-connect": "^1.0.1" + } + }, + "@types/mdast": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.3.tgz", + "integrity": "sha512-SXPBMnFVQg1s00dlMCc/jCdvPqdE4mXaMMCeRlxLDmTAEoegHT53xKtkDnzDTOcmMHUfcjyf36/YYZ6SxRdnsw==", + "dev": true, + "requires": { + "@types/unist": "*" + } + }, + "@types/unist": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.3.tgz", + "integrity": "sha512-FvUupuM3rlRsRtCN+fDudtmytGO6iHJuuRKS1Ss0pG5z8oX0diNEw94UEL7hgDbpN94rgaK5R7sWm6RrSkZuAQ==", + "dev": true + }, + "aggregate-error": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz", + "integrity": "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==", + "dev": true, + "requires": { + "clean-stack": "^2.0.0", + "indent-string": "^4.0.0" + } + }, + "ansi-regex": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", + "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "dev": true + }, + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "anymatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.2.tgz", + "integrity": "sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg==", + "dev": true, + "requires": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + } + }, + "argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "requires": { + "sprintf-js": "~1.0.2" + } + }, + "bail": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/bail/-/bail-1.0.5.tgz", + "integrity": "sha512-xFbRxM1tahm08yHBP16MMjVUAvDaBMD38zsM9EMAUN61omwLmKlOpB/Zku5QkjZ8TZ4vn53pj+t518cH0S03RQ==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "binary-extensions": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", + "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "requires": { + "fill-range": "^7.0.1" + } + }, + "buffer-from": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", + "dev": true + }, + "cacheable-request": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/cacheable-request/-/cacheable-request-6.1.0.tgz", + "integrity": "sha512-Oj3cAGPCqOZX7Rz64Uny2GYAZNliQSqfbePrgAQ1wKAihYmCUnraBtJtKcGR4xz7wF+LoJC+ssFZvv5BgF9Igg==", + "dev": true, + "requires": { + "clone-response": "^1.0.2", + "get-stream": "^5.1.0", + "http-cache-semantics": "^4.0.0", + "keyv": "^3.0.0", + "lowercase-keys": "^2.0.0", + "normalize-url": "^4.1.0", + "responselike": "^1.0.2" + }, + "dependencies": { + "get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dev": true, + "requires": { + "pump": "^3.0.0" + } + }, + "lowercase-keys": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", + "integrity": "sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==", + "dev": true + } + } + }, + "camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "dev": true + }, + "ccount": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-1.1.0.tgz", + "integrity": "sha512-vlNK021QdI7PNeiUh/lKkC/mNHHfV0m/Ad5JoI0TYtlBnJAslM/JIkm/tGC88bkLIwO6OQ5uV6ztS6kVAtCDlg==", + "dev": true + }, + "chalk": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz", + "integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==", + "dev": true, + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + } + }, + "character-entities": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-1.2.4.tgz", + "integrity": "sha512-iBMyeEHxfVnIakwOuDXpVkc54HijNgCyQB2w0VfGQThle6NXn50zU6V/u+LDhxHcDUPojn6Kpga3PTAD8W1bQw==", + "dev": true + }, + "character-entities-legacy": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz", + "integrity": "sha512-3Xnr+7ZFS1uxeiUDvV02wQ+QDbc55o97tIV5zHScSPJpcLm/r0DFPcoY3tYRp+VZukxuMeKgXYmsXQHO05zQeA==", + "dev": true + }, + "character-reference-invalid": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-1.1.4.tgz", + "integrity": "sha512-mKKUkUbhPpQlCOfIuZkvSEgktjPFIsZKRRbC6KWVEMvlzblj3i3asQv5ODsrwt0N3pHAEvjP8KTQPHkp0+6jOg==", + "dev": true + }, + "check-links": { + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/check-links/-/check-links-1.1.8.tgz", + "integrity": "sha512-lxt1EeQ1CVkmiZzPfbPufperYK0t7MvhdLs3zlRH9areA6NVT1tcGymAdJONolNWQBdCFU/sek59RpeLmVHCnw==", + "dev": true, + "requires": { + "got": "^9.6.0", + "is-relative-url": "^2.0.0", + "p-map": "^2.0.0", + "p-memoize": "^2.1.0" + } + }, + "chokidar": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.2.tgz", + "integrity": "sha512-ekGhOnNVPgT77r4K/U3GDhu+FQ2S8TnK/s2KbIGXi0SZWuwkZ2QNyfWdZW+TVfn84DpEP7rLeCt2UI6bJ8GwbQ==", + "dev": true, + "requires": { + "anymatch": "~3.1.2", + "braces": "~3.0.2", + "fsevents": "~2.3.2", + "glob-parent": "~5.1.2", + "is-binary-path": "~2.1.0", + "is-glob": "~4.0.1", + "normalize-path": "~3.0.0", + "readdirp": "~3.6.0" + } + }, + "clean-stack": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz", + "integrity": "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==", + "dev": true + }, + "clone-response": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/clone-response/-/clone-response-1.0.2.tgz", + "integrity": "sha1-0dyXOSAxTfZ/vrlCI7TuNQI56Ws=", + "dev": true, + "requires": { + "mimic-response": "^1.0.0" + } + }, + "co": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/co/-/co-3.1.0.tgz", + "integrity": "sha1-TqVOpaCJOBUxheFSEMaNkJK8G3g=", + "dev": true + }, + "collapse-white-space": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-1.0.6.tgz", + "integrity": "sha512-jEovNnrhMuqyCcjfEJA56v0Xq8SkIoPKDyaHahwo3POf4qcSXqMYuwNcOTzp74vTsR9Tn08z4MxWqAhcekogkQ==", + "dev": true + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", + "dev": true + }, + "concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, + "debug": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", + "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", + "dev": true, + "requires": { + "ms": "2.1.2" + } + }, + "decompress-response": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-3.3.0.tgz", + "integrity": "sha1-gKTdMjdIOEv6JICDYirt7Jgq3/M=", + "dev": true, + "requires": { + "mimic-response": "^1.0.0" + } + }, + "defer-to-connect": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-1.1.3.tgz", + "integrity": "sha512-0ISdNousHvZT2EiFlZeZAHBUvSxmKswVCEf8hW7KWgG4a8MVEu/3Vb6uWYozkjylyCxe0JBIiRB1jV45S70WVQ==", + "dev": true + }, + "dns-packet": { + "version": "5.2.4", + "resolved": "https://registry.npmjs.org/dns-packet/-/dns-packet-5.2.4.tgz", + "integrity": "sha512-vgu5Bx5IV8mXmh/9cn1lzn+J7okFlXe1vBRp+kCBJXg1nBED6Z/Q4e+QaDxQRSozMr14p/VQmdXwsf/I2wGjUA==", + "dev": true, + "requires": { + "ip": "^1.1.5" + } + }, + "dns-socket": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/dns-socket/-/dns-socket-4.2.2.tgz", + "integrity": "sha512-BDeBd8najI4/lS00HSKpdFia+OvUMytaVjfzR9n5Lq8MlZRSvtbI+uLtx1+XmQFls5wFU9dssccTmQQ6nfpjdg==", + "dev": true, + "requires": { + "dns-packet": "^5.2.4" + } + }, + "duplexer3": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/duplexer3/-/duplexer3-0.1.4.tgz", + "integrity": "sha1-7gHdHKwO08vH/b6jfcCo8c4ALOI=", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "dev": true, + "requires": { + "once": "^1.4.0" + } + }, + "error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "requires": { + "is-arrayish": "^0.2.1" + } + }, + "escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", + "dev": true + }, + "esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true + }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "dev": true + }, + "fault": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/fault/-/fault-1.0.4.tgz", + "integrity": "sha512-CJ0HCB5tL5fYTEA7ToAq5+kTwd++Borf1/bifxd9iT70QcXr4MRrO3Llf8Ifs70q+SJcGHFtnIE/Nw6giCtECA==", + "dev": true, + "requires": { + "format": "^0.2.0" + } + }, + "figgy-pudding": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/figgy-pudding/-/figgy-pudding-3.5.2.tgz", + "integrity": "sha512-0btnI/H8f2pavGMN8w40mlSKOfTK2SVJmBfBeVIj3kNw0swwgzyRq0d5TJVOwodFmtvpPeWPN/MCcfuWF0Ezbw==", + "dev": true + }, + "figures": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/figures/-/figures-3.2.0.tgz", + "integrity": "sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg==", + "dev": true, + "requires": { + "escape-string-regexp": "^1.0.5" + } + }, + "fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "requires": { + "to-regex-range": "^5.0.1" + } + }, + "find-up": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz", + "integrity": "sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==", + "dev": true, + "requires": { + "locate-path": "^3.0.0" + } + }, + "format": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/format/-/format-0.2.2.tgz", + "integrity": "sha1-1hcBB+nv3E7TDJ3DkBbflCtctYs=", + "dev": true + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", + "dev": true + }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "optional": true + }, + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", + "dev": true, + "requires": { + "pump": "^3.0.0" + } + }, + "glob": { + "version": "7.1.7", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", + "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "requires": { + "is-glob": "^4.0.1" + } + }, + "got": { + "version": "9.6.0", + "resolved": "https://registry.npmjs.org/got/-/got-9.6.0.tgz", + "integrity": "sha512-R7eWptXuGYxwijs0eV+v3o6+XH1IqVK8dJOEecQfTmkncw9AV4dcw/Dhxi8MdlqPthxxpZyizMzyg8RTmEsG+Q==", + "dev": true, + "requires": { + "@sindresorhus/is": "^0.14.0", + "@szmarczak/http-timer": "^1.1.2", + "cacheable-request": "^6.0.0", + "decompress-response": "^3.3.0", + "duplexer3": "^0.1.4", + "get-stream": "^4.1.0", + "lowercase-keys": "^1.0.1", + "mimic-response": "^1.0.1", + "p-cancelable": "^1.0.0", + "to-readable-stream": "^1.0.0", + "url-parse-lax": "^3.0.0" + } + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "http-cache-semantics": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz", + "integrity": "sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==", + "dev": true + }, + "ignore": { + "version": "5.1.8", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", + "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", + "dev": true + }, + "indent-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", + "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", + "dev": true + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "dev": true + }, + "ip": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/ip/-/ip-1.1.5.tgz", + "integrity": "sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo=", + "dev": true + }, + "ip-regex": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-4.3.0.tgz", + "integrity": "sha512-B9ZWJxHHOHUhUjCPrMpLD4xEq35bUTClHM1S6CBU5ixQnkZmwipwgc96vAd7AAGM9TGHvJR+Uss+/Ak6UphK+Q==", + "dev": true + }, + "is-absolute-url": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-absolute-url/-/is-absolute-url-2.1.0.tgz", + "integrity": "sha1-UFMN+4T8yap9vnhS6Do3uTufKqY=", + "dev": true + }, + "is-alphabetical": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-1.0.4.tgz", + "integrity": "sha512-DwzsA04LQ10FHTZuL0/grVDk4rFoVH1pjAToYwBrHSxcrBIGQuXrQMtD5U1b0U2XVgKZCTLLP8u2Qxqhy3l2Vg==", + "dev": true + }, + "is-alphanumerical": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-1.0.4.tgz", + "integrity": "sha512-UzoZUr+XfVz3t3v4KyGEniVL9BDRoQtY7tOyrRybkVNjDFWyo1yhXNGrrBTQxp3ib9BLAWs7k2YKBQsFRkZG9A==", + "dev": true, + "requires": { + "is-alphabetical": "^1.0.0", + "is-decimal": "^1.0.0" + } + }, + "is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=", + "dev": true + }, + "is-binary-path": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", + "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==", + "dev": true, + "requires": { + "binary-extensions": "^2.0.0" + } + }, + "is-buffer": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", + "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", + "dev": true + }, + "is-decimal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-1.0.4.tgz", + "integrity": "sha512-RGdriMmQQvZ2aqaQq3awNA6dCGtKpiDFcOzrTWrDAT2MiWrKQVPmxLGHl7Y2nNu6led0kEyoX0enY0qXYsv9zw==", + "dev": true + }, + "is-empty": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/is-empty/-/is-empty-1.2.0.tgz", + "integrity": "sha1-3pu1snhzigWgsJpX4ftNSjQan2s=", + "dev": true + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "dev": true + }, + "is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true + }, + "is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "dev": true, + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-hexadecimal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-1.0.4.tgz", + "integrity": "sha512-gyPJuv83bHMpocVYoqof5VDiZveEoGoFL8m3BXNb2VW8Xs+rz9kqO8LOQ5DH6EsuvilT1ApazU0pyl+ytbPtlw==", + "dev": true + }, + "is-ip": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/is-ip/-/is-ip-3.1.0.tgz", + "integrity": "sha512-35vd5necO7IitFPjd/YBeqwWnyDWbuLH9ZXQdMfDA8TEo7pv5X8yfrvVO3xbJbLUlERCMvf6X0hTUamQxCYJ9Q==", + "dev": true, + "requires": { + "ip-regex": "^4.0.0" + } + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true + }, + "is-online": { + "version": "8.5.1", + "resolved": "https://registry.npmjs.org/is-online/-/is-online-8.5.1.tgz", + "integrity": "sha512-RKyTQx/rJqw2QOXHwy7TmXdlkpe0Hhj7GBsr6TQJaj4ebNOfameZCMspU5vYbwBBzJ2brWArdSvNVox6T6oCTQ==", + "dev": true, + "requires": { + "got": "^9.6.0", + "p-any": "^2.0.0", + "p-timeout": "^3.0.0", + "public-ip": "^4.0.1" + } + }, + "is-plain-obj": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", + "integrity": "sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==", + "dev": true + }, + "is-relative-url": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/is-relative-url/-/is-relative-url-2.0.0.tgz", + "integrity": "sha1-cpAtf+BLPUeS59sV+duEtyBMnO8=", + "dev": true, + "requires": { + "is-absolute-url": "^2.0.0" + } + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true + }, + "js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "requires": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + } + }, + "json-buffer": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz", + "integrity": "sha1-Wx85evx11ne96Lz8Dkfh+aPZqJg=", + "dev": true + }, + "json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, + "json5": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz", + "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", + "dev": true, + "requires": { + "minimist": "^1.2.5" + } + }, + "keyv": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-3.1.0.tgz", + "integrity": "sha512-9ykJ/46SN/9KPM/sichzQ7OvXyGDYKGTaDlKMGCAlg2UK8KRy4jb0d8sFc+0Tt0YYnThq8X2RZgCg74RPxgcVA==", + "dev": true, + "requires": { + "json-buffer": "3.0.0" + } + }, + "libnpmconfig": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/libnpmconfig/-/libnpmconfig-1.2.1.tgz", + "integrity": "sha512-9esX8rTQAHqarx6qeZqmGQKBNZR5OIbl/Ayr0qQDy3oXja2iFVQQI81R6GZ2a02bSNZ9p3YOGX1O6HHCb1X7kA==", + "dev": true, + "requires": { + "figgy-pudding": "^3.5.1", + "find-up": "^3.0.0", + "ini": "^1.3.5" + } + }, + "lines-and-columns": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.1.6.tgz", + "integrity": "sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=", + "dev": true + }, + "load-plugin": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/load-plugin/-/load-plugin-3.0.0.tgz", + "integrity": "sha512-od7eKCCZ62ITvFf8nHHrIiYmgOHb4xVNDRDqxBWSaao5FZyyZVX8OmRCbwjDGPrSrgIulwPNyBsWCGnhiDC0oQ==", + "dev": true, + "requires": { + "libnpmconfig": "^1.0.0", + "resolve-from": "^5.0.0" + } + }, + "locate-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz", + "integrity": "sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==", + "dev": true, + "requires": { + "p-locate": "^3.0.0", + "path-exists": "^3.0.0" + } + }, + "longest-streak": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-2.0.4.tgz", + "integrity": "sha512-vM6rUVCVUJJt33bnmHiZEvr7wPT78ztX7rojL+LW51bHtLh6HTjx84LA5W4+oa6aKEJA7jJu5LR6vQRBpA5DVg==", + "dev": true + }, + "lowercase-keys": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-1.0.1.tgz", + "integrity": "sha512-G2Lj61tXDnVFFOi8VZds+SoQjtQC3dgokKdDG2mTm1tx4m50NUHBOZSBwQQHyy0V12A0JTG4icfZQH+xPyh8VA==", + "dev": true + }, + "map-age-cleaner": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.3.tgz", + "integrity": "sha512-bJzx6nMoP6PDLPBFmg7+xRKeFZvFboMrGlxmNj9ClvX53KrmvM5bXFXEWjbz4cz1AFn+jWJ9z/DJSz7hrs0w3w==", + "dev": true, + "requires": { + "p-defer": "^1.0.0" + } + }, + "markdown-extensions": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-1.1.1.tgz", + "integrity": "sha512-WWC0ZuMzCyDHYCasEGs4IPvLyTGftYwh6wIEOULOF0HXcqZlhwRzrK0w2VUlxWA98xnvb/jszw4ZSkJ6ADpM6Q==", + "dev": true + }, + "markdown-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-2.0.0.tgz", + "integrity": "sha512-Ezda85ToJUBhM6WGaG6veasyym+Tbs3cMAw/ZhOPqXiYsr0jgocBV3j3nx+4lk47plLlIqjwuTm/ywVI+zjJ/A==", + "dev": true, + "requires": { + "repeat-string": "^1.0.0" + } + }, + "mdast-comment-marker": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/mdast-comment-marker/-/mdast-comment-marker-1.1.2.tgz", + "integrity": "sha512-vTFXtmbbF3rgnTh3Zl3irso4LtvwUq/jaDvT2D1JqTGAwaipcS7RpTxzi6KjoRqI9n2yuAhzLDAC8xVTF3XYVQ==", + "dev": true + }, + "mdast-util-find-and-replace": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-1.1.1.tgz", + "integrity": "sha512-9cKl33Y21lyckGzpSmEQnIDjEfeeWelN5s1kUW1LwdB0Fkuq2u+4GdqcGEygYxJE8GVqCl0741bYXHgamfWAZA==", + "dev": true, + "requires": { + "escape-string-regexp": "^4.0.0", + "unist-util-is": "^4.0.0", + "unist-util-visit-parents": "^3.0.0" + }, + "dependencies": { + "escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true + } + } + }, + "mdast-util-from-markdown": { + "version": "0.8.5", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-0.8.5.tgz", + "integrity": "sha512-2hkTXtYYnr+NubD/g6KGBS/0mFmBcifAsI0yIWRiRo0PjVs6SSOSOdtzbp6kSGnShDN6G5aWZpKQ2lWRy27mWQ==", + "dev": true, + "requires": { + "@types/mdast": "^3.0.0", + "mdast-util-to-string": "^2.0.0", + "micromark": "~2.11.0", + "parse-entities": "^2.0.0", + "unist-util-stringify-position": "^2.0.0" + } + }, + "mdast-util-frontmatter": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-frontmatter/-/mdast-util-frontmatter-0.2.0.tgz", + "integrity": "sha512-FHKL4w4S5fdt1KjJCwB0178WJ0evnyyQr5kXTM3wrOVpytD0hrkvd+AOOjU9Td8onOejCkmZ+HQRT3CZ3coHHQ==", + "dev": true, + "requires": { + "micromark-extension-frontmatter": "^0.2.0" + } + }, + "mdast-util-gfm": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-0.1.2.tgz", + "integrity": "sha512-NNkhDx/qYcuOWB7xHUGWZYVXvjPFFd6afg6/e2g+SV4r9q5XUcCbV4Wfa3DLYIiD+xAEZc6K4MGaE/m0KDcPwQ==", + "dev": true, + "requires": { + "mdast-util-gfm-autolink-literal": "^0.1.0", + "mdast-util-gfm-strikethrough": "^0.2.0", + "mdast-util-gfm-table": "^0.1.0", + "mdast-util-gfm-task-list-item": "^0.1.0", + "mdast-util-to-markdown": "^0.6.1" + } + }, + "mdast-util-gfm-autolink-literal": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-0.1.3.tgz", + "integrity": "sha512-GjmLjWrXg1wqMIO9+ZsRik/s7PLwTaeCHVB7vRxUwLntZc8mzmTsLVr6HW1yLokcnhfURsn5zmSVdi3/xWWu1A==", + "dev": true, + "requires": { + "ccount": "^1.0.0", + "mdast-util-find-and-replace": "^1.1.0", + "micromark": "^2.11.3" + } + }, + "mdast-util-gfm-strikethrough": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-0.2.3.tgz", + "integrity": "sha512-5OQLXpt6qdbttcDG/UxYY7Yjj3e8P7X16LzvpX8pIQPYJ/C2Z1qFGMmcw+1PZMUM3Z8wt8NRfYTvCni93mgsgA==", + "dev": true, + "requires": { + "mdast-util-to-markdown": "^0.6.0" + } + }, + "mdast-util-gfm-table": { + "version": "0.1.6", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-0.1.6.tgz", + "integrity": "sha512-j4yDxQ66AJSBwGkbpFEp9uG/LS1tZV3P33fN1gkyRB2LoRL+RR3f76m0HPHaby6F4Z5xr9Fv1URmATlRRUIpRQ==", + "dev": true, + "requires": { + "markdown-table": "^2.0.0", + "mdast-util-to-markdown": "~0.6.0" + } + }, + "mdast-util-gfm-task-list-item": { + "version": "0.1.6", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-0.1.6.tgz", + "integrity": "sha512-/d51FFIfPsSmCIRNp7E6pozM9z1GYPIkSy1urQ8s/o4TC22BZ7DqfHFWiqBD23bc7J3vV1Fc9O4QIHBlfuit8A==", + "dev": true, + "requires": { + "mdast-util-to-markdown": "~0.6.0" + } + }, + "mdast-util-heading-style": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/mdast-util-heading-style/-/mdast-util-heading-style-1.0.6.tgz", + "integrity": "sha512-8ZuuegRqS0KESgjAGW8zTx4tJ3VNIiIaGFNEzFpRSAQBavVc7AvOo9I4g3crcZBfYisHs4seYh0rAVimO6HyOw==", + "dev": true + }, + "mdast-util-to-markdown": { + "version": "0.6.5", + "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-0.6.5.tgz", + "integrity": "sha512-XeV9sDE7ZlOQvs45C9UKMtfTcctcaj/pGwH8YLbMHoMOXNNCn2LsqVQOqrF1+/NU8lKDAqozme9SCXWyo9oAcQ==", + "dev": true, + "requires": { + "@types/unist": "^2.0.0", + "longest-streak": "^2.0.0", + "mdast-util-to-string": "^2.0.0", + "parse-entities": "^2.0.0", + "repeat-string": "^1.0.0", + "zwitch": "^1.0.0" + } + }, + "mdast-util-to-string": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-2.0.0.tgz", + "integrity": "sha512-AW4DRS3QbBayY/jJmD8437V1Gombjf8RSOUCMFBuo5iHi58AGEgVCKQ+ezHkZZDpAQS75hcBMpLqjpJTjtUL7w==", + "dev": true + }, + "mem": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/mem/-/mem-4.3.0.tgz", + "integrity": "sha512-qX2bG48pTqYRVmDB37rn/6PT7LcR8T7oAX3bf99u1Tt1nzxYfxkgqDwUwolPlXweM0XzBOBFzSx4kfp7KP1s/w==", + "dev": true, + "requires": { + "map-age-cleaner": "^0.1.1", + "mimic-fn": "^2.0.0", + "p-is-promise": "^2.0.0" + }, + "dependencies": { + "mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true + } + } + }, + "micromark": { + "version": "2.11.4", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-2.11.4.tgz", + "integrity": "sha512-+WoovN/ppKolQOFIAajxi7Lu9kInbPxFuTBVEavFcL8eAfVstoc5MocPmqBeAdBOJV00uaVjegzH4+MA0DN/uA==", + "dev": true, + "requires": { + "debug": "^4.0.0", + "parse-entities": "^2.0.0" + } + }, + "micromark-extension-frontmatter": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/micromark-extension-frontmatter/-/micromark-extension-frontmatter-0.2.2.tgz", + "integrity": "sha512-q6nPLFCMTLtfsctAuS0Xh4vaolxSFUWUWR6PZSrXXiRy+SANGllpcqdXFv2z07l0Xz/6Hl40hK0ffNCJPH2n1A==", + "dev": true, + "requires": { + "fault": "^1.0.0" + } + }, + "micromark-extension-gfm": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-0.3.3.tgz", + "integrity": "sha512-oVN4zv5/tAIA+l3GbMi7lWeYpJ14oQyJ3uEim20ktYFAcfX1x3LNlFGGlmrZHt7u9YlKExmyJdDGaTt6cMSR/A==", + "dev": true, + "requires": { + "micromark": "~2.11.0", + "micromark-extension-gfm-autolink-literal": "~0.5.0", + "micromark-extension-gfm-strikethrough": "~0.6.5", + "micromark-extension-gfm-table": "~0.4.0", + "micromark-extension-gfm-tagfilter": "~0.3.0", + "micromark-extension-gfm-task-list-item": "~0.3.0" + } + }, + "micromark-extension-gfm-autolink-literal": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-0.5.7.tgz", + "integrity": "sha512-ePiDGH0/lhcngCe8FtH4ARFoxKTUelMp4L7Gg2pujYD5CSMb9PbblnyL+AAMud/SNMyusbS2XDSiPIRcQoNFAw==", + "dev": true, + "requires": { + "micromark": "~2.11.3" + } + }, + "micromark-extension-gfm-strikethrough": { + "version": "0.6.5", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-0.6.5.tgz", + "integrity": "sha512-PpOKlgokpQRwUesRwWEp+fHjGGkZEejj83k9gU5iXCbDG+XBA92BqnRKYJdfqfkrRcZRgGuPuXb7DaK/DmxOhw==", + "dev": true, + "requires": { + "micromark": "~2.11.0" + } + }, + "micromark-extension-gfm-table": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-0.4.3.tgz", + "integrity": "sha512-hVGvESPq0fk6ALWtomcwmgLvH8ZSVpcPjzi0AjPclB9FsVRgMtGZkUcpE0zgjOCFAznKepF4z3hX8z6e3HODdA==", + "dev": true, + "requires": { + "micromark": "~2.11.0" + } + }, + "micromark-extension-gfm-tagfilter": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-0.3.0.tgz", + "integrity": "sha512-9GU0xBatryXifL//FJH+tAZ6i240xQuFrSL7mYi8f4oZSbc+NvXjkrHemeYP0+L4ZUT+Ptz3b95zhUZnMtoi/Q==", + "dev": true + }, + "micromark-extension-gfm-task-list-item": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-0.3.3.tgz", + "integrity": "sha512-0zvM5iSLKrc/NQl84pZSjGo66aTGd57C1idmlWmE87lkMcXrTxg1uXa/nXomxJytoje9trP0NDLvw4bZ/Z/XCQ==", + "dev": true, + "requires": { + "micromark": "~2.11.0" + } + }, + "mimic-fn": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-1.2.0.tgz", + "integrity": "sha512-jf84uxzwiuiIVKiOLpfYk7N46TSy8ubTonmneY9vrpHNAnp0QBt2BxWV9dO3/j+BoVAb+a5G6YDPW3M5HOdMWQ==", + "dev": true + }, + "mimic-response": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-1.0.1.tgz", + "integrity": "sha512-j5EctnkH7amfV/q5Hgmoal1g2QHFJRraOtmx0JpIqkxhBhI/lJSl1nMpQ45hVarwNETOoWEimndZ4QK0RHxuxQ==", + "dev": true + }, + "minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true + }, + "normalize-url": { + "version": "4.5.1", + "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-4.5.1.tgz", + "integrity": "sha512-9UZCFRHQdNrfTpGg8+1INIg93B6zE0aXMVFkw1WFwvO4SlZywU6aLg5Of0Ap/PgcbSw4LNxvMWXMeugwMCX0AA==", + "dev": true + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "p-any": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-any/-/p-any-2.1.0.tgz", + "integrity": "sha512-JAERcaMBLYKMq+voYw36+x5Dgh47+/o7yuv2oQYuSSUml4YeqJEFznBrY2UeEkoSHqBua6hz518n/PsowTYLLg==", + "dev": true, + "requires": { + "p-cancelable": "^2.0.0", + "p-some": "^4.0.0", + "type-fest": "^0.3.0" + }, + "dependencies": { + "p-cancelable": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", + "integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==", + "dev": true + } + } + }, + "p-cancelable": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-1.1.0.tgz", + "integrity": "sha512-s73XxOZ4zpt1edZYZzvhqFa6uvQc1vwUa0K0BdtIZgQMAJj9IbebH+JkgKZc9h+B05PKHLOTl4ajG1BmNrVZlw==", + "dev": true + }, + "p-defer": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-defer/-/p-defer-1.0.0.tgz", + "integrity": "sha1-n26xgvbJqozXQwBKfU+WsZaw+ww=", + "dev": true + }, + "p-finally": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", + "integrity": "sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4=", + "dev": true + }, + "p-is-promise": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-is-promise/-/p-is-promise-2.1.0.tgz", + "integrity": "sha512-Y3W0wlRPK8ZMRbNq97l4M5otioeA5lm1z7bkNkxCka8HSPjR0xRWmpCmc9utiaLP9Jb1eD8BgeIxTW4AIF45Pg==", + "dev": true + }, + "p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "requires": { + "p-try": "^2.0.0" + } + }, + "p-locate": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-3.0.0.tgz", + "integrity": "sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==", + "dev": true, + "requires": { + "p-limit": "^2.0.0" + } + }, + "p-map": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-map/-/p-map-2.1.0.tgz", + "integrity": "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==", + "dev": true + }, + "p-memoize": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-memoize/-/p-memoize-2.1.0.tgz", + "integrity": "sha512-c6+a2iV4JyX0r4+i2IBJYO0r6LZAT2fg/tcB6GQbv1uzZsfsmKT7Ej5DRT1G6Wi7XUJSV2ZiP9+YEtluvhCmkg==", + "dev": true, + "requires": { + "mem": "^4.0.0", + "mimic-fn": "^1.0.0" + } + }, + "p-some": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-some/-/p-some-4.1.0.tgz", + "integrity": "sha512-MF/HIbq6GeBqTrTIl5OJubzkGU+qfFhAFi0gnTAK6rgEIJIknEiABHOTtQu4e6JiXjIwuMPMUFQzyHh5QjCl1g==", + "dev": true, + "requires": { + "aggregate-error": "^3.0.0", + "p-cancelable": "^2.0.0" + }, + "dependencies": { + "p-cancelable": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", + "integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==", + "dev": true + } + } + }, + "p-timeout": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", + "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", + "dev": true, + "requires": { + "p-finally": "^1.0.0" + } + }, + "p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true + }, + "parse-entities": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-2.0.0.tgz", + "integrity": "sha512-kkywGpCcRYhqQIchaWqZ875wzpS/bMKhz5HnN3p7wveJTkTtyAB/AlnS0f8DFSqYW1T82t6yEAkEcB+A1I3MbQ==", + "dev": true, + "requires": { + "character-entities": "^1.0.0", + "character-entities-legacy": "^1.0.0", + "character-reference-invalid": "^1.0.0", + "is-alphanumerical": "^1.0.0", + "is-decimal": "^1.0.0", + "is-hexadecimal": "^1.0.0" + } + }, + "parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + } + }, + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "dev": true + }, + "picomatch": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.0.tgz", + "integrity": "sha512-lY1Q/PiJGC2zOv/z391WOTD+Z02bCgsFfvxoXXf6h7kv9o+WmsmzYqrAwY63sNgOxE4xEdq0WyUnXfKeBrSvYw==", + "dev": true + }, + "pluralize": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/pluralize/-/pluralize-8.0.0.tgz", + "integrity": "sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==", + "dev": true + }, + "prepend-http": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/prepend-http/-/prepend-http-2.0.0.tgz", + "integrity": "sha1-6SQ0v6XqjBn0HN/UAddBo8gZ2Jc=", + "dev": true + }, + "public-ip": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/public-ip/-/public-ip-4.0.4.tgz", + "integrity": "sha512-EJ0VMV2vF6Cu7BIPo3IMW1Maq6ME+fbR0NcPmqDfpfNGIRPue1X8QrGjrg/rfjDkOsIkKHIf2S5FlEa48hFMTA==", + "dev": true, + "requires": { + "dns-socket": "^4.2.2", + "got": "^9.6.0", + "is-ip": "^3.1.0" + } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "dev": true, + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + }, + "readdirp": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", + "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==", + "dev": true, + "requires": { + "picomatch": "^2.2.1" + } + }, + "remark": { + "version": "13.0.0", + "resolved": "https://registry.npmjs.org/remark/-/remark-13.0.0.tgz", + "integrity": "sha512-HDz1+IKGtOyWN+QgBiAT0kn+2s6ovOxHyPAFGKVE81VSzJ+mq7RwHFledEvB5F1p4iJvOah/LOKdFuzvRnNLCA==", + "dev": true, + "requires": { + "remark-parse": "^9.0.0", + "remark-stringify": "^9.0.0", + "unified": "^9.1.0" + } + }, + "remark-cli": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/remark-cli/-/remark-cli-9.0.0.tgz", + "integrity": "sha512-y6kCXdwZoMoh0Wo4Och1tDW50PmMc86gW6GpF08v9d+xUCEJE2wwXdQ+TnTaUamRnfFdU+fE+eNf2PJ53cyq8g==", + "dev": true, + "requires": { + "markdown-extensions": "^1.1.0", + "remark": "^13.0.0", + "unified-args": "^8.0.0" + } + }, + "remark-frontmatter": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-frontmatter/-/remark-frontmatter-3.0.0.tgz", + "integrity": "sha512-mSuDd3svCHs+2PyO29h7iijIZx4plX0fheacJcAoYAASfgzgVIcXGYSq9GFyYocFLftQs8IOmmkgtOovs6d4oA==", + "dev": true, + "requires": { + "mdast-util-frontmatter": "^0.2.0", + "micromark-extension-frontmatter": "^0.2.0" + } + }, + "remark-gfm": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-1.0.0.tgz", + "integrity": "sha512-KfexHJCiqvrdBZVbQ6RopMZGwaXz6wFJEfByIuEwGf0arvITHjiKKZ1dpXujjH9KZdm1//XJQwgfnJ3lmXaDPA==", + "dev": true, + "requires": { + "mdast-util-gfm": "^0.1.0", + "micromark-extension-gfm": "^0.3.0" + } + }, + "remark-lint": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/remark-lint/-/remark-lint-8.0.0.tgz", + "integrity": "sha512-ESI8qJQ/TIRjABDnqoFsTiZntu+FRifZ5fJ77yX63eIDijl/arvmDvT+tAf75/Nm5BFL4R2JFUtkHRGVjzYUsg==", + "dev": true, + "requires": { + "remark-message-control": "^6.0.0" + } + }, + "remark-lint-blockquote-indentation": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-blockquote-indentation/-/remark-lint-blockquote-indentation-2.0.1.tgz", + "integrity": "sha512-uJ9az/Ms9AapnkWpLSCJfawBfnBI2Tn1yUsPNqIFv6YM98ymetItUMyP6ng9NFPqDvTQBbiarulkgoEo0wcafQ==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-checkbox-character-style": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-checkbox-character-style/-/remark-lint-checkbox-character-style-3.0.0.tgz", + "integrity": "sha512-691OJ5RdBRXVpvnOEiBhMB4uhHJSHVttw83O4qyAkNBiqxa1Axqhsz8FgmzYgRLQbOGd2ncVUcXG1LOJt6C0DQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-code-block-style": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-code-block-style/-/remark-lint-code-block-style-2.0.1.tgz", + "integrity": "sha512-eRhmnColmSxJhO61GHZkvO67SpHDshVxs2j3+Zoc5Y1a4zQT2133ZAij04XKaBFfsVLjhbY/+YOWxgvtjx2nmA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-definition-case": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-definition-case/-/remark-lint-definition-case-2.0.1.tgz", + "integrity": "sha512-M+XlThtQwEJLQnQb5Gi6xZdkw92rGp7m2ux58WMw/Qlcg02WgHR/O0OcHPe5VO5hMJrtI+cGG5T0svsCgRZd3w==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-definition-spacing": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-definition-spacing/-/remark-lint-definition-spacing-2.0.1.tgz", + "integrity": "sha512-xK9DOQO5MudITD189VyUiMHBIKltW1oc55L7Fti3i9DedXoBG7Phm+V9Mm7IdWzCVkquZVgVk63xQdqzSQRrSQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-emphasis-marker": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-emphasis-marker/-/remark-lint-emphasis-marker-2.0.0.tgz", + "integrity": "sha512-O7/8xeie/dkazeSjty+kxQ5n3kxw+YjeK81F3lbZ88J8L7bRIK4q84hTB2bzeHddOmX8zRzwvw8Y+BNesBU2/g==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-fenced-code-flag": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-fenced-code-flag/-/remark-lint-fenced-code-flag-2.0.1.tgz", + "integrity": "sha512-+COnWHlS/h02FMxoZWxNlZW3Y8M0cQQpmx3aNCbG7xkyMyCKsMLg9EmRvYHHIbxQCuF3JT0WWx5AySqlc7d+NA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-fenced-code-marker": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-fenced-code-marker/-/remark-lint-fenced-code-marker-2.0.1.tgz", + "integrity": "sha512-lujpjm04enn3ma6lITlttadld6eQ1OWAEcT3qZzvFHp+zPraC0yr0eXlvtDN/0UH8mrln/QmGiZp3i8IdbucZg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-file-extension": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/remark-lint-file-extension/-/remark-lint-file-extension-1.0.5.tgz", + "integrity": "sha512-oVQdf5vEomwHkfQ7R/mgmsWW2H/t9kSvnrxtVoNOHr+qnOEafKKDn+AFhioN2kqtjCZBAjSSrePs6xGKmXKDTw==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-final-definition": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/remark-lint-final-definition/-/remark-lint-final-definition-2.1.0.tgz", + "integrity": "sha512-83K7n2icOHPfBzbR5Mr1o7cu8gOjD8FwJkFx/ly+rW+8SHfjCj4D3WOFGQ1xVdmHjfomBDXXDSNo2oiacADVXQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-final-newline": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/remark-lint-final-newline/-/remark-lint-final-newline-1.0.5.tgz", + "integrity": "sha512-rfLlW8+Fz2dqnaEgU4JwLA55CQF1T4mfSs/GwkkeUCGPenvEYwSkCN2KO2Gr1dy8qPoOdTFE1rSufLjmeTW5HA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-hard-break-spaces": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-hard-break-spaces/-/remark-lint-hard-break-spaces-2.0.1.tgz", + "integrity": "sha512-Qfn/BMQFamHhtbfLrL8Co/dbYJFLRL4PGVXZ5wumkUO5f9FkZC2RsV+MD9lisvGTkJK0ZEJrVVeaPbUIFM0OAw==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-heading-increment": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-heading-increment/-/remark-lint-heading-increment-2.0.1.tgz", + "integrity": "sha512-bYDRmv/lk3nuWXs2VSD1B4FneGT6v7a74FuVmb305hyEMmFSnneJvVgnOJxyKlbNlz12pq1IQ6MhlJBda/SFtQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-heading-style": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-heading-style/-/remark-lint-heading-style-2.0.1.tgz", + "integrity": "sha512-IrFLNs0M5Vbn9qg51AYhGUfzgLAcDOjh2hFGMz3mx664dV6zLcNZOPSdJBBJq3JQR4gKpoXcNwN1+FFaIATj+A==", + "dev": true, + "requires": { + "mdast-util-heading-style": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-link-title-style": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-link-title-style/-/remark-lint-link-title-style-2.0.1.tgz", + "integrity": "sha512-+Q7Ew8qpOQzjqbDF6sUHmn9mKgje+m2Ho8Xz7cEnGIRaKJgtJzkn/dZqQM/az0gn3zaN6rOuwTwqw4EsT5EsIg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0", + "vfile-location": "^3.0.0" + } + }, + "remark-lint-list-item-bullet-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-list-item-bullet-indent/-/remark-lint-list-item-bullet-indent-3.0.0.tgz", + "integrity": "sha512-X2rleWP8XReC4LXKF7Qi5vYiPJkA4Grx5zxsjHofFrVRz6j0PYOCuz7vsO+ZzMunFMfom6FODnscSWz4zouDVw==", + "dev": true, + "requires": { + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-list-item-content-indent": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-list-item-content-indent/-/remark-lint-list-item-content-indent-2.0.1.tgz", + "integrity": "sha512-OzUMqavxyptAdG7vWvBSMc9mLW9ZlTjbW4XGayzczd3KIr6Uwp3NEFXKx6MLtYIM/vwBqMrPQUrObOC7A2uBpQ==", + "dev": true, + "requires": { + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-list-item-indent": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-list-item-indent/-/remark-lint-list-item-indent-2.0.1.tgz", + "integrity": "sha512-4IKbA9GA14Q9PzKSQI6KEHU/UGO36CSQEjaDIhmb9UOhyhuzz4vWhnSIsxyI73n9nl9GGRAMNUSGzr4pQUFwTA==", + "dev": true, + "requires": { + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-list-item-spacing": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-list-item-spacing/-/remark-lint-list-item-spacing-2.0.0.tgz", + "integrity": "sha512-IxIAFIJA0BqciY28OZEJXe6oeMsonVA8hcw/JEx4pTJidEBKz/LLZ8t5ZLNFPh8Tdt/XnzvcmNPqPCoa05XDhg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-maximum-heading-length": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-maximum-heading-length/-/remark-lint-maximum-heading-length-2.0.0.tgz", + "integrity": "sha512-EuRam7uUQdWI3DXPKk7Kbu//l+IiYcTcZqBbswdpsHBzMQWF/R2e5SJVObdszKdEySJx/zktaYCKJDxqZjPnxQ==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-maximum-line-length": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-maximum-line-length/-/remark-lint-maximum-line-length-2.0.0.tgz", + "integrity": "sha512-Qhe1QwDGisMP/UraUexWIPNBXJO8VQ7LIelz4NdftBQl/FxDVoXn3477Fm+8bGtcTXkMPF+QfllE4L1U7kJQgQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-auto-link-without-protocol": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-auto-link-without-protocol/-/remark-lint-no-auto-link-without-protocol-2.0.1.tgz", + "integrity": "sha512-TFcXxzucsfBb/5uMqGF1rQA+WJJqm1ZlYQXyvJEXigEZ8EAxsxZGPb/gOQARHl/y0vymAuYxMTaChavPKaBqpQ==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-no-blockquote-without-marker": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-blockquote-without-marker/-/remark-lint-no-blockquote-without-marker-4.0.0.tgz", + "integrity": "sha512-Y59fMqdygRVFLk1gpx2Qhhaw5IKOR9T38Wf7pjR07bEFBGUNfcoNVIFMd1TCJfCPQxUyJzzSqfZz/KT7KdUuiQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.0.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0", + "vfile-location": "^3.0.0" + } + }, + "remark-lint-no-consecutive-blank-lines": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-consecutive-blank-lines/-/remark-lint-no-consecutive-blank-lines-3.0.0.tgz", + "integrity": "sha512-kmzLlOLrapBKEngwYFTdCZDmeOaze6adFPB7G0EdymD9V1mpAlnneINuOshRLEDKK5fAhXKiZXxdGIaMPkiXrA==", + "dev": true, + "requires": { + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-dead-urls": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/remark-lint-no-dead-urls/-/remark-lint-no-dead-urls-1.0.2.tgz", + "integrity": "sha512-YfTrZB3D5SMyoasvq9zMLXHnh/DD6/wOqpCEmzHRz/GL8Ps3SwvaTODGT48er7HoSUylX3t2BeCZw+ib3o1cDg==", + "dev": true, + "requires": { + "check-links": "^1.1.8", + "is-online": "^8.2.1", + "unified-lint-rule": "^1.0.4", + "unist-util-visit": "^2.0.1" + } + }, + "remark-lint-no-duplicate-definitions": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-duplicate-definitions/-/remark-lint-no-duplicate-definitions-2.0.1.tgz", + "integrity": "sha512-XL22benJZB01m+aOse91nsu1IMFqeWJWme9QvoJuxIcBROO1BG1VoqLOkwNcawE/M/0CkvTo5rfx0eMlcnXOIw==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-stringify-position": "^2.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-duplicate-headings": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-duplicate-headings/-/remark-lint-no-duplicate-headings-2.0.1.tgz", + "integrity": "sha512-F6AP0FJcHIlkmq0pHX0J5EGvLA9LfhuYTvnNO8y3kvflHeRjFkDyt2foz/taXR8OcLQR51n/jIJiwrrSMbiauw==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-stringify-position": "^2.0.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-no-emphasis-as-heading": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-emphasis-as-heading/-/remark-lint-no-emphasis-as-heading-2.0.1.tgz", + "integrity": "sha512-z86+yWtVivtuGIxIC4g9RuATbgZgOgyLcnaleonJ7/HdGTYssjJNyqCJweaWSLoaI0akBQdDwmtJahW5iuX3/g==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-file-name-articles": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/remark-lint-no-file-name-articles/-/remark-lint-no-file-name-articles-1.0.5.tgz", + "integrity": "sha512-AQk5eTb3s3TAPPjiglZgqlQj4ycao+gPs8/XkdN1VCPUtewW0GgwoQe7YEuBKayJ6ioN8dGP37Kg/P/PlKaRQA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-no-file-name-consecutive-dashes": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/remark-lint-no-file-name-consecutive-dashes/-/remark-lint-no-file-name-consecutive-dashes-1.0.5.tgz", + "integrity": "sha512-Mg2IDsi790/dSdAzwnBnsMYdZm3qC2QgGwqOWcr0TPABJhhjC3p8r5fX4MNMTXI5It7B7bW9+ImmCeLOZiXkLg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-no-file-name-irregular-characters": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/remark-lint-no-file-name-irregular-characters/-/remark-lint-no-file-name-irregular-characters-1.0.3.tgz", + "integrity": "sha512-b4xIy1Yi8qZpM2vnMN+6gEujagPGxUBAs1judv6xJQngkl5d5zT8VQZsYsTGHku4NWHjjh3b7vK5mr0/yp4JSg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-no-file-name-mixed-case": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/remark-lint-no-file-name-mixed-case/-/remark-lint-no-file-name-mixed-case-1.0.5.tgz", + "integrity": "sha512-ilrUCbHZin/ENwr8c3SC2chgkFsizXjBQIB/oZ7gnm1IkCkZPiMyXZAHdpwC/DjbrpGxfMYh9JmIHao4giS5+A==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-no-file-name-outer-dashes": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/remark-lint-no-file-name-outer-dashes/-/remark-lint-no-file-name-outer-dashes-1.0.6.tgz", + "integrity": "sha512-rT8CmcIlenegS0Yst4maYXdZfqIjBOiRUY8j/KJkORF5tKH+3O1/S07025qPGmcRihzK3w4yO0K8rgkKQw0b9w==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0" + } + }, + "remark-lint-no-heading-content-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-heading-content-indent/-/remark-lint-no-heading-content-indent-3.0.0.tgz", + "integrity": "sha512-yULDoVSIqKylLDfW6mVUbrHlyEWUSFtVFiKc+/BA412xDIhm8HZLUnP+FsuBC0OzbIZ+bO9Txy52WtO3LGnK1A==", + "dev": true, + "requires": { + "mdast-util-heading-style": "^1.0.2", + "pluralize": "^8.0.0", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-heading-punctuation": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-heading-punctuation/-/remark-lint-no-heading-punctuation-2.0.1.tgz", + "integrity": "sha512-lY/eF6GbMeGu4cSuxfGHyvaQQBIq/6T/o+HvAR5UfxSTxmxZFwbZneAI2lbeR1zPcqOU87NsZ5ZZzWVwdLpPBw==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-no-inline-padding": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-inline-padding/-/remark-lint-no-inline-padding-3.0.0.tgz", + "integrity": "sha512-3s9uW3Yux9RFC0xV81MQX3bsYs+UY7nPnRuMxeIxgcVwxQ4E/mTJd9QjXUwBhU9kdPtJ5AalngdmOW2Tgar8Cg==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-no-literal-urls": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-literal-urls/-/remark-lint-no-literal-urls-2.0.1.tgz", + "integrity": "sha512-IDdKtWOMuKVQIlb1CnsgBoyoTcXU3LppelDFAIZePbRPySVHklTtuK57kacgU5grc7gPM04bZV96eliGrRU7Iw==", + "dev": true, + "requires": { + "mdast-util-to-string": "^1.0.2", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + }, + "dependencies": { + "mdast-util-to-string": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-1.1.0.tgz", + "integrity": "sha512-jVU0Nr2B9X3MU4tSK7JP1CMkSvOj7X5l/GboG1tKRw52lLF1x2Ju92Ms9tNetCcbfX3hzlM73zYo2NKkWSfF/A==", + "dev": true + } + } + }, + "remark-lint-no-multiple-toplevel-headings": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-multiple-toplevel-headings/-/remark-lint-no-multiple-toplevel-headings-2.0.1.tgz", + "integrity": "sha512-VKSItR6c+u3OsE5pUiSmNusERNyQS9Nnji26ezoQ1uvy06k3RypIjmzQqJ/hCkSiF+hoyC3ibtrrGT8gorzCmQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-stringify-position": "^2.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-shell-dollars": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/remark-lint-no-shell-dollars/-/remark-lint-no-shell-dollars-2.0.2.tgz", + "integrity": "sha512-zhkHZOuyaD3r/TUUkkVqW0OxsR9fnSrAnHIF63nfJoAAUezPOu8D1NBsni6rX8H2DqGbPYkoeWrNsTwiKP0yow==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-shortcut-reference-image": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-shortcut-reference-image/-/remark-lint-no-shortcut-reference-image-2.0.1.tgz", + "integrity": "sha512-2jcZBdnN6ecP7u87gkOVFrvICLXIU5OsdWbo160FvS/2v3qqqwF2e/n/e7D9Jd+KTq1mR1gEVVuTqkWWuh3cig==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-shortcut-reference-link": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-shortcut-reference-link/-/remark-lint-no-shortcut-reference-link-2.0.1.tgz", + "integrity": "sha512-pTZbslG412rrwwGQkIboA8wpBvcjmGFmvugIA+UQR+GfFysKtJ5OZMPGJ98/9CYWjw9Z5m0/EktplZ5TjFjqwA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-no-table-indentation": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-table-indentation/-/remark-lint-no-table-indentation-3.0.0.tgz", + "integrity": "sha512-+l7GovI6T+3LhnTtz/SmSRyOb6Fxy6tmaObKHrwb/GAebI/4MhFS1LVo3vbiP/RpPYtyQoFbbuXI55hqBG4ibQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0", + "vfile-location": "^3.0.0" + } + }, + "remark-lint-no-undefined-references": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-no-undefined-references/-/remark-lint-no-undefined-references-3.0.0.tgz", + "integrity": "sha512-0hzaJS9GuzSQVOeeNdJr/s66LRQOzp618xuOQPYWHcJdd+SCaRTyWbjMrTM/cCI5L1sYjgurp410NkIBQ32Vqg==", + "dev": true, + "requires": { + "collapse-white-space": "^1.0.4", + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.1.0", + "unist-util-visit": "^2.0.0", + "vfile-location": "^3.1.0" + } + }, + "remark-lint-no-unused-definitions": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-no-unused-definitions/-/remark-lint-no-unused-definitions-2.0.1.tgz", + "integrity": "sha512-+BMc0BOjc364SvKYLkspmxDch8OaKPbnUGgQBvK0Bmlwy42baR4C9zhwAWBxm0SBy5Z4AyM4G4jKpLXPH40Oxg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-ordered-list-marker-style": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-ordered-list-marker-style/-/remark-lint-ordered-list-marker-style-2.0.0.tgz", + "integrity": "sha512-zYMZA8tQD/slJYKqsstZv0/Q34Hkdlf4DjC8SOr92PSA60R/xr7JdVd/AHHisbMsFvdnHZrxaB8oIOtbAUJCSw==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-ordered-list-marker-value": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-ordered-list-marker-value/-/remark-lint-ordered-list-marker-value-2.0.1.tgz", + "integrity": "sha512-blt9rS7OKxZ2NW8tqojELeyNEwPhhTJGVa+YpUkdEH+KnrdcD7Nzhnj6zfLWOx6jFNZk3jpq5nvLFAPteHaNKg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-rule-style": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-rule-style/-/remark-lint-rule-style-2.0.1.tgz", + "integrity": "sha512-hz4Ff9UdlYmtO6Czz99WJavCjqCer7Cav4VopXt+yVIikObw96G5bAuLYcVS7hvMUGqC9ZuM02/Y/iq9n8pkAg==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-strong-marker": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-strong-marker/-/remark-lint-strong-marker-2.0.0.tgz", + "integrity": "sha512-1gl6vZF5BvV4kvS4xxhl8cw90La5Cio9ZFDQuspZMRA2KjzpwoU5RlTUbeHv8OqlKJJ2p7s0MDs8bLZNTzzjHA==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-table-cell-padding": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-table-cell-padding/-/remark-lint-table-cell-padding-3.0.0.tgz", + "integrity": "sha512-sEKrbyFZPZpxI39R8/r+CwUrin9YtyRwVn0SQkNQEZWZcIpylK+bvoKIldvLIXQPob+ZxklL0GPVRzotQMwuWQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-table-pipe-alignment": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/remark-lint-table-pipe-alignment/-/remark-lint-table-pipe-alignment-2.0.1.tgz", + "integrity": "sha512-O89U7bp0ja6uQkT2uQrNB76GaPvFabrHiUGhqEUnld21yEdyj7rgS57kn84lZNSuuvN1Oor6bDyCwWQGzzpoOQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-table-pipes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-table-pipes/-/remark-lint-table-pipes-3.0.0.tgz", + "integrity": "sha512-QPokSazEdl0Y8ayUV9UB0Ggn3Jos/RAQwIo0z1KDGnJlGDiF80Jc6iU9RgDNUOjlpQffSLIfSVxH5VVYF/K3uQ==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-lint-unordered-list-marker-style": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-unordered-list-marker-style/-/remark-lint-unordered-list-marker-style-2.0.0.tgz", + "integrity": "sha512-s+ZiBgBDbIiScPPxWG/r2E/4YY+xP6EFLsLXPV/uPx7JqegIP/4+MAPi7Nz2zLmnQ2eekssZrEXma3uDb/dE1Q==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + }, + "remark-message-control": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/remark-message-control/-/remark-message-control-6.0.0.tgz", + "integrity": "sha512-k9bt7BYc3G7YBdmeAhvd3VavrPa/XlKWR3CyHjr4sLO9xJyly8WHHT3Sp+8HPR8lEUv+/sZaffL7IjMLV0f6BA==", + "dev": true, + "requires": { + "mdast-comment-marker": "^1.0.0", + "unified-message-control": "^3.0.0" + } + }, + "remark-parse": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-9.0.0.tgz", + "integrity": "sha512-geKatMwSzEXKHuzBNU1z676sGcDcFoChMK38TgdHJNAYfFtsfHDQG7MoJAjs6sgYMqyLduCYWDIWZIxiPeafEw==", + "dev": true, + "requires": { + "mdast-util-from-markdown": "^0.8.0" + } + }, + "remark-preset-lint-consistent": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-preset-lint-consistent/-/remark-preset-lint-consistent-4.0.0.tgz", + "integrity": "sha512-1euNZfRanM3wysHAR0vYX6uMbbKTlmTc+QvrymgRayKV8uhslQBISa+XduWk7mSz68ylS8CRR7JGvBfi6kDQjg==", + "dev": true, + "requires": { + "remark-lint": "^8.0.0", + "remark-lint-blockquote-indentation": "^2.0.0", + "remark-lint-checkbox-character-style": "^3.0.0", + "remark-lint-code-block-style": "^2.0.0", + "remark-lint-emphasis-marker": "^2.0.0", + "remark-lint-fenced-code-marker": "^2.0.0", + "remark-lint-heading-style": "^2.0.0", + "remark-lint-link-title-style": "^2.0.0", + "remark-lint-list-item-content-indent": "^2.0.0", + "remark-lint-ordered-list-marker-style": "^2.0.0", + "remark-lint-rule-style": "^2.0.0", + "remark-lint-strong-marker": "^2.0.0", + "remark-lint-table-cell-padding": "^3.0.0" + } + }, + "remark-preset-lint-markdown-style-guide": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-preset-lint-markdown-style-guide/-/remark-preset-lint-markdown-style-guide-4.0.0.tgz", + "integrity": "sha512-gczDlfZ28Fz0IN/oddy0AH4CiTu9S8d3pJWUsrnwFiafjhJjPGobGE1OD3bksi53md1Bp4K0fzo99YYfvB4Sjw==", + "dev": true, + "requires": { + "remark-lint": "^8.0.0", + "remark-lint-blockquote-indentation": "^2.0.0", + "remark-lint-code-block-style": "^2.0.0", + "remark-lint-definition-case": "^2.0.0", + "remark-lint-definition-spacing": "^2.0.0", + "remark-lint-emphasis-marker": "^2.0.0", + "remark-lint-fenced-code-flag": "^2.0.0", + "remark-lint-fenced-code-marker": "^2.0.0", + "remark-lint-file-extension": "^1.0.0", + "remark-lint-final-definition": "^2.0.0", + "remark-lint-hard-break-spaces": "^2.0.0", + "remark-lint-heading-increment": "^2.0.0", + "remark-lint-heading-style": "^2.0.0", + "remark-lint-link-title-style": "^2.0.0", + "remark-lint-list-item-content-indent": "^2.0.0", + "remark-lint-list-item-indent": "^2.0.0", + "remark-lint-list-item-spacing": "^3.0.0", + "remark-lint-maximum-heading-length": "^2.0.0", + "remark-lint-maximum-line-length": "^2.0.0", + "remark-lint-no-auto-link-without-protocol": "^2.0.0", + "remark-lint-no-blockquote-without-marker": "^4.0.0", + "remark-lint-no-consecutive-blank-lines": "^3.0.0", + "remark-lint-no-duplicate-headings": "^2.0.0", + "remark-lint-no-emphasis-as-heading": "^2.0.0", + "remark-lint-no-file-name-articles": "^1.0.0", + "remark-lint-no-file-name-consecutive-dashes": "^1.0.0", + "remark-lint-no-file-name-irregular-characters": "^1.0.0", + "remark-lint-no-file-name-mixed-case": "^1.0.0", + "remark-lint-no-file-name-outer-dashes": "^1.0.0", + "remark-lint-no-heading-punctuation": "^2.0.0", + "remark-lint-no-inline-padding": "^3.0.0", + "remark-lint-no-literal-urls": "^2.0.0", + "remark-lint-no-multiple-toplevel-headings": "^2.0.0", + "remark-lint-no-shell-dollars": "^2.0.0", + "remark-lint-no-shortcut-reference-image": "^2.0.0", + "remark-lint-no-shortcut-reference-link": "^2.0.0", + "remark-lint-no-table-indentation": "^3.0.0", + "remark-lint-ordered-list-marker-style": "^2.0.0", + "remark-lint-ordered-list-marker-value": "^2.0.0", + "remark-lint-rule-style": "^2.0.0", + "remark-lint-strong-marker": "^2.0.0", + "remark-lint-table-cell-padding": "^3.0.0", + "remark-lint-table-pipe-alignment": "^2.0.0", + "remark-lint-table-pipes": "^3.0.0", + "remark-lint-unordered-list-marker-style": "^2.0.0" + }, + "dependencies": { + "remark-lint-list-item-spacing": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/remark-lint-list-item-spacing/-/remark-lint-list-item-spacing-3.0.0.tgz", + "integrity": "sha512-SRUVonwdN3GOSFb6oIYs4IfJxIVR+rD0nynkX66qEO49/qDDT1PPvkndis6Nyew5+t+2V/Db9vqllL6SWbnEtw==", + "dev": true, + "requires": { + "unified-lint-rule": "^1.0.0", + "unist-util-generated": "^1.1.0", + "unist-util-position": "^3.0.0", + "unist-util-visit": "^2.0.0" + } + } + } + }, + "remark-preset-lint-recommended": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/remark-preset-lint-recommended/-/remark-preset-lint-recommended-5.0.0.tgz", + "integrity": "sha512-uu+Ab8JCwMMaKvvB0LOWTWtM3uAvJbKQM/oyWCEJqj7lUVNTKZS575Ro5rKM3Dx7kQjjR1iw0e99bpAYTc5xNA==", + "dev": true, + "requires": { + "remark-lint": "^8.0.0", + "remark-lint-final-newline": "^1.0.0", + "remark-lint-hard-break-spaces": "^2.0.0", + "remark-lint-list-item-bullet-indent": "^3.0.0", + "remark-lint-list-item-indent": "^2.0.0", + "remark-lint-no-auto-link-without-protocol": "^2.0.0", + "remark-lint-no-blockquote-without-marker": "^4.0.0", + "remark-lint-no-duplicate-definitions": "^2.0.0", + "remark-lint-no-heading-content-indent": "^3.0.0", + "remark-lint-no-inline-padding": "^3.0.0", + "remark-lint-no-literal-urls": "^2.0.0", + "remark-lint-no-shortcut-reference-image": "^2.0.0", + "remark-lint-no-shortcut-reference-link": "^2.0.0", + "remark-lint-no-undefined-references": "^3.0.0", + "remark-lint-no-unused-definitions": "^2.0.0", + "remark-lint-ordered-list-marker-style": "^2.0.0" + } + }, + "remark-stringify": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-9.0.1.tgz", + "integrity": "sha512-mWmNg3ZtESvZS8fv5PTvaPckdL4iNlCHTt8/e/8oN08nArHRHjNZMKzA/YW3+p7/lYqIw4nx1XsjCBo/AxNChg==", + "dev": true, + "requires": { + "mdast-util-to-markdown": "^0.6.0" + } + }, + "repeat-string": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz", + "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=", + "dev": true + }, + "resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true + }, + "responselike": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/responselike/-/responselike-1.0.2.tgz", + "integrity": "sha1-kYcg7ztjHFZCvgaPFa3lpG9Loec=", + "dev": true, + "requires": { + "lowercase-keys": "^1.0.0" + } + }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "dev": true + }, + "sliced": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/sliced/-/sliced-1.0.1.tgz", + "integrity": "sha1-CzpmK10Ewxd7GSa+qCsD+Dei70E=", + "dev": true + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + }, + "source-map-support": { + "version": "0.5.19", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.19.tgz", + "integrity": "sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", + "dev": true + }, + "string-width": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", + "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.0" + } + }, + "string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "dev": true, + "requires": { + "safe-buffer": "~5.2.0" + } + }, + "strip-ansi": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", + "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.0" + } + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + }, + "text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=", + "dev": true + }, + "to-readable-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/to-readable-stream/-/to-readable-stream-1.0.0.tgz", + "integrity": "sha512-Iq25XBt6zD5npPhlLVXGFN3/gyR2/qODcKNNyTMd4vbm39HUaOiAM4PMq0eMVC/Tkxz+Zjdsc55g9yyz+Yq00Q==", + "dev": true + }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "requires": { + "is-number": "^7.0.0" + } + }, + "to-vfile": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/to-vfile/-/to-vfile-6.1.0.tgz", + "integrity": "sha512-BxX8EkCxOAZe+D/ToHdDsJcVI4HqQfmw0tCkp31zf3dNP/XWIAjU4CmeuSwsSoOzOTqHPOL0KUzyZqJplkD0Qw==", + "dev": true, + "requires": { + "is-buffer": "^2.0.0", + "vfile": "^4.0.0" + } + }, + "trough": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/trough/-/trough-1.0.5.tgz", + "integrity": "sha512-rvuRbTarPXmMb79SmzEp8aqXNKcK+y0XaB298IXueQ8I2PsrATcPBCSPyK/dDNa2iWOhKlfNnOjdAOTBU/nkFA==", + "dev": true + }, + "type-fest": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.3.1.tgz", + "integrity": "sha512-cUGJnCdr4STbePCgqNFbpVNCepa+kAVohJs1sLhxzdH+gnEoOd8VhbYa7pD3zZYGiURWM2xzEII3fQcRizDkYQ==", + "dev": true + }, + "typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=", + "dev": true + }, + "unified": { + "version": "9.2.1", + "resolved": "https://registry.npmjs.org/unified/-/unified-9.2.1.tgz", + "integrity": "sha512-juWjuI8Z4xFg8pJbnEZ41b5xjGUWGHqXALmBZ3FC3WX0PIx1CZBIIJ6mXbYMcf6Yw4Fi0rFUTA1cdz/BglbOhA==", + "dev": true, + "requires": { + "bail": "^1.0.0", + "extend": "^3.0.0", + "is-buffer": "^2.0.0", + "is-plain-obj": "^2.0.0", + "trough": "^1.0.0", + "vfile": "^4.0.0" + } + }, + "unified-args": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/unified-args/-/unified-args-8.1.0.tgz", + "integrity": "sha512-t1HPS1cQPsVvt/6EtyWIbQGurza5684WGRigNghZRvzIdHm3LPgMdXPyGx0npORKzdiy5+urkF0rF5SXM8lBuQ==", + "dev": true, + "requires": { + "camelcase": "^5.0.0", + "chalk": "^3.0.0", + "chokidar": "^3.0.0", + "fault": "^1.0.2", + "json5": "^2.0.0", + "minimist": "^1.2.0", + "text-table": "^0.2.0", + "unified-engine": "^8.0.0" + } + }, + "unified-engine": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/unified-engine/-/unified-engine-8.2.0.tgz", + "integrity": "sha512-ZlMm62ejrf+tJHdyOjQfljszngQjRor95q2XZMGk6rpJUYi7ZIHY/EXEhOcj9PZkMKKdLIM+dqL4s0ceyk9wbA==", + "dev": true, + "requires": { + "concat-stream": "^2.0.0", + "debug": "^4.0.0", + "fault": "^1.0.0", + "figures": "^3.0.0", + "glob": "^7.0.3", + "ignore": "^5.0.0", + "is-buffer": "^2.0.0", + "is-empty": "^1.0.0", + "is-plain-obj": "^2.0.0", + "js-yaml": "^3.6.1", + "load-plugin": "^3.0.0", + "parse-json": "^5.0.0", + "to-vfile": "^6.0.0", + "trough": "^1.0.0", + "unist-util-inspect": "^5.0.0", + "vfile-reporter": "^6.0.0", + "vfile-statistics": "^1.1.0" + } + }, + "unified-lint-rule": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/unified-lint-rule/-/unified-lint-rule-1.0.6.tgz", + "integrity": "sha512-YPK15YBFwnsVorDFG/u0cVVQN5G2a3V8zv5/N6KN3TCG+ajKtaALcy7u14DCSrJI+gZeyYquFL9cioJXOGXSvg==", + "dev": true, + "requires": { + "wrapped": "^1.0.1" + } + }, + "unified-message-control": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/unified-message-control/-/unified-message-control-3.0.3.tgz", + "integrity": "sha512-oY5z2n8ugjpNHXOmcgrw0pQeJzavHS0VjPBP21tOcm7rc2C+5Q+kW9j5+gqtf8vfW/8sabbsK5+P+9QPwwEHDA==", + "dev": true, + "requires": { + "unist-util-visit": "^2.0.0", + "vfile-location": "^3.0.0" + } + }, + "unist-util-generated": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-1.1.6.tgz", + "integrity": "sha512-cln2Mm1/CZzN5ttGK7vkoGw+RZ8VcUH6BtGbq98DDtRGquAAOXig1mrBQYelOwMXYS8rK+vZDyyojSjp7JX+Lg==", + "dev": true + }, + "unist-util-inspect": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/unist-util-inspect/-/unist-util-inspect-5.0.1.tgz", + "integrity": "sha512-fPNWewS593JSmg49HbnE86BJKuBi1/nMWhDSccBvbARfxezEuJV85EaARR9/VplveiwCoLm2kWq+DhP8TBaDpw==", + "dev": true, + "requires": { + "is-empty": "^1.0.0" + } + }, + "unist-util-is": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.1.0.tgz", + "integrity": "sha512-ZOQSsnce92GrxSqlnEEseX0gi7GH9zTJZ0p9dtu87WRb/37mMPO2Ilx1s/t9vBHrFhbgweUwb+t7cIn5dxPhZg==", + "dev": true + }, + "unist-util-position": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-3.1.0.tgz", + "integrity": "sha512-w+PkwCbYSFw8vpgWD0v7zRCl1FpY3fjDSQ3/N/wNd9Ffa4gPi8+4keqt99N3XW6F99t/mUzp2xAhNmfKWp95QA==", + "dev": true + }, + "unist-util-stringify-position": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-2.0.3.tgz", + "integrity": "sha512-3faScn5I+hy9VleOq/qNbAd6pAx7iH5jYBMS9I1HgQVijz/4mv5Bvw5iw1sC/90CODiKo81G/ps8AJrISn687g==", + "dev": true, + "requires": { + "@types/unist": "^2.0.2" + } + }, + "unist-util-visit": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-2.0.3.tgz", + "integrity": "sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q==", + "dev": true, + "requires": { + "@types/unist": "^2.0.0", + "unist-util-is": "^4.0.0", + "unist-util-visit-parents": "^3.0.0" + } + }, + "unist-util-visit-parents": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-3.1.1.tgz", + "integrity": "sha512-1KROIZWo6bcMrZEwiH2UrXDyalAa0uqzWCxCJj6lPOvTve2WkfgCytoDTPaMnodXh1WrXOq0haVYHj99ynJlsg==", + "dev": true, + "requires": { + "@types/unist": "^2.0.0", + "unist-util-is": "^4.0.0" + } + }, + "url-parse-lax": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz", + "integrity": "sha1-FrXK/Afb42dsGxmZF3gj1lA6yww=", + "dev": true, + "requires": { + "prepend-http": "^2.0.0" + } + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", + "dev": true + }, + "vfile": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-4.2.1.tgz", + "integrity": "sha512-O6AE4OskCG5S1emQ/4gl8zK586RqA3srz3nfK/Viy0UPToBc5Trp9BVFb1u0CjsKrAWwnpr4ifM/KBXPWwJbCA==", + "dev": true, + "requires": { + "@types/unist": "^2.0.0", + "is-buffer": "^2.0.0", + "unist-util-stringify-position": "^2.0.0", + "vfile-message": "^2.0.0" + } + }, + "vfile-location": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-3.2.0.tgz", + "integrity": "sha512-aLEIZKv/oxuCDZ8lkJGhuhztf/BW4M+iHdCwglA/eWc+vtuRFJj8EtgceYFX4LRjOhCAAiNHsKGssC6onJ+jbA==", + "dev": true + }, + "vfile-message": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-2.0.4.tgz", + "integrity": "sha512-DjssxRGkMvifUOJre00juHoP9DPWuzjxKuMDrhNbk2TdaYYBNMStsNhEOt3idrtI12VQYM/1+iM0KOzXi4pxwQ==", + "dev": true, + "requires": { + "@types/unist": "^2.0.0", + "unist-util-stringify-position": "^2.0.0" + } + }, + "vfile-reporter": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/vfile-reporter/-/vfile-reporter-6.0.2.tgz", + "integrity": "sha512-GN2bH2gs4eLnw/4jPSgfBjo+XCuvnX9elHICJZjVD4+NM0nsUrMTvdjGY5Sc/XG69XVTgLwj7hknQVc6M9FukA==", + "dev": true, + "requires": { + "repeat-string": "^1.5.0", + "string-width": "^4.0.0", + "supports-color": "^6.0.0", + "unist-util-stringify-position": "^2.0.0", + "vfile-sort": "^2.1.2", + "vfile-statistics": "^1.1.0" + }, + "dependencies": { + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true + }, + "supports-color": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-6.1.0.tgz", + "integrity": "sha512-qe1jfm1Mg7Nq/NSh6XE24gPXROEVsWHxC1LIx//XNlD9iw7YZQGjZNjYN7xGaEG6iKdA8EtNFW6R0gjnVXp+wQ==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + } + } + }, + "vfile-reporter-json": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/vfile-reporter-json/-/vfile-reporter-json-2.0.2.tgz", + "integrity": "sha512-L9s5WLxOFCygydfGAaItZtgd2eQi/HVgo0ChYVbm02EnFMzybr3PZYUCmSqcCWMKWCX5FLjvSwjkSVS47Eph6g==", + "dev": true + }, + "vfile-sort": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/vfile-sort/-/vfile-sort-2.2.2.tgz", + "integrity": "sha512-tAyUqD2R1l/7Rn7ixdGkhXLD3zsg+XLAeUDUhXearjfIcpL1Hcsj5hHpCoy/gvfK/Ws61+e972fm0F7up7hfYA==", + "dev": true + }, + "vfile-statistics": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/vfile-statistics/-/vfile-statistics-1.1.4.tgz", + "integrity": "sha512-lXhElVO0Rq3frgPvFBwahmed3X03vjPF8OcjKMy8+F1xU/3Q3QU3tKEDp743SFtb74PdF0UWpxPvtOP0GCLheA==", + "dev": true + }, + "wrapped": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/wrapped/-/wrapped-1.0.1.tgz", + "integrity": "sha1-x4PZ2Aeyc+mwHoUWgKk4yHyQckI=", + "dev": true, + "requires": { + "co": "3.1.0", + "sliced": "^1.0.1" + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", + "dev": true + }, + "zwitch": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-1.0.5.tgz", + "integrity": "sha512-V50KMwwzqJV0NpZIZFwfOD5/lyny3WlSzRiXgA0G7VUnRlqttta1L6UQIHzd6EuBY/cHGfwTIck7w1yH6Q5zUw==", + "dev": true + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 000000000000..1003838ec9bf --- /dev/null +++ b/package.json @@ -0,0 +1,25 @@ +{ + "name": "datumaro", + "version": "1.0.0", + "description": "Remarklint dependencies", + "dependencies": {}, + "devDependencies": { + "remark-cli": "^9.0.0", + "remark-frontmatter": "^3.0.0", + "remark-gfm": "^1.0.0", + "remark-lint-emphasis-marker": "^2.0.0", + "remark-lint-list-item-spacing": "^2.0.0", + "remark-lint-maximum-heading-length": "^2.0.0", + "remark-lint-maximum-line-length": "^2.0.0", + "remark-lint-no-dead-urls": "^1.0.2", + "remark-lint-no-file-name-irregular-characters": "^1.0.3", + "remark-lint-ordered-list-marker-style": "^2.0.0", + "remark-lint-strong-marker": "^2.0.0", + "remark-lint-unordered-list-marker-style": "^2.0.0", + "remark-preset-lint-consistent": "^4.0.0", + "remark-preset-lint-markdown-style-guide": "^4.0.0", + "remark-preset-lint-recommended": "^5.0.0", + "source-map-support": "^0.5.19", + "vfile-reporter-json": "^2.0.2" + } +} diff --git a/requirements-core.txt b/requirements-core.txt new file mode 100644 index 000000000000..8fa8c7619b50 --- /dev/null +++ b/requirements-core.txt @@ -0,0 +1,23 @@ +attrs>=19.3.0 +defusedxml>=0.6.0 +GitPython>=3.0.8 +lxml>=4.4.1 +matplotlib>=3.3.1 +numpy>=1.17.3 +Pillow>=6.1.0 + +# Avoid 2.0.2 Linux binary distribution because of +# a conflict in numpy versions with TensorFlow: +# - TF is compiled with numpy 1.19 ABI +# - pycocotools is compiled with numpy 1.20 ABI +# Using a previous version allows to force package rebuilding. +# +# https://github.com/openvinotoolkit/datumaro/issues/253 +pycocotools>=2.0.0,!=2.0.2; platform_system != "Windows" + +pycocotools-windows; platform_system == "Windows" +PyYAML>=5.3.1 +scikit-image>=0.15.0 + +# 2.3 has an unlisted dependency on PyTorch, which we don't need +tensorboardX>=1.8,!=2.3 diff --git a/requirements.txt b/requirements.txt index b5490d77d2d5..fff66a6aa6df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,10 @@ -attrs>=19.3.0 Cython>=0.27.3 # include before pycocotools -defusedxml>=0.6.0 -GitPython>=3.0.8 -lxml>=4.4.1 -matplotlib>=3.3.1 +-r requirements-core.txt --no-binary=pycocotools # https://github.com/openvinotoolkit/datumaro/issues/253 opencv-python-headless>=4.1.0.25 -Pillow>=6.1.0 -pycocotools>=2.0.0 --no-binary=pycocotools # https://github.com/openvinotoolkit/datumaro/issues/253 -PyYAML>=5.3.1 -scikit-image>=0.15.0 -tensorboardX>=1.8 pandas>=1.1.5 -pytest>=5.3.5 \ No newline at end of file +pytest>=5.3.5 + +# linters +bandit>=1.7.0 +isort~=5.9 +pylint>=2.7.0 diff --git a/setup.py b/setup.py index 68591d9242be..dfa124e9bb18 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT @@ -7,6 +7,7 @@ import os import os.path as osp import re + import setuptools # Snyk scan integration @@ -34,29 +35,9 @@ def find_version(project_dir=None): return version def get_requirements(): - requirements = [ - 'attrs>=19.3.0', - 'defusedxml', - 'GitPython', - 'lxml', - 'matplotlib', - 'numpy>=1.17.3', - 'Pillow', - - # Avoid 2.0.2 Linux binary distribution because of - # a conflict in numpy versions with TensorFlow: - # - TF is compiled with numpy 1.19 ABI - # - pycocotools is compiled with numpy 1.20 ABI - # Using a previous version allows to force package rebuilding. - # - # https://github.com/openvinotoolkit/datumaro/issues/253 - 'pycocotools!=2.0.2; platform_system != "Windows"', - 'pycocotools-windows; platform_system == "Windows"', - - 'PyYAML', - 'scikit-image', - 'tensorboardX', - ] + with open('requirements-core.txt') as fh: + requirements = [fh.read()] + if strtobool(os.getenv('DATUMARO_HEADLESS', '0').lower()): requirements.append('opencv-python-headless') else: diff --git a/tests/assets/cifar_dataset/data_batch_1 b/tests/assets/cifar_dataset/data_batch_1 index e4ed1edc0b8f..361d15a734de 100644 Binary files a/tests/assets/cifar_dataset/data_batch_1 and b/tests/assets/cifar_dataset/data_batch_1 differ diff --git a/tests/assets/cifar_dataset/test_batch b/tests/assets/cifar_dataset/test_batch index e3776023189c..1f494f9c366c 100644 Binary files a/tests/assets/cifar_dataset/test_batch and b/tests/assets/cifar_dataset/test_batch differ diff --git a/tests/assets/coco_dataset/coco/annotations/captions_train.json b/tests/assets/coco_dataset/coco/annotations/captions_train.json new file mode 100644 index 000000000000..a568285a5517 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/captions_train.json @@ -0,0 +1,40 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":0, + "caption":"hello" + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/captions_val.json b/tests/assets/coco_dataset/coco/annotations/captions_val.json new file mode 100644 index 000000000000..68ad11857e1f --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/captions_val.json @@ -0,0 +1,46 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":0, + "caption":"world" + }, + { + "id":2, + "image_id":40, + "category_id":0, + "caption":"text" + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/image_info_train.json b/tests/assets/coco_dataset/coco/annotations/image_info_train.json new file mode 100644 index 000000000000..7753ea13e0b0 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/image_info_train.json @@ -0,0 +1,35 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/image_info_val.json b/tests/assets/coco_dataset/coco/annotations/image_info_val.json new file mode 100644 index 000000000000..f854a6f760e2 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/image_info_val.json @@ -0,0 +1,35 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/instances_train.json b/tests/assets/coco_dataset/coco/annotations/instances_train.json new file mode 100644 index 000000000000..f2fe3e9ee189 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/instances_train.json @@ -0,0 +1,64 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + }, + { + "id":3, + "name":"c", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2, + "segmentation":[ + + ], + "area":3.0, + "bbox":[ + 2.0, + 2.0, + 3.0, + 1.0 + ], + "iscrowd":0 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/instances_val.json b/tests/assets/coco_dataset/coco/annotations/instances_val.json new file mode 100644 index 000000000000..3b9bd790e785 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/instances_val.json @@ -0,0 +1,101 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + }, + { + "id":3, + "name":"c", + "supercategory":"" + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":1, + "segmentation":[ + [ + 0.0, + 0.0, + 1.0, + 0.0, + 1.0, + 2.0, + 0.0, + 2.0 + ] + ], + "area":2.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 2.0 + ], + "iscrowd":0, + "attributes":{ + "x":1, + "y":"hello" + } + }, + { + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] + }, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/labels_train.json b/tests/assets/coco_dataset/coco/annotations/labels_train.json new file mode 100644 index 000000000000..fc9ce7e6a899 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/labels_train.json @@ -0,0 +1,48 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/labels_val.json b/tests/assets/coco_dataset/coco/annotations/labels_val.json new file mode 100644 index 000000000000..01f120083846 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/labels_val.json @@ -0,0 +1,53 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":1 + }, + { + "id":2, + "image_id":40, + "category_id":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/panoptic_train.json b/tests/assets/coco_dataset/coco/annotations/panoptic_train.json new file mode 100644 index 000000000000..4225c3453a89 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/panoptic_train.json @@ -0,0 +1,63 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "isthing":0 + }, + { + "id":2, + "name":"b", + "supercategory":"", + "isthing":0 + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "image_id":5, + "file_name":"a.png", + "segments_info":[ + { + "id":7, + "category_id":1, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 4.0, + 4.0 + ], + "iscrowd":0 + } + ] + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/panoptic_train/a.png b/tests/assets/coco_dataset/coco/annotations/panoptic_train/a.png new file mode 100644 index 000000000000..4de88c099119 Binary files /dev/null and b/tests/assets/coco_dataset/coco/annotations/panoptic_train/a.png differ diff --git a/tests/assets/coco_dataset/coco/annotations/panoptic_val.json b/tests/assets/coco_dataset/coco/annotations/panoptic_val.json new file mode 100644 index 000000000000..5d44f745a8a9 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/panoptic_val.json @@ -0,0 +1,75 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "isthing":0 + }, + { + "id":2, + "name":"b", + "supercategory":"", + "isthing":0 + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "image_id":40, + "file_name":"b.png", + "segments_info":[ + { + "id":7, + "category_id":1, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":0 + }, + { + "id":20, + "category_id":2, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 + } + ] + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/panoptic_val/b.png b/tests/assets/coco_dataset/coco/annotations/panoptic_val/b.png new file mode 100644 index 000000000000..05b45f1c9d84 Binary files /dev/null and b/tests/assets/coco_dataset/coco/annotations/panoptic_val/b.png differ diff --git a/tests/assets/coco_dataset/coco/annotations/person_keypoints_train.json b/tests/assets/coco_dataset/coco/annotations/person_keypoints_train.json new file mode 100644 index 000000000000..7a040f99ff27 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/person_keypoints_train.json @@ -0,0 +1,97 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + }, + { + "id":2, + "name":"b", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2, + "segmentation":[ + + ], + "area":3.0, + "bbox":[ + 2.0, + 2.0, + 3.0, + 1.0 + ], + "iscrowd":0, + "keypoints":[ + 0, + 0, + 0, + 0, + 2, + 1, + 4, + 1, + 2 + ], + "num_keypoints":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/person_keypoints_val.json b/tests/assets/coco_dataset/coco/annotations/person_keypoints_val.json new file mode 100644 index 000000000000..5a79259a4870 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/person_keypoints_val.json @@ -0,0 +1,146 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + }, + { + "id":2, + "name":"b", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":1, + "segmentation":[ + [ + 0.0, + 0.0, + 1.0, + 0.0, + 1.0, + 2.0, + 0.0, + 2.0 + ] + ], + "area":2.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 2.0 + ], + "iscrowd":0, + "attributes":{ + "x":1, + "y":"hello" + }, + "keypoints":[ + 1, + 2, + 2, + 3, + 4, + 2, + 2, + 3, + 2 + ], + "num_keypoints":3 + }, + { + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] + }, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1, + "keypoints":[ + 2, + 4, + 2, + 4, + 4, + 2, + 4, + 2, + 2 + ], + "num_keypoints":3 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/stuff_train.json b/tests/assets/coco_dataset/coco/annotations/stuff_train.json new file mode 100644 index 000000000000..33c10c8fe851 --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/stuff_train.json @@ -0,0 +1,69 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":7, + "image_id":5, + "category_id":1, + "segmentation":{ + "counts":[ + 10, + 10, + 5, + 10, + 15 + ], + "size":[ + 5, + 10 + ] + }, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 4.0, + 4.0 + ], + "iscrowd":1 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco/annotations/stuff_val.json b/tests/assets/coco_dataset/coco/annotations/stuff_val.json new file mode 100644 index 000000000000..10f309063e1d --- /dev/null +++ b/tests/assets/coco_dataset/coco/annotations/stuff_val.json @@ -0,0 +1,67 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] + }, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg b/tests/assets/coco_dataset/coco/images/train/a.jpg similarity index 89% rename from tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg rename to tests/assets/coco_dataset/coco/images/train/a.jpg index a082a80324c3..222682d80bf9 100644 Binary files a/tests/assets/coco_dataset/coco_panoptic/images/val/000000000001.jpg and b/tests/assets/coco_dataset/coco/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_instances/images/val/000000000001.jpg b/tests/assets/coco_dataset/coco/images/val/b.jpg similarity index 100% rename from tests/assets/coco_dataset/coco_instances/images/val/000000000001.jpg rename to tests/assets/coco_dataset/coco/images/val/b.jpg diff --git a/tests/assets/coco_dataset/coco_captions/annotations/captions_train.json b/tests/assets/coco_dataset/coco_captions/annotations/captions_train.json index e360262e0c2b..a568285a5517 100644 --- a/tests/assets/coco_dataset/coco_captions/annotations/captions_train.json +++ b/tests/assets/coco_dataset/coco_captions/annotations/captions_train.json @@ -1,54 +1,40 @@ { - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [], - "images": [{ - "id": 1, - "width": 0, - "height": 0, - "file_name": "1.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }, { - "id": 2, - "width": 0, - "height": 0, - "file_name": "2.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }], - "annotations": [{ - "id": 1, - "image_id": 1, - "category_id": 0, - "caption": "hello", - "attributes": {} - }, { - "id": 2, - "image_id": 1, - "category_id": 0, - "caption": "world", - "attributes": {} - }, { - "id": 3, - "image_id": 2, - "category_id": 0, - "caption": "test", - "attributes": {} - }] -} \ No newline at end of file + "categories":[ + + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":0, + "caption":"hello" + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_captions/annotations/captions_val.json b/tests/assets/coco_dataset/coco_captions/annotations/captions_val.json index 47d071a57daf..68ad11857e1f 100644 --- a/tests/assets/coco_dataset/coco_captions/annotations/captions_val.json +++ b/tests/assets/coco_dataset/coco_captions/annotations/captions_val.json @@ -1,33 +1,46 @@ { - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [], - "images": [{ - "id": 1, - "width": 0, - "height": 0, - "file_name": "3.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }], - "annotations": [{ - "id": 1, - "image_id": 1, - "category_id": 0, - "caption": "word", - "attributes": {} - }] -} \ No newline at end of file + "categories":[ + + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":0, + "caption":"world" + }, + { + "id":2, + "image_id":40, + "category_id":0, + "caption":"text" + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_captions/images/train/a.jpg b/tests/assets/coco_dataset/coco_captions/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_captions/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_stuff/images/val/000000000001.jpg b/tests/assets/coco_dataset/coco_captions/images/val/b.jpg similarity index 100% rename from tests/assets/coco_dataset/coco_stuff/images/val/000000000001.jpg rename to tests/assets/coco_dataset/coco_captions/images/val/b.jpg diff --git a/tests/assets/coco_dataset/coco_image_info/annotations/image_info_default.json b/tests/assets/coco_dataset/coco_image_info/annotations/image_info_default.json deleted file mode 100644 index f2fc85a73f12..000000000000 --- a/tests/assets/coco_dataset/coco_image_info/annotations/image_info_default.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" - }, - "categories": [], - "images": [{ - "id": 1, - "width": 15, - "height": 10, - "file_name": "1.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }], - "annotations": [] -} \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_image_info/annotations/image_info_train.json b/tests/assets/coco_dataset/coco_image_info/annotations/image_info_train.json new file mode 100644 index 000000000000..7753ea13e0b0 --- /dev/null +++ b/tests/assets/coco_dataset/coco_image_info/annotations/image_info_train.json @@ -0,0 +1,35 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_image_info/annotations/image_info_val.json b/tests/assets/coco_dataset/coco_image_info/annotations/image_info_val.json new file mode 100644 index 000000000000..f854a6f760e2 --- /dev/null +++ b/tests/assets/coco_dataset/coco_image_info/annotations/image_info_val.json @@ -0,0 +1,35 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_image_info/images/train/a.jpg b/tests/assets/coco_dataset/coco_image_info/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_image_info/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_image_info/images/val/b.jpg b/tests/assets/coco_dataset/coco_image_info/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_image_info/images/val/b.jpg differ diff --git a/tests/assets/coco_dataset/coco_instances/annotations/instances_train.json b/tests/assets/coco_dataset/coco_instances/annotations/instances_train.json new file mode 100644 index 000000000000..f2fe3e9ee189 --- /dev/null +++ b/tests/assets/coco_dataset/coco_instances/annotations/instances_train.json @@ -0,0 +1,64 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + }, + { + "id":3, + "name":"c", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2, + "segmentation":[ + + ], + "area":3.0, + "bbox":[ + 2.0, + 2.0, + 3.0, + 1.0 + ], + "iscrowd":0 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_instances/annotations/instances_val.json b/tests/assets/coco_dataset/coco_instances/annotations/instances_val.json index 74de288d8ed0..3b9bd790e785 100644 --- a/tests/assets/coco_dataset/coco_instances/annotations/instances_val.json +++ b/tests/assets/coco_dataset/coco_instances/annotations/instances_val.json @@ -1,62 +1,101 @@ { - "licenses": [ + "licenses":[ { - "name": "", - "id": 0, - "url": "" + "name":"", + "id":0, + "url":"" } ], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [ + "categories":[ { - "id": 1, - "name": "TEST", - "supercategory": "" + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + }, + { + "id":3, + "name":"c", + "supercategory":"" } ], - "images": [ + "images":[ { - "id": 1, - "width": 5, - "height": 10, - "file_name": "000000000001.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 } ], - "annotations": [ + "annotations":[ { - "id": 1, - "image_id": 1, - "category_id": 1, - "segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]], - "area": 2, - "bbox": [0, 0, 1, 2], - "iscrowd": 0, - "attributes": { - "x": 1, "y": "hello" + "id":1, + "image_id":40, + "category_id":1, + "segmentation":[ + [ + 0.0, + 0.0, + 1.0, + 0.0, + 1.0, + 2.0, + 0.0, + 2.0 + ] + ], + "area":2.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 2.0 + ], + "iscrowd":0, + "attributes":{ + "x":1, + "y":"hello" } }, { - "id": 2, - "image_id": 1, - "category_id": 1, - "segmentation": { - "counts": [0, 10, 5, 5, 5, 5, 0, 10, 10, 0], - "size": [10, 5] + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] }, - "area": 30, - "bbox": [0, 0, 10, 4], - "iscrowd": 1 + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 } ] - } + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_instances/images/train/a.jpg b/tests/assets/coco_dataset/coco_instances/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_instances/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_instances/images/val/b.jpg b/tests/assets/coco_dataset/coco_instances/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_instances/images/val/b.jpg differ diff --git a/tests/assets/coco_dataset/coco_labels/annotations/labels_train.json b/tests/assets/coco_dataset/coco_labels/annotations/labels_train.json index 1f790645c3fa..fc9ce7e6a899 100644 --- a/tests/assets/coco_dataset/coco_labels/annotations/labels_train.json +++ b/tests/assets/coco_dataset/coco_labels/annotations/labels_train.json @@ -1,44 +1,48 @@ { - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [{ - "id": 1, - "name": "a", - "supercategory": "" - }, { - "id": 2, - "name": "b", - "supercategory": "" - }], - "images": [{ - "id": 1, - "width": 0, - "height": 0, - "file_name": "1.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }], - "annotations": [{ - "id": 1, - "image_id": 1, - "category_id": 2 - }, { - "id": 2, - "image_id": 1, - "category_id": 1, - "attributes": {} - }] -} \ No newline at end of file + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_labels/annotations/labels_val.json b/tests/assets/coco_dataset/coco_labels/annotations/labels_val.json new file mode 100644 index 000000000000..01f120083846 --- /dev/null +++ b/tests/assets/coco_dataset/coco_labels/annotations/labels_val.json @@ -0,0 +1,53 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":1 + }, + { + "id":2, + "image_id":40, + "category_id":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_labels/images/train/a.jpg b/tests/assets/coco_dataset/coco_labels/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_labels/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_labels/images/val/b.jpg b/tests/assets/coco_dataset/coco_labels/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_labels/images/val/b.jpg differ diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train.json b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train.json new file mode 100644 index 000000000000..9aa1b35cba7e --- /dev/null +++ b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train.json @@ -0,0 +1,63 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "isthing":0 + }, + { + "id":2, + "name":"b", + "supercategory":"", + "isthing":0 + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "image_id":5, + "file_name":"a.png", + "segments_info":[ + { + "id":460551, + "category_id":1, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 4.0, + 4.0 + ], + "iscrowd":0 + } + ] + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train/a.png b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train/a.png new file mode 100644 index 000000000000..1692eab8340d Binary files /dev/null and b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_train/a.png differ diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json index c945de7ca5ff..5d44f745a8a9 100644 --- a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json +++ b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val.json @@ -1,75 +1,75 @@ { - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" - }, - "categories": [ - { - "id": 1, - "name": "a", - "supercategory": "", - "isthing": 1 + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - { - "id": 2, - "name": "b", - "supercategory": "", - "isthing": 1 - }, - { - "id": 3, - "name": "c", - "supercategory": "", - "isthing": 1 - }, - { - "id": 4, - "name": "d", - "supercategory": "", - "isthing": 1 - } - ], - "images": [ - { - "id": 40, - "width": 5, - "height": 1, - "file_name": "000000000001.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - } - ], - "annotations": [ - { - "image_id": 40, - "file_name": "000000000001.png", - "segments_info": [ - { - "id": 7, - "category_id": 4, - "area": 2.0, - "bbox": [2.0, 0.0, 1.0, 0.0], - "iscrowd": 0 - }, - { - "id": 20, - "category_id": 2, - "area": 2.0, - "bbox": [1.0, 0.0, 3.0, 0.0], - "iscrowd": 1 - } - ] - } - ] -} \ No newline at end of file + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "isthing":0 + }, + { + "id":2, + "name":"b", + "supercategory":"", + "isthing":0 + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "image_id":40, + "file_name":"b.png", + "segments_info":[ + { + "id":7, + "category_id":1, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":0 + }, + { + "id":20, + "category_id":2, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 + } + ] + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png deleted file mode 100644 index e471bfed4162..000000000000 Binary files a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/000000000001.png and /dev/null differ diff --git a/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/b.png b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/b.png new file mode 100644 index 000000000000..05b45f1c9d84 Binary files /dev/null and b/tests/assets/coco_dataset/coco_panoptic/annotations/panoptic_val/b.png differ diff --git a/tests/assets/coco_dataset/coco_panoptic/images/train/a.jpg b/tests/assets/coco_dataset/coco_panoptic/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_panoptic/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_panoptic/images/val/b.jpg b/tests/assets/coco_dataset/coco_panoptic/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_panoptic/images/val/b.jpg differ diff --git a/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_train.json b/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_train.json index e5c2238d17eb..7a040f99ff27 100644 --- a/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_train.json +++ b/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_train.json @@ -1,87 +1,97 @@ { - "licenses": [{ - "name": "", - "id": 0, - "url": "" - }], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [{ - "id": 1, - "name": "a", - "supercategory": "", - "keypoints": [], - "skeleton": [ - [0, 1], - [1, 2] + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] ] - }, { - "id": 2, - "name": "b", - "supercategory": "", - "keypoints": [], - "skeleton": [ - [0, 1], - [1, 2] + }, + { + "id":2, + "name":"b", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] ] - }], - "images": [{ - "id": 1, - "width": 5, - "height": 5, - "file_name": "1.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 - }], - "annotations": [{ - "id": 3, - "image_id": 1, - "category_id": 1, - "segmentation": [], - "area": 4.0, - "bbox": [0.0, 1.0, 4.0, 1.0], - "iscrowd": 0, - "keypoints": [1, 2, 2, 0, 2, 2, 4, 1, 2], - "num_keypoints": 3 - }, { - "id": 5, - "image_id": 1, - "category_id": 0, - "segmentation": [], - "area": 4.0, - "bbox": [1.0, 2.0, 2.0, 2.0], - "iscrowd": 0, - "keypoints": [0, 0, 0, 1, 2, 1, 3, 4, 2], - "num_keypoints": 2 - }, { - "id": 1, - "image_id": 1, - "category_id": 2, - "segmentation": [ - [0.0, 0.0, 4.0, 0.0, 4.0, 4.0] + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":5, + "category_id":2, + "segmentation":[ + + ], + "area":3.0, + "bbox":[ + 2.0, + 2.0, + 3.0, + 1.0 + ], + "iscrowd":0, + "keypoints":[ + 0, + 0, + 0, + 0, + 2, + 1, + 4, + 1, + 2 ], - "area": 6.0, - "bbox": [0.0, 0.0, 4.0, 4.0], - "iscrowd": 0, - "keypoints": [0, 0, 0, 0, 2, 1, 4, 1, 2], - "num_keypoints": 2 - }, { - "id": 2, - "image_id": 1, - "category_id": 0, - "segmentation": [], - "area": 4.0, - "bbox": [1.0, 2.0, 2.0, 2.0], - "iscrowd": 0, - "keypoints": [1, 2, 2, 3, 4, 2, 2, 3, 2], - "num_keypoints": 3 - }] -} \ No newline at end of file + "num_keypoints":2 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_val.json b/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_val.json new file mode 100644 index 000000000000..5a79259a4870 --- /dev/null +++ b/tests/assets/coco_dataset/coco_person_keypoints/annotations/person_keypoints_val.json @@ -0,0 +1,146 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + }, + { + "id":2, + "name":"b", + "supercategory":"", + "keypoints":[ + + ], + "skeleton":[ + [ + 0, + 1 + ], + [ + 1, + 2 + ] + ] + } + ], + "images":[ + { + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":1, + "image_id":40, + "category_id":1, + "segmentation":[ + [ + 0.0, + 0.0, + 1.0, + 0.0, + 1.0, + 2.0, + 0.0, + 2.0 + ] + ], + "area":2.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 2.0 + ], + "iscrowd":0, + "attributes":{ + "x":1, + "y":"hello" + }, + "keypoints":[ + 1, + 2, + 2, + 3, + 4, + 2, + 2, + 3, + 2 + ], + "num_keypoints":3 + }, + { + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] + }, + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1, + "keypoints":[ + 2, + 4, + 2, + 4, + 4, + 2, + 4, + 2, + 2 + ], + "num_keypoints":3 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_person_keypoints/images/train/a.jpg b/tests/assets/coco_dataset/coco_person_keypoints/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_person_keypoints/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_person_keypoints/images/val/b.jpg b/tests/assets/coco_dataset/coco_person_keypoints/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_person_keypoints/images/val/b.jpg differ diff --git a/tests/assets/coco_dataset/coco_stuff/annotations/stuff_train.json b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_train.json new file mode 100644 index 000000000000..33c10c8fe851 --- /dev/null +++ b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_train.json @@ -0,0 +1,69 @@ +{ + "licenses":[ + { + "name":"", + "id":0, + "url":"" + } + ], + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" + }, + "categories":[ + { + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" + } + ], + "images":[ + { + "id":5, + "width":10, + "height":5, + "file_name":"a.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 + } + ], + "annotations":[ + { + "id":7, + "image_id":5, + "category_id":1, + "segmentation":{ + "counts":[ + 10, + 10, + 5, + 10, + 15 + ], + "size":[ + 5, + 10 + ] + }, + "area":20.0, + "bbox":[ + 2.0, + 0.0, + 4.0, + 4.0 + ], + "iscrowd":1 + } + ] + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json index 51a654f9d11c..10f309063e1d 100644 --- a/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json +++ b/tests/assets/coco_dataset/coco_stuff/annotations/stuff_val.json @@ -1,50 +1,67 @@ { - "licenses": [ + "licenses":[ { - "name": "", - "id": 0, - "url": "" + "name":"", + "id":0, + "url":"" } ], - "info": { - "contributor": "", - "date_created": "", - "description": "", - "url": "", - "version": "", - "year": "" + "info":{ + "contributor":"", + "date_created":"", + "description":"", + "url":"", + "version":"", + "year":"" }, - "categories": [ + "categories":[ { - "id": 1, - "name": "TEST", - "supercategory": "" + "id":1, + "name":"a", + "supercategory":"" + }, + { + "id":2, + "name":"b", + "supercategory":"" } ], - "images": [ + "images":[ { - "id": 1, - "width": 5, - "height": 10, - "file_name": "000000000001.jpg", - "license": 0, - "flickr_url": "", - "coco_url": "", - "date_captured": 0 + "id":40, + "width":5, + "height":10, + "file_name":"b.jpg", + "license":0, + "flickr_url":"", + "coco_url":"", + "date_captured":0 } ], - "annotations": [ + "annotations":[ { - "id": 2, - "image_id": 1, - "category_id": 1, - "segmentation": { - "counts": [0, 10, 5, 5, 5, 5, 0, 10, 10, 0], - "size": [10, 5] + "id":2, + "image_id":40, + "category_id":2, + "segmentation":{ + "counts":[ + 0, + 20, + 30 + ], + "size":[ + 10, + 5 + ] }, - "area": 30, - "bbox": [0, 0, 10, 4], - "iscrowd": 0 + "area":20.0, + "bbox":[ + 0.0, + 0.0, + 1.0, + 9.0 + ], + "iscrowd":1 } ] - } + } \ No newline at end of file diff --git a/tests/assets/coco_dataset/coco_stuff/images/train/a.jpg b/tests/assets/coco_dataset/coco_stuff/images/train/a.jpg new file mode 100644 index 000000000000..222682d80bf9 Binary files /dev/null and b/tests/assets/coco_dataset/coco_stuff/images/train/a.jpg differ diff --git a/tests/assets/coco_dataset/coco_stuff/images/val/b.jpg b/tests/assets/coco_dataset/coco_stuff/images/val/b.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/coco_dataset/coco_stuff/images/val/b.jpg differ diff --git a/tests/assets/image_zip_dataset/1.zip b/tests/assets/image_zip_dataset/1.zip new file mode 100644 index 000000000000..0db4b2b066fb Binary files /dev/null and b/tests/assets/image_zip_dataset/1.zip differ diff --git a/tests/assets/image_zip_dataset/2.zip b/tests/assets/image_zip_dataset/2.zip new file mode 100644 index 000000000000..832ffa55b172 Binary files /dev/null and b/tests/assets/image_zip_dataset/2.zip differ diff --git a/tests/assets/imagenet_txt_dataset/train.txt b/tests/assets/imagenet_txt_dataset/train.txt index 624d1113460f..e7b972634fd8 100644 --- a/tests/assets/imagenet_txt_dataset/train.txt +++ b/tests/assets/imagenet_txt_dataset/train.txt @@ -1,4 +1,4 @@ -1 0 -2 5 -3 3 -4 5 \ No newline at end of file +1.jpg 0 +2.jpg 5 +3.jpg 3 +4.jpg 5 diff --git a/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_10.png b/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_10.png new file mode 100644 index 000000000000..e6f3cff877de Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_10.png differ diff --git a/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_11.png b/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_11.png new file mode 100644 index 000000000000..e6f3cff877de Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_detection/training/image_2/000030_11.png differ diff --git a/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_10.txt b/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_10.txt new file mode 100644 index 000000000000..5eb7d27d8fc7 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_10.txt @@ -0,0 +1,2 @@ +Truck 1.0 0 -1 0 1 2 3 -1 -1 -1 -1 -1 -1 -1 +Van 0.0 0 -1 0 5 1 8 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_11.txt b/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_11.txt new file mode 100644 index 000000000000..f34013fa3d21 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_detection/training/label_2/000030_11.txt @@ -0,0 +1,3 @@ +Van 1.0 1 -1 0 0 2 2 -1 -1 -1 -1 -1 -1 -1 +Van 0.0 0 -1 4 4 6 6 -1 -1 -1 -1 -1 -1 -1 +Van 0.0 1 -1 6 6 7 9 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000000.png b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000000.png new file mode 100644 index 000000000000..7e7271738e86 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000000.png differ diff --git a/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000001.png b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000001.png new file mode 100644 index 000000000000..7e7271738e86 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000001.png differ diff --git a/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000002.png b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000002.png new file mode 100644 index 000000000000..4634ee1cf09a Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_raw/IMAGE_00/data/0000000002.png differ diff --git a/tests/assets/kitti_dataset/kitti_raw/tracklet_labels.xml b/tests/assets/kitti_dataset/kitti_raw/tracklet_labels.xml new file mode 100644 index 000000000000..afed1ebaadd6 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_raw/tracklet_labels.xml @@ -0,0 +1,112 @@ + + + + + 4 + 1 + + car + 7.95 + -3.62 + -1.03 + 0 + + 1 + 2 + + 1.0 + 2.0 + 3.0 + 0.0 + 0.0 + 0.0 + 2 + -1 + 0 + -1 + -1 + -1 + -1 + -1 + -1 + + + 1 + + + bus + 8.34 + 23.01 + -0.76 + 0 + + 2 + 3 + + 1.0 + 1.0 + 0.0 + 0.0 + 0.0 + 0.0 + 2 + -1 + 0 + -1 + -1 + -1 + -1 + -1 + -1 + + + 0.0 + 1.0 + 0.0 + 1.0 + 1.0 + 3.0 + 2 + 1 + 0 + -1 + -1 + -1 + -1 + -1 + -1 + + + 1 + + + car + -9.41 + 13.54 + 0.24 + 2 + + 1 + 5 + + 1.0 + 2.0 + 3.0 + 0.0 + 0.0 + 0.0 + 2 + -1 + 0 + -1 + -1 + -1 + -1 + -1 + -1 + + + 1 + + + \ No newline at end of file diff --git a/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000000.pcd b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000000.pcd new file mode 100644 index 000000000000..dca7790513e0 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000000.pcd @@ -0,0 +1,111 @@ +# .PCD v0.7 - Point Cloud Data file format +VERSION 0.7 +FIELDS x y z +SIZE 4 4 4 +TYPE F F F +COUNT 1 1 1 +WIDTH 100 +HEIGHT 1 +VIEWPOINT 0 0 0 1 0 0 0 +POINTS 100 +DATA ascii +0.5791356657 0.4114198749 0.7830189931 +0.08433272236 0.08169951458 0.5180490411 +0.2666649493 0.7924615988 0.2992874419 +0.2509502762 0.007282780613 0.9179724786 +0.0009519548103 0.5188591285 0.7461032855 +0.3341777395 0.7447405092 0.8507401902 +0.7737981441 0.8829159846 0.09243559982 +0.1151518369 0.1081754904 0.3861891806 +0.5522852638 0.4644139805 0.9158114954 +0.262019557 0.799949328 0.8619102084 +0.7817190286 0.3586988399 0.9762494608 +0.9533059971 0.3382646669 0.3340770417 +0.0494714741 0.03615227093 0.8568413846 +0.5430795611 0.5657840884 0.6231871341 +0.4526048699 0.3327443159 0.2934205997 +0.7491301442 0.6164146172 0.5041072455 +0.7411897262 0.9413747728 0.8853690873 +0.9546086163 0.8780641512 0.9457557961 +0.522717644 0.2523471196 0.7768039063 +0.09745947412 0.9427579717 0.1866185785 +0.8838772197 0.05067927336 0.3026229952 +0.276616514 0.3074261016 0.3658778032 +0.3857137087 0.3437450623 0.5808424098 +0.1504121976 0.7565417305 0.1940801831 +0.04206309677 0.5601714351 0.4928550054 +0.8959001711 0.7637477404 0.1656459291 +0.7559768424 0.08387528166 0.43365922 +0.1263603805 0.075636126 0.4639358595 +0.1611063239 0.5822180034 0.6677265751 +0.2382706459 0.7488458973 0.7196503691 +0.2787486381 0.714237112 0.5095637026 +0.07576261419 0.8975986259 0.4009473249 +0.1493201826 0.3648264844 0.4162678288 +0.3424474069 0.343425388 0.8997863722 +0.3584413167 0.7608157605 0.02584696926 +0.8259046259 0.6827611696 0.1364630583 +0.5417442348 0.2665643659 0.3894770748 +0.5693883343 0.1702286099 0.5118786166 +0.4586303573 0.4419680205 0.8718587973 +0.2555126271 0.8234680873 0.2937022288 +0.9152112333 0.8613987559 0.8785694019 +0.8139096568 0.03429760443 0.9530865347 +0.5076202239 0.8322589488 0.7961599799 +0.3527430534 0.7680710878 0.1901746714 +0.7410094529 0.5911577662 0.3328401293 +0.9708675231 0.8183511845 0.6995216067 +0.75449086 0.852550303 0.5772490648 +0.3603820772 0.09753456254 0.7285356732 +0.8426145021 0.1952573918 0.8855362182 +0.6102935227 0.9111583781 0.4135836035 +0.2668344631 0.9490736905 0.02373416445 +0.1672606785 0.9949630093 0.4559031019 +0.8567499065 0.097355228 0.4352994641 +0.3035360294 0.6682905699 0.9484998586 +0.8089234504 0.8366173575 0.8503950069 +0.6155459006 0.1000898629 0.399354125 +0.5392563896 0.5547267467 0.7073454857 +0.1256843851 0.9060443549 0.5367478021 +0.4528039557 0.8630172674 0.5704376168 +5.760227847e-05 0.8224073727 0.831449511 +0.1796916377 0.1734055202 0.7948878217 +0.810883435 0.3504472342 0.5144820089 +0.4906365019 0.794028057 0.355677471 +0.5212473715 0.5706615529 0.2866615709 +0.3698783992 0.4972479285 0.3559716539 +0.3664268911 0.1538402824 0.885025765 +0.7758925012 0.1232736316 0.5100987213 +0.90661068 0.4825559595 0.5327944691 +0.3090078216 0.1232007303 0.9469650131 +0.8596991207 0.4695369551 0.7954928839 +0.7400730096 0.7135225924 0.2317399864 +0.778502767 0.3236370572 0.5596816689 +0.7679065915 0.3203264538 0.2953326152 +0.1991905042 0.01979999711 0.4024950469 +0.5651530305 0.006185065383 0.1037946953 +0.1213076145 0.6169948617 0.7118041952 +0.255929024 0.6806643078 0.9724859382 +0.2868946845 0.459908593 0.5664612242 +0.8664995968 0.1661929971 0.8145931169 +0.3919819451 0.9655103479 0.1762528486 +0.01953937346 0.7905914014 0.4506287472 +0.9032500112 0.02406993994 0.6870933919 +0.6152027148 0.7198489321 0.9014066366 +0.5966142981 0.6270936512 0.9856644628 +0.6653253993 0.2587640894 0.3100885862 +0.5300796677 0.5051933038 0.984268778 +0.1756002886 0.6819248687 0.06743131528 +0.179217615 0.1716973763 0.6952441806 +0.6383691627 0.9291349941 0.8980673717 +0.7222629086 0.1277282925 0.457114115 +0.4357276007 0.3084016413 0.459899917 +0.1053334612 0.8258583608 0.5701472456 +0.05711539535 0.03161173035 0.5881848971 +0.7015011012 0.3480210232 0.3311890844 +0.4008349469 0.06827356637 0.7021136691 +0.1573765336 0.5639430863 0.9812992363 +0.5943477904 0.8583753679 0.7757091751 +0.937860795 0.4849899101 0.2587898971 +0.3640163008 0.3619259288 0.2099822029 +0.1939276021 0.6007837647 0.4454387266 diff --git a/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000001.pcd b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000001.pcd new file mode 100644 index 000000000000..2f9779c546c5 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000001.pcd differ diff --git a/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000002.pcd b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000002.pcd new file mode 100644 index 000000000000..dca7790513e0 --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_raw/velodyne_points/data/0000000002.pcd @@ -0,0 +1,111 @@ +# .PCD v0.7 - Point Cloud Data file format +VERSION 0.7 +FIELDS x y z +SIZE 4 4 4 +TYPE F F F +COUNT 1 1 1 +WIDTH 100 +HEIGHT 1 +VIEWPOINT 0 0 0 1 0 0 0 +POINTS 100 +DATA ascii +0.5791356657 0.4114198749 0.7830189931 +0.08433272236 0.08169951458 0.5180490411 +0.2666649493 0.7924615988 0.2992874419 +0.2509502762 0.007282780613 0.9179724786 +0.0009519548103 0.5188591285 0.7461032855 +0.3341777395 0.7447405092 0.8507401902 +0.7737981441 0.8829159846 0.09243559982 +0.1151518369 0.1081754904 0.3861891806 +0.5522852638 0.4644139805 0.9158114954 +0.262019557 0.799949328 0.8619102084 +0.7817190286 0.3586988399 0.9762494608 +0.9533059971 0.3382646669 0.3340770417 +0.0494714741 0.03615227093 0.8568413846 +0.5430795611 0.5657840884 0.6231871341 +0.4526048699 0.3327443159 0.2934205997 +0.7491301442 0.6164146172 0.5041072455 +0.7411897262 0.9413747728 0.8853690873 +0.9546086163 0.8780641512 0.9457557961 +0.522717644 0.2523471196 0.7768039063 +0.09745947412 0.9427579717 0.1866185785 +0.8838772197 0.05067927336 0.3026229952 +0.276616514 0.3074261016 0.3658778032 +0.3857137087 0.3437450623 0.5808424098 +0.1504121976 0.7565417305 0.1940801831 +0.04206309677 0.5601714351 0.4928550054 +0.8959001711 0.7637477404 0.1656459291 +0.7559768424 0.08387528166 0.43365922 +0.1263603805 0.075636126 0.4639358595 +0.1611063239 0.5822180034 0.6677265751 +0.2382706459 0.7488458973 0.7196503691 +0.2787486381 0.714237112 0.5095637026 +0.07576261419 0.8975986259 0.4009473249 +0.1493201826 0.3648264844 0.4162678288 +0.3424474069 0.343425388 0.8997863722 +0.3584413167 0.7608157605 0.02584696926 +0.8259046259 0.6827611696 0.1364630583 +0.5417442348 0.2665643659 0.3894770748 +0.5693883343 0.1702286099 0.5118786166 +0.4586303573 0.4419680205 0.8718587973 +0.2555126271 0.8234680873 0.2937022288 +0.9152112333 0.8613987559 0.8785694019 +0.8139096568 0.03429760443 0.9530865347 +0.5076202239 0.8322589488 0.7961599799 +0.3527430534 0.7680710878 0.1901746714 +0.7410094529 0.5911577662 0.3328401293 +0.9708675231 0.8183511845 0.6995216067 +0.75449086 0.852550303 0.5772490648 +0.3603820772 0.09753456254 0.7285356732 +0.8426145021 0.1952573918 0.8855362182 +0.6102935227 0.9111583781 0.4135836035 +0.2668344631 0.9490736905 0.02373416445 +0.1672606785 0.9949630093 0.4559031019 +0.8567499065 0.097355228 0.4352994641 +0.3035360294 0.6682905699 0.9484998586 +0.8089234504 0.8366173575 0.8503950069 +0.6155459006 0.1000898629 0.399354125 +0.5392563896 0.5547267467 0.7073454857 +0.1256843851 0.9060443549 0.5367478021 +0.4528039557 0.8630172674 0.5704376168 +5.760227847e-05 0.8224073727 0.831449511 +0.1796916377 0.1734055202 0.7948878217 +0.810883435 0.3504472342 0.5144820089 +0.4906365019 0.794028057 0.355677471 +0.5212473715 0.5706615529 0.2866615709 +0.3698783992 0.4972479285 0.3559716539 +0.3664268911 0.1538402824 0.885025765 +0.7758925012 0.1232736316 0.5100987213 +0.90661068 0.4825559595 0.5327944691 +0.3090078216 0.1232007303 0.9469650131 +0.8596991207 0.4695369551 0.7954928839 +0.7400730096 0.7135225924 0.2317399864 +0.778502767 0.3236370572 0.5596816689 +0.7679065915 0.3203264538 0.2953326152 +0.1991905042 0.01979999711 0.4024950469 +0.5651530305 0.006185065383 0.1037946953 +0.1213076145 0.6169948617 0.7118041952 +0.255929024 0.6806643078 0.9724859382 +0.2868946845 0.459908593 0.5664612242 +0.8664995968 0.1661929971 0.8145931169 +0.3919819451 0.9655103479 0.1762528486 +0.01953937346 0.7905914014 0.4506287472 +0.9032500112 0.02406993994 0.6870933919 +0.6152027148 0.7198489321 0.9014066366 +0.5966142981 0.6270936512 0.9856644628 +0.6653253993 0.2587640894 0.3100885862 +0.5300796677 0.5051933038 0.984268778 +0.1756002886 0.6819248687 0.06743131528 +0.179217615 0.1716973763 0.6952441806 +0.6383691627 0.9291349941 0.8980673717 +0.7222629086 0.1277282925 0.457114115 +0.4357276007 0.3084016413 0.459899917 +0.1053334612 0.8258583608 0.5701472456 +0.05711539535 0.03161173035 0.5881848971 +0.7015011012 0.3480210232 0.3311890844 +0.4008349469 0.06827356637 0.7021136691 +0.1573765336 0.5639430863 0.9812992363 +0.5943477904 0.8583753679 0.7757091751 +0.937860795 0.4849899101 0.2587898971 +0.3640163008 0.3619259288 0.2099822029 +0.1939276021 0.6007837647 0.4454387266 diff --git a/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_10.png b/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_10.png new file mode 100644 index 000000000000..528f10546704 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_10.png differ diff --git a/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_11.png b/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_11.png new file mode 100644 index 000000000000..528f10546704 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_segmentation/training/image_2/000030_11.png differ diff --git a/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_10.png b/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_10.png new file mode 100644 index 000000000000..e9d94abbd64c Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_10.png differ diff --git a/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_11.png b/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_11.png new file mode 100644 index 000000000000..80dfa6925033 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_segmentation/training/instance/000030_11.png differ diff --git a/tests/assets/lfw_dataset/test/pairs.txt b/tests/assets/lfw_dataset/test/pairs.txt index 401afe36b3f0..16907857a00c 100644 --- a/tests/assets/lfw_dataset/test/pairs.txt +++ b/tests/assets/lfw_dataset/test/pairs.txt @@ -2,4 +2,3 @@ name1 1 2 name0 1 name1 1 name0 1 name1 2 - \ No newline at end of file diff --git a/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv b/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv new file mode 100644 index 000000000000..c6f3141a6e2b --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv @@ -0,0 +1,2 @@ +/m/0,Generic label #0 +/m/1,Generic label #1 diff --git a/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv b/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv new file mode 100644 index 000000000000..accbc944afbb --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv @@ -0,0 +1,2 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +cc,test,,,,,Intel,Test Image CC,,,,0 diff --git a/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv b/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv new file mode 100644 index 000000000000..445b9864fdd7 --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv @@ -0,0 +1,2 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +aa,train,,,,,Intel,Test Image AA,,,,0 diff --git a/tests/assets/open_images_dataset_v5/images/test/cc.jpg b/tests/assets/open_images_dataset_v5/images/test/cc.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/open_images_dataset_v5/images/test/cc.jpg differ diff --git a/tests/assets/open_images_dataset_v5/images/train/aa.jpg b/tests/assets/open_images_dataset_v5/images/train/aa.jpg new file mode 100644 index 000000000000..864035b7f434 Binary files /dev/null and b/tests/assets/open_images_dataset_v5/images/train/aa.jpg differ diff --git a/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json b/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json new file mode 100644 index 000000000000..526c5765d8c1 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json @@ -0,0 +1,13 @@ +{ + "LabelName": "/m/x", + "Subcategory": [ + { + "LabelName": "/m/0", + "Subcategory": [ + { + "LabelName": "/m/1" + } + ] + } + ] +} diff --git a/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv b/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv new file mode 100644 index 000000000000..0bd3d8c7c278 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv @@ -0,0 +1,5 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +a,train,,,,,Intel,Test Image A,,,,0 +b,train,,,,,Intel,Test Image B,,,,0 +c,test,,,,,Intel,Test Image C,,,,0 +d,validation,,,,Intel,Test Image D,,,,0 diff --git a/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv b/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv new file mode 100644 index 000000000000..e73dc711361d --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv @@ -0,0 +1,5 @@ +LabelName,DisplayName +/m/0,Generic label #0 +/m/1,Generic label #1 +/m/2,Generic label #2 +/m/3,Generic label #3 diff --git a/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv b/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv new file mode 100644 index 000000000000..56d92fe4d288 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv @@ -0,0 +1,3 @@ +ImageID,Source,LabelName,Confidence +a,verification,/m/0,1 +b,verification,/m/0,0 diff --git a/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv b/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv new file mode 100644 index 000000000000..33317cc1d931 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv @@ -0,0 +1,3 @@ +ImageID,Source,LabelName,Confidence +c,verification,/m/1,1 +c,verification,/m/3,1 diff --git a/tests/assets/open_images_dataset_v6/images/test/c.jpg b/tests/assets/open_images_dataset_v6/images/test/c.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/test/c.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/train/a.jpg b/tests/assets/open_images_dataset_v6/images/train/a.jpg new file mode 100644 index 000000000000..864035b7f434 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/train/a.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/train/b.jpg b/tests/assets/open_images_dataset_v6/images/train/b.jpg new file mode 100644 index 000000000000..0ab7dbe4a419 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/train/b.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/validation/d.png b/tests/assets/open_images_dataset_v6/images/validation/d.png new file mode 100644 index 000000000000..528f10546704 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/validation/d.png differ diff --git a/tests/assets/sly_pointcloud_dataset/ds0/ann/frame1.pcd.json b/tests/assets/sly_pointcloud_dataset/ds0/ann/frame1.pcd.json new file mode 100644 index 000000000000..c44c86e2d068 --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/ds0/ann/frame1.pcd.json @@ -0,0 +1,134 @@ +{ + "description": "", + "key": "e78200a6-2a02-4717-838e-33bed1eef292", + "tags": [ + { + "name": "tag2", + "value": 65, + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:26:45.979Z", + "createdAt": "2021-06-25T14:26:45.979Z", + "key": "de972ed110904e88b27df03cc6182a30" + }, + { + "name": "tag1", + "value": "25dsd", + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:26:37.211Z", + "createdAt": "2021-06-25T14:26:37.211Z", + "key": "b366fada5028420196917aa09df68021" + } + ], + "objects": [ + { + "key": "95001e75-b37d-4d8e-95aa-066b639ccb3b", + "classTitle": "car", + "tags": [], + "labelerLogin": "user", + "createdAt": "2021-05-23 13:03:36.293651+00:00", + "updatedAt": "2021-05-23 13:06:38.474723+00:00" + }, + { + "key": "76aadd13-e606-4bcc-891f-38385e22e17b", + "classTitle": "bus", + "tags": [], + "labelerLogin": "user", + "createdAt": "2021-05-23 13:03:36.293651+00:00", + "updatedAt": "2021-05-23 13:06:38.474723+00:00" + }, + { + "key": "a5079ec44c004723ba8c8ffe4981eacd", + "classTitle": "car", + "tags": [ + { + "name": "tag3", + "value": "4s", + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:27:18.071Z", + "createdAt": "2021-06-25T14:27:18.071Z", + "key": "4358d8c22a14468e9e767117cdc9989a" + }, + { + "name": "tag1", + "value": "fd", + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:27:13.132Z", + "createdAt": "2021-06-25T14:27:13.132Z", + "key": "68c707fb9cfb45809ed31499d18a9d1a" + } + ], + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:24:53.418Z", + "createdAt": "2021-06-25T14:24:53.418Z" + }, + { + "key": "372929e9a944488aa955fd6e742a4d34", + "classTitle": "car", + "tags": [ + { + "name": "tag1", + "value": "v12", + "labelerLogin": "user", + "updatedAt": "2021-06-26T06:39:27.228Z", + "createdAt": "2021-06-26T06:39:27.228Z", + "key": "690565f74f9643f290cf28a9a2c80460" + } + ], + "labelerLogin": "user", + "updatedAt": "2021-06-26T06:37:24.190Z", + "createdAt": "2021-06-26T06:37:24.190Z" + } + ], + "figures": [ + { + "key": "3b761530437f44e5ba2e822e1223c5f4", + "objectKey": "a5079ec44c004723ba8c8ffe4981eacd", + "geometryType": "cuboid_3d", + "geometry": { + "position": { + "x": 0.47322101780194115, + "y": 0.23004135338593013, + "z": 0.7857368231792693 + }, + "rotation": { + "x": 0, + "y": 0, + "z": 0 + }, + "dimensions": { + "x": 0.01, + "y": 0.01, + "z": 0.01 + } + }, + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:24:53.622Z", + "createdAt": "2021-06-25T14:24:53.622Z" + }, + { + "key": "4fbceef3e486495c95dd02c9891ba87b", + "objectKey": "372929e9a944488aa955fd6e742a4d34", + "geometryType": "cuboid_3d", + "geometry": { + "position": { + "x": 0.36073632206016004, + "y": 0.6367435566638207, + "z": 0.9294467506049543 + }, + "rotation": { + "x": 0, + "y": 0, + "z": 0 + }, + "dimensions": { + "x": 0.01, + "y": 0.01, + "z": 0.01 + } + }, + "labelerLogin": "user", + "updatedAt": "2021-06-26T06:37:24.395Z", + "createdAt": "2021-06-26T06:37:24.395Z" + } + ] +} \ No newline at end of file diff --git a/tests/assets/sly_pointcloud_dataset/ds0/ann/frame2.pcd.json b/tests/assets/sly_pointcloud_dataset/ds0/ann/frame2.pcd.json new file mode 100644 index 000000000000..67e039a4f3c1 --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/ds0/ann/frame2.pcd.json @@ -0,0 +1,66 @@ +{ + "description": "", + "key": "648d1b96-2b93-4b37-bddd-690a012e6c0f", + "tags": [], + "objects": [ + { + "key": "95001e75-b37d-4d8e-95aa-066b639ccb3b", + "classTitle": "car", + "tags": [], + "labelerLogin": "user", + "createdAt": "2021-05-23 13:03:36.293651+00:00", + "updatedAt": "2021-05-23 13:06:38.474723+00:00" + }, + { + "key": "76aadd13-e606-4bcc-891f-38385e22e17b", + "classTitle": "bus", + "tags": [], + "labelerLogin": "user", + "createdAt": "2021-05-23 13:03:36.293651+00:00", + "updatedAt": "2021-05-23 13:06:38.474723+00:00" + }, + { + "key": "a5079ec44c004723ba8c8ffe4981eacd", + "classTitle": "car", + "tags": [], + "labelerLogin": "user", + "updatedAt": "2021-06-25T14:24:53.418Z", + "createdAt": "2021-06-25T14:24:53.418Z" + }, + { + "key": "372929e9a944488aa955fd6e742a4d34", + "classTitle": "car", + "tags": [], + "labelerLogin": "user", + "updatedAt": "2021-06-26T06:37:24.190Z", + "createdAt": "2021-06-26T06:37:24.190Z" + } + ], + "figures": [ + { + "key": "2584530b-0e75-4e56-bee8-d16321c976d3", + "objectKey": "76aadd13-e606-4bcc-891f-38385e22e17b", + "geometryType": "cuboid_3d", + "geometry": { + "position": { + "x": 0.59, + "y": 14.41, + "z": -0.61 + }, + "rotation": { + "x": 0.0, + "y": 0.0, + "z": 0.0 + }, + "dimensions": { + "x": 1.0, + "y": 1.0, + "z": 1.0 + } + }, + "labelerLogin": "user", + "createdAt": "2021-05-23 13:03:36.293651+00:00", + "updatedAt": "2021-05-23 13:06:38.474723+00:00" + } + ] +} \ No newline at end of file diff --git a/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame1.pcd b/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame1.pcd new file mode 100644 index 000000000000..dca7790513e0 --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame1.pcd @@ -0,0 +1,111 @@ +# .PCD v0.7 - Point Cloud Data file format +VERSION 0.7 +FIELDS x y z +SIZE 4 4 4 +TYPE F F F +COUNT 1 1 1 +WIDTH 100 +HEIGHT 1 +VIEWPOINT 0 0 0 1 0 0 0 +POINTS 100 +DATA ascii +0.5791356657 0.4114198749 0.7830189931 +0.08433272236 0.08169951458 0.5180490411 +0.2666649493 0.7924615988 0.2992874419 +0.2509502762 0.007282780613 0.9179724786 +0.0009519548103 0.5188591285 0.7461032855 +0.3341777395 0.7447405092 0.8507401902 +0.7737981441 0.8829159846 0.09243559982 +0.1151518369 0.1081754904 0.3861891806 +0.5522852638 0.4644139805 0.9158114954 +0.262019557 0.799949328 0.8619102084 +0.7817190286 0.3586988399 0.9762494608 +0.9533059971 0.3382646669 0.3340770417 +0.0494714741 0.03615227093 0.8568413846 +0.5430795611 0.5657840884 0.6231871341 +0.4526048699 0.3327443159 0.2934205997 +0.7491301442 0.6164146172 0.5041072455 +0.7411897262 0.9413747728 0.8853690873 +0.9546086163 0.8780641512 0.9457557961 +0.522717644 0.2523471196 0.7768039063 +0.09745947412 0.9427579717 0.1866185785 +0.8838772197 0.05067927336 0.3026229952 +0.276616514 0.3074261016 0.3658778032 +0.3857137087 0.3437450623 0.5808424098 +0.1504121976 0.7565417305 0.1940801831 +0.04206309677 0.5601714351 0.4928550054 +0.8959001711 0.7637477404 0.1656459291 +0.7559768424 0.08387528166 0.43365922 +0.1263603805 0.075636126 0.4639358595 +0.1611063239 0.5822180034 0.6677265751 +0.2382706459 0.7488458973 0.7196503691 +0.2787486381 0.714237112 0.5095637026 +0.07576261419 0.8975986259 0.4009473249 +0.1493201826 0.3648264844 0.4162678288 +0.3424474069 0.343425388 0.8997863722 +0.3584413167 0.7608157605 0.02584696926 +0.8259046259 0.6827611696 0.1364630583 +0.5417442348 0.2665643659 0.3894770748 +0.5693883343 0.1702286099 0.5118786166 +0.4586303573 0.4419680205 0.8718587973 +0.2555126271 0.8234680873 0.2937022288 +0.9152112333 0.8613987559 0.8785694019 +0.8139096568 0.03429760443 0.9530865347 +0.5076202239 0.8322589488 0.7961599799 +0.3527430534 0.7680710878 0.1901746714 +0.7410094529 0.5911577662 0.3328401293 +0.9708675231 0.8183511845 0.6995216067 +0.75449086 0.852550303 0.5772490648 +0.3603820772 0.09753456254 0.7285356732 +0.8426145021 0.1952573918 0.8855362182 +0.6102935227 0.9111583781 0.4135836035 +0.2668344631 0.9490736905 0.02373416445 +0.1672606785 0.9949630093 0.4559031019 +0.8567499065 0.097355228 0.4352994641 +0.3035360294 0.6682905699 0.9484998586 +0.8089234504 0.8366173575 0.8503950069 +0.6155459006 0.1000898629 0.399354125 +0.5392563896 0.5547267467 0.7073454857 +0.1256843851 0.9060443549 0.5367478021 +0.4528039557 0.8630172674 0.5704376168 +5.760227847e-05 0.8224073727 0.831449511 +0.1796916377 0.1734055202 0.7948878217 +0.810883435 0.3504472342 0.5144820089 +0.4906365019 0.794028057 0.355677471 +0.5212473715 0.5706615529 0.2866615709 +0.3698783992 0.4972479285 0.3559716539 +0.3664268911 0.1538402824 0.885025765 +0.7758925012 0.1232736316 0.5100987213 +0.90661068 0.4825559595 0.5327944691 +0.3090078216 0.1232007303 0.9469650131 +0.8596991207 0.4695369551 0.7954928839 +0.7400730096 0.7135225924 0.2317399864 +0.778502767 0.3236370572 0.5596816689 +0.7679065915 0.3203264538 0.2953326152 +0.1991905042 0.01979999711 0.4024950469 +0.5651530305 0.006185065383 0.1037946953 +0.1213076145 0.6169948617 0.7118041952 +0.255929024 0.6806643078 0.9724859382 +0.2868946845 0.459908593 0.5664612242 +0.8664995968 0.1661929971 0.8145931169 +0.3919819451 0.9655103479 0.1762528486 +0.01953937346 0.7905914014 0.4506287472 +0.9032500112 0.02406993994 0.6870933919 +0.6152027148 0.7198489321 0.9014066366 +0.5966142981 0.6270936512 0.9856644628 +0.6653253993 0.2587640894 0.3100885862 +0.5300796677 0.5051933038 0.984268778 +0.1756002886 0.6819248687 0.06743131528 +0.179217615 0.1716973763 0.6952441806 +0.6383691627 0.9291349941 0.8980673717 +0.7222629086 0.1277282925 0.457114115 +0.4357276007 0.3084016413 0.459899917 +0.1053334612 0.8258583608 0.5701472456 +0.05711539535 0.03161173035 0.5881848971 +0.7015011012 0.3480210232 0.3311890844 +0.4008349469 0.06827356637 0.7021136691 +0.1573765336 0.5639430863 0.9812992363 +0.5943477904 0.8583753679 0.7757091751 +0.937860795 0.4849899101 0.2587898971 +0.3640163008 0.3619259288 0.2099822029 +0.1939276021 0.6007837647 0.4454387266 diff --git a/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame2.pcd b/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame2.pcd new file mode 100644 index 000000000000..2f9779c546c5 Binary files /dev/null and b/tests/assets/sly_pointcloud_dataset/ds0/pointcloud/frame2.pcd differ diff --git a/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png new file mode 100644 index 000000000000..4634ee1cf09a Binary files /dev/null and b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png differ diff --git a/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png.json b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png.json new file mode 100644 index 000000000000..4739443fafee --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame1_pcd/img2.png.json @@ -0,0 +1,32 @@ +{ + "name": "img2.png", + "meta": { + "sensorsData": { + "extrinsicMatrix": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "intrinsicMatrix": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + } +} \ No newline at end of file diff --git a/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png new file mode 100644 index 000000000000..7e7271738e86 Binary files /dev/null and b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png differ diff --git a/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png.json b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png.json new file mode 100644 index 000000000000..e0a9fab664bb --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/ds0/related_images/frame2_pcd/img1.png.json @@ -0,0 +1,32 @@ +{ + "name": "img1.png", + "meta": { + "sensorsData": { + "extrinsicMatrix": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "intrinsicMatrix": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + } +} \ No newline at end of file diff --git a/tests/assets/sly_pointcloud_dataset/key_id_map.json b/tests/assets/sly_pointcloud_dataset/key_id_map.json new file mode 100644 index 000000000000..bdb3ccabdfb8 --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/key_id_map.json @@ -0,0 +1,21 @@ +{ + "tags": { + "de972ed110904e88b27df03cc6182a30": 108173310, + "b366fada5028420196917aa09df68021": 108173308 + }, + "objects": { + "95001e75-b37d-4d8e-95aa-066b639ccb3b": 35, + "76aadd13-e606-4bcc-891f-38385e22e17b": 36, + "a5079ec44c004723ba8c8ffe4981eacd": 231825, + "372929e9a944488aa955fd6e742a4d34": 231831 + }, + "figures": { + "2584530b-0e75-4e56-bee8-d16321c976d3": 216, + "3b761530437f44e5ba2e822e1223c5f4": 755220128, + "4fbceef3e486495c95dd02c9891ba87b": 755337225 + }, + "videos": { + "e78200a6-2a02-4717-838e-33bed1eef292": 0, + "648d1b96-2b93-4b37-bddd-690a012e6c0f": 1 + } +} \ No newline at end of file diff --git a/tests/assets/sly_pointcloud_dataset/meta.json b/tests/assets/sly_pointcloud_dataset/meta.json new file mode 100644 index 000000000000..65ec6d3899a2 --- /dev/null +++ b/tests/assets/sly_pointcloud_dataset/meta.json @@ -0,0 +1,48 @@ +{ + "classes": [ + { + "id": 35, + "title": "car", + "color": "#fa3253", + "shape": "cuboid_3d", + "geometry_config": {} + }, + { + "id": 36, + "title": "bus", + "color": "#fafa37", + "shape": "cuboid_3d", + "geometry_config": {} + } + ], + "tags": [ + { + "name": "tag1", + "value_type": "any_string", + "color": "#243175", + "id": 29185993, + "hotkey": "", + "applicable_type": "all", + "classes": [] + }, + { + "name": "tag2", + "value_type": "any_number", + "color": "#4DD562", + "id": 29185994, + "hotkey": "", + "applicable_type": "imagesOnly", + "classes": [] + }, + { + "name": "tag3", + "value_type": "any_string", + "color": "#3A3FE9", + "id": 29185995, + "hotkey": "", + "applicable_type": "objectsOnly", + "classes": [] + } + ], + "projectType": "point_clouds" +} \ No newline at end of file diff --git a/tests/assets/tf_detection_api_dataset/label_map.pbtxt b/tests/assets/tf_detection_api_dataset/label_map.pbtxt index dbf2b339b7b2..de7fbb9ec47a 100644 --- a/tests/assets/tf_detection_api_dataset/label_map.pbtxt +++ b/tests/assets/tf_detection_api_dataset/label_map.pbtxt @@ -1,50 +1,49 @@ item { - id: 1 - name: 'label_0' + id: 1 + name: 'label_0' } item { - id: 2 - name: 'label_1' + id: 2 + name: 'label_1' } item { - id: 3 - name: 'label_2' + id: 3 + name: 'label_2' } item { - id: 4 - name: 'label_3' + id: 4 + name: 'label_3' } item { - id: 5 - name: 'label_4' + id: 5 + name: 'label_4' } item { - id: 6 - name: 'label_5' + id: 6 + name: 'label_5' } item { - id: 7 - name: 'label_6' + id: 7 + name: 'label_6' } item { - id: 8 - name: 'label_7' + id: 8 + name: 'label_7' } item { - id: 9 - name: 'label_8' + id: 9 + name: 'label_8' } item { - id: 10 - name: 'label_9' + id: 10 + name: 'label_9' } - diff --git a/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_test.csv b/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_test.csv new file mode 100644 index 000000000000..99d3c718aee0 --- /dev/null +++ b/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_test.csv @@ -0,0 +1,2 @@ +NAME_ID,X,Y,W,H +n000003/0003_01,1,1,1,1 diff --git a/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_train.csv b/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_train.csv index 365734f28095..d554613e8bee 100644 --- a/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_train.csv +++ b/tests/assets/vgg_face2_dataset/bb_landmark/loose_bb_train.csv @@ -1,3 +1,4 @@ NAME_ID,X,Y,W,H n000001/0001_01,2,2,1,2 +n000002/0001_01,2,4,2,2 n000002/0002_01,1,3,1,1 diff --git a/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_test.csv b/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_test.csv new file mode 100644 index 000000000000..f5f656e52785 --- /dev/null +++ b/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_test.csv @@ -0,0 +1,2 @@ +NAME_ID,P1X,P1Y,P2X,P2Y,P3X,P3Y,P4X,P4Y,P5X,P5Y +n000003/0003_01,0.2,2.8,0.8,2.9,0.5,2.6,0.4,2.3,0.6,2.3 diff --git a/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_train.csv b/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_train.csv index 7ca5c1a3b0e3..0f316a60d82b 100644 --- a/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_train.csv +++ b/tests/assets/vgg_face2_dataset/bb_landmark/loose_landmark_train.csv @@ -1,3 +1,4 @@ NAME_ID,P1X,P1Y,P2X,P2Y,P3X,P3Y,P4X,P4Y,P5X,P5Y n000001/0001_01,2.787,2.898,2.965,2.79,2.8,2.456,2.81,2.32,2.89,2.3 +n000002/0001_01,2.3,4.9,2.9,4.93,2.62,4.745,2.54,4.45,2.76,4.43 n000002/0002_01,1.2,3.8,1.8,3.82,1.51,3.634,1.43,3.34,1.65,3.32 diff --git a/tests/assets/vgg_face2_dataset/labels.txt b/tests/assets/vgg_face2_dataset/labels.txt index cdd15b2026de..1eeec4249f54 100644 --- a/tests/assets/vgg_face2_dataset/labels.txt +++ b/tests/assets/vgg_face2_dataset/labels.txt @@ -1,2 +1,3 @@ -n000001 car -n000002 person \ No newline at end of file +n000001 Karl +n000002 Jay +n000003 Pol \ No newline at end of file diff --git a/tests/assets/vgg_face2_dataset/test/n000003/0003_01.jpg b/tests/assets/vgg_face2_dataset/test/n000003/0003_01.jpg new file mode 100644 index 000000000000..5ef534816125 Binary files /dev/null and b/tests/assets/vgg_face2_dataset/test/n000003/0003_01.jpg differ diff --git a/tests/assets/vgg_face2_dataset/train/n000002/0001_01.jpg b/tests/assets/vgg_face2_dataset/train/n000002/0001_01.jpg new file mode 100644 index 000000000000..8689b9563119 Binary files /dev/null and b/tests/assets/vgg_face2_dataset/train/n000002/0001_01.jpg differ diff --git a/tests/assets/voc_dataset/voc_dataset1/Annotations/2007_000001.xml b/tests/assets/voc_dataset/voc_dataset1/Annotations/2007_000001.xml index 4f1e25a2112f..04995b5736b0 100644 --- a/tests/assets/voc_dataset/voc_dataset1/Annotations/2007_000001.xml +++ b/tests/assets/voc_dataset/voc_dataset1/Annotations/2007_000001.xml @@ -3,8 +3,8 @@ VOC2007 2007_000001.jpg - 10 - 20 + 20 + 10 3 1 diff --git a/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000001.jpg b/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000001.jpg index 6c07340b733a..cd08aa30386f 100644 Binary files a/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000001.jpg and b/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000001.jpg differ diff --git a/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000002.jpg b/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000002.jpg index 3c81296b31dc..cd08aa30386f 100644 Binary files a/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000002.jpg and b/tests/assets/voc_dataset/voc_dataset1/JPEGImages/2007_000002.jpg differ diff --git a/tests/assets/voc_dataset/voc_dataset1/SegmentationClass/2007_000001.png b/tests/assets/voc_dataset/voc_dataset1/SegmentationClass/2007_000001.png index 0b9205145239..65f71759e682 100644 Binary files a/tests/assets/voc_dataset/voc_dataset1/SegmentationClass/2007_000001.png and b/tests/assets/voc_dataset/voc_dataset1/SegmentationClass/2007_000001.png differ diff --git a/tests/assets/voc_dataset/voc_dataset1/SegmentationObject/2007_000001.png b/tests/assets/voc_dataset/voc_dataset1/SegmentationObject/2007_000001.png index ebbeee61dd68..67b82d980a00 100644 Binary files a/tests/assets/voc_dataset/voc_dataset1/SegmentationObject/2007_000001.png and b/tests/assets/voc_dataset/voc_dataset1/SegmentationObject/2007_000001.png differ diff --git a/tests/assets/widerface_dataset/wider_face_split/wider_face_train_bbx_gt.txt b/tests/assets/widerface_dataset/wider_face_split/wider_face_train_bbx_gt.txt index 09109f70101c..813e43b117f4 100644 --- a/tests/assets/widerface_dataset/wider_face_split/wider_face_train_bbx_gt.txt +++ b/tests/assets/widerface_dataset/wider_face_split/wider_face_train_bbx_gt.txt @@ -1,7 +1,7 @@ 0--Parade/0_Parade_image_01.jpg 1 -1 2 2 2 0 0 0 0 0 0 +1 2 2 2 0 0 0 0 0 0 1--Handshaking/1_Handshaking_image_02.jpg 2 -1 1 2 2 0 0 1 0 0 0 -5 1 2 2 0 0 1 0 0 0 \ No newline at end of file +1 1 2 2 0 0 1 0 0 0 +5 1 2 2 0 0 1 0 0 0 diff --git a/tests/assets/widerface_dataset/wider_face_split/wider_face_val_bbx_gt.txt b/tests/assets/widerface_dataset/wider_face_split/wider_face_val_bbx_gt.txt index 04573e82685b..095b845257c0 100644 --- a/tests/assets/widerface_dataset/wider_face_split/wider_face_val_bbx_gt.txt +++ b/tests/assets/widerface_dataset/wider_face_split/wider_face_val_bbx_gt.txt @@ -1,5 +1,5 @@ 0--Parade/0_Parade_image_03.jpg 3 -0 0 1 1 2 0 0 0 2 0 -3 2 1 2 0 0 0 1 0 0 -5 6 1 1 2 0 0 0 2 0 \ No newline at end of file +0 0 1 1 2 0 0 0 2 0 +3 2 1 2 0 0 0 1 0 0 +5 6 1 1 2 0 0 0 2 0 diff --git a/tests/cli/test_diff.py b/tests/cli/test_diff.py index 96bf97fa2587..b80394021a8a 100644 --- a/tests/cli/test_diff.py +++ b/tests/cli/test_diff.py @@ -1,20 +1,19 @@ from unittest import TestCase - import os import os.path as osp import numpy as np from datumaro.cli.contexts.project.diff import DatasetDiffVisualizer +from datumaro.components.extractor import ( + AnnotationType, Bbox, Caption, DatasetItem, Label, LabelCategories, Mask, + MaskCategories, Points, PointsCategories, Polygon, PolyLine, +) from datumaro.components.operations import DistanceComparator from datumaro.components.project import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Label, Mask, Points, Polygon, - PolyLine, Bbox, Caption, - LabelCategories, MaskCategories, PointsCategories -) from datumaro.util.image import Image from datumaro.util.test_utils import TestDir + from ..requirements import Requirements, mark_requirement diff --git a/tests/cli/test_image_zip_format.py b/tests/cli/test_image_zip_format.py new file mode 100644 index 000000000000..07b742eadc86 --- /dev/null +++ b/tests/cli/test_image_zip_format.py @@ -0,0 +1,92 @@ +from unittest import TestCase +from zipfile import ZipFile +import os +import os.path as osp + +import numpy as np + +from datumaro.cli.__main__ import main +from datumaro.components.dataset import Dataset, DatasetItem +from datumaro.util.test_utils import TestDir, compare_datasets + +from ..requirements import Requirements, mark_requirement + + +def run(test, *args, expected_code=0): + test.assertEqual(expected_code, main(args), str(args)) + +def make_zip_archive(src_path, dst_path): + with ZipFile(dst_path, 'w') as archive: + for (dirpath, _, filenames) in os.walk(src_path): + for name in filenames: + path = osp.join(dirpath, name) + archive.write(path, osp.relpath(path, src_path)) + +class ImageZipIntegrationScenarios(TestCase): + @mark_requirement(Requirements.DATUM_267) + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((5, 5, 3))), + DatasetItem(id='2', image=np.ones((2, 8, 3))) + ]) + + with TestDir() as test_dir: + source_dataset.export(test_dir, format='image_dir') + zip_path = osp.join(test_dir, 'images.zip') + make_zip_archive(test_dir, zip_path) + + run(self, 'create', '-o', test_dir) + run(self, 'add', 'path', '-p', test_dir, '-f', 'image_zip', zip_path) + + export_path = osp.join(test_dir, 'export.zip') + run(self, 'export', '-p', test_dir, '-f', 'image_zip', + '-o', test_dir, '--overwrite', '--', + '--name', osp.basename(export_path) + ) + + parsed_dataset = Dataset.import_from(export_path, format='image_zip') + compare_datasets(self, source_dataset, parsed_dataset) + + @mark_requirement(Requirements.DATUM_267) + def test_can_export_zip_images_from_coco_dataset(self): + with TestDir() as test_dir: + coco_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], + 'tests', 'assets', 'coco_dataset') + + run(self, 'create', '-o', test_dir) + run(self, 'add', 'path', '-p', test_dir, '-f', 'coco', coco_dir) + + export_path = osp.join(test_dir, 'export.zip') + run(self, 'export', '-p', test_dir, '-f', 'image_zip', + '-o', test_dir, '--overwrite', '--', + '--name', osp.basename(export_path)) + + self.assertTrue(osp.isfile(export_path)) + with ZipFile(export_path, 'r') as zf: + images = {f.filename for f in zf.filelist} + self.assertTrue(images == {'a.jpg', 'b.jpg'}) + + @mark_requirement(Requirements.DATUM_267) + def test_can_change_extension_for_images_in_zip(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((5, 5, 3))), + DatasetItem(id='2', image=np.ones((2, 8, 3))) + ]) + + with TestDir() as test_dir: + source_dataset.export(test_dir, format='image_dir', image_ext='.jpg') + zip_path = osp.join(test_dir, 'images.zip') + make_zip_archive(test_dir, zip_path) + + run(self, 'create', '-o', test_dir) + run(self, 'add', 'path', '-p', test_dir, '-f', 'image_zip', zip_path) + + export_path = osp.join(test_dir, 'export.zip') + run(self, 'export', '-p', test_dir, '-f', 'image_zip', + '-o', test_dir, '--overwrite', '--', + '--name', osp.basename(export_path), '--image-ext', '.png') + + self.assertTrue(osp.isfile(export_path)) + with ZipFile(export_path, 'r') as zf: + images = {f.filename for f in zf.filelist} + self.assertTrue(images == {'1.png', '2.png'}) diff --git a/tests/cli/test_kitti_raw_format.py b/tests/cli/test_kitti_raw_format.py new file mode 100644 index 000000000000..82522369ec00 --- /dev/null +++ b/tests/cli/test_kitti_raw_format.py @@ -0,0 +1,90 @@ +from unittest import TestCase +import os.path as osp + +from datumaro.cli.__main__ import main +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, LabelCategories, +) +from datumaro.util.test_utils import TestDir, compare_datasets_3d + +from ..requirements import Requirements, mark_requirement + +DUMMY_DATASET_DIR = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], + 'tests', 'assets', 'kitti_dataset', 'kitti_raw') + +def run(test, *args, expected_code=0): + test.assertEqual(expected_code, main(args), str(args)) + +class KittiRawIntegrationScenarios(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_convert_to_kitti_raw(self): + with TestDir() as test_dir: + export_dir = osp.join(test_dir, 'export_dir') + expected_label_cat = LabelCategories(attributes={'occluded'}) + expected_label_cat.add('bus') + expected_label_cat.add('car') + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[1, 2, 3], + scale=[7.95, -3.62, -1.03], + label=1, attributes={'occluded': False, + 'track_id': 1}), + + Cuboid3d(position=[1, 1, 0], + scale=[8.34, 23.01, -0.76], + label=0, attributes={'occluded': False, + 'track_id': 2}) + ], + point_cloud=osp.join(export_dir, 'ds0', 'pointcloud', + '0000000000.pcd'), + related_images=[osp.join(export_dir, 'ds0', + 'related_images', '0000000000_pcd', '0000000000.png') + ], + attributes={'frame': 0, 'description': ''} + ), + + DatasetItem(id='0000000001', + annotations=[ + Cuboid3d(position=[0, 1, 0], + scale=[8.34, 23.01, -0.76], + rotation=[1, 1, 3], + label=0, attributes={'occluded': True, + 'track_id': 2}) + ], + point_cloud=osp.join(export_dir, 'ds0', 'pointcloud', + '0000000001.pcd'), + related_images=[osp.join(export_dir, 'ds0', + 'related_images', '0000000001_pcd', '0000000001.png') + ], + attributes={'frame': 1, 'description': ''} + ), + + DatasetItem(id='0000000002', + annotations=[ + Cuboid3d(position=[1, 2, 3], + scale=[-9.41, 13.54, 0.24], + label=1, attributes={'occluded': False, + 'track_id': 3}) + ], + point_cloud=osp.join(export_dir, 'ds0', 'pointcloud', + '0000000002.pcd'), + related_images=[osp.join(export_dir, 'ds0', + 'related_images', '0000000002_pcd', '0000000002.png') + ], + attributes={'frame': 2, 'description': ''} + ), + ], categories={AnnotationType.label: expected_label_cat}) + + run(self, 'import', '-f', 'kitti_raw', + '-i', DUMMY_DATASET_DIR, '-o', test_dir) + + run(self, 'export', '-p', test_dir, + '-f', 'sly_pointcloud', '-o', export_dir, + '--', '--save-images') + + parsed_dataset = Dataset.import_from(export_dir, + format='sly_pointcloud') + compare_datasets_3d(self, expected_dataset, parsed_dataset, + require_point_cloud=True) diff --git a/tests/cli/test_sly_point_cloud_format.py b/tests/cli/test_sly_point_cloud_format.py new file mode 100644 index 000000000000..5544387b59a3 --- /dev/null +++ b/tests/cli/test_sly_point_cloud_format.py @@ -0,0 +1,75 @@ +from unittest import TestCase +import os.path as osp + +from datumaro.cli.__main__ import main +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, LabelCategories, +) +from datumaro.util.test_utils import TestDir, compare_datasets_3d + +from ..requirements import Requirements, mark_requirement + +DUMMY_DATASET_DIR = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], + 'tests', 'assets', 'sly_pointcloud_dataset') + +def run(test, *args, expected_code=0): + test.assertEqual(expected_code, main(args), str(args)) + +class SlyPointCloudIntegrationScenarios(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_convert_to_kitti_raw(self): + with TestDir() as test_dir: + export_dir = osp.join(test_dir, 'export_dir') + expected_label_cat = LabelCategories(attributes={'occluded'}) + expected_label_cat.add('bus', attributes={'tag1', 'tag3'}) + expected_label_cat.add('car', attributes={'tag1', 'tag3'}) + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='frame1', + annotations=[ + Cuboid3d(label=1, + position=[0.47, 0.23, 0.79], + scale=[0.01, 0.01, 0.01], + attributes={'track_id': 2, + 'tag1': 'fd', 'tag3': '4s', 'occluded': False}), + + Cuboid3d(label=1, + position=[0.36, 0.64, 0.93], + scale=[0.01, 0.01, 0.01], + attributes={'track_id': 3, + 'tag1': 'v12', 'tag3': '', 'occluded': False}), + ], + point_cloud=osp.join(export_dir, 'velodyne_points', 'data', + 'frame1.pcd'), + related_images=[osp.join(export_dir, 'image_00', 'data', + 'frame1.png') + ], + attributes={'frame': 0} + ), + + DatasetItem(id='frame2', + annotations=[ + Cuboid3d(label=0, + position=[0.59, 14.41, -0.61], + attributes={'track_id': 1, + 'tag1': '', 'tag3': '', 'occluded': False}) + ], + point_cloud=osp.join(export_dir, 'velodyne_points', 'data', + 'frame2.pcd'), + related_images=[osp.join(export_dir, 'image_00', 'data', + 'frame2.png') + ], + attributes={'frame': 1} + ), + ], categories={AnnotationType.label: expected_label_cat}) + + run(self, 'import', '-f', 'sly_pointcloud', + '-i', DUMMY_DATASET_DIR, '-o', test_dir) + + run(self, 'export', '-p', test_dir, + '-f', 'kitti_raw', '-o', export_dir, + '--', '--save-images', '--allow-attrs') + + parsed_dataset = Dataset.import_from(export_dir, format='kitti_raw') + compare_datasets_3d(self, expected_dataset, parsed_dataset, + require_point_cloud=True) diff --git a/tests/cli/test_voc_format.py b/tests/cli/test_voc_format.py index a707a4651336..08149f9934e2 100644 --- a/tests/cli/test_voc_format.py +++ b/tests/cli/test_voc_format.py @@ -1,40 +1,61 @@ -import os.path as osp -import numpy as np from collections import OrderedDict - from unittest import TestCase +import os.path as osp + +import numpy as np -import datumaro.plugins.voc_format.format as VOC -from datumaro.components.dataset import Dataset, DatasetItem -from datumaro.components.extractor import Bbox, Mask, Image, Label from datumaro.cli.__main__ import main +from datumaro.components.dataset import Dataset, DatasetItem +from datumaro.components.extractor import Bbox, Label, Mask from datumaro.util.test_utils import TestDir, compare_datasets +import datumaro.plugins.voc_format.format as VOC + from ..requirements import Requirements, mark_requirement DUMMY_DATASETS_DIR = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], - 'tests', 'assets', 'voc_dataset') + 'tests', 'assets', 'voc_dataset') def run(test, *args, expected_code=0): test.assertEqual(expected_code, main(args), str(args)) - class VocIntegrationScenarios(TestCase): - def _test_can_save_and_load(self, project_path, source_path, source_dataset, - dataset_format, result_path=None, label_map=None): + def _test_can_save_and_load(self, project_path, source_path, expected_dataset, + dataset_format, result_path='', label_map=None): run(self, 'create', '-o', project_path) - run(self, 'add', 'path', '-p', project_path, '-f', dataset_format, source_path) + run(self, 'add', 'path', '-p', project_path, '-f', dataset_format, + source_path) - result_dir = osp.join(project_path, 'voc_dataset') + result_dir = osp.join(project_path, 'result') + extra_args = ['--', '--save-images'] + if label_map: + extra_args += ['--label-map', label_map] run(self, 'export', '-f', dataset_format, '-p', project_path, - '-o', result_dir, '--', '--label-map', label_map) + '-o', result_dir, *extra_args) - result_path = osp.join(result_dir, result_path) if result_path else result_dir - target_dataset = Dataset.import_from(result_path, dataset_format) - compare_datasets(self, source_dataset, target_dataset) + result_path = osp.join(result_dir, result_path) + parsed_dataset = Dataset.import_from(result_path, dataset_format) + compare_datasets(self, expected_dataset, parsed_dataset, + require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_preparing_dataset_for_train_model(self): - source_dataset = Dataset.from_iterable([ + """ + Description: + Testing a particular example of working with VOC dataset. + + Expected results: + A VOC dataset that matches the expected result. + + Steps: + 1. Get path to the source dataset from assets. + 2. Create a datumaro project and add source dataset to it. + 3. Leave only non-occluded annotations with `filter` command. + 4. Split the dataset into subsets with `transform` command. + 5. Export the project to a VOC dataset with `export` command. + 6. Verify that the resulting dataset is equal to the expected result. + """ + + expected_dataset = Dataset.from_iterable([ DatasetItem(id='c', subset='train', annotations=[ Bbox(3.0, 1.0, 8.0, 5.0, @@ -81,16 +102,16 @@ def test_preparing_dataset_for_train_model(self): '-o', export_path, '--', '--label-map', 'voc') parsed_dataset = Dataset.import_from(export_path, format='voc') - compare_datasets(self, source_dataset, parsed_dataset) + compare_datasets(self, expected_dataset, parsed_dataset) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_convert_to_voc_format(self): - label_map = OrderedDict(('label_' + str(i), [None, [], []]) for i in range(10)) + def test_export_to_voc_format(self): + label_map = OrderedDict(('label_%s' % i, [None, [], []]) for i in range(10)) label_map['background'] = [None, [], []] label_map.move_to_end('background', last=False) - source_dataset = Dataset.from_iterable([ - DatasetItem(id='1', subset='train', + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='1', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(0.0, 2.0, 4.0, 2.0, attributes={ @@ -121,18 +142,104 @@ def test_convert_to_voc_format(self): voc_export = osp.join(test_dir, 'voc_export') run(self, 'export', '-p', test_dir, '-f', 'voc', - '-o', voc_export) + '-o', voc_export, '--', '--save-images') parsed_dataset = Dataset.import_from(voc_export, format='voc') - compare_datasets(self, source_dataset, parsed_dataset) + compare_datasets(self, expected_dataset, parsed_dataset, + require_images=True) + + @mark_requirement(Requirements.DATUM_283) + def test_convert_to_voc_format(self): + """ + Description: + Ensure that the dataset can be converted to VOC format with + command `datum convert`. + + Expected results: + A VOC dataset that matches the expected dataset. + + Steps: + 1. Get path to the source dataset from assets. + 2. Convert source dataset to VOC format, using the `convert` command. + 3. Verify that resulting dataset is equal to the expected dataset. + """ + + label_map = OrderedDict(('label_' + str(i), [None, [], []]) for i in range(10)) + label_map['background'] = [None, [], []] + label_map.move_to_end('background', last=False) + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='1', subset='default', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0.0, 4.0, 4.0, 8.0, + attributes={ + 'difficult': False, + 'truncated': False, + 'occluded': False, + 'visibility': '1.0', + 'ignored': 'False' + }, + id=1, label=3, group=1 + ) + ] + ) + ], categories=VOC.make_voc_categories(label_map)) + + mot_dir = osp.join(__file__[:__file__.rfind(osp.join('tests', ''))], + 'tests', 'assets', 'mot_dataset') + with TestDir() as test_dir: + voc_dir = osp.join(test_dir, 'voc') + run(self, 'convert', '-if', 'mot_seq', '-i', mot_dir, + '-f', 'voc', '-o', voc_dir, '--', '--save-images') + + target_dataset = Dataset.import_from(voc_dir, format='voc') + compare_datasets(self, expected_dataset, target_dataset, + require_images=True) + + @mark_requirement(Requirements.DATUM_283) + def test_convert_from_voc_format(self): + """ + Description: + Ensure that the dataset can be converted from VOC format with + command `datum convert`. + + Expected results: + A ImageNet dataset that matches the expected dataset. + + Steps: + 1. Get path to the source dataset from assets. + 2. Convert source dataset to LabelMe format, using the `convert` command. + 3. Verify that resulting dataset is equal to the expected dataset. + """ + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='default', + image=np.ones((10, 20, 3)), + annotations=[Label(i) for i in range(11)] + ), + DatasetItem(id='2007_000002', subset='default', + image=np.ones((10, 20, 3)) + ) + ], categories=sorted([l.name for l in VOC.VocLabel if l.value % 2 == 1])) + + voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + with TestDir() as test_dir: + imagenet_dir = osp.join(test_dir, 'imagenet') + run(self, 'convert', '-if', 'voc', '-i', voc_dir, + '-f', 'imagenet', '-o', imagenet_dir, '--', '--save-image') + + target_dataset = Dataset.import_from(imagenet_dir, format='imagenet') + compare_datasets(self, expected_dataset, target_dataset, + require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load_voc_dataset(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[Label(i) for i in range(22) if i % 2 == 1] + [ - Bbox(4.0, 5.0, 2.0, 2.0, label=15, + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=1, group=1, attributes={ 'difficult': False, 'truncated': False, @@ -142,26 +249,21 @@ def test_can_save_and_load_voc_dataset(self): for a in VOC.VocAction } }, - id=1, group=1 ), - Bbox(1.0, 2.0, 2.0, 2.0, label=8, + Bbox(1.0, 2.0, 2.0, 2.0, label=8, id=2, group=2, attributes={ 'difficult': False, 'truncated': True, 'occluded': False, 'pose': 'Unspecified' - }, - id=2, group=2 - ), - Bbox(5.5, 6.0, 2.0, 2.0, label=22, - id=0, group=1 + } ), - Mask(image=np.ones([5, 10]), label=2, group=1) - ] - ), + Bbox(5.5, 6.0, 2.0, 2.0, label=22, id=0, group=1), + Mask(image=np.ones([10, 20]), label=2, group=1), + ]), + DatasetItem(id='2007_000002', subset='test', - image=np.ones((10, 20, 3)) - ) + image=np.ones((10, 20, 3))) ], categories=VOC.make_voc_categories()) voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') @@ -173,9 +275,9 @@ def test_can_save_and_load_voc_dataset(self): def test_can_save_and_load_voc_layout_dataset(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[ - Bbox(4.0, 5.0, 2.0, 2.0, label=15, + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=1, group=1, attributes={ 'difficult': False, 'truncated': False, @@ -184,31 +286,70 @@ def test_can_save_and_load_voc_layout_dataset(self): a.name : a.value % 2 == 1 for a in VOC.VocAction } - }, - id=1, group=1 + } ), - Bbox(5.5, 6.0, 2.0, 2.0, label=22, - id=0, group=1 - ), - ] - ), + Bbox(5.5, 6.0, 2.0, 2.0, label=22, id=0, group=1), + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), ], categories=VOC.make_voc_categories()) - voc_layout_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1', - 'ImageSets', 'Layout', 'train.txt') + dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + rpath = osp.join('ImageSets', 'Layout', 'train.txt') + matrix = [ + ('voc_layout', '', ''), + ('voc_layout', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + source = source_dataset.get_subset(subset) + else: + source = source_dataset + + with TestDir() as test_dir: + self._test_can_save_and_load(test_dir, + osp.join(dataset_dir, path), source, + format, result_path=path, label_map='voc') - with TestDir() as test_dir: - result_voc_path = osp.join('ImageSets', 'Layout', 'train.txt') - self._test_can_save_and_load(test_dir, voc_layout_path, source_dataset, - 'voc_layout', result_path=result_voc_path, label_map='voc') + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_classification_dataset(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[Label(i) for i in range(22) if i % 2 == 1]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) + + dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + rpath = osp.join('ImageSets', 'Main', 'train.txt') + matrix = [ + ('voc_classification', '', ''), + ('voc_classification', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + source = source_dataset.get_subset(subset) + else: + source = source_dataset + + with TestDir() as test_dir: + self._test_can_save_and_load(test_dir, + osp.join(dataset_dir, path), source, + format, result_path=path, label_map='voc') @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_save_and_load_voc_detect_dataset(self): + def test_can_save_and_load_voc_detection_dataset(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[ - Bbox(4.0, 5.0, 2.0, 2.0, label=15, + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=2, group=2, attributes={ 'difficult': False, 'truncated': False, @@ -217,56 +358,79 @@ def test_can_save_and_load_voc_detect_dataset(self): a.name : a.value % 2 == 1 for a in VOC.VocAction } - }, - id=2, group=2 + } ), - Bbox(1.0, 2.0, 2.0, 2.0, label=8, + Bbox(1.0, 2.0, 2.0, 2.0, label=8, id=1, group=1, attributes={ 'difficult': False, 'truncated': True, 'occluded': False, 'pose': 'Unspecified' - }, - id=1, group=1 + } ) - ] - ), - ], categories=VOC.make_voc_categories()) + ]), - voc_detection_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1', - 'ImageSets', 'Main', 'train.txt') + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) - with TestDir() as test_dir: - result_voc_path = osp.join('ImageSets', 'Main', 'train.txt') - self._test_can_save_and_load(test_dir, voc_detection_path, source_dataset, - 'voc_detection', result_path=result_voc_path, label_map='voc') + dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + rpath = osp.join('ImageSets', 'Main', 'train.txt') + matrix = [ + ('voc_detection', '', ''), + ('voc_detection', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + source = source_dataset.get_subset(subset) + else: + source = source_dataset + + with TestDir() as test_dir: + self._test_can_save_and_load(test_dir, + osp.join(dataset_dir, path), source, + format, result_path=path, label_map='voc') @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load_voc_segmentation_dataset(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[ - Mask(image=np.ones([5, 10]), label=2, group=1) - ] - ) - ], categories=VOC.make_voc_categories()) + Mask(image=np.ones([10, 20]), label=2, group=1) + ]), - voc_segm_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1', - 'ImageSets', 'Segmentation', 'train.txt') + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) - with TestDir() as test_dir: - result_voc_path = osp.join('ImageSets', 'Segmentation', 'train.txt') - self._test_can_save_and_load(test_dir, voc_segm_path, source_dataset, - 'voc_segmentation', result_path=result_voc_path, label_map='voc') + dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + rpath = osp.join('ImageSets', 'Segmentation', 'train.txt') + matrix = [ + ('voc_segmentation', '', ''), + ('voc_segmentation', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + source = source_dataset.get_subset(subset) + else: + source = source_dataset + + with TestDir() as test_dir: + self._test_can_save_and_load(test_dir, + osp.join(dataset_dir, path), source, + format, result_path=path, label_map='voc') @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load_voc_action_dataset(self): - source_dataset = Dataset.from_iterable([ + expected_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[ - Bbox(4.0, 5.0, 2.0, 2.0, label=15, + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=1, group=1, attributes={ 'difficult': False, 'truncated': False, @@ -275,17 +439,29 @@ def test_can_save_and_load_voc_action_dataset(self): a.name : a.value % 2 == 1 for a in VOC.VocAction } - }, - id=1, group=1 + } ) - ] - ) - ], categories=VOC.make_voc_categories()) + ]), - voc_act_path = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1', - 'ImageSets', 'Action', 'train.txt') + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) - with TestDir() as test_dir: - result_voc_path = osp.join('ImageSets', 'Action', 'train.txt') - self._test_can_save_and_load(test_dir, voc_act_path, source_dataset, - 'voc_action', result_path=result_voc_path, label_map='voc') + dataset_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1') + rpath = osp.join('ImageSets', 'Action', 'train.txt') + matrix = [ + ('voc_action', '', ''), + ('voc_action', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with TestDir() as test_dir: + self._test_can_save_and_load(test_dir, + osp.join(dataset_dir, path), expected, + format, result_path=path, label_map='voc') diff --git a/tests/cli/test_yolo_format.py b/tests/cli/test_yolo_format.py index 2ff047a01066..ebe28ce2f8d6 100644 --- a/tests/cli/test_yolo_format.py +++ b/tests/cli/test_yolo_format.py @@ -1,16 +1,17 @@ -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np from datumaro.cli.__main__ import main from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Bbox) +from datumaro.components.extractor import AnnotationType, Bbox, DatasetItem from datumaro.util.test_utils import TestDir, compare_datasets import datumaro.plugins.voc_format.format as VOC + from ..requirements import Requirements, mark_requirement + def run(test, *args, expected_code=0): test.assertEqual(expected_code, main(args), str(args)) @@ -68,10 +69,11 @@ def test_can_export_mot_as_yolo(self): def test_can_convert_voc_to_yolo(self): target_dataset = Dataset.from_iterable([ DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), annotations=[ - Bbox(8.0, 2.5, 4.0, 1.0, label=15), - Bbox(2.0, 1.0, 4.0, 1.0, label=8), - Bbox(11.0, 3.0, 4.0, 1.0, label=22) + Bbox(1.0, 2.0, 2.0, 2.0, label=8), + Bbox(4.0, 5.0, 2.0, 2.0, label=15), + Bbox(5.5, 6, 2, 2, label=22), ] ) ], categories=[label.name for label in @@ -86,7 +88,8 @@ def test_can_convert_voc_to_yolo(self): '-f', 'yolo', '-o', yolo_dir, '--', '--save-images') parsed_dataset = Dataset.import_from(yolo_dir, format='yolo') - compare_datasets(self, target_dataset, parsed_dataset) + compare_datasets(self, target_dataset, parsed_dataset, + require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_ignore_non_supported_subsets(self): diff --git a/tests/requirements.py b/tests/requirements.py index 49ab421d62eb..d85f9a92a9e4 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -2,30 +2,10 @@ # # SPDX-License-Identifier: MIT -import pytest - - -def mark_requirement(requirement): - def wrapper(test_func): - @pytest.mark.components(DatumaroComponent.Datumaro) - @pytest.mark.component - @pytest.mark.priority_medium - @pytest.mark.reqids(requirement) - def test_wrapper(*args, **kwargs): - return test_func(*args, **kwargs) - return test_wrapper - return wrapper +import typing -def mark_bug(bugs): - def wrapper(test_func): - @pytest.mark.components(DatumaroComponent.Datumaro) - @pytest.mark.component - @pytest.mark.priority_medium - @pytest.mark.bugs(bugs) - def test_wrapper(*args, **kwargs): - return test_func(*args, **kwargs) - return test_wrapper - return wrapper +from attr import attrs +import pytest class DatumaroComponent: @@ -38,12 +18,49 @@ class Requirements: # GitHub issues (not bugs) # https://github.com/openvinotoolkit/datumaro/issues + DATUM_231 = "Readable formats for CJK" DATUM_244 = "Add Snyk integration" + DATUM_267 = "Add Image zip format" + DATUM_274 = "Support the Open Images dataset" + DATUM_280 = "Support KITTI dataset formats" + DATUM_283 = "Create cli tests for testing convert command for VOC format" # GitHub issues (bugs) # https://github.com/openvinotoolkit/datumaro/issues DATUM_BUG_219 = "Return format is not uniform" + DATUM_BUG_257 = "Dataset.filter doesn't count removed items" + DATUM_BUG_259 = "Dataset.filter fails on merged datasets" class SkipMessages: NOT_IMPLEMENTED = "NOT IMPLEMENTED" + + +@attrs(auto_attribs=True) +class _CombinedDecorator: + decorators: typing.List[typing.Callable] + + def __call__(self, function): + for d in reversed(self.decorators): + function = d(function) + + return function + + +_SHARED_DECORATORS = [ + pytest.mark.components(DatumaroComponent.Datumaro), + pytest.mark.component, + pytest.mark.priority_medium, +] + +def mark_requirement(requirement): + return _CombinedDecorator([ + *_SHARED_DECORATORS, + pytest.mark.reqids(requirement), + ]) + +def mark_bug(bugs): + return _CombinedDecorator([ + *_SHARED_DECORATORS, + pytest.mark.bugs(bugs), + ]) diff --git a/tests/test_RISE.py b/tests/test_RISE.py index b32cc0b9459e..0c31031c29fe 100644 --- a/tests/test_RISE.py +++ b/tests/test_RISE.py @@ -1,11 +1,12 @@ from collections import namedtuple -import numpy as np - from unittest import TestCase -from datumaro.components.extractor import Label, Bbox -from datumaro.components.launcher import Launcher +import numpy as np + from datumaro.components.algorithms.rise import RISE +from datumaro.components.extractor import Bbox, Label +from datumaro.components.launcher import Launcher + from .requirements import Requirements, mark_requirement @@ -231,4 +232,4 @@ def DISABLED_test_roi_nms(): cv2.putText(image, 'p%s-%s-%.2f' % (i, roi.label, roi.conf), p1, cv2.FONT_HERSHEY_SIMPLEX, 0.25, c) cv2.imshow('nms_image', image) - cv2.waitKey(0) \ No newline at end of file + cv2.waitKey(0) diff --git a/tests/test_camvid_format.py b/tests/test_camvid_format.py index 73df7b2b592f..05fc12630ead 100644 --- a/tests/test_camvid_format.py +++ b/tests/test_camvid_format.py @@ -1,17 +1,21 @@ -import os.path as osp from collections import OrderedDict from functools import partial from unittest import TestCase +import os.path as osp -import datumaro.plugins.camvid_format as Camvid import numpy as np -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Extractor, LabelCategories, Mask) + from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Extractor, LabelCategories, Mask, +) from datumaro.plugins.camvid_format import CamvidConverter, CamvidImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) +import datumaro.plugins.camvid_format as Camvid + from .requirements import Requirements, mark_requirement @@ -310,4 +314,4 @@ def categories(self): self._test_save_and_load(SrcExtractor(), partial(CamvidConverter.convert, save_images=True), test_dir, require_images=True, - target_dataset=DstExtractor()) \ No newline at end of file + target_dataset=DstExtractor()) diff --git a/tests/test_cifar_format.py b/tests/test_cifar_format.py index 480d795954ca..48653979564e 100644 --- a/tests/test_cifar_format.py +++ b/tests/test_cifar_format.py @@ -1,13 +1,16 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, DatasetItem, Label, - LabelCategories) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) from datumaro.plugins.cifar_format import CifarConverter, CifarImporter from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement @@ -125,6 +128,48 @@ def test_can_save_and_load_empty_image(self): compare_datasets(self, dataset, parsed_dataset, require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_cifar100(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='image_2', subset='test', + image=np.ones((32, 32, 3)), + annotations=[Label(0)] + ), + DatasetItem(id='image_3', subset='test', + image=np.ones((32, 32, 3)) + ), + DatasetItem(id='image_4', subset='test', + image=np.ones((32, 32, 3)), + annotations=[Label(1)] + ) + ], categories=[['class_0', 'superclass_0'], ['class_1', 'superclass_0']]) + + with TestDir() as test_dir: + CifarConverter.convert(source_dataset, test_dir, save_images=True) + parsed_dataset = Dataset.import_from(test_dir, 'cifar') + + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_cifar100_without_saving_images(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='a', subset='train_1', + annotations=[Label(0)] + ), + DatasetItem(id='b', subset='train_1', + annotations=[Label(1)] + ), + ], categories=[['class_0', 'superclass_0'], ['class_1', 'superclass_0']]) + + with TestDir() as test_dir: + CifarConverter.convert(source_dataset, test_dir, save_images=False) + parsed_dataset = Dataset.import_from(test_dir, 'cifar') + + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) + + DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'cifar_dataset') class CifarImporterTest(TestCase): @@ -146,12 +191,17 @@ def test_can_import(self): DatasetItem(id='image_4', subset='test', image=np.ones((32, 32, 3)), annotations=[Label(2)] + ), + DatasetItem(id='image_5', subset='test', + image=np.array([[[1., 2., 3.], [4., 5., 6.]], + [[1., 2., 3.], [4., 5., 6.]]]), + annotations=[Label(3)] ) ], categories=['airplane', 'automobile', 'bird', 'cat']) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'cifar') - compare_datasets(self, expected_dataset, dataset) + compare_datasets(self, expected_dataset, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_detect(self): diff --git a/tests/test_cityscapes_format.py b/tests/test_cityscapes_format.py index fd23de9d76b5..db3838d04074 100644 --- a/tests/test_cityscapes_format.py +++ b/tests/test_cityscapes_format.py @@ -1,23 +1,30 @@ -import os.path as osp from collections import OrderedDict from functools import partial from unittest import TestCase +import os.path as osp -import datumaro.plugins.cityscapes_format as Cityscapes import numpy as np -from datumaro.components.extractor import (AnnotationType, DatasetItem, - Extractor, LabelCategories, Mask) + from datumaro.components.dataset import Dataset -from datumaro.plugins.cityscapes_format import (CityscapesImporter, - CityscapesConverter) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Extractor, LabelCategories, Mask, +) +from datumaro.plugins.cityscapes_format import ( + CityscapesConverter, CityscapesImporter, +) from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) +import datumaro.plugins.cityscapes_format as Cityscapes + +from .requirements import Requirements, mark_requirement DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'cityscapes_dataset') class CityscapesFormatTest(TestCase): + @mark_requirement(Requirements.DATUM_267) def test_can_write_and_parse_labelmap(self): src_label_map = Cityscapes.CityscapesLabelMap @@ -30,6 +37,7 @@ def test_can_write_and_parse_labelmap(self): self.assertEqual(src_label_map, dst_label_map) class CityscapesImportTest(TestCase): + @mark_requirement(Requirements.DATUM_267) def test_can_import(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='defaultcity/defaultcity_000001_000031', @@ -82,6 +90,7 @@ def test_can_import(self): compare_datasets(self, source_dataset, parsed_dataset) + @mark_requirement(Requirements.DATUM_267) def test_can_detect_cityscapes(self): self.assertTrue(CityscapesImporter.detect(DUMMY_DATASET_DIR)) @@ -100,6 +109,7 @@ def _test_save_and_load(self, source_dataset, converter, test_dir, importer='cityscapes', target_dataset=target_dataset, importer_args=importer_args, **kwargs) + @mark_requirement(Requirements.DATUM_267) def test_can_save_cityscapes_segm(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -126,6 +136,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_cityscapes_segm_unpainted(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -146,6 +157,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True, apply_colormap=False), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_cityscapes_dataset_with_no_subsets(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -172,6 +184,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_cityscapes_dataset_without_frame_and_sequence(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -189,6 +202,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -207,6 +221,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_cityscapes_dataset_with_strange_id(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -225,6 +240,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_can_save_with_no_masks(self): class TestExtractor(TestExtractorBase): def __iter__(self): @@ -239,6 +255,7 @@ def __iter__(self): partial(CityscapesConverter.convert, label_map='cityscapes', save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_267) def test_dataset_with_source_labelmap_undefined(self): class SrcExtractor(TestExtractorBase): def __iter__(self): @@ -281,6 +298,7 @@ def categories(self): partial(CityscapesConverter.convert, label_map='source', save_images=True), test_dir, target_dataset=DstExtractor()) + @mark_requirement(Requirements.DATUM_267) def test_dataset_with_source_labelmap_defined(self): class SrcExtractor(TestExtractorBase): def __iter__(self): @@ -321,6 +339,7 @@ def categories(self): partial(CityscapesConverter.convert, label_map='source', save_images=True), test_dir, target_dataset=DstExtractor()) + @mark_requirement(Requirements.DATUM_267) def test_can_save_and_load_image_with_arbitrary_extension(self): class TestExtractor(TestExtractorBase): def __iter__(self): diff --git a/tests/test_coco_format.py b/tests/test_coco_format.py index c1b033d4f425..fc6af9b02410 100644 --- a/tests/test_coco_format.py +++ b/tests/test_coco_format.py @@ -1,29 +1,31 @@ from functools import partial -import numpy as np +from itertools import product +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption, - LabelCategories, PointsCategories +from datumaro.components.extractor import ( + AnnotationType, Bbox, Caption, DatasetItem, Label, LabelCategories, Mask, + Points, PointsCategories, Polygon, ) from datumaro.plugins.coco_format.converter import ( - CocoConverter, - CocoImageInfoConverter, - CocoCaptionsConverter, - CocoInstancesConverter, - CocoPersonKeypointsConverter, - CocoLabelsConverter, - CocoPanopticConverter, - CocoStuffConverter, + CocoCaptionsConverter, CocoConverter, CocoImageInfoConverter, + CocoInstancesConverter, CocoLabelsConverter, CocoPanopticConverter, + CocoPersonKeypointsConverter, CocoStuffConverter, +) +from datumaro.plugins.coco_format.importer import ( + CocoCaptionsImporter, CocoImageInfoImporter, CocoImporter, + CocoInstancesImporter, CocoLabelsImporter, CocoPanopticImporter, + CocoPersonKeypointsImporter, CocoStuffImporter, ) -from datumaro.plugins.coco_format.importer import CocoImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset') @@ -33,168 +35,305 @@ class CocoImporterTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_instances(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), - subset='val', attributes={'id': 1}, + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, + annotations=[ + Bbox(2, 2, 3, 1, label=1, + group=1, id=1, attributes={'is_crowd': False}) + ] + ), + + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}, annotations=[ Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, id=1, group=1, attributes={'is_crowd': False, 'x': 1, 'y': 'hello'}), - Mask(np.array( - [[1, 0, 0, 1, 0]] * 5 + - [[1, 1, 1, 1, 0]] * 5 - ), label=0, + Mask(np.array( [[1, 1, 0, 0, 0]] * 10 ), label=1, id=2, group=2, attributes={'is_crowd': True}), ] ), - ], categories=['TEST',]) - - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_instances'), 'coco') - - compare_datasets(self, expected_dataset, dataset) + ], categories=['a', 'b', 'c']) + + formats = ['coco', 'coco_instances'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_instances')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_instances', + 'annotations', 'instances_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_instances', + 'annotations', 'instances_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_captions(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id=1, subset='train', + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, annotations=[ Caption('hello', id=1, group=1), - Caption('world', id=2, group=2), - ], attributes={'id': 1}), - DatasetItem(id=2, subset='train', - annotations=[ - Caption('test', id=3, group=3), - ], attributes={'id': 2}), + ]), - DatasetItem(id=3, subset='val', + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}, annotations=[ - Caption('word', id=1, group=1), - ], attributes={'id': 1}), - ]) - - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_captions'), 'coco') + Caption('world', id=1, group=1), + Caption('text', id=2, group=2), + ]), + ]) - compare_datasets(self, expected_dataset, dataset) + formats = ['coco', 'coco_captions'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_captions')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_captions', + 'annotations', 'captions_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_captions', + 'annotations', 'captions_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_labels(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id=1, subset='train', + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, annotations=[ Label(1, id=1, group=1), - Label(0, id=2, group=2), - ], attributes={'id': 1}), - ], categories=['a', 'b']) + ]), - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_labels'), 'coco') + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}, + annotations=[ + Label(0, id=1, group=1), + Label(1, id=2, group=2), + ]), + ], categories=['a', 'b']) - compare_datasets(self, expected_dataset, dataset) + formats = ['coco', 'coco_labels'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_labels')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_labels', + 'annotations', 'labels_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_labels', + 'annotations', 'labels_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_import_points(self): + def test_can_import_keypoints(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id=1, subset='train', - image=Image(path='1.jpg', size=(5, 5)), + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, annotations=[ - Points([0, 0, 0, 2, 4, 1], [0, 1, 2], - label=1, group=1, id=1, - attributes={'is_crowd': False}), - Polygon([0, 0, 4, 0, 4, 4], - label=1, group=1, id=1, - attributes={'is_crowd': False}), - - Points([1, 2, 3, 4, 2, 3], - group=2, id=2, - attributes={'is_crowd': False}), - Bbox(1, 2, 2, 2, - group=2, id=2, - attributes={'is_crowd': False}), - - Points([1, 2, 0, 2, 4, 1], - label=0, group=3, id=3, - attributes={'is_crowd': False}), - Bbox(0, 1, 4, 1, - label=0, group=3, id=3, - attributes={'is_crowd': False}), - - Points([0, 0, 1, 2, 3, 4], [0, 1, 2], - group=5, id=5, - attributes={'is_crowd': False}), - Bbox(1, 2, 2, 2, - group=5, id=5, - attributes={'is_crowd': False}), - ], attributes={'id': 1}), - ], categories={ - AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), - AnnotationType.points: PointsCategories.from_iterable( - (i, None, [[0, 1], [1, 2]]) for i in range(2) - ), - }) + Points([0, 0, 0, 2, 4, 1], [0, 1, 2], label=1, + id=1, group=1, attributes={'is_crowd': False}), + Bbox(2, 2, 3, 1, label=1, + id=1, group=1, attributes={'is_crowd': False}), + ]), - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints'), 'coco') + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}, + annotations=[ + Points([1, 2, 3, 4, 2, 3], label=0, + id=1, group=1, attributes={'is_crowd': False, + 'x': 1, 'y': 'hello'}), + Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, + id=1, group=1, attributes={'is_crowd': False, + 'x': 1, 'y': 'hello'}), - compare_datasets(self, expected_dataset, dataset) + Points([2, 4, 4, 4, 4, 2], label=1, + id=2, group=2, attributes={'is_crowd': True}), + Mask(np.array( [[1, 1, 0, 0, 0]] * 10 ), label=1, + id=2, group=2, attributes={'is_crowd': True}), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), + AnnotationType.points: PointsCategories.from_iterable( + (i, None, [[0, 1], [1, 2]]) for i in range(2) + ), + }) + + formats = ['coco', 'coco_person_keypoints'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints', + 'annotations', 'person_keypoints_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints', + 'annotations', 'person_keypoints_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_image_info(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), - attributes={'id': 1}), + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}), + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}) ]) - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_image_info'), 'coco') - - compare_datasets(self, expected_dataset, dataset) + formats = ['coco', 'coco_image_info'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_image_info')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_image_info', + 'annotations', 'image_info_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_image_info', + 'annotations', 'image_info_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_panoptic(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id='000000000001', - image=np.ones((1, 5, 3)), - subset='val', + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, + annotations=[ + Mask(np.ones((5, 5)), label=0, id=460551, + group=460551, attributes={'is_crowd': False}), + ]), + + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), attributes={'id': 40}, annotations=[ - Mask(image=np.array([[0, 0, 1, 1, 0]]), label=3, + Mask(np.array( [[1, 1, 0, 0, 0]] * 10 ), label=0, id=7, group=7, attributes={'is_crowd': False}), - Mask(image=np.array([[0, 1, 0, 0, 1]]), label=1, + Mask(np.array( [[0, 0, 1, 1, 0]] * 10 ), label=1, id=20, group=20, attributes={'is_crowd': True}), - ] - ), - ], categories=['a', 'b', 'c', 'd']) - - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_panoptic'), 'coco') + ]), + ], categories=['a', 'b']) - compare_datasets(self, expected_dataset, dataset, require_images=True) + formats = ['coco', 'coco_panoptic'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_panoptic')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_panoptic', + 'annotations', 'panoptic_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_panoptic', + 'annotations', 'panoptic_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import_stuff(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), - subset='val', attributes={'id': 1}, + DatasetItem(id='a', subset='train', image=np.ones((5, 10, 3)), + attributes={'id': 5}, annotations=[ Mask(np.array( - [[1, 0, 0, 1, 0]] * 5 + - [[1, 1, 1, 1, 0]] * 5 + [[0, 0, 1, 1, 0, 1, 1, 0, 0, 0]] * 5 ), label=0, - id=2, group=2, attributes={'is_crowd': False}), - ] - ), - ], categories=['TEST',]) + id=7, group=7, attributes={'is_crowd': False}), + ]), - dataset = Dataset.import_from( - osp.join(DUMMY_DATASET_DIR, 'coco_stuff'), 'coco') + DatasetItem(id='b', subset='val', image=np.ones((10, 5, 3)), + attributes={'id': 40}, + annotations=[ + Mask(np.array( [[1, 1, 0, 0, 0]] * 10 ), label=1, + id=2, group=2, attributes={'is_crowd': False}), + ]), + ], categories=['a', 'b']) - compare_datasets(self, expected_dataset, dataset) + formats = ['coco', 'coco_stuff'] + paths = [ + ('', osp.join(DUMMY_DATASET_DIR, 'coco_stuff')), + ('train', osp.join(DUMMY_DATASET_DIR, 'coco_stuff', + 'annotations', 'stuff_train.json')), + ('val', osp.join(DUMMY_DATASET_DIR, 'coco_stuff', + 'annotations', 'stuff_val.json')), + ] + for format, (subset, path) in product(formats, paths): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + with self.subTest(path=path, format=format, subset=subset): + dataset = Dataset.import_from(path, format) + compare_datasets(self, expected, dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_detect(self): - self.assertTrue(CocoImporter.detect( - osp.join(DUMMY_DATASET_DIR, 'coco_instances'))) + dataset_dir = osp.join(DUMMY_DATASET_DIR, 'coco') + matrix = [ + # Whole dataset + (dataset_dir, CocoImporter), + + # Subformats + (dataset_dir, CocoLabelsImporter), + (dataset_dir, CocoInstancesImporter), + (dataset_dir, CocoPanopticImporter), + (dataset_dir, CocoStuffImporter), + (dataset_dir, CocoCaptionsImporter), + (dataset_dir, CocoImageInfoImporter), + (dataset_dir, CocoPersonKeypointsImporter), + + # Subsets of subformats + (osp.join(dataset_dir, 'annotations', 'labels_train.json'), + CocoLabelsImporter), + (osp.join(dataset_dir, 'annotations', 'instances_train.json'), + CocoInstancesImporter), + (osp.join(dataset_dir, 'annotations', 'panoptic_train.json'), + CocoPanopticImporter), + (osp.join(dataset_dir, 'annotations', 'stuff_train.json'), + CocoStuffImporter), + (osp.join(dataset_dir, 'annotations', 'captions_train.json'), + CocoCaptionsImporter), + (osp.join(dataset_dir, 'annotations', 'image_info_train.json'), + CocoImageInfoImporter), + (osp.join(dataset_dir, 'annotations', 'person_keypoints_train.json'), + CocoPersonKeypointsImporter), + ] + + for path, subtask in matrix: + with self.subTest(path=path, task=subtask): + self.assertTrue(subtask.detect(path)) class CocoConverterTest(TestCase): def _test_save_and_load(self, source_dataset, converter, test_dir, @@ -575,6 +714,36 @@ def test_can_save_and_load_images(self): self._test_save_and_load(expected_dataset, CocoImageInfoConverter.convert, test_dir) + @mark_requirement(Requirements.DATUM_231) + def test_can_save_dataset_with_cjk_categories(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(0, 1, 2, 2, + label=0, group=1, id=1, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(1, 0, 2, 2, label=1, group=2, id=2, + attributes={ 'is_crowd': False }), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(0, 1, 2, 2, label=2, group=3, id=3, + attributes={ 'is_crowd': False }), + ], attributes={'id': 3}), + ], + categories=[ + "고양이", "ネコ", "猫" + ] + ) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoInstancesConverter.convert, test_dir) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): expected_dataset = Dataset.from_iterable([ diff --git a/tests/test_command_targets.py b/tests/test_command_targets.py index f5241f1889a3..81c7c2c37417 100644 --- a/tests/test_command_targets.py +++ b/tests/test_command_targets.py @@ -1,13 +1,15 @@ -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np from datumaro.components.project import Project -from datumaro.util.command_targets import ProjectTarget, \ - ImageTarget, SourceTarget +from datumaro.util.command_targets import ( + ImageTarget, ProjectTarget, SourceTarget, +) from datumaro.util.image import save_image from datumaro.util.test_utils import TestDir + from .requirements import Requirements, mark_requirement @@ -138,4 +140,4 @@ def test_source_false_when_source_doesnt_exist(self): status = target.test(source_name + '123') - self.assertFalse(status) \ No newline at end of file + self.assertFalse(status) diff --git a/tests/test_config.py b/tests/test_config.py index 2fee6b237961..a62d55fca2a9 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,6 +1,7 @@ from unittest import TestCase from datumaro.components.config import Config, DictConfig, SchemaBuilder + from .requirements import Requirements, mark_requirement diff --git a/tests/test_cvat_format.py b/tests/test_cvat_format.py index d95ed26304c2..90b599f7c249 100644 --- a/tests/test_cvat_format.py +++ b/tests/test_cvat_format.py @@ -1,19 +1,22 @@ from functools import partial +from unittest import TestCase import os import os.path as osp import numpy as np -from unittest import TestCase -from datumaro.components.project import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Points, Polygon, PolyLine, Bbox, Label, - LabelCategories, + +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Label, LabelCategories, Points, Polygon, + PolyLine, ) -from datumaro.plugins.cvat_format.extractor import CvatImporter +from datumaro.components.project import Dataset from datumaro.plugins.cvat_format.converter import CvatConverter +from datumaro.plugins.cvat_format.extractor import CvatImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement DUMMY_IMAGE_DATASET_DIR = osp.join(osp.dirname(__file__), @@ -392,4 +395,4 @@ def test_inplace_save_writes_only_updated_data(self): self.assertFalse(osp.isfile(osp.join(path, 'b.xml'))) self.assertTrue(osp.isfile(osp.join(path, 'c.xml'))) self.assertTrue(osp.isfile(osp.join(path, 'images', '2.jpg'))) - self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg'))) \ No newline at end of file + self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg'))) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index f8f7f0a0852c..e594a87f260e 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1,20 +1,26 @@ -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np +from datumaro.components.dataset import ( + DEFAULT_FORMAT, Dataset, ItemStatus, eager_mode, +) from datumaro.components.dataset_filter import ( - XPathDatasetFilter, XPathAnnotationsFilter, DatasetItemEncoder) -from datumaro.components.dataset import (Dataset, DEFAULT_FORMAT, ItemStatus, - eager_mode) + DatasetItemEncoder, XPathAnnotationsFilter, XPathDatasetFilter, +) from datumaro.components.environment import Environment from datumaro.components.errors import DatumaroError, RepeatedItemError -from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, Extractor, - DatasetItem, Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, - LabelCategories, AnnotationType, Transform) +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, Bbox, Caption, DatasetItem, Extractor, + ItemTransform, Label, LabelCategories, Mask, Points, Polygon, PolyLine, + Transform, +) +from datumaro.components.launcher import Launcher from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement @@ -176,27 +182,76 @@ def test_can_export_by_string_format_name(self): with TestDir() as test_dir: dataset.export(format='qq', save_dir=test_dir) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_compute_length_when_created_from_scratch(self): + dataset = Dataset() + + dataset.put(DatasetItem(1)) + dataset.put(DatasetItem(2)) + dataset.put(DatasetItem(3)) + dataset.remove(1) + + self.assertEqual(2, len(dataset)) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_compute_length_when_created_from_extractor(self): + class TestExtractor(Extractor): + def __iter__(self): + yield from [ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + ] + + dataset = Dataset.from_extractors(TestExtractor()) + + self.assertEqual(3, len(dataset)) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_compute_length_when_created_from_sequence(self): + dataset = Dataset.from_iterable([ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + ]) + + self.assertEqual(3, len(dataset)) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_transform_by_string_name(self): expected = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ], attributes={'qq': 1}), - ], categories=['a', 'b', 'c']) + DatasetItem(id=1, attributes={'qq': 1}), + ]) - class TestTransform(Transform): + class TestTransform(ItemTransform): def transform_item(self, item): return self.wrap_item(item, attributes={'qq': 1}) env = Environment() - env.transforms.items = {'qq': TestTransform} + env.transforms.register('qq', TestTransform) - dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c'], env=env) + dataset = Dataset.from_iterable([ DatasetItem(id=1) ], env=env) actual = dataset.transform('qq') compare_datasets(self, expected, actual) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_transform(self): + expected = Dataset.from_iterable([ + DatasetItem(id=1, attributes={'qq': 1}), + ]) + + class TestTransform(ItemTransform): + def transform_item(self, item): + return self.wrap_item(item, attributes={'qq': 1}) + + dataset = Dataset.from_iterable([ DatasetItem(id=1) ]) + + actual = dataset.transform(TestTransform) + + compare_datasets(self, expected, actual) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_join_annotations(self): a = Dataset.from_iterable([ @@ -315,8 +370,8 @@ def test_can_create_patch(self): self.assertEqual({ ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, - ('2', DEFAULT_SUBSET_NAME): ItemStatus.modified, - ('3', 'a'): ItemStatus.modified, + ('2', DEFAULT_SUBSET_NAME): ItemStatus.added, + ('3', 'a'): ItemStatus.added, }, patch.updated_items) self.assertEqual({ @@ -332,7 +387,7 @@ def test_can_create_patch(self): compare_datasets(self, expected, dataset) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_can_create_more_precise_patch_when_cached(self): + def test_can_create_patch_when_cached(self): expected = Dataset.from_iterable([ DatasetItem(2), DatasetItem(3, subset='a') @@ -349,6 +404,116 @@ def test_can_create_more_precise_patch_when_cached(self): patch = dataset.patch + self.assertEqual({ + ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, + + # Item was not changed from the original one. + # TODO: add item comparison and remove this line + ('2', DEFAULT_SUBSET_NAME): ItemStatus.modified, + + ('3', 'a'): ItemStatus.added, + }, patch.updated_items) + + self.assertEqual({ + 'default': ItemStatus.modified, + 'a': ItemStatus.modified, + }, patch.updated_subsets) + + self.assertEqual(2, len(patch.data)) + self.assertEqual(None, patch.data.get(1)) + self.assertEqual(dataset.get(2), patch.data.get(2)) + self.assertEqual(dataset.get(3, 'a'), patch.data.get(3, 'a')) + + compare_datasets(self, expected, dataset) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_can_create_patch_when_transforms_mixed(self): + expected = Dataset.from_iterable([ + DatasetItem(2), + DatasetItem(3, subset='a') + ]) + + dataset = Dataset.from_iterable([ + DatasetItem(1), + DatasetItem(2), + ]) + + class Remove1(Transform): + def __iter__(self): + for item in self._extractor: + if item.id != '1': + yield item + + class Add3(Transform): + def __iter__(self): + for item in self._extractor: + if item.id == '2': + yield item + yield DatasetItem(3, subset='a') + + dataset.transform(Remove1) + dataset.transform(Add3) + + patch = dataset.patch + + self.assertEqual({ + ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('2', DEFAULT_SUBSET_NAME): ItemStatus.modified, + ('3', 'a'): ItemStatus.added, + }, patch.updated_items) + + self.assertEqual({ + 'default': ItemStatus.modified, + 'a': ItemStatus.modified, + }, patch.updated_subsets) + + self.assertEqual(2, len(patch.data)) + self.assertEqual(None, patch.data.get(1)) + self.assertEqual(dataset.get(2), patch.data.get(2)) + self.assertEqual(dataset.get(3, 'a'), patch.data.get(3, 'a')) + + compare_datasets(self, expected, dataset) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_can_create_patch_when_transforms_chained(self): + expected = Dataset.from_iterable([ + DatasetItem(2), + DatasetItem(3, subset='a') + ]) + + class TestExtractor(Extractor): + iter_called = 0 + def __iter__(self): + yield from [ + DatasetItem(1), + DatasetItem(2), + ] + + __class__.iter_called += 1 + + class Remove1(Transform): + iter_called = 0 + def __iter__(self): + for item in self._extractor: + if item.id != '1': + yield item + + __class__.iter_called += 1 + + class Add3(Transform): + iter_called = 0 + def __iter__(self): + yield from self._extractor + yield DatasetItem(3, subset='a') + + __class__.iter_called += 1 + + dataset = Dataset.from_extractors(TestExtractor()) + dataset.transform(Remove1) + dataset.transform(Add3) + + patch = dataset.patch + self.assertEqual({ ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, ('2', DEFAULT_SUBSET_NAME): ItemStatus.modified, @@ -365,6 +530,198 @@ def test_can_create_more_precise_patch_when_cached(self): self.assertEqual(dataset.get(2), patch.data.get(2)) self.assertEqual(dataset.get(3, 'a'), patch.data.get(3, 'a')) + self.assertEqual(TestExtractor.iter_called, 2) # 1 for items, 1 for list + self.assertEqual(Remove1.iter_called, 1) + self.assertEqual(Add3.iter_called, 1) + + compare_datasets(self, expected, dataset) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_can_create_patch_when_transforms_intermixed_with_direct_ops(self): + expected = Dataset.from_iterable([ + DatasetItem(3, subset='a'), + DatasetItem(4), + DatasetItem(5), + ]) + + class TestExtractor(Extractor): + iter_called = 0 + def __iter__(self): + yield from [ + DatasetItem(1), + DatasetItem(2), + ] + + __class__.iter_called += 1 + + class Remove1(Transform): + iter_called = 0 + def __iter__(self): + for item in self._extractor: + if item.id != '1': + yield item + + __class__.iter_called += 1 + + class Add3(Transform): + iter_called = 0 + def __iter__(self): + yield from self._extractor + yield DatasetItem(3, subset='a') + + __class__.iter_called += 1 + + dataset = Dataset.from_extractors(TestExtractor()) + dataset.init_cache() + dataset.put(DatasetItem(4)) + dataset.transform(Remove1) + dataset.put(DatasetItem(5)) + dataset.remove(2) + dataset.transform(Add3) + + patch = dataset.patch + + self.assertEqual({ + ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('2', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('3', 'a'): ItemStatus.added, + ('4', DEFAULT_SUBSET_NAME): ItemStatus.added, + ('5', DEFAULT_SUBSET_NAME): ItemStatus.added, + }, patch.updated_items) + + self.assertEqual({ + 'default': ItemStatus.modified, + 'a': ItemStatus.modified, + }, patch.updated_subsets) + + self.assertEqual(3, len(patch.data)) + + self.assertEqual(None, patch.data.get(1)) + self.assertEqual(None, patch.data.get(2)) + self.assertEqual(dataset.get(3, 'a'), patch.data.get(3, 'a')) + self.assertEqual(dataset.get(4), patch.data.get(4)) + self.assertEqual(dataset.get(5), patch.data.get(5)) + + self.assertEqual(TestExtractor.iter_called, 1) + self.assertEqual(Remove1.iter_called, 1) + self.assertEqual(Add3.iter_called, 1) + + compare_datasets(self, expected, dataset) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_can_create_patch_when_local_transforms_stacked(self): + expected = Dataset.from_iterable([ + DatasetItem(4), + DatasetItem(5), + ]) + + class TestExtractor(Extractor): + iter_called = 0 + def __iter__(self): + yield from [ + DatasetItem(1), + DatasetItem(2), + ] + + __class__.iter_called += 1 + + class ShiftIds(ItemTransform): + def transform_item(self, item): + return item.wrap(id=int(item.id) + 1) + + dataset = Dataset.from_extractors(TestExtractor()) + dataset.remove(2) + dataset.transform(ShiftIds) + dataset.transform(ShiftIds) + dataset.transform(ShiftIds) + dataset.put(DatasetItem(5)) + + patch = dataset.patch + + self.assertEqual({ + ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('2', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('4', DEFAULT_SUBSET_NAME): ItemStatus.added, + ('5', DEFAULT_SUBSET_NAME): ItemStatus.added, + }, patch.updated_items) + + self.assertEqual({ + 'default': ItemStatus.modified, + }, patch.updated_subsets) + + self.assertEqual(2, len(patch.data)) + + self.assertEqual(None, patch.data.get(1)) + self.assertEqual(None, patch.data.get(2)) + self.assertEqual(None, patch.data.get(3)) + self.assertEqual(dataset.get(4), patch.data.get(4)) + self.assertEqual(dataset.get(5), patch.data.get(5)) + + self.assertEqual(TestExtractor.iter_called, 1) + + compare_datasets(self, expected, dataset) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_can_create_patch_when_transforms_chained_and_source_cached(self): + expected = Dataset.from_iterable([ + DatasetItem(2), + DatasetItem(3, subset='a') + ]) + + class TestExtractor(Extractor): + iter_called = 0 + def __iter__(self): + yield from [ + DatasetItem(1), + DatasetItem(2), + ] + + __class__.iter_called += 1 + + class Remove1(Transform): + iter_called = 0 + def __iter__(self): + for item in self._extractor: + if item.id != '1': + yield item + + __class__.iter_called += 1 + + class Add3(Transform): + iter_called = 0 + def __iter__(self): + yield from self._extractor + yield DatasetItem(3, subset='a') + + __class__.iter_called += 1 + + dataset = Dataset.from_extractors(TestExtractor()) + dataset.init_cache() + dataset.transform(Remove1) + dataset.transform(Add3) + + patch = dataset.patch + + self.assertEqual({ + ('1', DEFAULT_SUBSET_NAME): ItemStatus.removed, + ('2', DEFAULT_SUBSET_NAME): ItemStatus.modified, # TODO: remove this + ('3', 'a'): ItemStatus.added, + }, patch.updated_items) + + self.assertEqual({ + 'default': ItemStatus.modified, + 'a': ItemStatus.modified, + }, patch.updated_subsets) + + self.assertEqual(2, len(patch.data)) + self.assertEqual(None, patch.data.get(1)) + self.assertEqual(dataset.get(2), patch.data.get(2)) + self.assertEqual(dataset.get(3, 'a'), patch.data.get(3, 'a')) + + self.assertEqual(TestExtractor.iter_called, 1) # 1 for items and list + self.assertEqual(Remove1.iter_called, 1) + self.assertEqual(Add3.iter_called, 1) + compare_datasets(self, expected, dataset) @mark_requirement(Requirements.DATUM_GENERAL_REQ) @@ -443,12 +800,12 @@ class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called iter_called = True - return iter([ + yield from [ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), - ]) + ] dataset = Dataset.from_extractors(TestExtractor()) with eager_mode(dataset=dataset): @@ -459,56 +816,170 @@ def __iter__(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_do_lazy_select(self): - iter_called = False + iter_called = 0 class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called - iter_called = True - return iter([ + iter_called += 1 + yield from [ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), - ]) + ] dataset = Dataset.from_extractors(TestExtractor()) dataset.select(lambda item: int(item.id) < 3) dataset.select(lambda item: int(item.id) < 2) - self.assertFalse(iter_called) + self.assertEqual(iter_called, 0) self.assertEqual(1, len(dataset)) - self.assertTrue(iter_called) + self.assertEqual(iter_called, 1) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_chain_lazy_transforms(self): - iter_called = False + iter_called = 0 class TestExtractor(Extractor): def __iter__(self): nonlocal iter_called - iter_called = True - return iter([ + iter_called += 1 + yield from [ DatasetItem(1), DatasetItem(2), DatasetItem(3), DatasetItem(4), - ]) + ] dataset = Dataset.from_extractors(TestExtractor()) - class TestTransform(Transform): + class TestTransform(ItemTransform): def transform_item(self, item): return self.wrap_item(item, id=int(item.id) + 1) dataset.transform(TestTransform) dataset.transform(TestTransform) - self.assertFalse(iter_called) + self.assertEqual(iter_called, 0) self.assertEqual(4, len(dataset)) self.assertEqual(3, int(min(int(item.id) for item in dataset))) - self.assertTrue(iter_called) + self.assertEqual(iter_called, 1) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_get_len_after_local_transforms(self): + iter_called = 0 + class TestExtractor(Extractor): + def __iter__(self): + nonlocal iter_called + iter_called += 1 + yield from [ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + DatasetItem(4), + ] + dataset = Dataset.from_extractors(TestExtractor()) + + class TestTransform(ItemTransform): + def transform_item(self, item): + return self.wrap_item(item, id=int(item.id) + 1) + + dataset.transform(TestTransform) + dataset.transform(TestTransform) + + self.assertEqual(iter_called, 0) + + self.assertEqual(4, len(dataset)) + + self.assertEqual(iter_called, 1) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_get_len_after_nonlocal_transforms(self): + iter_called = 0 + class TestExtractor(Extractor): + def __iter__(self): + nonlocal iter_called + iter_called += 1 + yield from [ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + DatasetItem(4), + ] + dataset = Dataset.from_extractors(TestExtractor()) + + class TestTransform(Transform): + def __iter__(self): + for item in self._extractor: + yield self.wrap_item(item, id=int(item.id) + 1) + + dataset.transform(TestTransform) + dataset.transform(TestTransform) + + self.assertEqual(iter_called, 0) + + self.assertEqual(4, len(dataset)) + + self.assertEqual(iter_called, 2) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_get_subsets_after_local_transforms(self): + iter_called = 0 + class TestExtractor(Extractor): + def __iter__(self): + nonlocal iter_called + iter_called += 1 + yield from [ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + DatasetItem(4), + ] + dataset = Dataset.from_extractors(TestExtractor()) + + class TestTransform(ItemTransform): + def transform_item(self, item): + return self.wrap_item(item, id=int(item.id) + 1, subset='a') + + dataset.transform(TestTransform) + dataset.transform(TestTransform) + + self.assertEqual(iter_called, 0) + + self.assertEqual({'a'}, set(dataset.subsets())) + + self.assertEqual(iter_called, 1) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_get_subsets_after_nonlocal_transforms(self): + iter_called = 0 + class TestExtractor(Extractor): + def __iter__(self): + nonlocal iter_called + iter_called += 1 + yield from [ + DatasetItem(1), + DatasetItem(2), + DatasetItem(3), + DatasetItem(4), + ] + dataset = Dataset.from_extractors(TestExtractor()) + + class TestTransform(Transform): + def __iter__(self): + for item in self._extractor: + yield self.wrap_item(item, id=int(item.id) + 1, subset='a') + + dataset.transform(TestTransform) + dataset.transform(TestTransform) + + self.assertEqual(iter_called, 0) + + self.assertEqual({'a'}, set(dataset.subsets())) + + self.assertEqual(iter_called, 2) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_raises_when_repeated_items_in_source(self): @@ -641,6 +1112,116 @@ def test_loader(): self.assertFalse(called) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_transform_labels(self): + expected = Dataset.from_iterable([], categories=['c', 'b']) + dataset = Dataset.from_iterable([], categories=['a', 'b']) + + actual = dataset.transform('remap_labels', {'a': 'c'}) + + compare_datasets(self, expected, actual) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_run_model(self): + dataset = Dataset.from_iterable([ + DatasetItem(i, image=np.array([i])) + for i in range(5) + ], categories=['label']) + + batch_size = 3 + + expected = Dataset.from_iterable([ + DatasetItem(i, image=np.array([i]), annotations=[ + Label(0, attributes={ 'idx': i % batch_size, 'data': i }) + ]) + for i in range(5) + ], categories=['label']) + + calls = 0 + + class TestLauncher(Launcher): + def launch(self, inputs): + nonlocal calls + calls += 1 + + for i, inp in enumerate(inputs): + yield [ Label(0, attributes={'idx': i, 'data': inp.item()}) ] + + model = TestLauncher() + + actual = dataset.run_model(model, batch_size=batch_size) + + compare_datasets(self, expected, actual, require_images=True) + self.assertEqual(2, calls) + + @mark_requirement(Requirements.DATUM_BUG_259) + def test_can_filter_items(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='train'), + DatasetItem(id=1, subset='test'), + ]) + + dataset.filter('/item[id > 0]') + + self.assertEqual(1, len(dataset)) + + @mark_requirement(Requirements.DATUM_BUG_257) + def test_filter_registers_changes(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='train'), + DatasetItem(id=1, subset='test'), + ]) + + dataset.filter('/item[id > 0]') + + self.assertEqual({ + ('0', 'train'): ItemStatus.removed, + ('1', 'test'): ItemStatus.modified, # TODO: remove this line + }, dataset.patch.updated_items) + + @mark_requirement(Requirements.DATUM_BUG_259) + def test_can_filter_annotations(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='train', annotations=[Label(0), Label(1)]), + DatasetItem(id=1, subset='val', annotations=[Label(2)]), + DatasetItem(id=2, subset='test', annotations=[Label(0), Label(2)]), + ], categories=['a', 'b', 'c']) + + dataset.filter('/item/annotation[label = "c"]', + filter_annotations=True, remove_empty=True) + + self.assertEqual(2, len(dataset)) + + @mark_requirement(Requirements.DATUM_BUG_259) + def test_can_filter_items_in_merged_dataset(self): + dataset = Dataset.from_extractors( + Dataset.from_iterable([ DatasetItem(id=0, subset='train') ]), + Dataset.from_iterable([ DatasetItem(id=1, subset='test') ]), + ) + + dataset.filter('/item[id > 0]') + + self.assertEqual(1, len(dataset)) + + @mark_requirement(Requirements.DATUM_BUG_259) + def test_can_filter_annotations_in_merged_dataset(self): + dataset = Dataset.from_extractors( + Dataset.from_iterable([ + DatasetItem(id=0, subset='train', annotations=[Label(0)]), + ], categories=['a', 'b', 'c']), + Dataset.from_iterable([ + DatasetItem(id=1, subset='val', annotations=[Label(1)]), + ], categories=['a', 'b', 'c']), + Dataset.from_iterable([ + DatasetItem(id=2, subset='test', annotations=[Label(2)]), + ], categories=['a', 'b', 'c']), + ) + + dataset.filter('/item/annotation[label = "c"]', + filter_annotations=True, remove_empty=True) + + self.assertEqual(1, len(dataset)) + class DatasetItemTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) diff --git a/tests/test_datumaro_format.py b/tests/test_datumaro_format.py index ae5a88781fad..c4403216f03f 100644 --- a/tests/test_datumaro_format.py +++ b/tests/test_datumaro_format.py @@ -1,31 +1,35 @@ from functools import partial +from unittest import TestCase import os import os.path as osp import numpy as np -from unittest import TestCase -from datumaro.components.project import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Label, Mask, Points, Polygon, - PolyLine, Bbox, Caption, - LabelCategories, MaskCategories, PointsCategories + +from datumaro.components.extractor import ( + AnnotationType, Bbox, Caption, Cuboid3d, DatasetItem, Label, + LabelCategories, Mask, MaskCategories, Points, PointsCategories, Polygon, + PolyLine, ) -from datumaro.plugins.datumaro_format.extractor import DatumaroImporter +from datumaro.components.project import Dataset from datumaro.plugins.datumaro_format.converter import DatumaroConverter -from datumaro.util.mask_tools import generate_colormap +from datumaro.plugins.datumaro_format.extractor import DatumaroImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets_strict, - test_save_and_load) +from datumaro.util.mask_tools import generate_colormap +from datumaro.util.test_utils import ( + Dimensions, TestDir, compare_datasets_strict, test_save_and_load, +) + from .requirements import Requirements, mark_requirement class DatumaroConverterTest(TestCase): def _test_save_and_load(self, source_dataset, converter, test_dir, - target_dataset=None, importer_args=None): + target_dataset=None, importer_args=None, + compare=compare_datasets_strict, **kwargs): return test_save_and_load(self, source_dataset, converter, test_dir, importer='datumaro', target_dataset=target_dataset, importer_args=importer_args, - compare=compare_datasets_strict) + compare=compare, **kwargs) @property @mark_requirement(Requirements.DATUM_GENERAL_REQ) @@ -75,6 +79,13 @@ def test_dataset(self): Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), ]), + DatasetItem(id=1, subset='test', + annotations=[ + Cuboid3d([1.0, 2.0, 3.0], [2.0, 2.0, 4.0], [1.0, 3.0, 4.0], + id=6, label=0, attributes={'occluded': True}, group=6 + ) + ]), + DatasetItem(id=42, subset='test', attributes={'a1': 5, 'a2': '42'}), @@ -111,6 +122,37 @@ def test_relative_paths(self): self._test_save_and_load(test_dataset, partial(DatumaroConverter.convert, save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_231) + def test_can_save_dataset_with_cjk_categories(self): + expected = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(0, 1, 2, 2, + label=0, group=1, id=1, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(1, 0, 2, 2, label=1, group=2, id=2, + attributes={ 'is_crowd': False }), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Bbox(0, 1, 2, 2, label=2, group=3, id=3, + attributes={ 'is_crowd': False }), + ], attributes={'id': 3}), + ], + categories=[ + "고양이", "ネコ", "猫" + ] + ) + + with TestDir() as test_dir: + self._test_save_and_load(expected, + partial(DatumaroConverter.convert, save_images=True), test_dir) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): test_dataset = Dataset.from_iterable([ @@ -160,4 +202,45 @@ def test_inplace_save_writes_only_updated_data(self): self.assertFalse(osp.isfile(osp.join(path, 'annotations', 'b.json'))) self.assertTrue(osp.isfile(osp.join(path, 'annotations', 'c.json'))) self.assertTrue(osp.isfile(osp.join(path, 'images', '2.jpg'))) - self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg'))) \ No newline at end of file + self.assertFalse(osp.isfile(osp.join(path, 'images', '3.jpg'))) + + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_with_pointcloud(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='test', point_cloud='1.pcd', + related_images= [ + Image(data=np.ones((5, 5, 3)), path='1/a.jpg'), + Image(data=np.ones((5, 4, 3)), path='1/b.jpg'), + Image(size=(5, 3), path='1/c.jpg'), + '1/d.jpg', + ], + annotations=[ + Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], + id=1, group=1, label=0, attributes={'x': True} + ) + ]), + ], categories=['label']) + + with TestDir() as test_dir: + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='test', + point_cloud=osp.join(test_dir, 'point_clouds', '1.pcd'), + related_images= [ + Image(data=np.ones((5, 5, 3)), path=osp.join( + test_dir, 'related_images', '1/a.jpg')), + Image(data=np.ones((5, 4, 3)), path=osp.join( + test_dir, 'related_images', '1/b.jpg')), + Image(size=(5, 3), path=osp.join( + test_dir, 'related_images', '1/c.jpg')), + osp.join(test_dir, 'related_images', '1/d.jpg'), + ], + annotations=[ + Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], + id=1, group=1, label=0, attributes={'x': True} + ) + ]), + ], categories=['label']) + self._test_save_and_load(source_dataset, + partial(DatumaroConverter.convert, save_images=True), test_dir, + target_dataset, compare=None, dimension=Dimensions.dim_3d) diff --git a/tests/test_diff.py b/tests/test_diff.py index 83dfcde1b526..51f8aa32f923 100644 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -1,11 +1,13 @@ +from unittest import TestCase + import numpy as np -from datumaro.components.extractor import (DatasetItem, Label, Bbox, - Caption, Mask, Points, DEFAULT_SUBSET_NAME) -from datumaro.components.project import Dataset +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, Bbox, Caption, DatasetItem, Label, Mask, Points, +) from datumaro.components.operations import DistanceComparator, ExactComparator +from datumaro.components.project import Dataset -from unittest import TestCase from .requirements import Requirements, mark_requirement @@ -259,4 +261,4 @@ def test_image_comparison(self): self.assertEqual(5, len(unmatched_ann), unmatched_ann) self.assertEqual(1, len(a_unmatched), a_unmatched) self.assertEqual(1, len(b_unmatched), b_unmatched) - self.assertEqual(1, len(errors), errors) \ No newline at end of file + self.assertEqual(1, len(errors), errors) diff --git a/tests/test_icdar_format.py b/tests/test_icdar_format.py index 21bd8f0e0fd2..50a405107765 100644 --- a/tests/test_icdar_format.py +++ b/tests/test_icdar_format.py @@ -1,21 +1,26 @@ -import os.path as osp from functools import partial from unittest import TestCase +import os.path as osp import numpy as np -from datumaro.components.extractor import (Bbox, Caption, DatasetItem, Mask, - Polygon) +from datumaro.components.extractor import ( + Bbox, Caption, DatasetItem, Mask, Polygon, +) from datumaro.components.project import Dataset from datumaro.plugins.icdar_format.converter import ( IcdarTextLocalizationConverter, IcdarTextSegmentationConverter, - IcdarWordRecognitionConverter) + IcdarWordRecognitionConverter, +) from datumaro.plugins.icdar_format.extractor import ( - IcdarWordRecognitionImporter, IcdarTextLocalizationImporter, - IcdarTextSegmentationImporter) + IcdarTextLocalizationImporter, IcdarTextSegmentationImporter, + IcdarWordRecognitionImporter, +) from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'icdar_dataset') diff --git a/tests/test_image.py b/tests/test_image.py index 1983b711d95c..7c18bec231f5 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -1,11 +1,12 @@ from itertools import product -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np -import datumaro.util.image as image_module from datumaro.util.test_utils import TestDir +import datumaro.util.image as image_module + from .requirements import Requirements, mark_requirement diff --git a/tests/test_image_dir_format.py b/tests/test_image_dir_format.py index 4cbea707ab12..cc4e04b3772b 100644 --- a/tests/test_image_dir_format.py +++ b/tests/test_image_dir_format.py @@ -1,14 +1,17 @@ -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.components.project import Dataset from datumaro.components.extractor import DatasetItem +from datumaro.components.project import Dataset from datumaro.plugins.image_dir_format import ImageDirConverter from datumaro.util.image import Image, save_image -from datumaro.util.test_utils import TestDir, compare_datasets, test_save_and_load +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement @@ -73,4 +76,4 @@ def test_can_save_and_load_image_with_custom_extension(self): os.rename(image_path, osp.join(test_dir, 'a', '3.qq')) actual = Dataset.import_from(test_dir, 'image_dir', exts='qq') - compare_datasets(self, expected, actual, require_images=True) \ No newline at end of file + compare_datasets(self, expected, actual, require_images=True) diff --git a/tests/test_image_zip_format.py b/tests/test_image_zip_format.py new file mode 100644 index 000000000000..2970dfc65d68 --- /dev/null +++ b/tests/test_image_zip_format.py @@ -0,0 +1,108 @@ +from unittest import TestCase +import os.path as osp + +import numpy as np + +from datumaro.components.extractor import DatasetItem +from datumaro.components.project import Dataset +from datumaro.plugins.image_zip_format import ImageZipConverter, ImageZipPath +from datumaro.util.image import Image, save_image +from datumaro.util.test_utils import TestDir, compare_datasets + +from .requirements import Requirements, mark_requirement + + +class ImageZipConverterTest(TestCase): + @mark_requirement(Requirements.DATUM_267) + def _test_can_save_and_load(self, source_dataset, test_dir, + **kwargs): + archive_path = osp.join(test_dir, kwargs.get('name', + ImageZipPath.DEFAULT_ARCHIVE_NAME)) + ImageZipConverter.convert(source_dataset, test_dir, **kwargs) + parsed_dataset = Dataset.import_from(archive_path, 'image_zip') + + compare_datasets(self, source_dataset, parsed_dataset) + + @mark_requirement(Requirements.DATUM_267) + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((10, 6, 3))), + DatasetItem(id='2', image=np.ones((5, 4, 3))), + ]) + + with TestDir() as test_dir: + self._test_can_save_and_load(source_dataset, test_dir) + + @mark_requirement(Requirements.DATUM_267) + def test_can_save_and_load_with_custom_archive_name(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='img_1', image=np.ones((10, 10, 3))), + ]) + + with TestDir() as test_dir: + self._test_can_save_and_load(source_dataset, test_dir, + name='my_archive.zip') + + @mark_requirement(Requirements.DATUM_267) + def test_relative_paths(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((10, 10, 3))), + DatasetItem(id='a/2', image=np.ones((4, 5, 3))), + DatasetItem(id='a/b/3', image=np.ones((20, 10, 3))) + ]) + + with TestDir() as test_dir: + self._test_can_save_and_load(source_dataset, test_dir) + + @mark_requirement(Requirements.DATUM_267) + def test_can_save_and_load_custom_compresion_method(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((5, 5, 3))), + DatasetItem(id='2', image=np.ones((4, 3, 3))), + ]) + + with TestDir() as test_dir: + self._test_can_save_and_load(source_dataset, test_dir, + compression='ZIP_DEFLATED') + + @mark_requirement(Requirements.DATUM_267) + def test_can_save_and_load_with_arbitrary_extensions(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='subset/1', + image=Image(data=np.ones((10, 10, 3)), path='subset/1.png')), + DatasetItem(id='2', + image=Image(data=np.ones((4, 5, 3)), path='2.jpg')), + ]) + + with TestDir() as test_dir: + save_image(osp.join(test_dir, '2.jpg'), + source_dataset.get('2').image.data) + save_image(osp.join(test_dir, 'subset', '1.png'), + source_dataset.get('subset/1').image.data, + create_dir=True) + + self._test_can_save_and_load(source_dataset, test_dir) + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'image_zip_dataset') + +class ImageZipImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_267) + def test_can_import(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((10, 10, 3))) + ]) + + zip_path = osp.join(DUMMY_DATASET_DIR, '1.zip') + parsed_dataset = Dataset.import_from(zip_path, format='image_zip') + compare_datasets(self, source_dataset, parsed_dataset) + + + @mark_requirement(Requirements.DATUM_267) + def test_can_import_from_directory(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((10, 10, 3))), + DatasetItem(id='2', image=np.ones((5, 10, 3))) + ]) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, format='image_zip') + compare_datasets(self, source_dataset, parsed_dataset) diff --git a/tests/test_imagenet_format.py b/tests/test_imagenet_format.py index 6735f1812362..f8e009f44f90 100644 --- a/tests/test_imagenet_format.py +++ b/tests/test_imagenet_format.py @@ -1,15 +1,16 @@ from unittest import TestCase +import os.path as osp import numpy as np -import os.path as osp from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, Label, - LabelCategories, AnnotationType +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, ) from datumaro.plugins.imagenet_format import ImagenetConverter, ImagenetImporter from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_imagenet_txt_format.py b/tests/test_imagenet_txt_format.py index 15bfc2b81cd9..637bd132b1a9 100644 --- a/tests/test_imagenet_txt_format.py +++ b/tests/test_imagenet_txt_format.py @@ -1,16 +1,18 @@ from unittest import TestCase +import os.path as osp import numpy as np -import os.path as osp from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, Label, - LabelCategories, AnnotationType +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) +from datumaro.plugins.imagenet_txt_format import ( + ImagenetTxtConverter, ImagenetTxtImporter, ) -from datumaro.plugins.imagenet_txt_format import \ - ImagenetTxtConverter, ImagenetTxtImporter from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_images.py b/tests/test_images.py index a0c22d607c7c..54579648ac88 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -1,12 +1,15 @@ -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.util.test_utils import TestDir -from datumaro.util.image import (lazy_image, load_image, save_image, \ - Image, ByteImage, encode_image) +from datumaro.util.image import ( + ByteImage, Image, encode_image, lazy_image, load_image, + load_image_meta_file, save_image, +) from datumaro.util.image_cache import ImageCache +from datumaro.util.test_utils import TestDir + from .requirements import Requirements, mark_requirement @@ -128,3 +131,28 @@ def test_ctors(self): if 'ext' in args or 'path' in args: self.assertEqual(img.ext, args.get('ext', '.png')) # pylint: enable=pointless-statement + +class ImageMetaTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_loading(self): + meta_file_contents = r""" + # this is a comment + + a 123 456 + 'b c' 10 20 # inline comment + """ + + meta_expected = { + 'a': (123, 456), + 'b c': (10, 20), + } + + with TestDir() as test_dir: + meta_path = osp.join(test_dir, 'images.meta') + + with open(meta_path, 'w') as meta_file: + meta_file.write(meta_file_contents) + + meta_loaded = load_image_meta_file(meta_path) + + self.assertEqual(meta_loaded, meta_expected) diff --git a/tests/test_kitti_format.py b/tests/test_kitti_format.py new file mode 100644 index 000000000000..ac0328c8b00a --- /dev/null +++ b/tests/test_kitti_format.py @@ -0,0 +1,437 @@ +from collections import OrderedDict +from functools import partial +from unittest import TestCase +import os.path as osp + +import numpy as np + +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Extractor, LabelCategories, Mask, +) +from datumaro.plugins.kitti_format.converter import KittiConverter +from datumaro.plugins.kitti_format.format import ( + KittiLabelMap, KittiPath, KittiTask, make_kitti_categories, + make_kitti_detection_categories, parse_label_map, write_label_map, +) +from datumaro.plugins.kitti_format.importer import ( + KittiDetectionImporter, KittiImporter, KittiSegmentationImporter, +) +from datumaro.util.image import Image +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + +from .requirements import Requirements, mark_requirement + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', + 'kitti_dataset') + + +class KittiFormatTest(TestCase): + @mark_requirement(Requirements.DATUM_280) + def test_can_write_and_parse_labelmap(self): + src_label_map = KittiLabelMap + + with TestDir() as test_dir: + file_path = osp.join(test_dir, 'label_colors.txt') + + write_label_map(file_path, src_label_map) + dst_label_map = parse_label_map(file_path) + + self.assertEqual(src_label_map, dst_label_map) + +class KittiImportTest(TestCase): + @mark_requirement(Requirements.DATUM_280) + def test_can_import_segmentation(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='000030_10', + subset='training', + image=np.ones((1, 5, 3)), + annotations=[ + Mask(image=np.array([[1, 1, 0, 0, 0]]), id=0, label=3, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 0, 1, 0, 0]]), id=1, label=27, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 0, 0, 1, 1]]), id=2, label=27, + attributes={'is_crowd': False}), + ] + ), + DatasetItem(id='000030_11', + subset='training', + image=np.ones((1, 5, 3)), + annotations=[ + Mask(image=np.array([[1, 1, 0, 0, 0]]), id=1, label=31, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 0, 1, 0, 0]]), id=1, label=12, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 0, 0, 1, 1]]), id=0, label=3, + attributes={'is_crowd': True}), + ] + ), + ], categories=make_kitti_categories()) + + parsed_dataset = Dataset.import_from( + osp.join(DUMMY_DATASET_DIR, 'kitti_segmentation'), 'kitti') + + compare_datasets(self, source_dataset, parsed_dataset) + + @mark_requirement(Requirements.DATUM_280) + def test_can_import_detection(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='000030_10', + subset='training', + image=np.ones((10, 10, 3)), + annotations=[ + Bbox(0, 1, 2, 2, label=2, id=0, + attributes={'truncated': True, 'occluded': False}), + Bbox(0, 5, 1, 3, label=1, id=1, + attributes={'truncated': False, 'occluded': False}), + ]), + DatasetItem(id='000030_11', + subset='training', + image=np.ones((10, 10, 3)), annotations=[ + Bbox(0, 0, 2, 2, label=1, id=0, + attributes={'truncated': True, 'occluded': True}), + Bbox(4, 4, 2, 2, label=1, id=1, + attributes={'truncated': False, 'occluded': False}), + Bbox(6, 6, 1, 3, label=1, id=2, + attributes={'truncated': False, 'occluded': True}), + ]), + ], categories=make_kitti_detection_categories()) + + parsed_dataset = Dataset.import_from( + osp.join(DUMMY_DATASET_DIR, 'kitti_detection'), 'kitti') + + compare_datasets(self, source_dataset, parsed_dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect_kitti(self): + matrix = [ + # Whole dataset + (DUMMY_DATASET_DIR, KittiImporter), + + # Subformats + (DUMMY_DATASET_DIR, KittiSegmentationImporter), + (DUMMY_DATASET_DIR, KittiDetectionImporter), + + # Subsets of subformats + (osp.join(DUMMY_DATASET_DIR, 'kitti_detection'), + KittiDetectionImporter), + (osp.join(DUMMY_DATASET_DIR, 'kitti_detection', 'training'), + KittiDetectionImporter), + (osp.join(DUMMY_DATASET_DIR, 'kitti_segmentation'), + KittiSegmentationImporter), + (osp.join(DUMMY_DATASET_DIR, 'kitti_segmentation', 'training'), + KittiSegmentationImporter), + ] + + for path, subtask in matrix: + with self.subTest(path=path, task=subtask): + self.assertTrue(subtask.detect(path)) + + +class TestExtractorBase(Extractor): + def _label(self, kitti_label): + return self.categories()[AnnotationType.label].find(kitti_label)[0] + + def categories(self): + return make_kitti_categories() + +class KittiConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None, **kwargs): + return test_save_and_load(self, source_dataset, converter, test_dir, + importer='kitti', + target_dataset=target_dataset, importer_args=importer_args, **kwargs) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_segm(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='1_2', subset='test', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=24, id=1, + attributes={'is_crowd': False}), + Mask(image=np.array([[1, 0, 0, 0, 1]]), label=15, id=0, + attributes={'is_crowd': True}), + ]), + DatasetItem(id='3', subset='val', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 1, 0, 1, 1]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 0, 1, 0, 0]]), label=5, id=0, + attributes={'is_crowd': True}), + ]), + ]) + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_detection(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='1_2', subset='test', + image=np.ones((10, 10, 3)), annotations=[ + Bbox(0, 1, 2, 2, label=2, id=0, + attributes={'truncated': False, 'occluded': False}), + ]), + DatasetItem(id='1_3', subset='test', + image=np.ones((10, 10, 3)), annotations=[ + Bbox(0, 0, 2, 2, label=1, id=0, + attributes={'truncated': True, 'occluded': False}), + Bbox(6, 2, 3, 4, label=1, id=1, + attributes={'truncated': False, 'occluded': True}), + ]), + ]) + + def categories(self): + return make_kitti_detection_categories() + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, + save_images=True, tasks=KittiTask.detection), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_segm_unpainted(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='1_2', subset='test', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=24, id=1, + attributes={'is_crowd': False}), + Mask(image=np.array([[1, 0, 0, 0, 1]]), label=15, id=0, + attributes={'is_crowd': True}), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True, apply_colormap=False), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_dataset_with_no_subsets(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='1_2', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 0]]), label=0, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 1]]), label=3, id=0, + attributes={'is_crowd': True}), + ]), + + DatasetItem(id='1_3', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 1, 0, 1, 0]]), label=1, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 0, 1, 0, 1]]), label=2, id=0, + attributes={'is_crowd': True}), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_dataset_without_frame_and_sequence(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='data', subset='test', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=24, id=1, + attributes={'is_crowd': False}), + ]), + ]) + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='кириллица с пробелом', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=24, id=1, + attributes={'is_crowd': False}), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_kitti_dataset_with_complex_id(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='test', + image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=3, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=24, id=1, + attributes={'is_crowd': False}), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_can_save_with_no_masks(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='city_1_2', subset='test', + image=np.ones((2, 5, 3)), + ), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, label_map='kitti', + save_images=True), test_dir) + + @mark_requirement(Requirements.DATUM_280) + def test_dataset_with_source_labelmap_undefined(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=1, id=1, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=2, id=2, + attributes={'is_crowd': False}), + ]) + + def categories(self): + label_cat = LabelCategories() + label_cat.add('background') + label_cat.add('Label_1') + label_cat.add('label_2') + return { + AnnotationType.label: label_cat, + } + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), + attributes={'is_crowd': False}, id=1, + label=self._label('Label_1')), + Mask(image=np.array([[0, 1, 1, 0, 0]]), + attributes={'is_crowd': False}, id=2, + label=self._label('label_2')), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = None + label_map['Label_1'] = None + label_map['label_2'] = None + return make_kitti_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(KittiConverter.convert, label_map='source', + save_images=True), test_dir, target_dataset=DstExtractor()) + + @mark_requirement(Requirements.DATUM_280) + def test_dataset_with_source_labelmap_defined(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), label=1, id=1, + attributes={'is_crowd': False}), + Mask(image=np.array([[0, 1, 1, 0, 0]]), label=2, id=2, + attributes={'is_crowd': False}), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = (0, 0, 0) + label_map['label_1'] = (1, 2, 3) + label_map['label_2'] = (3, 2, 1) + return make_kitti_categories(label_map) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, image=np.ones((1, 5, 3)), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 1]]), + attributes={'is_crowd': False}, id=1, + label=self._label('label_1')), + Mask(image=np.array([[0, 1, 1, 0, 0]]), + attributes={'is_crowd': False}, id=2, + label=self._label('label_2')), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = (0, 0, 0) + label_map['label_1'] = (1, 2, 3) + label_map['label_2'] = (3, 2, 1) + return make_kitti_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(KittiConverter.convert, label_map='source', + save_images=True), test_dir, target_dataset=DstExtractor()) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_image_with_arbitrary_extension(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='q/1', image=Image(path='q/1.JPEG', + data=np.zeros((4, 3, 3)))), + + DatasetItem(id='a/b/c/2', image=Image( + path='a/b/c/2.bmp', data=np.ones((1, 5, 3)) + ), annotations=[ + Mask(image=np.array([[1, 0, 0, 1, 0]]), label=0, id=0, + attributes={'is_crowd': True}), + Mask(image=np.array([[0, 1, 1, 0, 1]]), label=1, id=0, + attributes={'is_crowd': True}), + ]), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['a'] = None + label_map['b'] = None + return make_kitti_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(KittiConverter.convert, save_images=True), + test_dir, require_images=True) + + self.assertTrue(osp.isfile(osp.join(test_dir, 'default', + KittiPath.IMAGES_DIR, 'a/b/c/2.bmp'))) + self.assertTrue(osp.isfile(osp.join(test_dir, 'default', + KittiPath.IMAGES_DIR, 'q/1.JPEG'))) diff --git a/tests/test_kitti_raw_format.py b/tests/test_kitti_raw_format.py new file mode 100644 index 000000000000..94bcc16edd99 --- /dev/null +++ b/tests/test_kitti_raw_format.py @@ -0,0 +1,439 @@ +from functools import partial +from unittest import TestCase +import os +import os.path as osp + +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, LabelCategories, +) +from datumaro.components.project import Dataset +from datumaro.plugins.kitti_raw_format.converter import KittiRawConverter +from datumaro.plugins.kitti_raw_format.extractor import KittiRawImporter +from datumaro.util.test_utils import ( + Dimensions, TestDir, compare_datasets_3d, test_save_and_load, +) + +from tests.requirements import Requirements, mark_requirement + +DUMMY_DATASET_DIR = osp.join(osp.dirname( + __file__), 'assets', 'kitti_dataset', 'kitti_raw') + + +class KittiRawImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + self.assertTrue(KittiRawImporter.detect(DUMMY_DATASET_DIR)) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load(self): + pcd1 = osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000000.pcd') + pcd2 = osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000001.pcd') + pcd3 = osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000002.pcd') + + image1 = osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000000.png') + image2 = osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000001.png') + image3 = osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000002.png') + + expected_label_cat = LabelCategories(attributes={'occluded'}) + expected_label_cat.add('bus') + expected_label_cat.add('car') + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[1, 2, 3], scale=[7.95, -3.62, -1.03], + label=1, attributes={'occluded': False, 'track_id': 1}), + + Cuboid3d(position=[1, 1, 0], scale=[8.34, 23.01, -0.76], + label=0, attributes={'occluded': False, 'track_id': 2}) + ], + point_cloud=pcd1, related_images=[image1], + attributes={'frame': 0}), + + DatasetItem(id='0000000001', + annotations=[ + Cuboid3d(position=[0, 1, 0], scale=[8.34, 23.01, -0.76], + rotation=[1, 1, 3], + label=0, attributes={'occluded': True, 'track_id': 2}) + ], + point_cloud=pcd2, related_images=[image2], + attributes={'frame': 1}), + + DatasetItem(id='0000000002', + annotations=[ + Cuboid3d(position=[1, 2, 3], scale=[-9.41, 13.54, 0.24], + label=1, attributes={'occluded': False, 'track_id': 3}) + ], + point_cloud=pcd3, related_images=[image3], + attributes={'frame': 2}) + + ], categories={AnnotationType.label: expected_label_cat}) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'kitti_raw') + + compare_datasets_3d(self, expected_dataset, parsed_dataset, + require_point_cloud=True) + + +class KittiRawConverterTest(TestCase): + pcd1 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000000.pcd')) + pcd2 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000001.pcd')) + pcd3 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'velodyne_points', 'data', '0000000002.pcd')) + + image1 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000000.png')) + image2 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000001.png')) + image3 = osp.abspath(osp.join(DUMMY_DATASET_DIR, + 'IMAGE_00', 'data', '0000000002.png')) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None, **kwargs): + kwargs.setdefault('dimension', Dimensions.dim_3d) + return test_save_and_load(self, source_dataset, converter, test_dir, + importer='kitti_raw', target_dataset=target_dataset, + importer_args=importer_args, **kwargs) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'occluded': False, 'track_id': 1}), + + Cuboid3d(position=[3.4, -2.11, 4.4], label=1, + attributes={'occluded': True, 'track_id': 2}) + ], + point_cloud=self.pcd1, related_images=[self.image1], + attributes={'frame': 0} + ), + + DatasetItem(id='0000000001', + annotations=[ + Cuboid3d(position=[1.4, 2.1, 1.4], label=1, + attributes={'track_id': 2}), + + Cuboid3d(position=[11.4, -0.1, 4.2], scale=[2, 1, 2], + label=0, attributes={'track_id': 3}) + ], + ), + + DatasetItem(id='0000000002', + annotations=[ + Cuboid3d(position=[0.4, -1, 2.24], scale=[2, 1, 2], + label=0, attributes={'track_id': 3}), + ], + point_cloud=self.pcd3, + attributes={'frame': 2} + ), + ], categories=['cat', 'dog']) + + with TestDir() as test_dir: + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('cat') + target_label_cat.add('dog') + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={ + 'occluded': False, 'track_id': 1}), + + Cuboid3d(position=[3.4, -2.11, 4.4], label=1, + attributes={ + 'occluded': True, 'track_id': 2}) + ], + point_cloud=osp.join(test_dir, + 'velodyne_points', 'data', '0000000000.pcd'), + related_images=[osp.join(test_dir, + 'image_00', 'data', '0000000000.png') + ], + attributes={'frame': 0} + ), + + DatasetItem(id='0000000001', + annotations=[ + Cuboid3d(position=[1.4, 2.1, 1.4], label=1, + attributes={'occluded': False, 'track_id': 2}), + + Cuboid3d(position=[11.4, -0.1, 4.2], scale=[2, 1, 2], + label=0, attributes={ + 'occluded': False, 'track_id': 3}) + ], + attributes={'frame': 1} + ), + + DatasetItem(id='0000000002', + annotations=[ + Cuboid3d(position=[0.4, -1, 2.24], scale=[2, 1, 2], + label=0, attributes={ + 'occluded': False, 'track_id': 3}), + ], + point_cloud=osp.join(test_dir, + 'velodyne_points', 'data', '0000000002.pcd'), + attributes={'frame': 2} + ), + ], categories={AnnotationType.label: target_label_cat}) + + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + require_point_cloud=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_preserve_frame_ids(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='abc', attributes={'frame': 40}) + ], categories=[]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + KittiRawConverter.convert, test_dir) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_reindex_frames(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='abc') + ], categories=[]) + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='abc', attributes={'frame': 0}) + ], categories=[]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, reindex=True), + test_dir, target_dataset=expected_dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_requires_track_id(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='abc', + annotations=[ + Cuboid3d(position=[0.4, -1, 2.24], label=0), + ] + ) + ], categories=['dog']) + + with TestDir() as test_dir: + with self.assertRaisesRegex(Exception, 'track_id'): + KittiRawConverter.convert(source_dataset, test_dir) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_reindex_allows_single_annotations(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='abc', + annotations=[ + Cuboid3d(position=[0.4, -1, 2.24], label=0), + ] + ) + ], categories=['dog']) + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='abc', + annotations=[ + Cuboid3d(position=[0.4, -1, 2.24], label=0, + attributes={'track_id': 1, 'occluded': False}), + ], + attributes={'frame': 0}) + ], categories=['dog']) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, reindex=True), + test_dir, target_dataset=expected_dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_attributes(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'track_id': 1, + 'occluded': True, 'a': 'w', 'b': 5}) + ], + attributes={'frame': 0} + ) + ], categories=['cat']) + + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('cat', attributes=['a', 'b']) + target_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'track_id': 1, + 'occluded': True, 'a': 'w', 'b': 5}) + ], + attributes={'frame': 0} + ) + ], categories={AnnotationType.label: target_label_cat}) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, allow_attrs=True), + test_dir, target_dataset=target_dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_discard_attributes(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'track_id': 1, 'a': 'w', 'b': 5}) + ], + attributes={'frame': 0} + ) + ], categories=['cat']) + + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('cat') + target_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', + annotations=[ + Cuboid3d(position=[13.54, -9.41, 0.24], label=0, + attributes={'track_id': 1, 'occluded': False}) + ], + attributes={'frame': 0} + ) + ], categories={AnnotationType.label: target_label_cat}) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + KittiRawConverter.convert, + test_dir, target_dataset=target_dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_without_annotations(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='0000000000', attributes={'frame': 0}) + ], categories=[]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + KittiRawConverter.convert, test_dir) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_arbitrary_paths(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='a/d', + annotations=[ + Cuboid3d(position=[1, 2, 3], label=0, + attributes={'track_id': 1}) + ], + point_cloud=self.pcd1, related_images=[self.image1], + attributes={'frame': 3} + ), + ], categories=['cat']) + + with TestDir() as test_dir: + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('cat') + target_dataset = Dataset.from_iterable([ + DatasetItem(id='a/d', + annotations=[ + Cuboid3d(position=[1, 2, 3], label=0, + attributes={'track_id': 1, 'occluded': False}) + ], + point_cloud=osp.join(test_dir, + 'velodyne_points', 'data', 'a', 'd.pcd'), + related_images=[ + osp.join(test_dir, 'image_00', 'data', 'a', 'd.png'), + ], + attributes={'frame': 3} + ), + ], categories={AnnotationType.label: target_label_cat}) + + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + require_point_cloud=True) + self.assertTrue(osp.isfile(osp.join( + test_dir, 'image_00', 'data', 'a', 'd.png'))) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_multiple_related_images(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='a/d', + annotations=[ + Cuboid3d(position=[1, 2, 3], label=0, + attributes={'track_id': 1}) + ], + point_cloud=self.pcd1, + related_images=[self.image1, self.image2, self.image3], + attributes={'frame': 3} + ), + ], categories=['cat']) + + with TestDir() as test_dir: + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('cat') + target_dataset = Dataset.from_iterable([ + DatasetItem(id='a/d', + annotations=[ + Cuboid3d(position=[1, 2, 3], label=0, + attributes={'track_id': 1, 'occluded': False}) + ], + point_cloud=osp.join(test_dir, + 'velodyne_points', 'data', 'a', 'd.pcd'), + related_images=[ + osp.join(test_dir, 'image_00', 'data', 'a', 'd.png'), + osp.join(test_dir, 'image_01', 'data', 'a', 'd.png'), + osp.join(test_dir, 'image_02', 'data', 'a', 'd.png'), + ], + attributes={'frame': 3} + ), + ], categories={AnnotationType.label: target_label_cat}) + + self._test_save_and_load(source_dataset, + partial(KittiRawConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + require_point_cloud=True) + self.assertTrue(osp.isfile(osp.join( + test_dir, 'image_00', 'data', 'a', 'd.png'))) + self.assertTrue(osp.isfile(osp.join( + test_dir, 'image_01', 'data', 'a', 'd.png'))) + self.assertTrue(osp.isfile(osp.join( + test_dir, 'image_02', 'data', 'a', 'd.png'))) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_inplace_save_writes_only_updated_data(self): + with TestDir() as path: + dataset = Dataset.from_iterable([ + DatasetItem(id='frame1', + annotations=[ + Cuboid3d(position=[3.5, 9.8, 0.3], label=0, + attributes={'track_id': 1}) + ], + point_cloud=self.pcd1, related_images=[self.image1], + attributes={'frame': 0} + ) + ], categories=['car', 'bus']) + dataset.export(path, 'kitti_raw', save_images=True) + + dataset.put(DatasetItem('frame2', + annotations=[ + Cuboid3d(position=[1, 2, 0], label=1, + attributes={'track_id': 1}) + ], + point_cloud=self.pcd2, related_images=[self.image2], + attributes={'frame': 1} + )) + dataset.remove('frame1') + dataset.save(save_images=True) + + self.assertEqual({'frame2.png'}, set(os.listdir( + osp.join(path, 'image_00', 'data')))) + self.assertEqual({'frame2.pcd'}, set(os.listdir( + osp.join(path, 'velodyne_points', 'data')))) diff --git a/tests/test_labelme_format.py b/tests/test_labelme_format.py index ad80a9ecf5a8..be89add706f7 100644 --- a/tests/test_labelme_format.py +++ b/tests/test_labelme_format.py @@ -1,15 +1,18 @@ from functools import partial -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, Bbox, Mask, Polygon) -from datumaro.plugins.labelme_format import LabelMeImporter, LabelMeConverter +from datumaro.components.extractor import Bbox, DatasetItem, Mask, Polygon +from datumaro.plugins.labelme_format import LabelMeConverter, LabelMeImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement @@ -267,4 +270,4 @@ def test_can_import(self): ]) parsed = Dataset.import_from(DUMMY_DATASET_DIR, 'label_me') - compare_datasets(self, expected=target_dataset, actual=parsed) \ No newline at end of file + compare_datasets(self, expected=target_dataset, actual=parsed) diff --git a/tests/test_lfw_format.py b/tests/test_lfw_format.py index 64a37731a723..b1c445f1fa7e 100644 --- a/tests/test_lfw_format.py +++ b/tests/test_lfw_format.py @@ -1,12 +1,14 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset from datumaro.components.extractor import DatasetItem, Label, Points from datumaro.plugins.lfw_format import LfwConverter, LfwImporter from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_market1501_format.py b/tests/test_market1501_format.py index 17fc3afeacd5..26f6adf714da 100644 --- a/tests/test_market1501_format.py +++ b/tests/test_market1501_format.py @@ -1,13 +1,16 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset from datumaro.components.extractor import DatasetItem -from datumaro.plugins.market1501_format import (Market1501Converter, - Market1501Importer) +from datumaro.plugins.market1501_format import ( + Market1501Converter, Market1501Importer, +) from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_masks.py b/tests/test_masks.py index 1025927a4f49..5e2950b51725 100644 --- a/tests/test_masks.py +++ b/tests/test_masks.py @@ -1,9 +1,10 @@ -import numpy as np - from unittest import TestCase -import datumaro.util.mask_tools as mask_tools +import numpy as np + from datumaro.components.extractor import CompiledMask +import datumaro.util.mask_tools as mask_tools + from .requirements import Requirements, mark_requirement @@ -203,4 +204,4 @@ def test_can_decode_compiled_mask(self): labels = compiled_mask.get_instance_labels() - self.assertEqual({instance_idx: class_idx}, labels) \ No newline at end of file + self.assertEqual({instance_idx: class_idx}, labels) diff --git a/tests/test_mnist_csv_format.py b/tests/test_mnist_csv_format.py index 8fd258a8e5bd..b8ee635d30f7 100644 --- a/tests/test_mnist_csv_format.py +++ b/tests/test_mnist_csv_format.py @@ -1,14 +1,18 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, DatasetItem, Label, - LabelCategories) -from datumaro.plugins.mnist_csv_format import (MnistCsvConverter, - MnistCsvImporter) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) +from datumaro.plugins.mnist_csv_format import ( + MnistCsvConverter, MnistCsvImporter, +) from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_mnist_format.py b/tests/test_mnist_format.py index 41fe67286f6e..f3f219b42fab 100644 --- a/tests/test_mnist_format.py +++ b/tests/test_mnist_format.py @@ -1,13 +1,16 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, DatasetItem, Label, - LabelCategories) +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) from datumaro.plugins.mnist_format import MnistConverter, MnistImporter from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_mot_format.py b/tests/test_mot_format.py index 6b428b40f88c..d133fb799161 100644 --- a/tests/test_mot_format.py +++ b/tests/test_mot_format.py @@ -1,16 +1,19 @@ from functools import partial -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Bbox, LabelCategories +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, LabelCategories, ) from datumaro.plugins.mot_format import MotSeqGtConverter, MotSeqImporter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement @@ -150,4 +153,4 @@ def test_can_import(self): dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'mot_seq') - compare_datasets(self, expected_dataset, dataset) \ No newline at end of file + compare_datasets(self, expected_dataset, dataset) diff --git a/tests/test_mots_format.py b/tests/test_mots_format.py index 51691a68c490..eddd909a657f 100644 --- a/tests/test_mots_format.py +++ b/tests/test_mots_format.py @@ -1,15 +1,17 @@ from functools import partial -import numpy as np +from unittest import TestCase import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.components.extractor import DatasetItem, Mask from datumaro.components.dataset import Dataset -from datumaro.plugins.mots_format import MotsPngConverter, MotsImporter +from datumaro.components.extractor import DatasetItem, Mask +from datumaro.plugins.mots_format import MotsImporter, MotsPngConverter from datumaro.util.image import Image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) + from .requirements import Requirements, mark_requirement DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mots_dataset') @@ -135,4 +137,4 @@ def test_can_import(self): ], categories=['a', 'b', 'c', 'd']) parsed = Dataset.import_from(DUMMY_DATASET_DIR, 'mots') - compare_datasets(self, expected=target, actual=parsed) \ No newline at end of file + compare_datasets(self, expected=target, actual=parsed) diff --git a/tests/test_ndr.py b/tests/test_ndr.py index 7180d1f5236a..731bf238d943 100644 --- a/tests/test_ndr.py +++ b/tests/test_ndr.py @@ -2,11 +2,12 @@ import numpy as np +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) from datumaro.components.project import Dataset -from datumaro.components.extractor import (DatasetItem, Label, - LabelCategories, AnnotationType) - import datumaro.plugins.ndr as ndr + from .requirements import Requirements, mark_requirement diff --git a/tests/test_open_images_format.py b/tests/test_open_images_format.py new file mode 100644 index 000000000000..c49a804ca27f --- /dev/null +++ b/tests/test_open_images_format.py @@ -0,0 +1,158 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from unittest.case import TestCase +import os.path as osp + +import numpy as np + +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Label, LabelCategories, +) +from datumaro.plugins.open_images_format import ( + OpenImagesConverter, OpenImagesImporter, +) +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets + +from tests.requirements import Requirements, mark_requirement + + +class OpenImagesFormatTest(TestCase): + @mark_requirement(Requirements.DATUM_274) + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable( + [ + DatasetItem(id='a', subset='train', + annotations=[Label(0, attributes={'score': 0.7})] + ), + DatasetItem(id='b', subset='train', image=np.zeros((8, 8, 3)), + annotations=[Label(1), Label(2, attributes={'score': 0})] + ), + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable([ + '/m/0', + ('/m/1', '/m/0'), + '/m/2', + ]), + }, + ) + + expected_dataset = Dataset.from_extractors(source_dataset) + expected_dataset.put( + DatasetItem(id='b', subset='train', image=np.zeros((8, 8, 3)), + annotations=[ + # the converter assumes that labels without a score + # have a score of 100% + Label(1, attributes={'score': 1}), + Label(2, attributes={'score': 0}), + ] + ), + ) + + with TestDir() as test_dir: + OpenImagesConverter.convert(source_dataset, test_dir, + save_images=True) + + parsed_dataset = Dataset.import_from(test_dir, 'open_images') + + compare_datasets(self, expected_dataset, parsed_dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_274) + def test_can_save_and_load_with_no_subsets(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='a', + annotations=[Label(0, attributes={'score': 0.7})] + ), + ], categories=['/m/0']) + + with TestDir() as test_dir: + OpenImagesConverter.convert(source_dataset, test_dir, + save_images=True) + + parsed_dataset = Dataset.import_from(test_dir, 'open_images') + + compare_datasets(self, source_dataset, parsed_dataset) + + @mark_requirement(Requirements.DATUM_274) + def test_can_save_and_load_image_with_arbitrary_extension(self): + dataset = Dataset.from_iterable([ + DatasetItem(id='a/1', image=Image(path='a/1.JPEG', + data=np.zeros((4, 3, 3)))), + DatasetItem(id='b/c/d/2', image=Image(path='b/c/d/2.bmp', + data=np.zeros((3, 4, 3)))), + ], categories=[]) + + with TestDir() as test_dir: + OpenImagesConverter.convert(dataset, test_dir, save_images=True) + + parsed_dataset = Dataset.import_from(test_dir, 'open_images') + + compare_datasets(self, dataset, parsed_dataset, require_images=True) + +ASSETS_DIR = osp.join(osp.dirname(__file__), 'assets') + +DUMMY_DATASET_DIR_V6 = osp.join(ASSETS_DIR, 'open_images_dataset_v6') +DUMMY_DATASET_DIR_V5 = osp.join(ASSETS_DIR, 'open_images_dataset_v5') + +class OpenImagesImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_274) + def test_can_import_v6(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='a', subset='train', image=np.zeros((8, 6, 3)), + annotations=[Label(label=0, attributes={'score': 1})]), + DatasetItem(id='b', subset='train', image=np.zeros((2, 8, 3)), + annotations=[Label(label=0, attributes={'score': 0})]), + DatasetItem(id='c', subset='test', image=np.ones((10, 5, 3)), + annotations=[ + Label(label=1, attributes={'score': 1}), + Label(label=3, attributes={'score': 1}), + ]), + DatasetItem(id='d', subset='validation', image=np.ones((1, 5, 3)), + annotations=[]), + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable([ + # The hierarchy file in the test dataset also includes a fake label + # /m/x that is set to be /m/0's parent. This is to mimic the real + # Open Images dataset, that assigns a nonexistent label as a parent + # to all labels that don't have one. + '/m/0', + ('/m/1', '/m/0'), + '/m/2', + '/m/3', + ]), + }, + ) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_V6, 'open_images') + + compare_datasets(self, expected_dataset, dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_274) + def test_can_import_v5(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='aa', subset='train', image=np.zeros((8, 6, 3))), + DatasetItem(id='cc', subset='test', image=np.ones((10, 5, 3))), + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable([ + '/m/0', + '/m/1', + ]), + }, + ) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_V5, 'open_images') + + compare_datasets(self, expected_dataset, dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_274) + def test_can_detect(self): + self.assertTrue(OpenImagesImporter.detect(DUMMY_DATASET_DIR_V6)) + self.assertTrue(OpenImagesImporter.detect(DUMMY_DATASET_DIR_V5)) diff --git a/tests/test_ops.py b/tests/test_ops.py index 8658015dc8a6..35349c78090d 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -2,14 +2,19 @@ import numpy as np -from datumaro.components.extractor import (Bbox, Caption, DatasetItem, - Label, Mask, Points, Polygon, PolyLine, DEFAULT_SUBSET_NAME, - LabelCategories, PointsCategories, MaskCategories, AnnotationType) -from datumaro.components.operations import (FailedAttrVotingError, - IntersectMerge, NoMatchingAnnError, NoMatchingItemError, WrongGroupError, - compute_ann_statistics, mean_std, find_unique_images) from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, AnnotationType, Bbox, Caption, DatasetItem, Label, + LabelCategories, Mask, MaskCategories, Points, PointsCategories, Polygon, + PolyLine, +) +from datumaro.components.operations import ( + FailedAttrVotingError, IntersectMerge, NoMatchingAnnError, + NoMatchingItemError, WrongGroupError, compute_ann_statistics, + find_unique_images, mean_std, +) from datumaro.util.test_utils import compare_datasets + from .requirements import Requirements, mark_requirement @@ -82,6 +87,7 @@ def test_stats(self): 'mask': { 'count': 1, }, 'points': { 'count': 1, }, 'caption': { 'count': 2, }, + 'cuboid_3d': {'count': 0}, }, 'annotations': { 'labels': { @@ -164,6 +170,7 @@ def test_stats_with_empty_dataset(self): 'mask': { 'count': 0, }, 'points': { 'count': 0, }, 'caption': { 'count': 0, }, + 'cuboid_3d': {'count': 0}, }, 'annotations': { 'labels': { @@ -177,7 +184,7 @@ def test_stats_with_empty_dataset(self): 'attributes': {} }, 'segments': { - 'avg. area': 0, + 'avg. area': 0.0, 'area distribution': [], 'pixel distribution': { 'label_0': [0, 0.0], diff --git a/tests/test_project.py b/tests/test_project.py index 8a8ddbaccf51..29faf40909d4 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1,17 +1,19 @@ -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.components.project import Project, Environment -from datumaro.components.config_model import Source, Model -from datumaro.components.launcher import Launcher, ModelTransform -from datumaro.components.extractor import (Extractor, DatasetItem, - Label, LabelCategories, AnnotationType) from datumaro.components.config import Config -from datumaro.components.dataset import Dataset, DEFAULT_FORMAT +from datumaro.components.config_model import Model, Source +from datumaro.components.dataset import DEFAULT_FORMAT, Dataset +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Extractor, Label, LabelCategories, +) +from datumaro.components.launcher import Launcher, ModelTransform +from datumaro.components.project import Environment, Project from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_sampler.py b/tests/test_sampler.py index 4d4c10a9c0ad..2378fcfa5a73 100644 --- a/tests/test_sampler.py +++ b/tests/test_sampler.py @@ -1,21 +1,19 @@ from collections import defaultdict from unittest import TestCase, skipIf +import csv -from datumaro.components.project import Dataset from datumaro.components.extractor import ( - DatasetItem, - Label, - LabelCategories, - AnnotationType, + AnnotationType, DatasetItem, Label, LabelCategories, ) +from datumaro.components.project import Dataset from datumaro.util.image import Image -import csv - try: import pandas as pd + + from datumaro.plugins.sampler.algorithm.entropy import \ + SampleEntropy as entropy from datumaro.plugins.sampler.sampler import Sampler - from datumaro.plugins.sampler.algorithm.entropy import SampleEntropy as entropy has_libs = True except ImportError: has_libs = False diff --git a/tests/test_sly_pointcloud_format.py b/tests/test_sly_pointcloud_format.py new file mode 100644 index 000000000000..f8a3cd61a7ec --- /dev/null +++ b/tests/test_sly_pointcloud_format.py @@ -0,0 +1,310 @@ +from functools import partial +from unittest import TestCase +import os +import os.path as osp + +from datumaro.components.extractor import ( + AnnotationType, Cuboid3d, DatasetItem, LabelCategories, +) +from datumaro.components.project import Dataset +from datumaro.plugins.sly_pointcloud_format.converter import ( + SuperviselyPointCloudConverter, +) +from datumaro.plugins.sly_pointcloud_format.extractor import ( + SuperviselyPointCloudImporter, +) +from datumaro.util.test_utils import ( + Dimensions, TestDir, compare_datasets_3d, test_save_and_load, +) + +from .requirements import Requirements, mark_requirement + +DUMMY_DATASET_DIR = osp.join(osp.dirname( + __file__), 'assets', 'sly_pointcloud_dataset') + + +class SuperviselyPointcloudImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + self.assertTrue(SuperviselyPointCloudImporter.detect(DUMMY_DATASET_DIR)) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load(self): + pcd1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame1.pcd') + pcd2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame2.pcd') + + image1 = osp.join(DUMMY_DATASET_DIR, + 'ds0', 'related_images', 'frame1_pcd', 'img2.png') + image2 = osp.join(DUMMY_DATASET_DIR, + 'ds0', 'related_images', 'frame2_pcd', 'img1.png') + + label_cat = LabelCategories(attributes={'tag1', 'tag3'}) + label_cat.add('car') + label_cat.add('bus') + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='frame1', + annotations=[ + Cuboid3d(id=755220128, label=0, + position=[0.47, 0.23, 0.79], scale=[0.01, 0.01, 0.01], + attributes={'track_id': 231825, + 'tag1': 'fd', 'tag3': '4s'}), + + Cuboid3d(id=755337225, label=0, + position=[0.36, 0.64, 0.93], scale=[0.01, 0.01, 0.01], + attributes={'track_id': 231831, + 'tag1': 'v12', 'tag3': ''}), + ], + point_cloud=pcd1, related_images=[image1], + attributes={'frame': 0, 'description': '', + 'tag1': '25dsd', 'tag2': 65} + ), + + DatasetItem(id='frame2', + annotations=[ + Cuboid3d(id=216, label=1, + position=[0.59, 14.41, -0.61], + attributes={'track_id': 36, 'tag1': '', 'tag3': ''}) + ], + point_cloud=pcd2, related_images=[image2], + attributes={'frame': 1, 'description': ''} + ), + ], categories={AnnotationType.label: label_cat}) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'sly_pointcloud') + + compare_datasets_3d(self, expected_dataset, parsed_dataset, + require_point_cloud=True) + + +class PointCloudConverterTest(TestCase): + pcd1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame1.pcd') + pcd2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame2.pcd') + + image1 = osp.join(DUMMY_DATASET_DIR, + 'ds0', 'related_images', 'frame1_pcd', 'img2.png') + image2 = osp.join(DUMMY_DATASET_DIR, + 'ds0', 'related_images', 'frame2_pcd', 'img1.png') + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None, **kwargs): + kwargs.setdefault('dimension', Dimensions.dim_3d) + return test_save_and_load(self, source_dataset, converter, test_dir, + importer='sly_pointcloud', target_dataset=target_dataset, + importer_args=importer_args, **kwargs) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load(self): + src_label_cat = LabelCategories(attributes={'occluded'}) + src_label_cat.add('car', attributes=['x']) + src_label_cat.add('bus') + + source_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_1', + annotations=[ + Cuboid3d(id=206, label=0, + position=[320.86, 979.18, 1.04], + attributes={'occluded': False, 'track_id': 1, 'x': 1}), + + Cuboid3d(id=207, label=1, + position=[318.19, 974.65, 1.29], + attributes={'occluded': True, 'track_id': 2}), + ], + point_cloud=self.pcd1, + attributes={'frame': 0, 'description': 'zzz'} + ), + + DatasetItem(id='frm2', + annotations=[ + Cuboid3d(id=208, label=1, + position=[23.04, 8.75, -0.78], + attributes={'occluded': False, 'track_id': 2}) + ], + point_cloud=self.pcd2, related_images=[self.image2], + attributes={'frame': 1} + ), + ], categories={ AnnotationType.label: src_label_cat }) + + with TestDir() as test_dir: + target_label_cat = LabelCategories(attributes={'occluded'}) + target_label_cat.add('car', attributes=['x']) + target_label_cat.add('bus') + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_1', + annotations=[ + Cuboid3d(id=206, label=0, + position=[320.86, 979.18, 1.04], + attributes={'occluded': False, + 'track_id': 1, 'x': 1}), + + Cuboid3d(id=207, label=1, + position=[318.19, 974.65, 1.29], + attributes={'occluded': True, 'track_id': 2}), + ], + point_cloud=osp.join(test_dir, + 'ds0', 'pointcloud', 'frame_1.pcd'), + attributes={'frame': 0, 'description': 'zzz'}), + + DatasetItem(id='frm2', + annotations=[ + Cuboid3d(id=208, label=1, + position=[23.04, 8.75, -0.78], + attributes={'occluded': False, 'track_id': 2}), + ], + point_cloud=osp.join(test_dir, + 'ds0', 'pointcloud', 'frm2.pcd'), + related_images=[osp.join(test_dir, + 'ds0', 'related_images', 'frm2_pcd', 'img1.png') + ], + attributes={'frame': 1, 'description': ''}) + ], categories={ AnnotationType.label: target_label_cat }) + + self._test_save_and_load(source_dataset, + partial(SuperviselyPointCloudConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + require_point_cloud=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_preserve_frame_ids(self): + dataset = Dataset.from_iterable([ + DatasetItem(id='abc', attributes={'frame': 20}), + ], categories=[]) + + with TestDir() as test_dir: + self._test_save_and_load(dataset, + SuperviselyPointCloudConverter.convert, test_dir, + ignored_attrs={'description'}) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_reindex(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='somename', attributes={'frame': 1234}) + ]) + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='somename', attributes={'frame': 1}) + ], categories=[]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(SuperviselyPointCloudConverter.convert, reindex=True), + test_dir, target_dataset=expected_dataset, + ignored_attrs={'description'}) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_keep_undeclared_attributes(self): + src_label_cat = LabelCategories(attributes={'occluded'}) + src_label_cat.add('label1', attributes={'a'}) + + source_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_000000', + annotations=[ + Cuboid3d(id=206, label=0, position=[320.86, 979.18, 1.04], + attributes={'track_id': 1, 'occluded': False, + 'a': 5, 'undeclared': 'y'}), + ], + attributes={'frame': 0}), + ], categories={AnnotationType.label: src_label_cat}) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(SuperviselyPointCloudConverter.convert, save_images=True, + allow_undeclared_attrs=True), + test_dir, ignored_attrs=['description']) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_drop_undeclared_attributes(self): + src_label_cat = LabelCategories(attributes={'occluded'}) + src_label_cat.add('label1', attributes={'a'}) + + source_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_000000', + annotations=[ + Cuboid3d(id=206, label=0, position=[320.86, 979.18, 1.04], + attributes={'occluded': False, + 'a': 5, 'undeclared': 'y'}), + ], + attributes={'frame': 0}), + ], categories={AnnotationType.label: src_label_cat}) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_000000', + annotations=[ + Cuboid3d(id=206, label=0, position=[320.86, 979.18, 1.04], + attributes={'track_id': 206, 'occluded': False, 'a': 5}), + ], + attributes={'frame': 0}), + ], categories={AnnotationType.label: src_label_cat}) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(SuperviselyPointCloudConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + ignored_attrs=['description']) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_have_arbitrary_item_ids(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='a/b/c235', + point_cloud=self.pcd1, related_images=[self.image1], + attributes={'frame': 20}), + ]) + + with TestDir() as test_dir: + pcd_path = osp.join(test_dir, 'ds0', 'pointcloud', + 'a', 'b', 'c235.pcd') + img_path = osp.join(test_dir, 'ds0', 'related_images', + 'a', 'b', 'c235_pcd', 'img2.png') + target_dataset = Dataset.from_iterable([ + DatasetItem(id='a/b/c235', + point_cloud=pcd_path, related_images=[img_path], + attributes={'frame': 20}), + ], categories=[]) + + self._test_save_and_load(source_dataset, + partial(SuperviselyPointCloudConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset, + ignored_attrs={'description'}, require_point_cloud=True) + + self.assertTrue(osp.isfile( + osp.join(test_dir, 'ds0', 'ann', 'a', 'b', 'c235.pcd.json'))) + self.assertTrue(osp.isfile(pcd_path)) + self.assertTrue({'img2.png', 'img2.png.json'}, + set(os.listdir(osp.join(test_dir, 'ds0', 'related_images', + 'a', 'b', 'c235_pcd')))) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_inplace_save_writes_only_updated_data(self): + with TestDir() as path: + dataset = Dataset.from_iterable([ + DatasetItem(id='frame1', + annotations=[ + Cuboid3d(id=215, + position=[320.59, 979.48, 1.03], label=0) + ], + point_cloud=self.pcd1, related_images=[self.image1], + attributes={'frame': 0}) + ], categories=['car', 'bus']) + dataset.export(path, 'sly_pointcloud', save_images=True) + + dataset.put(DatasetItem(id='frame2', + annotations=[ + Cuboid3d(id=216, position=[0.59, 14.41, -0.61], label=1) + ], + point_cloud=self.pcd2, related_images=[self.image2], + attributes={'frame': 1}) + ) + + dataset.remove('frame1') + dataset.save(save_images=True) + + self.assertEqual({'frame2.pcd.json'}, + set(os.listdir(osp.join(path, 'ds0', 'ann')))) + self.assertEqual({'frame2.pcd'}, + set(os.listdir(osp.join(path, 'ds0', 'pointcloud')))) + self.assertTrue(osp.isfile(osp.join(path, + 'ds0', 'related_images', 'frame2_pcd', 'img1.png'))) + self.assertFalse(osp.isfile(osp.join(path, + 'ds0', 'related_images', 'frame1_pcd', 'img2.png'))) diff --git a/tests/test_splitter.py b/tests/test_splitter.py index 21aa6ceb4df8..82ac0851d32c 100644 --- a/tests/test_splitter.py +++ b/tests/test_splitter.py @@ -1,20 +1,14 @@ -import numpy as np - from unittest import TestCase -from datumaro.components.project import Dataset +import numpy as np + from datumaro.components.extractor import ( - DatasetItem, - Label, - Bbox, - Mask, - Polygon, - LabelCategories, - AnnotationType, + AnnotationType, Bbox, DatasetItem, Label, LabelCategories, Mask, Polygon, ) - -import datumaro.plugins.splitter as splitter from datumaro.components.operations import compute_ann_statistics +from datumaro.components.project import Dataset +import datumaro.plugins.splitter as splitter + from .requirements import Requirements, mark_requirement diff --git a/tests/test_tfrecord_format.py b/tests/test_tfrecord_format.py index 1b147fa3e804..2a54e855e6db 100644 --- a/tests/test_tfrecord_format.py +++ b/tests/test_tfrecord_format.py @@ -1,25 +1,29 @@ from functools import partial -import numpy as np +from unittest import TestCase, skipIf import os import os.path as osp -from unittest import TestCase, skipIf +import numpy as np from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Bbox, Mask, LabelCategories +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, LabelCategories, Mask, +) +from datumaro.util.image import ByteImage, Image, encode_image +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, ) -from datumaro.util.image import Image, ByteImage, encode_image -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) from datumaro.util.tf_util import check_import + from .requirements import Requirements, mark_requirement try: - from datumaro.plugins.tf_detection_api_format.extractor import \ - TfDetectionApiExtractor, TfDetectionApiImporter - from datumaro.plugins.tf_detection_api_format.converter import \ - TfDetectionApiConverter + from datumaro.plugins.tf_detection_api_format.converter import ( + TfDetectionApiConverter, + ) + from datumaro.plugins.tf_detection_api_format.extractor import ( + TfDetectionApiExtractor, TfDetectionApiImporter, + ) import_failed = False except ImportError: import_failed = True diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 9ccd45023e3e..ea5748bd5a2b 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -1,15 +1,17 @@ +from unittest import TestCase import logging as log + import numpy as np -from unittest import TestCase -from datumaro.components.project import Dataset -from datumaro.components.extractor import (Extractor, DatasetItem, - Mask, Polygon, PolyLine, Points, Bbox, Label, - LabelCategories, MaskCategories, AnnotationType +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Label, LabelCategories, Mask, + MaskCategories, Points, Polygon, PolyLine, ) -import datumaro.util.mask_tools as mask_tools -import datumaro.plugins.transforms as transforms +from datumaro.components.project import Dataset from datumaro.util.test_utils import compare_datasets +import datumaro.plugins.transforms as transforms +import datumaro.util.mask_tools as mask_tools + from .requirements import Requirements, mark_requirement diff --git a/tests/test_util.py b/tests/test_util.py index 88f850eb3a38..c5048334dedf 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,11 +1,11 @@ +from unittest import TestCase import os import os.path as osp -from unittest import TestCase - from datumaro.util import Rollback, error_rollback -from datumaro.util.test_utils import TestDir from datumaro.util.os_util import walk +from datumaro.util.test_utils import TestDir + from .requirements import Requirements, mark_requirement @@ -33,7 +33,7 @@ def cb(): with Rollback() as on_error: on_error.do(cb) raise Exception('err') - except Exception: + except Exception: # nosec - disable B110:try_except_pass check pass finally: self.assertTrue(success) @@ -52,7 +52,7 @@ def foo(on_error=None): try: foo() - except Exception: + except Exception: # nosec - disable B110:try_except_pass check pass finally: self.assertTrue(success) @@ -86,7 +86,7 @@ def foo(): try: foo() - except Exception: + except Exception: # nosec - disable B110:try_except_pass check pass finally: self.assertTrue(success) @@ -111,7 +111,7 @@ def cb2(a1, a2=None, ignore_errors=None): on_error.do(cb2, 5, a2=2, ignore_errors=True, fwd_kwargs={'ignore_errors': 4}) raise Exception('err') - except Exception: + except Exception: # nosec - disable B110:try_except_pass check pass finally: self.assertTrue(success1) diff --git a/tests/test_validator.py b/tests/test_validator.py index 2d0bd47e7877..0e30a3a78e4c 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -4,21 +4,25 @@ from collections import Counter from unittest import TestCase + import numpy as np from datumaro.components.dataset import Dataset, DatasetItem -from datumaro.components.errors import (MissingLabelCategories, - MissingAnnotation, MultiLabelAnnotations, MissingAttribute, - UndefinedLabel, UndefinedAttribute, LabelDefinedButNotFound, - AttributeDefinedButNotFound, OnlyOneLabel, FewSamplesInLabel, - FewSamplesInAttribute, ImbalancedLabels, ImbalancedAttribute, - ImbalancedDistInLabel, ImbalancedDistInAttribute, - NegativeLength, InvalidValue, FarFromLabelMean, - FarFromAttrMean, OnlyOneAttributeValue) +from datumaro.components.errors import ( + AttributeDefinedButNotFound, FarFromAttrMean, FarFromLabelMean, + FewSamplesInAttribute, FewSamplesInLabel, ImbalancedAttribute, + ImbalancedDistInAttribute, ImbalancedDistInLabel, ImbalancedLabels, + InvalidValue, LabelDefinedButNotFound, MissingAnnotation, MissingAttribute, + MissingLabelCategories, MultiLabelAnnotations, NegativeLength, + OnlyOneAttributeValue, OnlyOneLabel, UndefinedAttribute, UndefinedLabel, +) from datumaro.components.extractor import Bbox, Label, Mask, Polygon -from datumaro.components.validator import (ClassificationValidator, - DetectionValidator, TaskType, validate_annotations, _Validator, - SegmentationValidator) +from datumaro.components.validator import TaskType +from datumaro.plugins.validators import ( + ClassificationValidator, DetectionValidator, SegmentationValidator, + _TaskValidator, +) + from .requirements import Requirements, mark_requirement @@ -114,7 +118,7 @@ def setUpClass(cls): class TestBaseValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = _Validator(task_type=TaskType.classification, + cls.validator = _TaskValidator(task_type=TaskType.classification, few_samples_thr=1, imbalance_ratio_thr=50, far_from_mean_thr=5.0, dominance_ratio_thr=0.8, topk_bins=0.1) @@ -721,8 +725,8 @@ class TestValidateAnnotations(TestValidatorTemplate): } @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_validate_annotations_classification(self): - actual_results = validate_annotations(self.dataset, 'classification', - **self.extra_args) + validator = ClassificationValidator(**self.extra_args) + actual_results = validator.validate(self.dataset) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -778,8 +782,8 @@ def test_validate_annotations_classification(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_validate_annotations_detection(self): - actual_results = validate_annotations(self.dataset, 'detection', - **self.extra_args) + validator = DetectionValidator(**self.extra_args) + actual_results = validator.validate(self.dataset) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -833,8 +837,8 @@ def test_validate_annotations_detection(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_validate_annotations_segmentation(self): - actual_results = validate_annotations(self.dataset, 'segmentation', - **self.extra_args) + validator = SegmentationValidator(**self.extra_args) + actual_results = validator.validate(self.dataset) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -888,11 +892,7 @@ def test_validate_annotations_segmentation(self): self.assertEqual(actual_summary, expected_summary) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_validate_annotations_invalid_task_type(self): - with self.assertRaises(ValueError): - validate_annotations(self.dataset, 'INVALID', **self.extra_args) - - @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_validate_annotations_invalid_dataset_type(self): + def test_validate_invalid_dataset_type(self): with self.assertRaises(TypeError): - validate_annotations(object(), 'classification', **self.extra_args) + validator = ClassificationValidator(**self.extra_args) + validator.validate(object()) diff --git a/tests/test_vgg_face2_format.py b/tests/test_vgg_face2_format.py index 9801f44d17bc..5608b1a57529 100644 --- a/tests/test_vgg_face2_format.py +++ b/tests/test_vgg_face2_format.py @@ -1,14 +1,18 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np + from datumaro.components.dataset import Dataset -from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem, - Label, LabelCategories, Points) -from datumaro.plugins.vgg_face2_format import (VggFace2Converter, - VggFace2Importer) +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Label, LabelCategories, Points, +) +from datumaro.plugins.vgg_face2_format import ( + VggFace2Converter, VggFace2Importer, +) from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement @@ -16,35 +20,35 @@ class VggFace2FormatTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='1', subset='train', image=np.ones((8, 8, 3)), + DatasetItem(id='label_0/1', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=0), ] ), - DatasetItem(id='2', subset='train', image=np.ones((10, 10, 3)), + DatasetItem(id='label_1/2', subset='train', image=np.ones((10, 10, 3)), annotations=[ Points([4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34], label=1), ] ), - DatasetItem(id='3', subset='train', image=np.ones((8, 8, 3)), + DatasetItem(id='label_2/3', subset='train', image=np.ones((8, 8, 3)), annotations=[Label(2)] ), - DatasetItem(id='4', subset='train', image=np.ones((10, 10, 3)), + DatasetItem(id='label_3/4', subset='train', image=np.ones((10, 10, 3)), annotations=[ Bbox(0, 2, 4, 2, label=3), Points([3.2, 3.12, 4.11, 3.2, 2.11, 2.5, 3.5, 2.11, 3.8, 2.13], label=3), ] ), - DatasetItem(id='a/5', subset='train', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/a/5', subset='train', image=np.ones((8, 8, 3)), annotations=[ Bbox(2, 2, 2, 2), ] ), - DatasetItem(id='label_0', subset='train', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/label_0', subset='train', image=np.ones((8, 8, 3)), ), ], categories={ AnnotationType.label: LabelCategories.from_iterable( @@ -60,7 +64,7 @@ def test_can_save_and_load(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_no_subsets(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='b/1', image=np.ones((8, 8, 3)), + DatasetItem(id='a/b/1', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0), Points([4.23, 4.32, 5.34, 4.45, 3.54, @@ -78,7 +82,7 @@ def test_can_save_dataset_with_no_subsets(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='кириллица с пробелом', image=np.ones((8, 8, 3)), + DatasetItem(id='a/кириллица с пробелом', image=np.ones((8, 8, 3)), annotations=[ Points([4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34], label=0), @@ -96,7 +100,7 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_no_save_images(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='1', image=np.ones((8, 8, 3)), + DatasetItem(id='label_0/1', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2, label=0), Points([4.23, 4.32, 5.34, 4.45, 3.54, @@ -114,14 +118,14 @@ def test_can_save_dataset_with_no_save_images(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_no_labels(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='1', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/1', image=np.ones((8, 8, 3)), annotations=[ Bbox(0, 2, 4, 2), Points([4.23, 4.32, 5.34, 4.45, 3.54, 3.56, 4.52, 3.51, 4.78, 3.34]), ] ), - DatasetItem(id='2', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/2', image=np.ones((8, 8, 3)), annotations=[ Bbox(2, 2, 4, 2), ] @@ -137,7 +141,7 @@ def test_can_save_dataset_with_no_labels(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_wrong_number_of_points(self): source_dataset = Dataset.from_iterable([ - DatasetItem(id='1', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/1', image=np.ones((8, 8, 3)), annotations=[ Points([4.23, 4.32, 5.34, 3.51, 4.78, 3.34]), ] @@ -145,7 +149,7 @@ def test_can_save_dataset_with_wrong_number_of_points(self): ], categories=[]) target_dataset = Dataset.from_iterable([ - DatasetItem(id='1', image=np.ones((8, 8, 3)), + DatasetItem(id='no_label/1', image=np.ones((8, 8, 3)), annotations=[] ), ], categories=[]) @@ -159,7 +163,7 @@ def test_can_save_dataset_with_wrong_number_of_points(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load_image_with_arbitrary_extension(self): dataset = Dataset.from_iterable([ - DatasetItem('q/1', image=Image(path='q/1.JPEG', + DatasetItem('no_label/q/1', image=Image(path='q/1.JPEG', data=np.zeros((4, 3, 3)))), DatasetItem('a/b/c/2', image=Image(path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), @@ -187,7 +191,7 @@ def test_can_detect(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_import(self): expected_dataset = Dataset.from_iterable([ - DatasetItem(id='0001_01', subset='train', + DatasetItem(id='n000001/0001_01', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(2, 2, 1, 2, label=0), @@ -195,7 +199,15 @@ def test_can_import(self): 2.456, 2.81, 2.32, 2.89, 2.3], label=0), ] ), - DatasetItem(id='0002_01', subset='train', + DatasetItem(id='n000002/0001_01', subset='train', + image=np.ones((10, 15, 3)), + annotations=[ + Bbox(2, 4, 2, 2, label=1), + Points([2.3, 4.9, 2.9, 4.93, 2.62, + 4.745, 2.54, 4.45, 2.76, 4.43], label=1) + ] + ), + DatasetItem(id='n000002/0002_01', subset='train', image=np.ones((10, 15, 3)), annotations=[ Bbox(1, 3, 1, 1, label=1), @@ -203,11 +215,39 @@ def test_can_import(self): 3.634, 1.43, 3.34, 1.65, 3.32], label=1) ] ), + DatasetItem(id='n000003/0003_01', subset='test', + image=np.ones((10, 15, 3)), + annotations=[ + Bbox(1, 1, 1, 1, label=2), + Points([0.2, 2.8, 0.8, 2.9, 0.5, + 2.6, 0.4, 2.3, 0.6, 2.3], label=2) + ] + ) ], categories={ AnnotationType.label: LabelCategories.from_iterable( - [('n000001', 'car'), ('n000002', 'person')]), + [('n000001', 'Karl'), ('n000002', 'Jay'), ('n000003', 'Pol')]), }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'vgg_face2') compare_datasets(self, expected_dataset, dataset) + + def test_can_import_specific_subset(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='n000003/0003_01', subset='test', + image=np.ones((10, 15, 3)), + annotations=[ + Bbox(1, 1, 1, 1, label=2), + Points([0.2, 2.8, 0.8, 2.9, 0.5, + 2.6, 0.4, 2.3, 0.6, 2.3], label=2) + ] + ) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + [('n000001', 'Karl'), ('n000002', 'Jay'), ('n000003', 'Pol')]), + }) + + specific_subset = osp.join(DUMMY_DATASET_DIR, 'bb_landmark', 'loose_bb_test.csv') + dataset = Dataset.import_from(specific_subset, 'vgg_face2') + + compare_datasets(self, expected_dataset, dataset) diff --git a/tests/test_voc_format.py b/tests/test_voc_format.py index f8b7df67199d..269e3def3b80 100644 --- a/tests/test_voc_format.py +++ b/tests/test_voc_format.py @@ -1,29 +1,31 @@ from collections import OrderedDict from functools import partial -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.components.extractor import (Extractor, DatasetItem, - AnnotationType, Label, Bbox, Mask, LabelCategories, +from datumaro.components.dataset import Dataset +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, Extractor, Label, LabelCategories, Mask, + MaskCategories, ) -import datumaro.plugins.voc_format.format as VOC from datumaro.plugins.voc_format.converter import ( - VocConverter, - VocClassificationConverter, - VocDetectionConverter, - VocLayoutConverter, - VocActionConverter, - VocSegmentationConverter, + VocActionConverter, VocClassificationConverter, VocConverter, + VocDetectionConverter, VocLayoutConverter, VocSegmentationConverter, +) +from datumaro.plugins.voc_format.importer import ( + VocActionImporter, VocClassificationImporter, VocDetectionImporter, + VocImporter, VocLayoutImporter, VocSegmentationImporter, ) -from datumaro.plugins.voc_format.importer import VocImporter -from datumaro.components.dataset import Dataset from datumaro.util.image import Image from datumaro.util.mask_tools import load_mask -from datumaro.util.test_utils import (TestDir, compare_datasets, - test_save_and_load) +from datumaro.util.test_utils import ( + TestDir, compare_datasets, test_save_and_load, +) +import datumaro.plugins.voc_format.format as VOC + from .requirements import Requirements, mark_requirement @@ -79,7 +81,8 @@ def categories(self): return VOC.make_voc_categories() -DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'voc_dataset', 'voc_dataset1') +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'voc_dataset', + 'voc_dataset1') class VocImportTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) @@ -88,7 +91,7 @@ class DstExtractor(TestExtractorBase): def __iter__(self): return iter([ DatasetItem(id='2007_000001', subset='train', - image=Image(path='2007_000001.jpg', size=(10, 20)), + image=np.ones((10, 20, 3)), annotations=[ Label(self._label(l.name)) for l in VOC.VocLabel if l.value % 2 == 1 @@ -102,6 +105,12 @@ def __iter__(self): }, id=1, group=1, ), + # Only main boxes denote instances (have ids) + Mask(image=np.ones([10, 20]), + label=self._label(VOC.VocLabel(2).name), + group=1, + ), + Bbox(4, 5, 2, 2, label=self._label('person'), attributes={ 'truncated': False, @@ -114,16 +123,14 @@ def __iter__(self): }, id=2, group=2, ), - Bbox(5.5, 6, 2, 2, label=self._label( - VOC.VocBodyPart(1).name), + # Only main boxes denote instances (have ids) + Bbox(5.5, 6, 2, 2, + label=self._label(VOC.VocBodyPart(1).name), group=2 ), - Mask(image=np.ones([5, 10]), - label=self._label(VOC.VocLabel(2).name), - group=1, - ), ] ), + DatasetItem(id='2007_000002', subset='test', image=np.ones((10, 20, 3))), ]) @@ -132,9 +139,232 @@ def __iter__(self): compare_datasets(self, DstExtractor(), dataset) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_classification_dataset(self): + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[ + Label(self._label(l.name)) + for l in VOC.VocLabel if l.value % 2 == 1 + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ]) + expected_dataset = DstExtractor() + + rpath = osp.join('ImageSets', 'Main', 'train.txt') + matrix = [ + ('voc_classification', '', ''), + ('voc_classification', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + actual = Dataset.import_from(osp.join(DUMMY_DATASET_DIR, path), + format) + + compare_datasets(self, expected, actual, require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_layout_dataset(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[ + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=2, group=2, + attributes={ + 'difficult': False, + 'truncated': False, + 'occluded': False, + **{ + a.name : a.value % 2 == 1 + for a in VOC.VocAction + } + } + ), + Bbox(5.5, 6.0, 2.0, 2.0, label=22, group=2), + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) + + rpath = osp.join('ImageSets', 'Layout', 'train.txt') + matrix = [ + ('voc_layout', '', ''), + ('voc_layout', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + actual = Dataset.import_from(osp.join(DUMMY_DATASET_DIR, path), + format) + + compare_datasets(self, expected, actual, require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_detection_dataset(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[ + Bbox(1.0, 2.0, 2.0, 2.0, label=8, id=1, group=1, + attributes={ + 'difficult': False, + 'truncated': True, + 'occluded': False, + 'pose': 'Unspecified' + } + ), + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=2, group=2, + attributes={ + 'difficult': False, + 'truncated': False, + 'occluded': False, + **{ + a.name : a.value % 2 == 1 + for a in VOC.VocAction + } + } + ), + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) + + rpath = osp.join('ImageSets', 'Main', 'train.txt') + matrix = [ + ('voc_detection', '', ''), + ('voc_detection', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + actual = Dataset.import_from(osp.join(DUMMY_DATASET_DIR, path), + format) + + compare_datasets(self, expected, actual, require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_segmentation_dataset(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[ + Mask(image=np.ones([10, 20]), label=2, group=1) + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) + + rpath = osp.join('ImageSets', 'Segmentation', 'train.txt') + matrix = [ + ('voc_segmentation', '', ''), + ('voc_segmentation', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + actual = Dataset.import_from(osp.join(DUMMY_DATASET_DIR, path), + format) + + compare_datasets(self, expected, actual, require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_save_and_load_voc_action_dataset(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='2007_000001', subset='train', + image=np.ones((10, 20, 3)), + annotations=[ + Bbox(4.0, 5.0, 2.0, 2.0, label=15, id=2, group=2, + attributes={ + 'difficult': False, + 'truncated': False, + 'occluded': False, + **{ + a.name : a.value % 2 == 1 + for a in VOC.VocAction + } + } + ) + ]), + + DatasetItem(id='2007_000002', subset='test', + image=np.ones((10, 20, 3))), + ], categories=VOC.make_voc_categories()) + + rpath = osp.join('ImageSets', 'Action', 'train.txt') + matrix = [ + ('voc_action', '', ''), + ('voc_action', 'train', rpath), + ('voc', 'train', rpath), + ] + for format, subset, path in matrix: + with self.subTest(format=format, subset=subset, path=path): + if subset: + expected = expected_dataset.get_subset(subset) + else: + expected = expected_dataset + + actual = Dataset.import_from(osp.join(DUMMY_DATASET_DIR, path), + format) + + compare_datasets(self, expected, actual, require_images=True) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_detect_voc(self): - self.assertTrue(VocImporter.detect(DUMMY_DATASET_DIR)) + matrix = [ + # Whole dataset + (DUMMY_DATASET_DIR, VocImporter), + + # Subformats + (DUMMY_DATASET_DIR, VocClassificationImporter), + (DUMMY_DATASET_DIR, VocDetectionImporter), + (DUMMY_DATASET_DIR, VocSegmentationImporter), + (DUMMY_DATASET_DIR, VocLayoutImporter), + (DUMMY_DATASET_DIR, VocActionImporter), + + # Subsets of subformats + (osp.join(DUMMY_DATASET_DIR, 'ImageSets', 'Main', 'train.txt'), + VocClassificationImporter), + (osp.join(DUMMY_DATASET_DIR, 'ImageSets', 'Main', 'train.txt'), + VocDetectionImporter), + (osp.join(DUMMY_DATASET_DIR, 'ImageSets', 'Segmentation', 'train.txt'), + VocSegmentationImporter), + (osp.join(DUMMY_DATASET_DIR, 'ImageSets', 'Layout', 'train.txt'), + VocLayoutImporter), + (osp.join(DUMMY_DATASET_DIR, 'ImageSets', 'Action', 'train.txt'), + VocActionImporter), + ] + + for path, subtask in matrix: + with self.subTest(path=path, task=subtask): + self.assertTrue(subtask.detect(path)) + class VocConverterTest(TestCase): def _test_save_and_load(self, source_dataset, converter, test_dir, @@ -750,45 +980,126 @@ def __iter__(self): target_dataset=DstExtractor()) @mark_requirement(Requirements.DATUM_GENERAL_REQ) - def test_inplace_save_writes_only_updated_data(self): + def test_inplace_save_writes_only_updated_data_with_direct_changes(self): + expected = Dataset.from_iterable([ + DatasetItem(1, subset='a', image=np.ones((1, 2, 3)), + annotations=[ + # Bbox(0, 0, 0, 0, label=1) # won't find removed anns + ]), + + DatasetItem(2, subset='b', image=np.ones((3, 2, 3)), + annotations=[ + Bbox(0, 0, 0, 0, label=4, id=1, group=1, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + }) + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + ['background', 'a', 'b', 'c', 'd']), + AnnotationType.mask: MaskCategories( + colormap=VOC.generate_colormap(5)), + }) + + dataset = Dataset.from_iterable([ + DatasetItem(1, subset='a', image=np.ones((1, 2, 3)), + annotations=[Bbox(0, 0, 0, 0, label=1)]), + DatasetItem(2, subset='b', + annotations=[Bbox(0, 0, 0, 0, label=2)]), + DatasetItem(3, subset='c', image=np.ones((2, 2, 3)), + annotations=[ + Bbox(0, 0, 0, 0, label=3), + Mask(np.ones((2, 2)), label=1) + ]), + ], categories=['a', 'b', 'c', 'd']) + with TestDir() as path: - # generate initial dataset - dataset = Dataset.from_iterable([ - DatasetItem(1, subset='a', - annotations=[Bbox(0, 0, 0, 0, label=1)]), - DatasetItem(2, subset='b', - annotations=[Bbox(0, 0, 0, 0, label=2)]), - DatasetItem(3, subset='c', image=np.ones((2, 2, 3)), - annotations=[ - Bbox(0, 0, 0, 0, label=3), - Mask(np.ones((2, 2)), label=1) - ]), - ], categories=['a', 'b', 'c', 'd']) dataset.export(path, 'voc', save_images=True) os.unlink(osp.join(path, 'Annotations', '1.xml')) os.unlink(osp.join(path, 'Annotations', '2.xml')) os.unlink(osp.join(path, 'Annotations', '3.xml')) - self.assertFalse(osp.isfile(osp.join(path, 'JPEGImages', '2.jpg'))) - self.assertTrue(osp.isfile(osp.join(path, 'JPEGImages', '3.jpg'))) - self.assertTrue(osp.isfile( - osp.join(path, 'SegmentationObject', '3.png'))) - self.assertTrue(osp.isfile( - osp.join(path, 'SegmentationClass', '3.png'))) dataset.put(DatasetItem(2, subset='b', image=np.ones((3, 2, 3)), annotations=[Bbox(0, 0, 0, 0, label=3)])) dataset.remove(3, 'c') dataset.save(save_images=True) - self.assertFalse(osp.isfile(osp.join(path, 'Annotations', '1.xml'))) - self.assertTrue(osp.isfile(osp.join(path, 'Annotations', '2.xml'))) - self.assertFalse(osp.isfile(osp.join(path, 'Annotations', '3.xml'))) - self.assertTrue(osp.isfile(osp.join(path, 'JPEGImages', '2.jpg'))) - self.assertFalse(osp.isfile(osp.join(path, 'JPEGImages', '3.jpg'))) - self.assertFalse(osp.isfile( - osp.join(path, 'SegmentationObject', '3.png'))) - self.assertFalse(osp.isfile( - osp.join(path, 'SegmentationClass', '3.png'))) + self.assertEqual({'2.xml'}, # '1.xml' won't be touched + set(os.listdir(osp.join(path, 'Annotations')))) + self.assertEqual({'1.jpg', '2.jpg'}, + set(os.listdir(osp.join(path, 'JPEGImages')))) + self.assertEqual({'a.txt', 'b.txt'}, + set(os.listdir(osp.join(path, 'ImageSets', 'Main')))) + compare_datasets(self, expected, Dataset.import_from(path, 'voc'), + require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_inplace_save_writes_only_updated_data_with_transforms(self): + expected = Dataset.from_iterable([ + DatasetItem(3, subset='test', image=np.ones((2, 3, 3)), + annotations=[ + Bbox(0, 1, 0, 0, label=4, id=1, group=1, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + }) + ]), + DatasetItem(4, subset='train', image=np.ones((2, 4, 3)), + annotations=[ + Bbox(1, 0, 0, 0, label=4, id=1, group=1, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + }), + Mask(np.ones((2, 2)), label=2, group=1), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + ['background', 'a', 'b', 'c', 'd']), + AnnotationType.mask: MaskCategories( + colormap=VOC.generate_colormap(5)), + }) + + dataset = Dataset.from_iterable([ + DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), + annotations=[ Bbox(0, 0, 0, 1, label=1) ]), + DatasetItem(2, subset='b', image=np.ones((2, 2, 3)), + annotations=[ + Bbox(0, 0, 1, 0, label=2), + Mask(np.ones((2, 2)), label=1), + ]), + DatasetItem(3, subset='b', image=np.ones((2, 3, 3)), + annotations=[ Bbox(0, 1, 0, 0, label=3) ]), + DatasetItem(4, subset='c', image=np.ones((2, 4, 3)), + annotations=[ + Bbox(1, 0, 0, 0, label=3), + Mask(np.ones((2, 2)), label=1) + ]), + ], categories=['a', 'b', 'c', 'd']) + + with TestDir() as path: + dataset.export(path, 'voc', save_images=True) + + dataset.filter('/item[id >= 3]') + dataset.transform('random_split', (('train', 0.5), ('test', 0.5)), + seed=42) + dataset.save(save_images=True) + + self.assertEqual({'3.xml', '4.xml'}, + set(os.listdir(osp.join(path, 'Annotations')))) + self.assertEqual({'3.jpg', '4.jpg'}, + set(os.listdir(osp.join(path, 'JPEGImages')))) + self.assertEqual({'4.png'}, + set(os.listdir(osp.join(path, 'SegmentationClass')))) + self.assertEqual({'4.png'}, + set(os.listdir(osp.join(path, 'SegmentationObject')))) + self.assertEqual({'train.txt', 'test.txt'}, + set(os.listdir(osp.join(path, 'ImageSets', 'Main')))) + self.assertEqual({'train.txt'}, + set(os.listdir(osp.join(path, 'ImageSets', 'Segmentation')))) + compare_datasets(self, expected, Dataset.import_from(path, 'voc'), + require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_no_data_images(self): diff --git a/tests/test_widerface_format.py b/tests/test_widerface_format.py index a6b4ab3ccf48..79996aa1c3fb 100644 --- a/tests/test_widerface_format.py +++ b/tests/test_widerface_format.py @@ -1,13 +1,16 @@ -import os.path as osp from unittest import TestCase +import os.path as osp import numpy as np -from datumaro.components.extractor import (AnnotationType, Bbox, DatasetItem, - Label, LabelCategories) + from datumaro.components.dataset import Dataset -from datumaro.plugins.widerface_format import WiderFaceConverter, WiderFaceImporter +from datumaro.components.extractor import Bbox, DatasetItem, Label +from datumaro.plugins.widerface_format import ( + WiderFaceConverter, WiderFaceImporter, +) from datumaro.util.image import Image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement diff --git a/tests/test_yolo_format.py b/tests/test_yolo_format.py index a537e8d736d0..3b34a7d53457 100644 --- a/tests/test_yolo_format.py +++ b/tests/test_yolo_format.py @@ -1,17 +1,18 @@ -import numpy as np +from unittest import TestCase import os import os.path as osp -from unittest import TestCase +import numpy as np -from datumaro.components.extractor import (DatasetItem, - AnnotationType, Bbox, LabelCategories, -) from datumaro.components.dataset import Dataset -from datumaro.plugins.yolo_format.extractor import YoloImporter +from datumaro.components.extractor import ( + AnnotationType, Bbox, DatasetItem, LabelCategories, +) from datumaro.plugins.yolo_format.converter import YoloConverter +from datumaro.plugins.yolo_format.extractor import YoloImporter from datumaro.util.image import Image, save_image from datumaro.util.test_utils import TestDir, compare_datasets + from .requirements import Requirements, mark_requirement