diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..225e54305 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,39 @@ +{ + "name": "Existing Docker Compose (Extend)", + "initializeCommand": "docker buildx install", + "dockerComposeFile": [ + "../docker-compose.yml", + "docker-compose.extend.yml" + ], + "service": "backend", + "workspaceFolder": "/workspace", + "settings": { + "editor.formatOnSave": true, + "git.inputValidation": "always", + "files.insertFinalNewline": true, + "[python]": { + "editor.codeActionsOnSave": { + "source.organizeImports": true + } + }, + "python.formatting.provider": "black", + "python.formatting.blackArgs": ["--line-length", "120"], + "python.languageServer": "Pylance", + "python.linting.flake8Enabled": true, + "python.linting.pylintEnabled": true, + "python.linting.enabled": true, + "python.pythonPath": "/usr/local/bin/python", + "python.linting.mypyEnabled": true, + "python.linting.mypyArgs": [ + "--ignore-missing-imports", + "--follow-imports=silent", + "--show-column-numbers", + "--strict", + "--exclude tests" + ], + }, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + ], +} diff --git a/.devcontainer/docker-compose.extend.yml b/.devcontainer/docker-compose.extend.yml new file mode 100644 index 000000000..81937afac --- /dev/null +++ b/.devcontainer/docker-compose.extend.yml @@ -0,0 +1,9 @@ +version: '3' +services: + backend: + build: + context: . + target: local + volumes: + - .:/workspace:cached + command: /bin/sh -c "while sleep 1000; do :; done" diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..fe819d6cc --- /dev/null +++ b/.env.example @@ -0,0 +1,39 @@ +# authentication +AAI_CLIENT_SECRET=secret_must_be_long +AAI_CLIENT_ID=aud2 +OIDC_URL=http://mockauth:8000 +# change to http://mockauth:8000 if tests are run from container +OIDC_URL_TEST=http://localhost:8000 + + +# app urls +BASE_URL=http://localhost:5430 +# change to http://frontend:3000 if started using docker-compose for frontend +# should be commented out when running integration tests +# REDIRECT_URL=http://localhost:3000 + +# logging +LOG_LEVEL=DEBUG + +# database +MONGO_HOST=database:27017 +MONGO_DATABASE=default +MONGO_AUTHDB=admin +MONGO_INITDB_ROOT_PASSWORD=admin +MONGO_INITDB_ROOT_USERNAME=admin +MONGO_SSL=true +MONGO_SSL_CA=/tls/cacert +MONGO_SSL_CLIENT_KEY=/tls/key +MONGO_SSL_CLIENT_CERT=/tls/cert + +# doi +DOI_API=http://mockdoi:8001/dois +DOI_PREFIX=10.xxxx +DOI_USER=user +DOI_KEY=key +DISCOVERY_URL=https://etsin.demo.fairdata.fi/dataset/ + +# metax +METAX_USER=sd +METAX_PASS=demo_pass +METAX_URL=http://mockmetax:8002 diff --git a/.github/config/.spellcheck.yml b/.github/config/.spellcheck.yml new file mode 100644 index 000000000..b44ec0607 --- /dev/null +++ b/.github/config/.spellcheck.yml @@ -0,0 +1,49 @@ +matrix: +- name: Markdown + aspell: + lang: en + ignore-case: true + dictionary: + wordlists: + - .github/config/.wordlist.txt + encoding: utf-8 + pipeline: + - pyspelling.filters.markdown: + - pyspelling.filters.context: + context_visible_first: true + escapes: '\\[\\`~]' + delimiters: + # Ignore text between inline back ticks as this is code or hightlight words + - open: '(?P`+)' + close: '(?P=open)' + # Ignore surrounded in <> as in RST it is link + - open: '<(https?://[^\\s/$.?#].[^\\s]+|[A-Za-z0-9-_:.]+)' + close: '>' + sources: + - 'docs/*.rst' + - '**/*.md' + default_encoding: utf-8 + +- name: JSON schemas + aspell: + lang: en + ignore-case: true + dictionary: + wordlists: + - .github/config/.wordlist.txt + pipeline: + - pyspelling.filters.javascript: + jsdocs: true + line_comments: false + block_comments: false + strings: true + - pyspelling.filters.context: + context_visible_first: true + escapes: '\\[\\`~]' + delimiters: + # Ignore Pāli word as it cannot be added to dictionary + - open: '(Pāli)' + close: '-' + sources: + - metadata_backend/helpers/schemas/*.json + default_encoding: utf-8 diff --git a/.github/config/.wordlist.txt b/.github/config/.wordlist.txt new file mode 100644 index 000000000..fb308f3d8 --- /dev/null +++ b/.github/config/.wordlist.txt @@ -0,0 +1,665 @@ +aai +abkhaz +accessionid +additionalproperties +addobjecttodrafts +addobjecttofolder +affiliationidentifier +affiliationidentifierscheme +agp +ajv +akan +allof +alternateidentifier +alternateidentifiers +alternateidentifiertype +amd +amplicon +amr +amrantibiogram +analysisattribute +analysisattributes +analysiscenter +analysisdate +analysislinks +analysisref +analysistype +annotinfo +anonymized +antibiogram +api +api's +apisauce +aragonese +arxiv +assemblyannotation +async +atac +auth +authdb +authlib +automodule +autosummary +avaric +avestan +awardnumber +awardtitle +awarduri +backend +bai +bam +bambara +barcode +basecall +basecoord +baseurl +bashkir +bcf +bgiseq +bibcode +bihari +biocollections +biome +biomes +bionano +bioproject +biosample +biosystems +bislama +bisulfite +blastdbinfo +bokmål +bookchapter +boolean +bugfix +bugfixes +buildkit +buildx +cdd +cdna +centername +centerprojectname +certreqs +chamorro +changelog +checksummethod +chia +chichewa +chip +chmod +cli +clinvar +cloneend +codeql +commonname +computationalnotebook +conf +conferencepaper +conferenceproceeding +config +configs +const +contributortype +covid +cp +cpg +crai +createfromjson +createfromxml +createnewdraftfolder +createslice +crna +crossref +csc +cscfi +cscusername +csi +csrf +csv +ctrl +cts +curation +currentsubmissiontype +customfields +dac +dacattribute +dacattributes +daclinks +dacref +datacite +datadescription +datapaper +dataset +datasetattribute +datasetattributes +datasetidentifiers +datasetlinks +datasets +datasettype +datauses +datausetype +datecreated +dateinformation +datepublished +datetype +dbprobe +dbvar +ddi +ddialliance +de +decodings +defaultlength +defaultmember +demultiplexed +demultiplexing +demux +dependabot +descriptiontype +designdescription +destructure +dev +devcontainer +divehi +dnase +dockerfile +docstrings +doi +doiinfo +dois +dt +dzongkha +ean +eastboundlongitude +ebi +ebispot +edirect +ega +eissn +ena +enasequence +entrez +entrezdb +entrezid +entrezlink +enum +env +envs +epigenetics +eppn +eslint +eslintrc +exome +expectedbasecalltable +experimentattribute +experimentattributes +experimentlinks +experimentref +experimenttype +externalid +extrainfo +fairdata +faire +familyname +faroese +fasta +fastq +fbtr +filename +filetype +flatfile +flx +flybase +folderid +followsreadindex +formdata +fos +fractionation +frontend +fula +funder +funderidentifier +funderidentifiertype +fundername +fundingreferences +galician +ganda +gapplus +gds +genbank +genestudio +genexus +genindex +genomemap +genomic +genotyping +geolocation +geolocationbox +geolocationplace +geolocationpoint +geolocationpolygon +geolocations +geoprofiles +gff +gh +github +githubusercontent +givenname +gridion +groupedbyschema +gtr +guaraní +gunicorn +helicos +heliscope +hiri +hiscansq +hiseq +histone +hmpr +homologene +hostname +hotfix +hq +html +http +https +identifiertype +identitypython +ido +idp +ietf +igbo +igsn +iix +illumina +ini +insdc +interactiveresource +interlingua +interlingue +inupiaq +ipg +isni +issn +istc +journalarticle +js +json +jsoncontent +jwk +jwtconnect +kalaallisut +kallisto +kanuri +kashmiri +katanga +keyfile +kinyarwanda +kirundi +koalaman +komi +kubernetes +kwanyama +kyrgyz +lang +leaveaspool +lexically +libraryconstructionprotocol +librarydescriptor +librarylayout +libraryname +libraryselection +librarysource +librarystrategy +librarytype +lifecycle +limburgish +lims +lingala +linux +lissn +localhost +locusname +lsid +lt +luba +luxembourgish +maincontact +makestyles +marshallese +matchedge +matk +maxcontains +maxdepth +maxitems +maxmismatch +mbd +mda +medgen +medip +membername +metadataobjects +metagenome +metagenomic +metagenomics +metatranscriptome +metatranscriptomic +metax +metaxidentifier +metaxservicehandler +methylation +methylcytidine +mf +mgiseq +middleware +middlewares +mimetype +mingaplength +minion +miniseq +minitems +minlength +minmatch +mirna +miseq +mkdir +mnase +mockauth +modindex +mol +moltype +mongo +mongodb +motu +mpeg +mre +msll +mypy +nameidentifier +nameidentifiers +nameidentifierscheme +namespace +nametype +nano +nav +ncbi +ncbisearch +ncrna +ndebele +ndonga +neic +newdraft +nextseq +nlmcatalog +noindex +nominallength +nominalsdev +northboundlatitude +novaseq +npm +npx +nuccore +nuosu +nynorsk +objectdetails +objectinsidefolder +objectinsidefolderwithtags +objectstatus +objectsubmissiontypes +objecttags +objecttype +objecttypes +occitan +oecd +oidc +oidcrp +ojibwe +ol +oligo +omim +oneof +ontologies +openapi +openid +orcid +orgtrack +oromo +oss +ossetian +outputmanagementplan +ownedby +pacbio +paleo +panjabi +pashto +pathogenanalysis +pcassay +pccompound +pcr +pcsubstance +pdf +peerreview +pgm +phenome +physicalobject +pipesection +pmc +pmid +pointlatitude +pointlongitude +policyattribute +policyattributes +policylinks +policyref +policytext +policyurl +polya +poolclone +poolingstrategy +poolmembertype +popset +pre +precedesreadindex +prefill +preprint +prettierrc +prevstepindex +primaryid +probeset +processedreads +processingtype +projectId +projectNumber +promethion +proteinclusters +protfam +providermetadata +publicationyear +pubmed +py +pycqa +pymongo +pyspelling +quickstart +randompriming +rbcl +readclass +readgrouptag +readindex +readlabel +readme +readspec +readthedocs +readtype +redux +refcenter +referencealignment +referencesequence +referer +refname +relatedidentifier +relatedidentifiers +relatedidentifiertype +relatedmetadatascheme +relationtype +relativeorder +remoteuseridentifier +reqs +resequencing +resourcetypegeneral +rnaseq +rojopolis +rootreducer +ror +rrna +rst +runattribute +runattributes +runcenter +rundate +runlinks +runref +runtime +runtype +sami +sampleattribute +sampleattributes +sampledata +sampledemuxdirective +sampledescriptor +samplelinks +samplename +samplephenotype +sampleref +sango +sardinian +schemaorg +schemas +schemetype +schemeuri +scientificname +sda +sdev +sdSubmitProjects +se +secondaryid +sectionname +selex +seqannot +sequenceable +sequenceannotation +sequenceassembly +sequenceflatfile +sequencetype +sequencevariation +sff +sha +shellcheck +shona +sinhala +sllversion +snp +solid +sotho +southboundlatitude +spotdescriptor +spotlength +sra +src +srf +ssl +ssrna +stepindex +studyabstract +studyattribute +studyattributes +studydescription +studyidentifier +studylinks +studyref +studytitle +studytype +subjectscheme +submissionfolder +submissionfolderslice +submissiontype +submitter's +submitterdemultiplexed +submitterid +submitters +svg +swati +tabix +tajik +targetloci +taxonid +taxonomicreferenceset +taxonomysystem +taxonomysystemversion +telephonenumber +templateId +tigrinya +tls +toctree +tox +tpa +transcriptome +transcriptomeassembly +transcriptomic +transcriptomics +tsonga +turkmen +twi +txt +ui +ujson +umi +uncomment +unencryptedchecksum +uniqueitems +unlocalised +uri +url +urllink +usedispatch +userid +userinfo +useselector +uuid +uuids +uyghur +validator +vcf +venda +volapük +vscode +wcs +westboundlongitude +wga +wget +wgs +withstyles +wizardcomponents +wizardobject +wizardobjectindex +wizardsavedobjectslist +wizardsavedobjectslistprops +wizardshowsummarystep +wizardsteps +wizardsubmissionfolderslice +wolof +wordlist +wxs +xl +xml +xmlfile +xmlschema +xref +xrefdb +xrefid +xreflink +xsd +yaml +yml +za +zhuang diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 16d77d6e0..65d90dc53 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f7999a430..d421d356e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,10 +12,11 @@ jobs: runs-on: ${{ matrix.os }} steps: - - name: Spell check install - run: curl -L https://git.io/misspell | bash - - name: Spell check docs - run: bin/misspell -error docs/* + - uses: actions/checkout@v3 + - uses: rojopolis/spellcheck-github-actions@0.22.1 + name: Spellcheck + with: + config_path: .github/config/.spellcheck.yml code_docs: strategy: @@ -27,9 +28,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/int.yml b/.github/workflows/int.yml index 7742bcf25..2e5d316f0 100644 --- a/.github/workflows/int.yml +++ b/.github/workflows/int.yml @@ -15,9 +15,9 @@ jobs: name: Integration Tests steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install requirements @@ -26,21 +26,31 @@ jobs: - name: Start Services run: | - docker-compose up -d --build + docker-compose --env-file .env.example up -d --build sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py - python tests/integration/run_tests.py + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + - name: Run Integration test + run: | + python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + OIDC_URL: http://localhost:8000 + - name: Collect logs from docker if: ${{ failure() }} run: docker-compose logs --no-color -t > tests/dockerlogs || true - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.2.4 + uses: actions/upload-artifact@v3 with: name: test_debugging_help path: tests @@ -55,13 +65,25 @@ jobs: - name: Start services with TLS enabled run: | - docker-compose -f docker-compose-tls.yml up -d + docker-compose -f docker-compose-tls.yml --env-file .env.example up -d sleep 30 - - name: Run Integration test + - name: Clear database run: | python tests/integration/clean_db.py --tls + env: + MONGO_HOST: localhost:27017 + MONGO_DATABASE: default + MONGO_AUTHDB: admin + MONGO_SSL: True + + - name: Run Integration test + run: | python tests/integration/run_tests.py + env: + BASE_URL: http://localhost:5430 + OIDC_URL: http://localhost:8000 + MONGO_SSL: True - name: Collect logs from docker if: ${{ failure() }} @@ -69,7 +91,7 @@ jobs: - name: Persist log files if: ${{ failure() }} - uses: actions/upload-artifact@v2.2.4 + uses: actions/upload-artifact@v3 with: name: test_debugging_help path: tests diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index cc70326fb..b8d097ff7 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -23,7 +23,7 @@ jobs: - 5000:5000 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 name: Get sources - name: Set up Docker Buildx diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 44dad60c4..5936add9e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Prepare id: prep diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 9a0287bcf..c92937042 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -13,9 +13,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index a1dd9597d..b784c5869 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -13,9 +13,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install libcurl-devel @@ -25,9 +25,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install tox tox-gh-actions + pip install tox tox-gh-actions coveragepy-lcov - name: Run unit tests for python 3.8 - if: ${{ matrix.python-version == '3.8' }} - env: - COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} - run: tox -e py38 + run: | + tox -e py38 + coveragepy-lcov --output_file_path lcov.info + - name: Send coverage to coveralls + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.github_token }} + path-to-lcov: lcov.info diff --git a/.gitignore b/.gitignore index eee6ebf7d..e482d5ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ venv/ ENV/ env.bak/ venv.bak/ +.env # Spyder project settings .spyderproject @@ -119,3 +120,9 @@ venv.bak/ metadata_backend/frontend/* config/* + +# oidcrp generated directories that store JWKs +private +static +# ignore pyspelling dictionary +*.dic diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..d2febe843 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: +- repo: local + hooks: + - id: custom-script-file + name: custom-script-file + entry: ./scripts/pre-commit.sh + language: script + pass_filenames: false + verbose: true diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..7853366d3 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,18 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "module": "metadata_backend", + "console": "integratedTerminal", + "autoReload": { + "enable": true + }, + "restart": true, + "remoteRoot": "/usr/local/lib/python3.8/site-packages/metadata_backend", + "localRoot": "${workspaceFolder}/metadata_backend/" + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 12ff2fdb0..805c1c378 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { - "restructuredtext.confPath": "${workspaceFolder}/docs" -} \ No newline at end of file + "restructuredtext.confPath": "${workspaceFolder}/docs", +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..7dfb8c269 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,340 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.13.0] - 2022-04-07 + +### Added + +- Submission endpoint update #371 + - Adds mandatory query parameter `folder` for submit endpoint POST + - On actions add and modify object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename + - Adds metax integration to submit endpoint +- Integration with Metax service #356 #387 + - Adds new local container for testing against mocked Metax API + - Introduces new env vars: METAX_USER, METAX_PASS, METAX_URL for connection to Metax service + - Introduces new env var DISCOVERY_URL for creating link to dataset inside Fairdata SD catalog + - Adds new key metaxIdentifier to Study and Dataset collections containing metax id returned from Metax API + - Adds new handler MetaxServiceHandler to take care of mapping Submitter metadata to Metax metadata and to connect to Metax API + - Adds new mapper class to adjust incoming metadata to Metax schema +- Add patching of folders after object save and update operations #354 + - Adds mandatory query parameter `folder` for objects endpoint POST + - Object is added or updated to folder(submission) where it belongs with it's accession ID, schema, submission type, title and filename in the case of CSV and XML upload + - Adds configuration for mypy linting to VScode devcontainer setup +- Templates API #256 + - use `ujson` as default json library +- Creating draft Datacite DOI for folders #257 #332 + - created a mock web app, which would act similarly to DataCite REST API + - altered `publish_folder` endpoint so that `extraInfo` containing the DOI data is added upon publishing + - added `datePublished` key to folders which takes in the date/time, when folder is published +- DOI Publishing and deletion to Datacite #332 #369 + - create draft DOIs for both Study and Datasets and add them to the folder `extraInfo` when published + - delete draft DOIs on object delete + - update DOI info at Datacite when folder is published +- VScode Dev environment #287 + - Add VS Code development container + - Update docker for development +- Docker-compose and docker-compose-tls files changed to use variables from .env file. #301 +- Add folder querying by name #305 + - Add indexing on database initialization + - Add new field text_name to folder collection + - Python scripts for database operations. `mongo_indexes.py` for collections and indexes creation to be run if the database is destroyed and `clean_db.py` script with new functionality to only delete documents from collections + - update github actions +- Add folder querying by date #308 +- Add description to JSON schemas #323 + - add JSON schema spelling checker to pyspelling github action + - optimise wordlist by adding regex ignore patterns + - added pyspelling to pre-commit hooks (fixed syntax for scripts according to https://github.com/koalaman/shellcheck ) + - enum are sorted alphabetically, with the exception of other and unspecified values which are left at the end of the list + - allow for accession key in `referenceAlignment` & `process sequence` as array, previously all accession keys were converted to `accessionId` which is not correct + - add default `gender` as `unknown` +- Project ownership #346 + - added new collection `project` + - added new key `projects` to `user` + - added new key `projectId` to `folder` and `template-*` collections + - new mandatory `/userinfo` value from AAI at login time `sdSubmitProjects` + - user is redirected to an info page by AAI if key is missing + - new mandatory query parameter `projectId` in `GET /folders` + - new mandatory JSON key `projectId` in `POST /folders` and `POST /templates` + - new endpoint `GET /templates` to replace `GET /users/current` `{"templates":[...]}` + - new JSON keys `index` and `tags` to `PATCH /templates/schema/templateId`, same values as were previously used in `PATCH /user` which is now removed + - WARNING: breaking change that requires fresh database, because "project" is new information that did not exist before, and it can't be migrated to existing user-owned hierarchy +- Multilevel add patch objects to support `/extraInfo/datasetIdentifiers/-` which needs dot notation for mongodb to work e.g. `extraInfo.datasetIdentifiers` #332 + +### Changed + +- Refactor auth.py package by removing custom OIDC code and replacing it with https://github.com/IdentityPython/JWTConnect-Python-OidcRP. #315 + - New mandatory ENV `OIDC_URL` + - New optional ENVs `OIDC_SCOPE`, `AUTH_METHOD` + - Added oidcrp dependency +- Use node 16+ #345 +- VScode Dev environment #287 + - Adds requirements-dev.in/txt files. Now pip dependencies can be managed with pip-tools + - README updated with tox command, development build instructions, and prettify Dockerfile. +- Update ENA XML and JSON schemas #299 +- Github actions changed the use of https://git.io/misspell to rojopolis/spellcheck-github-actions #316 +- Separated most of the handlers to own files inside the handlers folder #319 +- allow inserting only one study in folder #332 +- JSON schemas #332 + - introduce `keywords` required for Metax in `doiInfo` + - dataset `description` and study `studyAbstract` are now mandatory +- `keywords` will be comma separated values, that will require splitting when adding to Metax API + +### Fixed + +- Coveralls report #267 +- Typos for functions and tests #279 +- Fix spelling mistakes for JSON schemas #323 +- Oidcrp does not allow empty values, prefill them in mockauth so front-end can start #333 +- Fix development environment #336 + - Add env vars OIDC_URL and OIDC_URL_TEST to mock auth container + - Adds logging configs for mock auth + - Updates mock auth api's token endpoint with expiration configs + - Adds config .pre-commit-config.yaml file required by pre-commit library + - Redirect url in docker-compose is now default + - Adds logging for doi mock api + +### Removed + +- Removed `Authlib` dependency #315 +- Project ownership #346 + - deprecated `folders` and `templates` keys from `GET /users/current` + - as a side effect, deprecated `items` query parameter from the same endpoint + - deprecated `PATCH /user` + +### Deprecated + +- Deprecated ENVs `ISS_URL`, `AUTH_URL`, `AUTH_REFERER`, `JWK_URL` #315 + +## [0.11.0] - 2021-08-31 + +### Changed + +- Package updates + +### Added + +- Feature/sort folders #249 +- Include DOI information in the folder schema #246 + + +## [0.10.0] - 2021-08-12 + +### Added + +- Add integration tests for misses in dataset, experiment, policy + +### Changed + +- Package updates +- EGA XML schemas version:1.8.0 +- Refactor analysis and experiment schemas to adhere to XML schema + +### Fixed + +- Fix misses for DAC, experiment and policy processing of XML +- Fix misses in JSON Schema + +## [0.9.0] - 2021-03-22 + +### Added + +- Use dependabot +- Support simultaneous sessions + +### Changed + +- Refactor JSON schema Links +- Refactor handlers to be more streamlined +- Validate patch requests for JSON content +- Switch to python 3.8 + +## [0.8.1] - 2021-02-15 + +### Fixed + +- Bugfix for error pages #202 + +## [0.8.0] - 2021-02-12 + +### Added + +- TLS support +- Use `sub` as alternative to `eppn` to identify users +- `PATCH` for objects and `PUT` for XML objects enabled +- Delete folders and objects associated to user on user delete + +### Changed + +- Redirect to error pages +- Extended integration tests + +### Fixed + +- Fix replace on json patch +- General bug and fixes + +## [0.7.1] - 2021-01-19 + +### Fixed + +- Hotfix release #176 + - added check_object_exists to check object exists and fail early with 404 before checking it belongs to user + - refactor and added more check_folder_exists to check folder exists before doing anything + - integration test to check objects are deleted properly + +### Changes + +- Check objects and folders exist before any operation +- Integration check to see if deleted object or folder are still registered in db + +## [0.7.0] - 2021-01-06 + +### Added + +- CodeQL github action #162 +- `/health` endpoint #173 + +- Map `users` to `folders` with `_handle_check_ownedby_user` #158 + - querying for objects is restricted to only the objects that belong to user + - return folders owned by user or published + - added a few db operators some used (aggregate, remove) + - process json patch to mongo query so that there is addition and replace instead of full rewrite of the document causing race condition + - standardise raises description and general improvements and fixes of logs and descriptions + +### Changed +- Verify `/publish` endpoint #163 +- Restrict endpoints to logged in users #151 +- Updated documentation #165 +- Switch to using uuids for accession ids #168 +- Integration tests and increase unit test coverage #166 + +### Fixed + +- Fixes for idp and location headers redirects #150 +- Fix race condition in db operations #158 +- Fix handling of draft deletion by removing redundant deletion #164, #169 and #172 + +## [0.6.1] - 2020-11-23 + +### Added + +- CSRF session #142 + +### Changed + +- Refactor draft `/folder` #144 +- Refactor gh actions #140 +- Patch publish #141 + +### Fixed + +- Bugfixes for login redirect #139 + +## [0.6.0] - 2020-10-08 + +### Added + +- Authentication with OIDC #133 +- Only 3.7 support going further #134 +- More submission actions `ADD` and `MODIFY` #137 + + +## [0.5.3] - 2020-08-21 + +### Changed + +- Updated OpenAPI specifications #127 +- Python modules, project description and instructions to documentation sources #128 +- Added integration tests #129 +- Updated documentation #130 + + +## [0.5.2] - 2020-08-14 + +### Fixes + +- Fix mimetype for SVG image and package data + +## [0.5.1] - 2020-08-14 + +### Added + +- Add folder POST JSON schema +- Added `/user` endpoint with support for GET, PATCH and DELETE + +### Fixes + +- Dockerfile build fixes #115 +- Fix JSON Schema details #117 +- Missing env from github actions #119 +- Typo fixes #120 +- Await responses #122 + + +## [0.5.0] - 2020-08-06 + +### Added + +- Centralized status message handler #83 +- Alert dialog component #81 +- `/folders` endpoint +- `/drafts` endpoint +- JSON validation +- XML better parsing +- Auth middleware +- Pagination + +### Changed + +- Improved current naming conventions #82 +- Login flow with new routes for Home & Login #76, #79, #80 +- Change from pymongo to motor + +## [0.2.0] - 2020-07-01 + +### Added + +- Added integration tests +- Switched to github actions +- Added base docs folder +- Added more refined XML parsing +- Integration tests added +- Refactor unit tests + +### Changed + +- Refactor API endpoints and responses + - error using https://tools.ietf.org/html/rfc7807 + - `objects` and `schemas` endpoints added + +## [0.1.0] - 2020-06-08 + +### Added + +- RESTful API for metadata XML files, making it possible to Submit, List and Query files +- Files are also validated during submission process. + + +[unreleased]: https://github.com/CSCfi/metadata-submitter/compare/v0.13.0...HEAD +[0.13.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.10.0...v0.13.0 +[0.11.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.10.0...v0.11.0 +[0.10.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.9.0...v0.10.0 +[0.9.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.8.1...v0.9.0 +[0.8.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.8.0...v0.8.1 +[0.8.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.7.1...v0.8.0 +[0.7.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.7.0...v0.7.1 +[0.7.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.6.1...v0.7.0 +[0.6.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.6.0...v0.6.1 +[0.6.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.3...v0.6.0 +[0.5.3]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.2...v0.5.3 +[0.5.2]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.1...v0.5.2 +[0.5.1]: https://github.com/CSCfi/metadata-submitter/compare/v0.5.0...v0.5.1 +[0.5.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.2.0...v0.5.0 +[0.3.0]: https://github.com/CSCfi/metadata-submitter/compare/v0.1.0...v0.2.0 +[0.1.0]: https://github.com/CSCfi/metadata-submitter/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d3ca9a1b7..8f2e46346 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ Once submitted, the Pull Request will go through a review process, meaning we wi #### Git Branches -We use `develop` branch as the main developopment branch and `master` as the releases branch. +We use `develop` branch as the main development branch and `master` as the releases branch. All Pull Requests related to features should be done against `develop` branch, releases Pull Requests should be done against `master` branch. Give your branch a short descriptive name (like the names between the `<>` below) and prefix the name with something representative for that branch: @@ -42,12 +42,12 @@ We do optimize for readability, and it would be awesome if you go through the co - Indentation should be 4 *spaces* - 120 character limit is almost strict, but can be broken in documentation when hyperlinks go over the limits -- We use [black](https://github.com/psf/black) code formatter and also check for [pep8](https://www.python.org/dev/peps/pep-0008/) and [pep257](https://www.python.org/dev/peps/pep-0257/) with some small exceptions. You can see the stated exceptions in `tox.ini` configuration file +- We use [black](https://github.com/psf/black) for code format and also check for [pep8](https://www.python.org/dev/peps/pep-0008/) and [pep257](https://www.python.org/dev/peps/pep-0257/) with some small exceptions. You can see the stated exceptions in `tox.ini` configuration file - We like to keep things simple, so when possible avoid importing any big libraries. - Tools to help you: - - Tox is configured to run bunch of tests: black, flake8, docstrings, missing type hints, mypy. - - Tox is also ran in our CI, so please run tox before each push to this repo - - If you like things to happen automagically, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [misspell](https://github.com/client9/misspell) (which is there just for, well, spelling errors). + - Tox is configured to run bunch of tests: black, flake8, docstrings, missing type hints, mypy; + - Tox is also ran in our CI, so please run tox before each push to this repository; + - If you like things to happen in an automated manner, you can add pre-commit hook to your git workflow! Hook can be found from [scripts-folder](scripts) and it includes settings for tox and [pyspelling](https://facelessuser.github.io/pyspelling/) (which is there just for, well, spelling errors). Thanks, CSC developers diff --git a/Dockerfile b/Dockerfile index b317c8c81..af227f0ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,6 @@ -FROM node:14-alpine as BUILD-FRONTEND +#======================= +FROM node:16-alpine as BUILD-FRONTEND +#======================= RUN apk add --update \ && apk add --no-cache git\ @@ -9,16 +11,17 @@ ARG BRANCH=master RUN git clone -b ${BRANCH} https://github.com/CSCfi/metadata-submitter-frontend.git WORKDIR /metadata-submitter-frontend -RUN npm install -g npm@7.21.0 \ - && npx --quiet pinst --disable \ +RUN npx --quiet pinst --disable \ && npm install --production \ && npm run build --production -FROM python:3.8-alpine3.13 as BUILD-BACKEND +#======================= +FROM python:3.8-alpine3.15 as BUILD-BACKEND +#======================= RUN apk add --update \ && apk add --no-cache build-base curl-dev linux-headers bash git musl-dev libffi-dev \ - && apk add --no-cache python3-dev openssl-dev rust cargo \ + && apk add --no-cache python3-dev openssl-dev rust cargo libstdc++ \ && rm -rf /var/cache/apk/* COPY requirements.txt /root/submitter/requirements.txt @@ -32,9 +35,11 @@ RUN pip install --upgrade pip && \ pip install -r /root/submitter/requirements.txt && \ pip install /root/submitter -FROM python:3.8-alpine3.13 +#======================= +FROM python:3.8-alpine3.15 +#======================= -RUN apk add --no-cache --update bash +RUN apk add --no-cache --update libstdc++ LABEL maintainer="CSC Developers" LABEL org.label-schema.schema-version="1.0" diff --git a/Dockerfile-dev b/Dockerfile-dev index f28c2657a..66aada08d 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -1,18 +1,35 @@ -FROM python:3.8-slim +#======================= +FROM cscfi/python:3.8-slim-git as appbase +#======================= RUN apt-get install ca-certificates WORKDIR /usr/src/app -RUN pip install --upgrade pip \ - && pip install certifi +RUN pip install --upgrade pip COPY setup.py . COPY requirements.txt . COPY metadata_backend/ ./metadata_backend -RUN pip install . +RUN pip install . +RUN pip install authlib # required for mockauth (integration test) EXPOSE 5430 +#======================= +FROM appbase as develop +#======================= + CMD ["metadata_submitter"] + +#======================= +FROM appbase as local +#======================= + +COPY requirements-dev.txt . + +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements-dev.txt + +ENV PYTHONUNBUFFERED=1 diff --git a/README.md b/README.md index b39fc2a13..96fbbfd5b 100644 --- a/README.md +++ b/README.md @@ -11,19 +11,23 @@ Service also validates submitted metadata objects against EGA XSD metadata model ## Install and run -Requirements: +### Requirements: - Python 3.8+ - MongoDB - Docker + docker-compose -For quick testing, launch both server and database with Docker by running `docker-compose up --build` (add `-d` flag to run containers in background). Server can then be found from `http://localhost:5430`. +### For quick testing: +- copy the contents of .env.example file to .env file +- launch both server and database with Docker by running `docker-compose up --build` (add `-d` flag to run containers in background). -For more detailed setup, do following: +Server can then be found from `http://localhost:5430`. + +### For more detailed setup, do following: - Install project by running: `pip install .` in project root - Setup mongodb and env variables via desired way, details: - Server expects to find mongodb instance running, specified with following environment variables: - - `MONGO_USERNAME`, username for connecting to mondogdb instance - - `MONGO_PASSWORD`, password for connecting to mondogdb instance + - `MONGO_USERNAME`, username for connecting to mongodb instance + - `MONGO_PASSWORD`, password for connecting to mongodb instance - `MONGO_HOST`, host and port for mongodb instance (e.g. `localhost:27017`) - `MONGO_DATABASE`, If a specific database is to be used, set the name here. - `MONGO_AUTHDB`, if `MONGO_DATABASE` is set and the user doesn't exists in the database, set this to the database where the user exists (e.g. `admin`) @@ -31,11 +35,54 @@ For more detailed setup, do following: - Suitable mongodb instance can be launched with Docker by running `docker-compose up database` - After installing and setting up database, server can be launched with `metadata_submitter` -If you also need frontend for development, check out [frontend repository](https://github.com/CSCfi/metadata-submitter-frontend/). +If you also need frontend for development, check out [frontend repository](https://github.com/CSCfi/metadata-submitter-frontend/). You will also need to uncomment `REDIRECT_URL` environment variable from .env file. ## Tests -Tests can be run with tox automation: just run `tox` on project root (remember to install it first with `pip install tox`). +Tests can be run with tox automation: just run `tox -p auto` on project root (remember to install it first with `pip install tox`). + +## Developing + +Docker is utilizing the Buildkit builder toolkit. To activate it you might need to update your docker configurations with `{ "features": { "buildkit": true } }` inside the /etc/docker/daemon.json. + +If the above is not enough, try: +``` +$ wget https://github.com/docker/buildx/releases/download/v0.7.0/buildx-v0.7.0.linux-amd64 +$ mkdir -p ~/.docker/cli-plugins +$ cp ~/Downloads/buildx-v0.7.0.linux-amd64 ~/.docker/cli-plugins/docker-buildx +$ chmod +x ~/.docker/cli-plugins/docker-buildx +``` +and add `{ "experimental": "enabled" }` inside the /etc/docker/daemon.json. + +### Developing with VS Code + +VS Code provides functionality to develop inside the docker container. This mitigates the need to install a development environment and difficulties to make things work with different OSs. Also developing inside a container gives you the ability to see code changes on the fly. + +To start using the VS Code devcontainer: +- install extension Remote - Containers +- with CTRL+SHIFT P choose Remote-Container: Reopen in Container +- to run application and debug F5 + +Git hooks are activated inside the local development environment which will run tox tests before pushing. To ignore them for fast updates use the flag `--no-verify`. + +### Keeping Python requirements up to date + +1. Install `pip-tools`: + * `pip install pip-tools` + * if using docker-compose pip-tools are installed automatically + +2. Add new packages to `requirements.in` or `requirements-dev.in` + +3. Update `.txt` file for the changed requirements file: + * `pip-compile requirements.in` + * `pip-compile requirements-dev.in` + +4. If you want to update all dependencies to their newest versions, run: + * `pip-compile --upgrade requirements.in` + +5. To install Python requirements run: + * `pip-sync requirements.txt` + ## Build and deploy @@ -51,6 +98,6 @@ Frontend is built and added as static files to backend while building. Metadata submission interface is released under `MIT`, see [LICENSE](LICENSE). -## Contibuting +## Contributing If you want to contribute to a project and make it better, your help is very welcome. For more info about how to contribute, see [CONTRIBUTING](CONTRIBUTING.md). diff --git a/docker-compose-tls.yml b/docker-compose-tls.yml index b5e8cffce..ee076acd2 100644 --- a/docker-compose-tls.yml +++ b/docker-compose-tls.yml @@ -4,6 +4,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev container_name: "metadata_submitter_backend_dev" volumes: @@ -13,35 +14,44 @@ services: depends_on: - database - mockauth + - mockdoi + - mockmetax restart: on-failure environment: - - "MONGO_HOST=database:27017" - - "MONGO_SSL=true" - - "MONGO_SSL_CA=/tls/cacert" - - "MONGO_SSL_CLIENT_KEY=/tls/key" - - "MONGO_SSL_CLIENT_CERT=/tls/cert" - - "AAI_CLIENT_SECRET=secret" - - "AAI_CLIENT_ID=aud2" - - "ISS_URL=http://mockauth:8000" - - "AUTH_URL=http://localhost:8000/authorize" - - "OIDC_URL=http://mockauth:8000" - # Enable this for working with front-end on localhost - # or change to http://frontend:3000 if started using docker-compose - # - "REDIRECT_URL=http://localhost:3000" - - "AUTH_REFERER=http://mockauth:8000" - - "JWK_URL=http://mockauth:8000/keyset" - - "LOG_LEVEL=DEBUG" + - "MONGO_HOST=${MONGO_HOST}" + - "MONGO_SSL=${MONGO_SSL}" + - "MONGO_SSL_CA=${MONGO_SSL_CA}" + - "MONGO_SSL_CLIENT_KEY=${MONGO_SSL_CLIENT_KEY}" + - "MONGO_SSL_CLIENT_CERT=${MONGO_SSL_CLIENT_CERT}" + - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" + - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" + - "BASE_URL=${BASE_URL}" + - "REDIRECT_URL=${REDIRECT_URL}" + - "LOG_LEVEL=${LOG_LEVEL}" + - "MONGO_DATABASE=${MONGO_DATABASE}" + - "MONGO_AUTHDB=${MONGO_AUTHDB}" + - "DOI_API=${DOI_API}" + - "DOI_PREFIX=${DOI_PREFIX}" + - "DOI_USER=${DOI_USER}" + - "DOI_KEY=${DOI_KEY}" + - "DISCOVERY_URL=${DISCOVERY_URL}" + - "METAX_USER=${METAX_USER}" + - "METAX_PASS=${METAX_PASS}" + - "METAX_URL=${METAX_URL}" database: image: "mongo" container_name: "metadata_submitter_database_dev" command: "mongod --tlsMode=requireTLS --tlsCertificateKeyFile=/tls/combined2 --tlsCAFile=/tls/cacert" restart: on-failure environment: - - "MONGO_INITDB_ROOT_USERNAME=admin" - - "MONGO_INITDB_ROOT_PASSWORD=admin" + - "MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}" + - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db - ./config:/tls + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: @@ -50,7 +60,12 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev + environment: + - "LOG_LEVEL=${LOG_LEVEL}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" hostname: mockauth expose: - 8000 @@ -59,5 +74,33 @@ services: volumes: - ./tests/integration/mock_auth.py:/mock_auth.py entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] + mockdoi: + build: + dockerfile: Dockerfile-dev + context: . + target: develop + image: cscfi/metadata-submitter-dev + hostname: mockdoi + expose: + - 8001 + ports: + - 8001:8001 + volumes: + - ./tests/integration/mock_doi_api.py:/mock_doi_api.py + entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] + mockmetax: + build: + dockerfile: Dockerfile-dev + context: . + target: develop + image: cscfi/metadata-submitter-dev + hostname: mockmetax + expose: + - 8002 + ports: + - 8002:8002 + volumes: + - ./tests/integration/mock_metax_api.py:/mock_metax_api.py + entrypoint: ["python", "/mock_metax_api.py", "0.0.0.0", "8002"] volumes: - data: + data: diff --git a/docker-compose.yml b/docker-compose.yml index 77678bc3b..8d42e5104 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev container_name: "metadata_submitter_backend_dev" ports: @@ -11,31 +12,38 @@ services: depends_on: - database - mockauth + - mockdoi + - mockmetax restart: on-failure environment: - - "MONGO_HOST=database:27017" - - "AAI_CLIENT_SECRET=secret" - - "AAI_CLIENT_ID=aud2" - - "ISS_URL=http://mockauth:8000" - - "AUTH_URL=http://localhost:8000/authorize" - - "OIDC_URL=http://mockauth:8000" - # Enable this for working with front-end on localhost - # or change to http://frontend:3000 if started using docker-compose - # - "REDIRECT_URL=http://localhost:3000" - - "AUTH_REFERER=http://mockauth:8000" - - "JWK_URL=http://mockauth:8000/keyset" - - "LOG_LEVEL=DEBUG" - - "MONGO_DATABASE=default" - - "MONGO_AUTHDB=admin" + - "MONGO_HOST=${MONGO_HOST}" + - "AAI_CLIENT_SECRET=${AAI_CLIENT_SECRET}" + - "AAI_CLIENT_ID=${AAI_CLIENT_ID}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" + - "BASE_URL=${BASE_URL}" + - "REDIRECT_URL=${REDIRECT_URL}" + - "LOG_LEVEL=${LOG_LEVEL}" + - "MONGO_DATABASE=${MONGO_DATABASE}" + - "MONGO_AUTHDB=${MONGO_AUTHDB}" + - "DOI_API=${DOI_API}" + - "DOI_PREFIX=${DOI_PREFIX}" + - "DOI_USER=${DOI_USER}" + - "DOI_KEY=${DOI_KEY}" + - "DISCOVERY_URL=${DISCOVERY_URL}" + - "METAX_USER=${METAX_USER}" + - "METAX_PASS=${METAX_PASS}" + - "METAX_URL=${METAX_URL}" database: image: "mongo" container_name: "metadata_submitter_database_dev" restart: on-failure environment: - - "MONGO_INITDB_ROOT_USERNAME=admin" - - "MONGO_INITDB_ROOT_PASSWORD=admin" + - "MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}" + - "MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}" volumes: - data:/data/db + - ./scripts/init_mongo.js:/docker-entrypoint-initdb.d/init_mongo.js:ro expose: - 27017 ports: @@ -44,7 +52,12 @@ services: build: dockerfile: Dockerfile-dev context: . + target: develop image: cscfi/metadata-submitter-dev + environment: + - "LOG_LEVEL=${LOG_LEVEL}" + - "OIDC_URL=${OIDC_URL}" + - "OIDC_URL_TEST=${OIDC_URL_TEST}" hostname: mockauth expose: - 8000 @@ -53,5 +66,33 @@ services: volumes: - ./tests/integration/mock_auth.py:/mock_auth.py entrypoint: ["python", "/mock_auth.py", "0.0.0.0", "8000"] + mockdoi: + build: + dockerfile: Dockerfile-dev + context: . + target: develop + image: cscfi/metadata-submitter-dev + hostname: mockdoi + expose: + - 8001 + ports: + - 8001:8001 + volumes: + - ./tests/integration/mock_doi_api.py:/mock_doi_api.py + entrypoint: ["python", "/mock_doi_api.py", "0.0.0.0", "8001"] + mockmetax: + build: + dockerfile: Dockerfile-dev + context: . + target: develop + image: cscfi/metadata-submitter-dev + hostname: mockmetax + expose: + - 8002 + ports: + - 8002:8002 + volumes: + - ./tests/integration/mock_metax_api.py:/mock_metax_api.py + entrypoint: ["python", "/mock_metax_api.py", "0.0.0.0", "8002"] volumes: - data: + data: diff --git a/docs/conf.py b/docs/conf.py index af2536b80..b0cf682cb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,7 @@ author = "CSC Developers" # The full version, including alpha/beta/rc tags -release = "0.11.0" +release = "0.13.0" # -- General configuration --------------------------------------------------- diff --git a/docs/frontend.rst b/docs/frontend.rst index d1d84da5f..9a730db45 100644 --- a/docs/frontend.rst +++ b/docs/frontend.rst @@ -5,7 +5,7 @@ Metadata Submitter Frontend .. note:: Requirements: - * Node 14+ + * Node 16+ Environment Setup ----------------- diff --git a/docs/index.rst b/docs/index.rst index bb12b6853..23edca2a6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,7 +12,7 @@ Single Page Application frontend. Metadata Submitter is divided intro :ref:`backend` and :ref:`frontend`, both of them coming together in a Single Page Application that aims to streamline working with metadata and providing a submission process through which researchers can submit and publish metadata. -The application's inteded use is with `NeIC SDA (Sensitive Data Archive) `_ stand-alone version, and it +The application's intended use is with `NeIC SDA (Sensitive Data Archive) `_ stand-alone version, and it consists out of the box includes the `ENA (European Nucleotide Archive) `_ metadata model, model which is used also by the `European Genome-phenome Archive (EGA) `_. diff --git a/docs/metadata.rst b/docs/metadata.rst index 3651c5a9e..8126ce978 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -30,7 +30,7 @@ Relationships between objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each of the objects are connected between each other by references, usually in the form of an ``accessionId``. -Some of the relationships are illustrated in the Metdata ENA Model figure, however in more detail they are connected as follows: +Some of the relationships are illustrated in the Metadata ENA Model figure, however in more detail they are connected as follows: - ``Study`` - usually other objects point to it, as it represents one of the main objects of a ``Submission``; - ``Analysis`` - contains references to: diff --git a/docs/specification.yml b/docs/specification.yml index f054db993..88b0ce820 100644 --- a/docs/specification.yml +++ b/docs/specification.yml @@ -21,6 +21,14 @@ paths: tags: - Submission summary: XML submission endpoint, will also trigger validation. + parameters: + - name: folder + in: query + schema: + type: string + description: The folder ID where object belongs to. + required: true + example: "folder=12345" requestBody: content: multipart/form-data: @@ -295,10 +303,17 @@ paths: parameters: - name: schema in: path - description: Title of the XML schema. + description: Name of the Metadata schema. + schema: + type: string + required: true + - name: folder + in: query schema: type: string + description: The folder ID where object belongs to. required: true + example: "folder=12345" requestBody: content: multipart/form-data:: @@ -317,7 +332,9 @@ paths: content: application/json: schema: - $ref: "#/components/schemas/ObjectCreated" + oneOf: + - $ref: "#/components/schemas/ObjectCreated" + - $ref: "#/components/schemas/ObjectsCreated" 400: description: Bad Request content: @@ -529,10 +546,17 @@ paths: parameters: - name: schema in: path - description: Title of the XML schema. + description: Name of the Metadata schema. schema: type: string required: true + - name: folder + in: query + schema: + type: string + description: The folder ID where object belongs to. + required: true + example: "folder=12345" requestBody: content: multipart/form-data:: @@ -749,12 +773,177 @@ paths: application/json: schema: $ref: "#/components/schemas/403Forbidden" + /templates: + get: + tags: + - Query + summary: Get templates from selected project + parameters: + - name: projectId + in: query + description: project internal ID + schema: + type: string + required: true + responses: + 200: + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/Templates" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + /templates/{schema}: + post: + tags: + - Submission + summary: Submit data to a specific schema + parameters: + - name: schema + in: path + description: Title of the template schema. + schema: + type: string + required: true + requestBody: + content: + application/json: + schema: + type: object + responses: + 201: + description: Created + content: + application/json: + schema: + $ref: "#/components/schemas/ObjectCreated" + 400: + description: Bad Request + content: + application/json: + schema: + $ref: "#/components/schemas/400BadRequest" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + /templates/{schema}/{accessionId}: + get: + tags: + - Query + summary: List of object by accession ID. + parameters: + - name: schema + in: path + description: Unique id of the targeted service. + schema: + type: string + required: true + - name: accessionId + in: path + description: filter objects in schema using accession ID + schema: + type: string + required: true + responses: + 200: + description: OK + content: + application/json: + schema: + $ref: "#/components/schemas/Object" + text/xml: + schema: + type: string + format: binary + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + 404: + description: Not Found + content: + application/json: + schema: + $ref: "#/components/schemas/404NotFound" + delete: + tags: + - Manage + summary: Delete object from a schema with a specified accession ID + parameters: + - name: schema + in: path + description: Unique id of the targeted service. + schema: + type: string + required: true + - name: accessionId + in: path + description: filter objects in schema using accession ID + schema: + type: string + required: true + responses: + 204: + description: No Content + 400: + description: Bad Request + content: + application/json: + schema: + $ref: "#/components/schemas/400BadRequest" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/401Unauthorized" + 403: + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/403Forbidden" + /folders: get: tags: - Query summary: List of folders available for the user. parameters: + - in: query + name: projectId + schema: + type: string + description: ID of the project the folder belongs to + required: true - in: query name: page schema: @@ -769,7 +958,30 @@ paths: name: published schema: type: string - description: Return folders based on the folder published value + example: true + description: Return folders based on the folder published value. Should be 'true' or 'false' + - in: query + name: name + schema: + type: string + example: test folder + description: Return folders containing filtered string[s] in their name + - in: query + name: date_created_start + schema: + type: string + example: "2015-01-01" + description: | + Returns folders created between provided dates. + MUST be used with parameter 'date_created_end'. + - in: query + name: date_created_end + schema: + type: string + example: "2015-12-31" + description: | + Returns folders created between provided dates. + MUST be used with parameter 'date_created_start'. responses: 200: description: OK @@ -801,11 +1013,14 @@ paths: required: - name - description + - projectId properties: name: type: string description: type: string + projectId: + type: string responses: 201: description: OK @@ -1000,11 +1215,6 @@ paths: schema: type: string description: Results per page - - in: query - name: items - schema: - type: string - description: Item type name responses: 200: description: OK @@ -1177,6 +1387,20 @@ components: type: string description: URL pointing to the schema source example: https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.sample.xsd + Templates: + type: array + items: + type: object + properties: + accessionId: + type: string + description: internal ID of template + displayTitle: + type: string + description: name of template to be displayed in UI + schema: + type: string + description: database collection name template belongs to Object: type: object required: @@ -1285,6 +1509,7 @@ components: type: object required: - folderId + - projectId - name - description - published @@ -1297,6 +1522,9 @@ components: folderId: type: string description: Folder id + projectId: + type: string + description: Project ID this folder belongs to name: type: string description: Folder name @@ -1321,7 +1549,7 @@ components: name: type: string description: Full name - example: "Last name, First name" + example: "Family name, First name" nameType: type: string description: Type of name @@ -1332,8 +1560,8 @@ components: example: "First name" familyName: type: string - description: Official last name - example: "Last name" + description: Official Family name + example: "Family name" nameIdentifiers: type: array items: @@ -1464,8 +1692,7 @@ components: required: - userId - name - - drafts - - folders + - projects additionalProperties: false properties: userId: @@ -1474,35 +1701,21 @@ components: name: type: string description: User's Name - drafts: + projects: type: array items: type: object required: - - accessionId - - schema + - projectId + - projectNumber additionalProperties: false properties: - accessionId: + projectId: type: string - description: Accession id generated to identify an object - schema: + description: Internal accession ID for project + projectNumber: type: string - description: type of schema this Accession ID relates to and was added in submit - tags: - type: object - description: Different tags to describe the object. - additionalProperties: true - properties: - submissionType: - type: string - description: Type of submission - enum: ["XML", "Form"] - folders: - type: array - items: - type: string - description: Folder Id + description: Human friendly project number received from AAI UserUpdated: type: object required: diff --git a/docs/submitter.rst b/docs/submitter.rst index 153d0072a..f4cf2ad1e 100644 --- a/docs/submitter.rst +++ b/docs/submitter.rst @@ -29,7 +29,7 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``MONGO_PASSWORD`` | ``admin`` | Admin password for MongoDB. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``MONGO_SSL`` | ``-`` | Set to True to enable MONGO TLS connection url. | No | +| ``MONGO_SSL`` | ``-`` | Set to True to enable MongoDB TLS connection url. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``MONGO_SSL_CA`` | ``-`` | Path to CA file, required if ``MONGO_SSL`` enabled. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ @@ -41,22 +41,18 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AAI_CLIENT_ID`` | ``secret`` | OIDC client ID. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_REFERER`` | ``-`` | OIDC Provider url that redirects the request to the application. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``BASE_URL`` | ``http://localhost:5430`` | base URL of the metadata submitter. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | +| ``AUTH_METHOD`` | ``code`` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_URL`` | ``-`` | Set if a special OIDC authorize URL is required, | No | -| | | otherwise use ``"OIDC_URL"/authorize``. | | +| ``OIDC_URL`` | ``-`` | OIDC URL base URL, MUST resolve to configuration endpoint when appended with | Yes | +| | | /.well-known/openid-configuration | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC base URL for constructing OIDC provider endpoint calls. | Yes | +| ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``REDIRECT_URL`` | ``-`` | Required only for testing with front-end on ``localhost`` or change to | No | | | | ``http://frontend:3000`` if started using ``docker-compose`` (see :ref:`deploy`). | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``JWK_URL`` | ``-`` | JWK OIDC URL for retrieving key for validating ID token. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``LOG_LEVEL`` | ``INFO`` | Set logging level, uppercase. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``SERVE_KEY`` | ``-`` | Keyfile used for TLS. | No | @@ -79,8 +75,8 @@ the table below. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -.. note:: If just ``MONGO_DATABASE`` is specified it will autenticate the user against it. - If just ``MONGO_AUTHDB`` is specified it will autenticate the user against it. +.. note:: If just ``MONGO_DATABASE`` is specified it will authenticate the user against it. + If just ``MONGO_AUTHDB`` is specified it will authenticate the user against it. If both ``MONGO_DATABASE`` and ``MONGO_AUTHDB`` are specified, the client will attempt to authenticate the specified user to the MONGO_AUTHDB database. If both ``MONGO_DATABASE`` and ``MONGO_AUTHDB`` are unspecified, the client will attempt to authenticate the specified user to the admin database. @@ -96,10 +92,10 @@ For installing ``metadata-submitter`` backend do the following: .. hint:: Before running the application have MongoDB running. - MongoDB Server expects to find MongoDB instance running, spesified with following environmental variables: + MongoDB Server expects to find MongoDB instance running, specified with following environmental variables: - - ``MONGO_INITDB_ROOT_USERNAME`` (username for admin user to mondogdb instance) - - ``MONGO_INITDB_ROOT_PASSWORD`` (password for admin user to mondogdb instance) + - ``MONGO_INITDB_ROOT_USERNAME`` (username for admin user to mongodb instance) + - ``MONGO_INITDB_ROOT_PASSWORD`` (password for admin user to mongodb instance) - ``MONGO_HOST`` (host and port for MongoDB instance, e.g. `localhost:27017`) To run the backend from command line set the environment variables required and use: @@ -118,13 +114,11 @@ The Authentication follows the `OIDC Specification `_ metadata @@ -137,18 +131,14 @@ endpoint ``https:///.well-known/openid-configuration``. +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``AAI_CLIENT_ID`` | ``secret`` | OIDC client ID. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_REFERER`` | ``-`` | OIDC Provider url that redirects the request to the application. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ | ``BASE_URL`` | ``http://localhost:5430`` | base URL of the metadata submitter. | Yes | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``ISS_URL`` | ``-`` | OIDC claim issuer URL. | Yes | -+--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``AUTH_URL`` | ``-`` | Set if a special OIDC authorize URL is required, | No | -| | | otherwise use ``"OIDC_URL"/authorize``. | | +| ``AUTH_METHOD`` | ``code`` | OIDC Authentication method to use. | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``OIDC_URL`` | ``-`` | OIDC base URL for constructing OIDC provider endpoint calls. | Yes | +| ``OIDC_URL`` | ``-`` | OIDC URL base URL, MUST resolve to configuration endpoint when appended with | Yes | +| | | /.well-known/openid-configuration | | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ -| ``JWK_URL`` | ``-`` | JWK OIDC URL for retrieving key for validating ID token. | Yes | +| ``OIDC_SCOPE`` | ``openid profile email`` | Claims to request from AAI | No | +--------------------------------+-------------------------------+-----------------------------------------------------------------------------------+-----------+ REST API diff --git a/docs/test.rst b/docs/test.rst index 0ffe20134..cf1692901 100644 --- a/docs/test.rst +++ b/docs/test.rst @@ -21,7 +21,7 @@ and `flake8 `_ (coding style guide) $ tox -p auto -To run environments seprately use: +To run environments separately use: .. code-block:: console @@ -42,8 +42,10 @@ Integration tests required a running backend, follow the instructions in :ref:`d After the backend has been successfully set up, run the following in the backend repository root directory: ``python tests/integration/run_tests.py``. This command will run a series of integration tests. -To clean db before or after each integration tests run: ``python tests/integration/clean_db.py`` (``--tls`` -argument can be added if Mongodb is started via ``docker-compose-tls.yml``. +To clean db before or after each integration tests run: ``python tests/integration/clean_db.py`` (``--tls`` argument +can be added if MongoDB is started via ``docker-compose-tls.yml``). Script clean_db.py will delete all documents in all collections in the database. +To erase the database run: ``python tests/integration/clean_db.py --purge``. After that indexes need to be recreated. +To do that run: ``python tests/integration/mongo_indexes.py`` (``--tls`` argument can be added if MongoDB is started via ``docker-compose-tls.yml``). Performance Testing @@ -62,7 +64,7 @@ running the following commands in the repository root directory will run differe The configuration values for running performance tests are predefined in the ``locust.conf`` file in the repository root directory. All configuration options (`as defined here `_) -can be overriden and new options can be added by either editing the current ``locust.conf`` file or running the test with additional tags, e.g.: +can be overridden and new options can be added by either editing the current ``locust.conf`` file or running the test with additional tags, e.g.: .. code-block:: console @@ -77,7 +79,7 @@ Run Jest-based tests with ``npm test``. Check code formatting and style errors w Respectively for formatting errors in ``json/yaml/css/md`` -files, use ``npm run format:check`` or ``npm run format``. Possible type errors can be checked with ``npm run flow``. -We're following recommended settings from ``eslint``, ``react`` and ``prettier`` - packages witha a couple of exceptions, +We're following recommended settings from ``eslint``, ``react`` and ``prettier`` - packages with a couple of exceptions, which can be found in ``.eslintrc`` and ``.prettierrc``. Linting, formatting and testing are also configured for you as a git pre-commit, which is recommended to use to avoid fails on CI pipeline. diff --git a/docs/validator.rst b/docs/validator.rst index 0528ea204..0c171b00e 100644 --- a/docs/validator.rst +++ b/docs/validator.rst @@ -9,7 +9,7 @@ The tool can be found and installed from `metadata-submitter-tools repository None: self.client_id = aai["client_id"] self.client_secret = aai["client_secret"] self.callback_url = aai["callback_url"] - self.auth_url = aai["auth_url"] - self.token_url = aai["token_url"] - self.revoke_url = aai["revoke_url"] + self.oidc_url = aai["oidc_url"].rstrip("/") + "/.well-known/openid-configuration" + self.iss = aai["oidc_url"] self.scope = aai["scope"] - self.jwk = aai["jwk_server"] - self.iss = aai["iss"] - self.user_info = aai["user_info"] - self.nonce = secrets.token_hex() + self.auth_method = aai["auth_method"] + + self.oidc_conf = { + "aai": { + "issuer": self.iss, + "client_id": self.client_id, + "client_secret": self.client_secret, + "redirect_uris": [self.callback_url], + "behaviour": { + "response_types": self.auth_method.split(" "), + "scope": self.scope.split(" "), + }, + }, + } + self.rph = RPHandler(self.oidc_url, client_configs=self.oidc_conf) async def login(self, req: Request) -> Response: """Redirect user to AAI login. - :param req: A HTTP request instance + :param req: A HTTP request instance (unused) :raises: HTTPSeeOther redirect to login AAI + :raises: HTTPInternalServerError if OIDC configuration init failed """ - # Generate a state for callback and save it to session storage - state = secrets.token_hex() - req.app["OIDC_State"].add(state) - LOG.debug("Start login") - # Parameters for authorisation request - params = { - "client_id": self.client_id, - "response_type": "code", - "state": state, - "redirect_uri": self.callback_url, - "scope": self.scope, - "nonce": self.nonce, - } - # Prepare response - url = f"{self.auth_url}?{urllib.parse.urlencode(params)}" - response = web.HTTPSeeOther(url) - response.headers["Location"] = url + # Generate authentication payload + session = None + try: + session = self.rph.begin("aai") + except Exception as e: + # This can be caused if config is improperly configured, and + # oidcrp is unable to fetch oidc configuration from the given URL + LOG.error(f"OIDC authorization request failed: {e}") + raise web.HTTPInternalServerError(reason="OIDC authorization request failed.") + + # Redirect user to AAI + response = web.HTTPSeeOther(session["url"]) + response.headers["Location"] = session["url"] raise response async def callback(self, req: Request) -> Response: @@ -79,6 +81,7 @@ async def callback(self, req: Request) -> Response: :param req: A HTTP request instance with callback parameters :returns: HTTPSeeOther redirect to home page """ + # Response from AAI must have the query params `state` and `code` if "state" in req.query and "code" in req.query: LOG.debug("AAI response contained the correct params.") @@ -88,34 +91,31 @@ async def callback(self, req: Request) -> Response: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - # Verify, that state is pending - if not params["state"] in req.app["OIDC_State"]: + # Verify oidc_state and retrieve auth session + session = None + try: + session = self.rph.get_session_information(params["state"]) + except KeyError as e: + # This exception is raised if the RPHandler doesn't have the supplied "state" + LOG.error(f"Session not initialised: {e}") raise web.HTTPForbidden(reason="Bad user session.") - auth = BasicAuth(login=self.client_id, password=self.client_secret) - data = {"grant_type": "authorization_code", "code": params["code"], "redirect_uri": self.callback_url} - - # Set up client authentication for request - async with ClientSession(auth=auth) as sess: - # Send request to AAI - async with sess.post(f"{self.token_url}", data=data) as resp: - LOG.debug(f"AAI response status: {resp.status}.") - # Validate response from AAI - if resp.status == 200: - result = await resp.json() - if all(x in result for x in ["id_token", "access_token"]): - LOG.debug("Both ID and Access tokens received.") - access_token = result["access_token"] - id_token = result["id_token"] - await self._validate_jwt(id_token) - else: - reason = "AAI response did not contain access and id tokens." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - else: - reason = f"Token request to AAI failed: {resp}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) + # Place authorization_code to session for finalize step + session["auth_request"]["code"] = params["code"] + + # finalize requests id_token and access_token with code, validates them and requests userinfo data + try: + session = self.rph.finalize(session["iss"], session["auth_request"]) + except KeyError as e: + LOG.error(f"Issuer {session['iss']} not found: {e}.") + raise web.HTTPBadRequest(reason="Token issuer not found.") + except OidcServiceError as e: + # This exception is raised if RPHandler encounters an error due to: + # 1. "code" is wrong, so token request failed + # 2. token validation failed + # 3. userinfo request failed + LOG.error(f"OIDC Callback failed with: {e}") + raise web.HTTPBadRequest(reason="Invalid OIDC callback.") response = web.HTTPSeeOther(f"{self.redirect}/home") @@ -126,7 +126,7 @@ async def callback(self, req: Request) -> Response: hashlib.sha256((cookie["id"] + cookie["referer"] + req.app["Salt"]).encode("utf-8")) ).hexdigest() - cookie_crypted = req.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8") + cookie_crypted = req.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8") response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" response.headers["Pragma"] = "no-Cache" @@ -146,11 +146,34 @@ async def callback(self, req: Request) -> Response: session_id = cookie["id"] - req.app["Session"][session_id] = {"oidc_state": params["state"], "access_token": access_token} + req.app["Session"][session_id] = {"oidc_state": params["state"], "access_token": session["token"]} req.app["Cookies"].add(session_id) - req.app["OIDC_State"].remove(params["state"]) - await self._set_user(req, session_id, access_token) + # User data is read from AAI /userinfo and is used to create the user model in database + user_data = { + "user_id": "", + "real_name": f"{session['userinfo']['given_name']} {session['userinfo']['family_name']}", + # projects come from AAI in this form: "project1 project2 project3" + # if user is not affiliated to any projects the `sdSubmitProjects` key will be missing + "projects": session["userinfo"]["sdSubmitProjects"].split(" "), + } + if "CSCUserName" in session["userinfo"]: + user_data["user_id"] = session["userinfo"]["CSCUserName"] + elif "remoteUserIdentifier" in session["userinfo"]: + user_data["user_id"] = session["userinfo"]["remoteUserIdentifier"] + elif "sub" in session["userinfo"]: + user_data["user_id"] = session["userinfo"]["sub"] + else: + LOG.error( + "User was authenticated, but they are missing mandatory claim CSCUserName, remoteUserIdentifier or sub." + ) + raise web.HTTPBadRequest( + reason="Could not set user, missing claim CSCUserName, remoteUserIdentifier or sub." + ) + + # Process project external IDs into the database and return accession IDs back to user_data + user_data["projects"] = await self._process_projects(req, user_data["projects"]) + await self._set_user(req, session_id, user_data) # done like this otherwise it will not redirect properly response.headers["Location"] = "/home" if self.redirect == self.domain else f"{self.redirect}/home" @@ -166,6 +189,7 @@ async def logout(self, req: Request) -> Response: :returns: HTTPSeeOther redirect to login page """ # Revoke token at AAI + # Implement, when revocation_endpoint is supported by AAI try: cookie = decrypt_cookie(req) @@ -185,91 +209,57 @@ async def logout(self, req: Request) -> Response: raise response - async def _set_user(self, req: Request, session_id: str, token: str) -> None: + async def _process_projects(self, req: Request, projects: List[str]) -> List[Dict[str, str]]: + """Process project external IDs to internal accession IDs by getting IDs\ + from database and creating projects that are missing. + + :raises: HTTPBadRequest in failed to add project to database + :param req: A HTTP request instance + :param projects: A list of project external IDs + :returns: A list of objects containing project accession IDs and project numbers + """ + projects.sort() # sort project numbers to be increasing in order + new_project_ids: List[Dict[str, str]] = [] + + db_client = req.app["db_client"] + operator = ProjectOperator(db_client) + for project in projects: + project_id = await operator.create_project(project) + project_data = { + "projectId": project_id, # internal ID + "projectNumber": project, # human friendly + } + new_project_ids.append(project_data) + + return new_project_ids + + async def _set_user( + self, req: Request, session_id: str, user_data: Dict[str, Union[List[Dict[str, str]], str]] + ) -> None: """Set user in current session and return user id based on result of create_user. :raises: HTTPBadRequest in could not get user info from AAI OIDC :param req: A HTTP request instance - :param token: access token from AAI + :param user_data: user id and given name """ - user_data: Tuple[str, str] - try: - headers = CIMultiDict({"Authorization": f"Bearer {token}"}) - async with ClientSession(headers=headers) as sess: - async with sess.get(f"{self.user_info}") as resp: - result = await resp.json() - if "eppn" in result: - user_data = result["eppn"], f"{result['given_name']} {result['family_name']}" - elif "sub" in result: - user_data = result["sub"], f"{result['given_name']} {result['family_name']}" - else: - LOG.error("Could not set user, missing claim eppn or sub.") - raise web.HTTPBadRequest(reason="Could not set user, missing claim eppn or sub.") - except Exception as e: - LOG.error(f"Could not get information from AAI UserInfo endpoint because of: {e}") - raise web.HTTPBadRequest(reason="Could not get information from AAI UserInfo endpoint.") + LOG.debug("Create and set user to database") db_client = req.app["db_client"] operator = UserOperator(db_client) + + # Create user user_id = await operator.create_user(user_data) - req.app["Session"][session_id]["user_info"] = user_id - async def _get_key(self) -> dict: - """Get OAuth2 public key and transform it to usable pem key. + # Check if user's projects have changed + old_user = await operator.read_user(user_id) + if old_user["projects"] != user_data["projects"]: + update_operation = [ + { + "op": "replace", + "path": "/projects", + "value": user_data["projects"], + } + ] + user_id = await operator.update_user(user_id, update_operation) - :raises: HTTPUnauthorized in case JWK could not be retrieved - :returns: dictionary with JWK (JSON Web Keys) - """ - try: - async with ClientSession() as session: - async with session.get(self.jwk) as r: - # This can be a single key or a list of JWK - return await r.json() - except Exception: - raise web.HTTPUnauthorized(reason="JWK cannot be retrieved") - - async def _validate_jwt(self, token: str) -> None: - """Validate id token from AAI according to OIDC specs. - - :raises: HTTPUnauthorized in case token is missing claim, has expired signature or invalid - :raises: HTTPForbidden does not provide access to the token received - :param token: id token received from AAI - """ - key = await self._get_key() # JWK used to decode token with - claims_options = { - "iss": { - "essential": True, - "values": self.iss, - }, - "aud": {"essential": True, "value": self.client_id}, - "exp": {"essential": True}, - "iat": {"essential": True}, - } - claims_params = { - "auth_time": {"essential": True}, - "acr": { - "essential": True, - "values": f"{self.iss}/LoginHaka,{self.iss}/LoginCSC", - }, - "nonce": self.nonce, - } - try: - LOG.debug("Validate ID Token") - - decoded_data = jwt.decode( - token, key, claims_options=claims_options, claims_params=claims_params, claims_cls=CodeIDToken - ) # decode the token - decoded_data.validate() # validate the token contents - # Testing the exceptions is done in integration tests - except MissingClaimError as e: - raise web.HTTPUnauthorized(reason=f"Missing claim(s): {e}") - except ExpiredTokenError as e: - raise web.HTTPUnauthorized(reason=f"Expired signature: {e}") - except InvalidClaimError as e: - raise web.HTTPForbidden(reason=f"Token info not corresponding with claim: {e}") - except InvalidTokenError as e: - raise web.HTTPUnauthorized(reason=f"Invalid authorization token: {e}") - except DecodeError as e: - raise web.HTTPUnauthorized(reason=f"Invalid JWT format: {e}") - except Exception: - raise web.HTTPForbidden(reason="No access") + req.app["Session"][session_id]["user_info"] = user_id diff --git a/metadata_backend/api/handlers.py b/metadata_backend/api/handlers.py deleted file mode 100644 index a0ce381f0..000000000 --- a/metadata_backend/api/handlers.py +++ /dev/null @@ -1,1164 +0,0 @@ -"""Handle HTTP methods for server.""" -import json -import re -import mimetypes -from collections import Counter -from math import ceil -from pathlib import Path -from typing import Dict, List, Tuple, Union, cast, AsyncGenerator, Any - -from aiohttp import BodyPartReader, web -from aiohttp.web import Request, Response -from multidict import CIMultiDict -from motor.motor_asyncio import AsyncIOMotorClient -from multidict import MultiDict, MultiDictProxy -from xmlschema import XMLSchemaException -from distutils.util import strtobool - -from .middlewares import decrypt_cookie, get_session - -from ..conf.conf import schema_types -from ..helpers.logger import LOG -from ..helpers.parser import XMLToJSONParser -from ..helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException, XMLSchemaLoader -from ..helpers.validator import JSONValidator, XMLValidator -from .operators import FolderOperator, Operator, XMLOperator, UserOperator - -from ..conf.conf import aai_config - - -class RESTAPIHandler: - """Handler for REST API methods.""" - - def _check_schema_exists(self, schema_type: str) -> None: - """Check if schema type exists. - - :param schema_type: schema type. - :raises: HTTPNotFound if schema does not exist. - """ - if schema_type not in schema_types.keys(): - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - - def _get_page_param(self, req: Request, name: str, default: int) -> int: - """Handle page parameter value extracting. - - :param req: GET Request - :param param_name: Name of the parameter - :param default: Default value in case parameter not specified in request - :returns: Page parameter value - """ - try: - param = int(req.query.get(name, default)) - except ValueError: - reason = f"{name} parameter must be a number, now it is {req.query.get(name)}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if param < 1: - reason = f"{name} parameter must be over 0" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - return param - - async def _handle_check_ownedby_user(self, req: Request, collection: str, accession_id: str) -> bool: - """Check if object belongs to user. - - For this we need to check the object is in exactly 1 folder and we need to check - that folder belongs to a user. If the folder is published that means it can be - browsed by other users as well. - - :param req: HTTP request - :param collection: collection or schema of document - :param doc_id: document accession id - :raises: HTTPUnauthorized if accession id does not belong to user - :returns: bool - """ - db_client = req.app["db_client"] - current_user = get_session(req)["user_info"] - user_op = UserOperator(db_client) - _check = False - - if collection != "folders": - - folder_op = FolderOperator(db_client) - check, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) - - if published: - _check = True - elif check: - # if the draft object is found in folder we just need to check if the folder belongs to user - _check = await user_op.check_user_has_doc("folders", current_user, folder_id) - elif collection.startswith("draft"): - # if collection is draft but not found in a folder we also check if object is in drafts of the user - # they will be here if they will not be deleted after publish - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) - else: - _check = False - else: - _check = await user_op.check_user_has_doc(collection, current_user, accession_id) - - if not _check: - reason = f"The ID: {accession_id} does not belong to current user." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - - return _check - - async def _get_collection_objects( - self, folder_op: AsyncIOMotorClient, collection: str, seq: List - ) -> AsyncGenerator: - """Get objects ids based on folder and collection. - - Considering that many objects will be returned good to have a generator. - - :param req: HTTP request - :param collection: collection or schema of document - :param seq: list of folders - :returns: AsyncGenerator - """ - for el in seq: - result = await folder_op.get_collection_objects(el, collection) - - yield result - - async def _handle_user_objects_collection(self, req: Request, collection: str) -> List: - """Retrieve list of objects accession ids belonging to user in collection. - - :param req: HTTP request - :param collection: collection or schema of document - :returns: List - """ - db_client = req.app["db_client"] - current_user = get_session(req)["user_info"] - user_op = UserOperator(db_client) - folder_op = FolderOperator(db_client) - - user = await user_op.read_user(current_user) - res = self._get_collection_objects(folder_op, collection, user["folders"]) - - dt = [] - async for r in res: - dt.extend(r) - - return dt - - async def _filter_by_user(self, req: Request, collection: str, seq: List) -> AsyncGenerator: - """For a list of objects check if these are owned by a user. - - This can be called using a partial from functools. - - :param req: HTTP request - :param collection: collection or schema of document - :param seq: list of folders - :returns: AsyncGenerator - """ - for el in seq: - if await self._handle_check_ownedby_user(req, collection, el["accessionId"]): - yield el - - async def _get_data(self, req: Request) -> Dict: - """Get the data content from a request. - - :param req: POST/PUT/PATCH request - :raises: HTTPBadRequest if request does not have proper JSON data - :returns: JSON content of the request - """ - try: - content = await req.json() - return content - except json.decoder.JSONDecodeError as e: - reason = "JSON is not correctly formatted." f" See: {e}" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_schema_types(self, req: Request) -> Response: - """Get all possible metadata schema types from database. - - Basically returns which objects user can submit and query for. - :param req: GET Request - :returns: JSON list of schema types - """ - types_json = json.dumps([x["description"] for x in schema_types.values()]) - LOG.info(f"GET schema types. Retrieved {len(schema_types)} schemas.") - return web.Response(body=types_json, status=200, content_type="application/json") - - async def get_json_schema(self, req: Request) -> Response: - """Get all JSON Schema for a specific schema type. - - Basically returns which objects user can submit and query for. - :param req: GET Request - :raises: HTTPBadRequest if request does not find the schema - :returns: JSON list of schema types - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - - try: - schema = JSONSchemaLoader().get_schema(schema_type) - LOG.info(f"{schema_type} schema loaded.") - return web.Response(body=json.dumps(schema), status=200, content_type="application/json") - - except SchemaNotFoundException as error: - reason = f"{error} ({schema_type})" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: - """Create link header for pagination. - - :param url: base url for request - :param page: current page - :param size: results per page - :param total_objects: total objects to compute the total pages - :returns: JSON with query results - """ - total_pages = ceil(total_objects / size) - prev_link = f'<{url}?page={page-1}&per_page={size}>; rel="prev", ' if page > 1 else "" - next_link = f'<{url}?page={page+1}&per_page={size}>; rel="next", ' if page < total_pages else "" - last_link = f'<{url}?page={total_pages}&per_page={size}>; rel="last"' if page < total_pages else "" - comma = ", " if page > 1 and page < total_pages else "" - first_link = f'<{url}?page=1&per_page={size}>; rel="first"{comma}' if page > 1 else "" - links = f"{prev_link}{next_link}{first_link}{last_link}" - link_headers = CIMultiDict(Link=f"{links}") - LOG.debug("Link headers created") - return link_headers - - -class ObjectAPIHandler(RESTAPIHandler): - """API Handler for Objects.""" - - async def _handle_query(self, req: Request) -> Response: - """Handle query results. - - :param req: GET request with query parameters - :returns: JSON with query results - """ - collection = req.match_info["schema"] - req_format = req.query.get("format", "json").lower() - if req_format == "xml": - reason = "xml-formatted query results are not supported" - raise web.HTTPBadRequest(reason=reason) - - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 10) - db_client = req.app["db_client"] - - filter_list = await self._handle_user_objects_collection(req, collection) - data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( - collection, req.query, page, per_page, filter_list - ) - - result = json.dumps( - { - "page": { - "page": page_num, - "size": page_size, - "totalPages": ceil(total_objects / per_page), - "totalObjects": total_objects, - }, - "objects": data, - } - ) - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page_num, per_page, total_objects) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for objects in {collection} resulted in {total_objects} objects ") - return web.Response( - body=result, - status=200, - headers=link_headers, - content_type="application/json", - ) - - async def get_object(self, req: Request) -> Response: - """Get one metadata object by its accession id. - - Returns original XML object from backup if format query parameter is - set, otherwise json. - - :param req: GET request - :returns: JSON or XML response containing metadata object - """ - accession_id = req.match_info["accessionId"] - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - req_format = req.query.get("format", "json").lower() - db_client = req.app["db_client"] - operator = XMLOperator(db_client) if req_format == "xml" else Operator(db_client) - type_collection = f"xml-{collection}" if req_format == "xml" else collection - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - data, content_type = await operator.read_metadata_object(type_collection, accession_id) - - data = data if req_format == "xml" else json.dumps(data) - LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") - return web.Response(body=data, status=200, content_type=content_type) - - async def post_object(self, req: Request) -> Response: - """Save metadata object to database. - - For JSON request body we validate it is consistent with the - associated JSON schema. - - :param req: POST request - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - content: Union[Dict, str] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) - content, _ = files[0] - operator = XMLOperator(db_client) - else: - content = await self._get_data(req) - if not req.path.startswith("/drafts"): - JSONValidator(content, schema_type).validate - operator = Operator(db_client) - - accession_id = await operator.create_metadata_object(collection, content) - - body = json.dumps({"accessionId": accession_id}) - url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}{accession_id}") - LOG.info(f"POST object with accesssion ID {accession_id} in schema {collection} was successful.") - return web.Response( - body=body, - status=201, - headers=location_headers, - content_type="application/json", - ) - - async def query_objects(self, req: Request) -> Response: - """Query metadata objects from database. - - :param req: GET request with query parameters (can be empty). - :returns: Query results as JSON - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - return await self._handle_query(req) - - async def delete_object(self, req: Request) -> Response: - """Delete metadata object from database. - - :param req: DELETE request - :raises: HTTPUnauthorized if folder published - :raises: HTTPUnprocessableEntity if object does not belong to current user - :returns: HTTPNoContent response - """ - schema_type = req.match_info["schema"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - accession_id = req.match_info["accessionId"] - db_client = req.app["db_client"] - - await Operator(db_client).check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - folder_op = FolderOperator(db_client) - exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) - if exists: - if published: - reason = "published objects cannot be deleted." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - await folder_op.remove_object(folder_id, collection, accession_id) - else: - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - check_user = await user_op.check_user_has_doc(collection, current_user, accession_id) - if check_user: - await user_op.remove_objects(current_user, "drafts", [accession_id]) - else: - reason = "This object does not seem to belong to any user." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) - - accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) - - LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(status=204) - - async def put_object(self, req: Request) -> Response: - """Replace metadata object in database. - - For JSON request we don't allow replacing in the DB. - - :param req: PUT request - :raises: HTTPUnsupportedMediaType if JSON replace is attempted - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - accession_id = req.match_info["accessionId"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - content: Union[Dict, str] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - files = await _extract_xml_upload(req, extract_one=True) - content, _ = files[0] - operator = XMLOperator(db_client) - else: - content = await self._get_data(req) - if not req.path.startswith("/drafts"): - reason = "Replacing objects only allowed for XML." - LOG.error(reason) - raise web.HTTPUnsupportedMediaType(reason=reason) - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - accession_id = await operator.replace_metadata_object(collection, accession_id, content) - - body = json.dumps({"accessionId": accession_id}) - LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def patch_object(self, req: Request) -> Response: - """Update metadata object in database. - - We do not support patch for XML. - - :param req: PATCH request - :raises: HTTPUnauthorized if object is in published folder - :returns: JSON response containing accessionId for submitted object - """ - schema_type = req.match_info["schema"] - accession_id = req.match_info["accessionId"] - self._check_schema_exists(schema_type) - collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type - - db_client = req.app["db_client"] - operator: Union[Operator, XMLOperator] - if req.content_type == "multipart/form-data": - reason = "XML patching is not possible." - raise web.HTTPUnsupportedMediaType(reason=reason) - else: - content = await self._get_data(req) - operator = Operator(db_client) - - await operator.check_exists(collection, accession_id) - - await self._handle_check_ownedby_user(req, collection, accession_id) - - folder_op = FolderOperator(db_client) - exists, _, published = await folder_op.check_object_in_folder(collection, accession_id) - if exists: - if published: - reason = "Published objects cannot be updated." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - - accession_id = await operator.update_metadata_object(collection, accession_id, content) - - body = json.dumps({"accessionId": accession_id}) - LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - -class FolderAPIHandler(RESTAPIHandler): - """API Handler for folders.""" - - def _check_patch_folder(self, patch_ops: Any) -> None: - """Check patch operations in request are valid. - - We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or - ``Form`` as values. - :param patch_ops: JSON patch request - :raises: HTTPBadRequest if request does not fullfil one of requirements - :raises: HTTPUnauthorized if request tries to do anything else than add or replace - :returns: None - """ - _required_paths = ["/name", "/description"] - _required_values = ["schema", "accessionId"] - _arrays = ["/metadataObjects/-", "/drafts/-", "/doiInfo"] - _tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$") - - for op in patch_ops: - if _tags.match(op["path"]): - LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - pass - else: - if all(i not in op["path"] for i in _required_paths + _arrays): - reason = f"Request contains '{op['path']}' key that cannot be updated to folders." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move"]: - reason = f"{op['op']} on {op['path']} is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["op"] == "replace" and op["path"] in _arrays: - reason = f"{op['op']} on {op['path']}; replacing all objects is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["path"] in _arrays and op["path"] != "/doiInfo": - _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] - for item in _ops: - if not all(key in item.keys() for key in _required_values): - reason = "accessionId and schema are required fields." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if ( - "tags" in item - and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] - ): - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_folders(self, req: Request) -> Response: - """Get a set of folders owned by the user with pagination values. - - :param req: GET Request - :returns: JSON list of folders available for the user - """ - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 5) - db_client = req.app["db_client"] - - user_operator = UserOperator(db_client) - current_user = get_session(req)["user_info"] - user = await user_operator.read_user(current_user) - - folder_query = {"folderId": {"$in": user["folders"]}} - # Check if only published or draft folders are requestsed - if "published" in req.query: - pub_param = req.query.get("published", "").title() - if pub_param in ["True", "False"]: - folder_query["published"] = {"$eq": bool(strtobool(pub_param))} - else: - reason = "'published' parameter must be either 'true' or 'false'" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - folder_operator = FolderOperator(db_client) - folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page) - - result = json.dumps( - { - "page": { - "page": page, - "size": per_page, - "totalPages": ceil(total_folders / per_page), - "totalFolders": total_folders, - }, - "folders": folders, - } - ) - - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_folders) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's folders resulted in {total_folders} folders") - return web.Response( - body=result, - status=200, - headers=link_headers, - content_type="application/json", - ) - - async def post_folder(self, req: Request) -> Response: - """Save object folder to database. - - Also assigns the folder to the current user. - - :param req: POST request - :returns: JSON response containing folder ID for submitted folder - """ - db_client = req.app["db_client"] - content = await self._get_data(req) - JSONValidator(content, "folders").validate - - operator = FolderOperator(db_client) - folder = await operator.create_folder(content) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - await user_op.assign_objects(current_user, "folders", [folder]) - - body = json.dumps({"folderId": folder}) - - url = f"{req.scheme}://{req.host}{req.path}" - location_headers = CIMultiDict(Location=f"{url}/{folder}") - LOG.info(f"POST new folder with ID {folder} was successful.") - return web.Response(body=body, status=201, headers=location_headers, content_type="application/json") - - async def get_folder(self, req: Request) -> Response: - """Get one object folder by its folder id. - - :param req: GET request - :raises: HTTPNotFound if folder not owned by user - :returns: JSON response containing object folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - folder = await operator.read_folder(folder_id) - - LOG.info(f"GET folder with ID {folder_id} was successful.") - return web.Response(body=json.dumps(folder), status=200, content_type="application/json") - - async def patch_folder(self, req: Request) -> Response: - """Update object folder with a specific folder id. - - :param req: PATCH request - :returns: JSON response containing folder ID for updated folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - # Check patch operations in request are valid - patch_ops = await self._get_data(req) - self._check_patch_folder(patch_ops) - - # Validate against folders schema if DOI is being added - for op in patch_ops: - if op["path"] == "/doiInfo": - curr_folder = await operator.read_folder(folder_id) - curr_folder["doiInfo"] = op["value"] - JSONValidator(curr_folder, "folders").validate - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - - body = json.dumps({"folderId": upd_folder}) - LOG.info(f"PATCH folder with ID {upd_folder} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def publish_folder(self, req: Request) -> Response: - """Update object folder specifically into published state. - - :param req: PATCH request - :returns: JSON response containing folder ID for updated folder - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - folder = await operator.read_folder(folder_id) - - obj_ops = Operator(db_client) - - for obj in folder["drafts"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - - # Patch the folder into a published state - patch = [ - {"op": "replace", "path": "/published", "value": True}, - {"op": "replace", "path": "/drafts", "value": []}, - ] - new_folder = await operator.update_folder(folder_id, patch) - - body = json.dumps({"folderId": new_folder}) - LOG.info(f"Patching folder with ID {new_folder} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def delete_folder(self, req: Request) -> Response: - """Delete object folder from database. - - :param req: DELETE request - :returns: HTTP No Content response - """ - folder_id = req.match_info["folderId"] - db_client = req.app["db_client"] - operator = FolderOperator(db_client) - - await operator.check_folder_exists(folder_id) - await operator.check_folder_published(folder_id) - - await self._handle_check_ownedby_user(req, "folders", folder_id) - - obj_ops = Operator(db_client) - - folder = await operator.read_folder(folder_id) - - for obj in folder["drafts"] + folder["metadataObjects"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - - _folder_id = await operator.delete_folder(folder_id) - - user_op = UserOperator(db_client) - current_user = get_session(req)["user_info"] - await user_op.remove_objects(current_user, "folders", [folder_id]) - - LOG.info(f"DELETE folder with ID {_folder_id} was successful.") - return web.Response(status=204) - - -class UserAPIHandler(RESTAPIHandler): - """API Handler for users.""" - - def _check_patch_user(self, patch_ops: Any) -> None: - """Check patch operations in request are valid. - - We check that ``folders`` have string values (one or a list) - and ``drafts`` have ``_required_values``. - For tags we check that the ``submissionType`` takes either ``XML`` or - ``Form`` as values. - :param patch_ops: JSON patch request - :raises: HTTPBadRequest if request does not fullfil one of requirements - :raises: HTTPUnauthorized if request tries to do anything else than add or replace - :returns: None - """ - _arrays = ["/drafts/-", "/folders/-"] - _required_values = ["schema", "accessionId"] - _tags = re.compile("^/(drafts)/[0-9]*/(tags)$") - for op in patch_ops: - if _tags.match(op["path"]): - LOG.info(f"{op['op']} on tags in folder") - if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in ["XML", "Form"]: - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - pass - else: - if all(i not in op["path"] for i in _arrays): - reason = f"Request contains '{op['path']}' key that cannot be updated to user object" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["op"] in ["remove", "copy", "test", "move", "replace"]: - reason = f"{op['op']} on {op['path']} is not allowed." - LOG.error(reason) - raise web.HTTPUnauthorized(reason=reason) - if op["path"] == "/folders/-": - if not (isinstance(op["value"], str) or isinstance(op["value"], list)): - reason = "We only accept string folder IDs." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if op["path"] == "/drafts/-": - _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] - for item in _ops: - if not all(key in item.keys() for key in _required_values): - reason = "accessionId and schema are required fields." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if ( - "tags" in item - and "submissionType" in item["tags"] - and item["tags"]["submissionType"] not in ["XML", "Form"] - ): - reason = "submissionType is restricted to either 'XML' or 'Form' values." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def get_user(self, req: Request) -> Response: - """Get one user by its user ID. - - :param req: GET request - :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user object or list of user drafts or user folders by id - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} was requested") - raise web.HTTPUnauthorized(reason="Only current user retrieval is allowed") - - current_user = get_session(req)["user_info"] - - item_type = req.query.get("items", "").lower() - if item_type: - # Return only list of drafts or list of folder IDs owned by the user - result, link_headers = await self._get_user_items(req, current_user, item_type) - return web.Response( - body=json.dumps(result), - status=200, - headers=link_headers, - content_type="application/json", - ) - else: - # Return whole user object if drafts or folders are not specified in query - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user = await operator.read_user(current_user) - LOG.info(f"GET user with ID {user_id} was successful.") - return web.Response(body=json.dumps(user), status=200, content_type="application/json") - - async def patch_user(self, req: Request) -> Response: - """Update user object with a specific user ID. - - :param req: PATCH request - :raises: HTTPUnauthorized if not current user - :returns: JSON response containing user ID for updated user object - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} patch was requested") - raise web.HTTPUnauthorized(reason="Only current user operations are allowed") - db_client = req.app["db_client"] - - patch_ops = await self._get_data(req) - self._check_patch_user(patch_ops) - - operator = UserOperator(db_client) - - current_user = get_session(req)["user_info"] - user = await operator.update_user(current_user, patch_ops if isinstance(patch_ops, list) else [patch_ops]) - - body = json.dumps({"userId": user}) - LOG.info(f"PATCH user with ID {user} was successful.") - return web.Response(body=body, status=200, content_type="application/json") - - async def delete_user(self, req: Request) -> Response: - """Delete user from database. - - :param req: DELETE request - :raises: HTTPUnauthorized if not current user - :returns: HTTPNoContent response - """ - user_id = req.match_info["userId"] - if user_id != "current": - LOG.info(f"User ID {user_id} delete was requested") - raise web.HTTPUnauthorized(reason="Only current user deletion is allowed") - db_client = req.app["db_client"] - operator = UserOperator(db_client) - fold_ops = FolderOperator(db_client) - obj_ops = Operator(db_client) - - current_user = get_session(req)["user_info"] - user = await operator.read_user(current_user) - - for folder_id in user["folders"]: - _folder = await fold_ops.read_folder(folder_id) - if "published" in _folder and not _folder["published"]: - for obj in _folder["drafts"] + _folder["metadataObjects"]: - await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) - await fold_ops.delete_folder(folder_id) - - for tmpl in user["drafts"]: - await obj_ops.delete_metadata_object(tmpl["schema"], tmpl["accessionId"]) - - await operator.delete_user(current_user) - LOG.info(f"DELETE user with ID {current_user} was successful.") - - cookie = decrypt_cookie(req) - - try: - req.app["Session"].pop(cookie["id"]) - req.app["Cookies"].remove(cookie["id"]) - except KeyError: - pass - - response = web.HTTPSeeOther(f"{aai_config['redirect']}/") - response.headers["Location"] = ( - "/" if aai_config["redirect"] == aai_config["domain"] else f"{aai_config['redirect']}/" - ) - LOG.debug("Logged out user ") - raise response - - async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: - """Get draft templates owned by the user with pagination values. - - :param req: GET request - :param user: User object - :param item_type: Name of the items ("drafts" or "folders") - :raises: HTTPUnauthorized if not current user - :returns: Paginated list of user draft templates and link header - """ - # Check item_type parameter is not faulty - if item_type not in ["drafts", "folders"]: - reason = f"{item_type} is a faulty item parameter. Should be either folders or drafts" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - page = self._get_page_param(req, "page", 1) - per_page = self._get_page_param(req, "per_page", 5) - - db_client = req.app["db_client"] - operator = UserOperator(db_client) - user_id = req.match_info["userId"] - - query = {"userId": user} - - items, total_items = await operator.filter_user(query, item_type, page, per_page) - LOG.info(f"GET user with ID {user_id} was successful.") - - result = { - "page": { - "page": page, - "size": per_page, - "totalPages": ceil(total_items / per_page), - "total" + item_type.title(): total_items, - }, - item_type: items, - } - - url = f"{req.scheme}://{req.host}{req.path}" - link_headers = await self._header_links(url, page, per_page, total_items) - LOG.debug(f"Pagination header links: {link_headers}") - LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") - return result, link_headers - - -class SubmissionAPIHandler: - """Handler for non-rest API methods.""" - - async def submit(self, req: Request) -> Response: - """Handle submission.xml containing submissions to server. - - First submission info is parsed and then for every action in submission - (add/modify/validate) corresponding operation is performed. - Finally submission info itself is added. - - :param req: Multipart POST request with submission.xml and files - :raises: HTTPBadRequest if request is missing some parameters or cannot be processed - :returns: XML-based receipt from submission - """ - files = await _extract_xml_upload(req) - schema_types = Counter(file[1] for file in files) - if "submission" not in schema_types: - reason = "There must be a submission.xml file in submission." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if schema_types["submission"] > 1: - reason = "You should submit only one submission.xml file." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - submission_xml = files[0][0] - submission_json = XMLToJSONParser().parse("submission", submission_xml) - - # Check what actions should be performed, collect them to dictionary - actions: Dict[str, List] = {} - for action_set in submission_json["actions"]["action"]: - for action, attr in action_set.items(): - if not attr: - reason = f"""You also need to provide necessary - information for submission action. - Now {action} was provided without any - extra information.""" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - LOG.debug(f"submission has action {action}") - if attr["schema"] in actions: - set = [] - set.append(actions[attr["schema"]]) - set.append(action) - actions[attr["schema"]] = set - else: - actions[attr["schema"]] = action - - # Go through parsed files and do the actual action - results: List[Dict] = [] - db_client = req.app["db_client"] - for file in files: - content_xml = file[0] - schema_type = file[1] - if schema_type == "submission": - LOG.debug("file has schema of submission type, continuing ...") - continue # No need to use submission xml - action = actions[schema_type] - if isinstance(action, List): - for item in action: - result = await self._execute_action(schema_type, content_xml, db_client, item) - results.append(result) - else: - result = await self._execute_action(schema_type, content_xml, db_client, action) - results.append(result) - - body = json.dumps(results) - LOG.info(f"Processed a submission of {len(results)} actions.") - return web.Response(body=body, status=200, content_type="application/json") - - async def validate(self, req: Request) -> Response: - """Handle validating an XML file sent to endpoint. - - :param req: Multipart POST request with submission.xml and files - :returns: JSON response indicating if validation was successful or not - """ - files = await _extract_xml_upload(req, extract_one=True) - xml_content, schema_type = files[0] - validator = await self._perform_validation(schema_type, xml_content) - return web.Response(body=validator.resp_body, content_type="application/json") - - async def _perform_validation(self, schema_type: str, xml_content: str) -> XMLValidator: - """Validate an xml. - - :param schema_type: Schema type of the object to validate. - :param xml_content: Metadata object - :raises: HTTPBadRequest if schema load fails - :returns: JSON response indicating if validation was successful or not - """ - try: - schema = XMLSchemaLoader().get_schema(schema_type) - LOG.info(f"{schema_type} schema loaded.") - return XMLValidator(schema, xml_content) - - except (SchemaNotFoundException, XMLSchemaException) as error: - reason = f"{error} ({schema_type})" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - async def _execute_action(self, schema: str, content: str, db_client: AsyncIOMotorClient, action: str) -> Dict: - """Complete the command in the action set of the submission file. - - Only "add/modify/validate" actions are supported. - - :param schema: Schema type of the object in question - :param content: Metadata object referred to in submission - :param db_client: Database client for database operations - :param action: Type of action to be done - :raises: HTTPBadRequest if an incorrect or non-supported action is called - :returns: Dict containing specific action that was completed - """ - if action == "add": - result = { - "accessionId": await XMLOperator(db_client).create_metadata_object(schema, content), - "schema": schema, - } - LOG.debug(f"added some content in {schema} ...") - return result - - elif action == "modify": - data_as_json = XMLToJSONParser().parse(schema, content) - if "accessionId" in data_as_json: - accession_id = data_as_json["accessionId"] - else: - alias = data_as_json["alias"] - query = MultiDictProxy(MultiDict([("alias", alias)])) - data, _, _, _ = await Operator(db_client).query_metadata_database(schema, query, 1, 1, []) - if len(data) > 1: - reason = "Alias in provided XML file corresponds with more than one existing metadata object." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - accession_id = data[0]["accessionId"] - data_as_json.pop("accessionId", None) - result = { - "accessionId": await Operator(db_client).update_metadata_object(schema, accession_id, data_as_json), - "schema": schema, - } - LOG.debug(f"modified some content in {schema} ...") - return result - - elif action == "validate": - validator = await self._perform_validation(schema, content) - return json.loads(validator.resp_body) - - else: - reason = f"Action {action} in XML is not supported." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - - -class StaticHandler: - """Handler for static routes, mostly frontend and 404.""" - - def __init__(self, frontend_static_files: Path) -> None: - """Initialize path to frontend static files folder.""" - self.path = frontend_static_files - - async def frontend(self, req: Request) -> Response: - """Serve requests related to frontend SPA. - - :param req: GET request - :returns: Response containing frontpage static file - """ - serve_path = self.path.joinpath("./" + req.path) - - if not serve_path.exists() or not serve_path.is_file(): - LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") - serve_path = self.path.joinpath("./index.html") - - LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") - - mime_type = mimetypes.guess_type(serve_path.as_posix()) - - return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) - - def setup_static(self) -> Path: - """Set path for static js files and correct return mimetypes. - - :returns: Path to static js files folder - """ - mimetypes.init() - mimetypes.types_map[".js"] = "application/javascript" - mimetypes.types_map[".js.map"] = "application/json" - mimetypes.types_map[".svg"] = "image/svg+xml" - mimetypes.types_map[".css"] = "text/css" - mimetypes.types_map[".css.map"] = "application/json" - LOG.debug("static paths for SPA set.") - return self.path / "static" - - -# Private functions shared between handlers -async def _extract_xml_upload(req: Request, extract_one: bool = False) -> List[Tuple[str, str]]: - """Extract submitted xml-file(s) from multi-part request. - - Files are sorted to spesific order by their schema priorities (e.g. - submission should be processed before study). - - :param req: POST request containing "multipart/form-data" upload - :raises: HTTPBadRequest if request is not valid for multipart or multiple files sent. HTTPNotFound if - schema was not found. - :returns: content and schema type for each uploaded file, sorted by schema - type. - """ - files: List[Tuple[str, str]] = [] - try: - reader = await req.multipart() - except AssertionError: - reason = "Request does not have valid multipart/form content" - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - while True: - part = await reader.next() - # Following is probably error in aiohttp type hints, fixing so - # mypy doesn't complain about it. No runtime consequences. - part = cast(BodyPartReader, part) - if not part: - break - if extract_one and files: - reason = "Only one file can be sent to this endpoint at a time." - LOG.error(reason) - raise web.HTTPBadRequest(reason=reason) - if part.name: - schema_type = part.name.lower() - if schema_type not in schema_types: - reason = f"Specified schema {schema_type} was not found." - LOG.error(reason) - raise web.HTTPNotFound(reason=reason) - data = [] - while True: - chunk = await part.read_chunk() - if not chunk: - break - data.append(chunk) - xml_content = "".join(x.decode("UTF-8") for x in data) - files.append((xml_content, schema_type)) - LOG.debug(f"processed file in {schema_type}") - return sorted(files, key=lambda x: schema_types[x[1]]["priority"]) diff --git a/metadata_backend/api/handlers/__init__.py b/metadata_backend/api/handlers/__init__.py new file mode 100644 index 000000000..f3b5ffee8 --- /dev/null +++ b/metadata_backend/api/handlers/__init__.py @@ -0,0 +1 @@ +"""API handlers.""" diff --git a/metadata_backend/api/handlers/common.py b/metadata_backend/api/handlers/common.py new file mode 100644 index 000000000..00aa94df7 --- /dev/null +++ b/metadata_backend/api/handlers/common.py @@ -0,0 +1,162 @@ +"""Functions shared between handlers.""" +import csv +import string +from typing import Any, Dict, List, Tuple + +from aiohttp import BodyPartReader, MultipartReader, hdrs, web +from aiohttp.web import Request +from xmlschema import XMLResource +from xmlschema.etree import ElementTree + +from ...conf.conf import schema_types +from ...helpers.logger import LOG +from ...helpers.parser import CSVToJSONParser + + +async def multipart_content( + req: Request, extract_one: bool = False, expect_xml: bool = False +) -> Tuple[List[Tuple[Any, str, str]], str]: + """Get content(s) and schema type(s) of a multipart request (from either csv or xml format). + + Note: for multiple files support check: https://docs.aiohttp.org/en/stable/multipart.html#hacking-multipart + + :param req: POST request containing "multipart/form-data" upload + :param extract_one: boolean stating whether multiple files should be handled + :param expect_xml: boolean stating if file can be expected to be XML + :raises: HTTPBadRequest for multiple different reasons + :returns: content and schema type for each uploaded file and file type of the upload + """ + xml_files: List[Tuple[str, str, str]] = [] + csv_files: List[Tuple[Dict, str, str]] = [] + try: + reader = await req.multipart() + except AssertionError: + reason = "Request does not have valid multipart/form content" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + while True: + part = await reader.next() + # we expect a simple body part (BodyPartReader) instance here + # otherwise, it will be another MultipartReader instance for the nested multipart. + # we don't need to cast the part BodyPartReader, we fail if we get anything else. + # MultipartReader is aimed at ``multiplart/mixed``, ``multipart/related`` content + # we will be working with ``multipart/form-data`` only. + if isinstance(part, MultipartReader): + reason = "We cannot work nested multipart content." + LOG.error(reason) + raise web.HTTPUnsupportedMediaType(reason=reason) + if not part: + break + filename = part.filename if part.filename else "" + if extract_one and (xml_files or csv_files): + reason = "Only one file can be sent to this endpoint at a time." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # we check the multipart request header to see file type + # or we expect XML file directly + # additionally we check that the content is XML or CSV to be accurate + if expect_xml or part.headers[hdrs.CONTENT_TYPE] == "text/xml": + content, schema_type = await _extract_upload(part) + _check_xml(content) + xml_files.append((content, schema_type, filename)) + elif part.headers[hdrs.CONTENT_TYPE] == "text/csv": + content, schema_type = await _extract_upload(part) + _check_csv(content) + csv_content = CSVToJSONParser().parse(schema_type, content) + for row in csv_content: + csv_files.append((row, schema_type, filename)) + else: + reason = "Submitted file was not proper XML nor CSV." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Return extracted content + return _get_content_with_type(xml_files, csv_files) + + +async def _extract_upload(part: BodyPartReader) -> Tuple[str, str]: + """Extract a submitted file from upload. + + :param part: Multipart reader for single body part + :raises: HTTPNotFound if schema was not found + :returns: content as text and schema type for uploaded file + """ + schema_type = part.name.lower() if part.name else "none" + if schema_type not in schema_types: + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + data = [] + while True: + chunk = await part.read_chunk() + if not chunk: + break + data.append(chunk) + xml_content = "".join(x.decode("UTF-8") for x in data) + LOG.debug(f"Processed file in {schema_type}") + return xml_content, schema_type + + +def _check_csv(content: str) -> bool: + """Check if content is in CSV format. + + :param content: Text of file content + :raises: HTTPBadRequest if both CSV validation fails + :returns: true if file is CSV + """ + try: + # Check for non-printable characters which should not be in CSV files + if not all([c in string.printable or c.isprintable() for c in content]): + raise csv.Error + csv.Sniffer().sniff(content) + # No errors indicates validity of CSV + LOG.info("Valid CSV content was extracted.") + return True + except csv.Error: + reason = "Submitted file was not proper formatted as CSV." + LOG.error(reason) + return False + + +def _check_xml(content: str) -> bool: + """Check if content is in XML format. + + :param content: Text of file content + :raises: HTTPBadRequest if both XML validation fails + :returns: name of file type + """ + try: + XMLResource(content, allow="local", defuse="always") + LOG.info("Valid XML content was extracted.") + return True + except ElementTree.ParseError as err: + reason = f"Submitted file was not proper XML. Error: {err}" + LOG.error(reason) + return False + + +def _get_content_with_type( + xml_files: List[Tuple[str, str, str]], csv_files: List[Tuple[Dict, str, str]] +) -> Tuple[List[Tuple[Any, str, str]], str]: + """Return either list of XML or CSV files with the file type info. + + :param xml_files: List of xml contents with schema types + :param csv_files: List of csv contents with schema types + :raises: HTTPBadRequest if both lists are populated or empty + :returns: List of xml or csv files with string stating which file type + """ + if xml_files and csv_files: + reason = "Request contained both xml and csv file types. Only one file type can be processed in this endpoint." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + elif xml_files: + # Files are sorted to spesific order by their schema priorities + # (e.g. submission should be processed before study). + return sorted(xml_files, key=lambda x: schema_types[x[1]]["priority"]), "xml" + elif csv_files: + return csv_files, "csv" + else: + reason = "Request data seems empty." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) diff --git a/metadata_backend/api/handlers/folder.py b/metadata_backend/api/handlers/folder.py new file mode 100644 index 000000000..5dd2838bc --- /dev/null +++ b/metadata_backend/api/handlers/folder.py @@ -0,0 +1,603 @@ +"""Handle HTTP methods for server.""" +import re +from datetime import date, datetime +from distutils.util import strtobool +from math import ceil +from typing import Any, Dict, List, Tuple, Union + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...conf.conf import doi_config +from ...helpers.doi import DOIHandler +from ...helpers.logger import LOG +from ...helpers.metax_api_handler import MetaxServiceHandler +from ...helpers.validator import JSONValidator +from ..middlewares import get_session +from ..operators import FolderOperator, Operator, ProjectOperator, UserOperator +from .restapi import RESTAPIHandler + + +class FolderAPIHandler(RESTAPIHandler): + """API Handler for folders.""" + + def _prepare_published_study(self, study_data: Dict, general_info: Dict) -> Dict: + """Prepare Study object for publishing. + + :param study_data: Study Object read from the database + :param general_info: General information that is captured in front-end and set in ``doiInfo`` key + :returns: Study Object ready to publish to Datacite + """ + + study = { + "attributes": { + "publisher": doi_config["publisher"], + "publicationYear": date.today().year, + "event": "publish", + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "doi": study_data["doi"], + "prefix": study_data["doi"].split("/")[0], + "suffix": study_data["doi"].split("/")[1], + "types": { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + }, + "url": f"{doi_config['discovery_url']}{study_data['metaxIdentifier']}", + "identifiers": [ + { + "identifierType": "DOI", + "doi": study_data["doi"], + } + ], + "descriptions": [], + "titles": [], + }, + "id": study_data["doi"], + "type": "dois", + } + + study["attributes"]["titles"].append( + {"lang": None, "title": study_data["descriptor"]["studyTitle"], "titleType": None}, + ) + + study["attributes"]["descriptions"].append( + { + "lang": None, + "description": study_data["descriptor"]["studyAbstract"], + "descriptionType": "Abstract", + } + ) + + if "studyDescription" in study_data: + study["attributes"]["descriptions"].append( + {"lang": None, "description": study_data["studyDescription"], "descriptionType": "Other"} + ) + + study["attributes"].update(general_info) + LOG.debug(f"prepared study info: {study}") + + return study + + def _prepare_published_dataset(self, study_doi: str, dataset_data: Dict, general_info: Dict) -> Dict: + """Prepare Dataset object for publishing. + + :param study_doi: Study DOI to link dataset to study at Datacite + :param dataset_data: Dataset Object read from the database + :param general_info: General information that is captured in front-end and set in `doiInfo` key + :returns: Dataset Object ready to publish to Datacite + """ + + dataset = { + "attributes": { + "publisher": doi_config["publisher"], + "publicationYear": date.today().year, + "event": "publish", + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "doi": dataset_data["doi"], + "prefix": dataset_data["doi"].split("/")[0], + "suffix": dataset_data["doi"].split("/")[1], + "types": { + "ris": "DATA", + "bibtex": "misc", + "citeproc": "dataset", + "schemaOrg": "Dataset", + "resourceTypeGeneral": "Dataset", + }, + "url": f"{doi_config['discovery_url']}{dataset_data['metaxIdentifier']}", + "identifiers": [ + { + "identifierType": "DOI", + "doi": dataset_data["doi"], + } + ], + "descriptions": [], + "titles": [], + }, + "id": dataset_data["doi"], + "type": "dois", + } + + dataset["attributes"]["titles"].append( + {"lang": None, "title": dataset_data["title"], "titleType": None}, + ) + + dataset["attributes"]["descriptions"].append( + { + "lang": None, + "description": dataset_data["description"], + "descriptionType": "Other", + } + ) + + # A Dataset is described by a Study + if "relatedIdentifiers" not in dataset["attributes"]: + dataset["attributes"]["relatedIdentifiers"] = [] + + dataset["attributes"]["relatedIdentifiers"].append( + { + "relationType": "IsDescribedBy", + "relatedIdentifier": study_doi, + "resourceTypeGeneral": "Collection", + "relatedIdentifierType": "DOI", + } + ) + + dataset["attributes"].update(general_info) + LOG.debug(f"prepared dataset info: {dataset}") + + return dataset + + async def _prepare_doi_update(self, obj_op: Operator, folder: Dict) -> Tuple[Dict, List, List]: + """Prepare dictionary with values for the Datacite DOI update. + + We need to prepare data for Study and Datasets, publish doi for each, + and create links (relatedIdentifiers) between Study and Datasets. + All the required information should be in the folder ``doiInfo``, + as well as ``extraInfo`` which contains the draft DOIs created for the Study + and each Dataset. + + :param obj_op: Operator for reading objects from database. + :param folder: Folder data + :returns: Tuple with the Study and list of Datasets and list of identifiers for publishing to Metax + """ + + metax_ids = [] + study = {} + datasets: List = [] + + # we need to re-format these for Datacite, as in the JSON schemas + # we split the words so that front-end will display them nicely + _info = folder["doiInfo"] + if "relatedIdentifiers" in _info: + for d in _info["relatedIdentifiers"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k in {"resourceTypeGeneral", "relationType"}) + + if "contributors" in _info: + for d in _info["contributors"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "contributorType") + + if "descriptions" in _info: + for d in _info["descriptions"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "descriptionType") + + if "fundingReferences" in _info: + for d in _info["fundingReferences"]: + d.update((k, "".join(v.split())) for k, v in d.items() if k == "funderIdentifierType") + + try: + # keywords are only required for Metax integration + # thus we remove them + _info.pop("keywords", None) + + _study_doi = "" + + for _obj in folder["metadataObjects"]: + + if _obj["schema"] == "study": + + # we need the study for the title, abstract and description + study_data, _ = await obj_op.read_metadata_object("study", _obj["accessionId"]) + + if isinstance(study_data, dict): + + study = self._prepare_published_study(study_data, _info) + + _study_doi = study_data["doi"] + + metax_ids.append({"doi": study_data["doi"], "metaxIdentifier": study_data["metaxIdentifier"]}) + + # there are cases where datasets are added first + if len(datasets) > 0: + LOG.info(datasets) + for ds in datasets: + if "relatedIdentifiers" not in study["attributes"]: + study["attributes"]["relatedIdentifiers"] = [] + + study["attributes"]["relatedIdentifiers"].append( + { + "relationType": "Describes", + "relatedIdentifier": ds["attributes"]["doi"], + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI", + } + ) + + elif _obj["schema"] == "dataset": + + # we need the dataset title and description + ds_data, _ = await obj_op.read_metadata_object("dataset", _obj["accessionId"]) + + if isinstance(ds_data, dict): + dataset = self._prepare_published_dataset(_study_doi, ds_data, _info) + + datasets.append(dataset) + metax_ids.append({"doi": ds_data["doi"], "metaxIdentifier": ds_data["metaxIdentifier"]}) + + # A Study describes a Dataset + # there are cases where datasets are added first + if "attributes" in study: + if "relatedIdentifiers" not in study["attributes"]: + study["attributes"]["relatedIdentifiers"] = [] + + study["attributes"]["relatedIdentifiers"].append( + { + "relationType": "Describes", + "relatedIdentifier": ds_data["doi"], + "resourceTypeGeneral": "Dataset", + "relatedIdentifierType": "DOI", + } + ) + else: + pass + # we catch all errors, if we missed even a key, that means some information is not + # properly recorded + except Exception as e: + reason = f"Could not construct DOI data, reason: {e}" + LOG.error(reason) + raise web.HTTPInternalServerError(reason=reason) + + return (study, datasets, metax_ids) + + def _check_patch_folder(self, patch_ops: Any) -> None: + """Check patch operations in request are valid. + + We check that ``metadataObjects`` and ``drafts`` have ``_required_values``. + For tags we check that the ``submissionType`` takes either ``CSV``, ``XML`` or + ``Form`` as values. + + :param patch_ops: JSON patch request + :raises: HTTPBadRequest if request does not fullfil one of requirements + :raises: HTTPUnauthorized if request tries to do anything else than add or replace + :returns: None + """ + _required_paths = {"/name", "/description"} + _required_values = {"schema", "accessionId"} + _arrays = {"/metadataObjects/-", "/drafts/-", "/doiInfo"} + _tags = re.compile("^/(metadataObjects|drafts)/[0-9]*/(tags)$") + + for op in patch_ops: + if _tags.match(op["path"]): + LOG.info(f"{op['op']} on tags in folder") + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in { + "XML", + "CSV", + "Form", + }: + reason = "submissionType is restricted to either 'CSV', 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + pass + else: + if all(i not in op["path"] for i in set.union(_required_paths, _arrays)): + reason = f"Request contains '{op['path']}' key that cannot be updated to folders." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["op"] in {"remove", "copy", "test", "move"}: + reason = f"{op['op']} on {op['path']} is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["op"] == "replace" and op["path"] in _arrays: + reason = f"{op['op']} on {op['path']}; replacing all objects is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["path"] in _arrays and op["path"] != "/doiInfo": + _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] + for item in _ops: + if not all(key in item.keys() for key in _required_values): + reason = "accessionId and schema are required fields." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if ( + "tags" in item + and "submissionType" in item["tags"] + and item["tags"]["submissionType"] not in {"XML", "CSV", "Form"} + ): + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_folders(self, req: Request) -> Response: + """Get a set of folders owned by the project with pagination values. + + :param req: GET Request + :returns: JSON list of folders available for the user + """ + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 5) + project_id = self._get_param(req, "projectId") + sort = {"date": True, "score": False} + db_client = req.app["db_client"] + + user_operator = UserOperator(db_client) + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {project_id}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + folder_query: Dict[str, Union[str, Dict[str, Union[str, bool, float]]]] = {"projectId": project_id} + # Check if only published or draft folders are requestsed + if "published" in req.query: + pub_param = req.query.get("published", "").title() + if pub_param in {"True", "False"}: + folder_query["published"] = {"$eq": bool(strtobool(pub_param))} + else: + reason = "'published' parameter must be either 'true' or 'false'" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if "name" in req.query: + name_param = req.query.get("name", "") + if name_param: + folder_query["$text"] = {"$search": name_param} + sort["score"] = True + sort["date"] = False + + format_incoming = "%Y-%m-%d" + format_query = "%Y-%m-%d %H:%M:%S" + if "date_created_start" in req.query and "date_created_end" in req.query: + date_param_start = req.query.get("date_created_start", "") + date_param_end = req.query.get("date_created_end", "") + + if datetime.strptime(date_param_start, format_incoming) and datetime.strptime( + date_param_end, format_incoming + ): + query_start = datetime.strptime(date_param_start + " 00:00:00", format_query).timestamp() + query_end = datetime.strptime(date_param_end + " 23:59:59", format_query).timestamp() + folder_query["dateCreated"] = {"$gte": query_start, "$lte": query_end} + else: + reason = f"'date_created_start' and 'date_created_end' parameters must be formated as {format_incoming}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if "name" in req.query and "date_created_start" in req.query: + sort["score"] = True + sort["date"] = True + + folder_operator = FolderOperator(db_client) + folders, total_folders = await folder_operator.query_folders(folder_query, page, per_page, sort) + + result = ujson.dumps( + { + "page": { + "page": page, + "size": per_page, + "totalPages": ceil(total_folders / per_page), + "totalFolders": total_folders, + }, + "folders": folders, + }, + escape_forward_slashes=False, + ) + + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = self._header_links(url, page, per_page, total_folders) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for project={project_id} folders resulted in {total_folders} folders") + return web.Response( + body=result, + status=200, + headers=link_headers, + content_type="application/json", + ) + + async def post_folder(self, req: Request) -> Response: + """Save object folder to database. + + :param req: POST request + :returns: JSON response containing folder ID for submitted folder + """ + db_client = req.app["db_client"] + content = await self._get_data(req) + + JSONValidator(content, "folders").validate + + # Check that project exists + project_op = ProjectOperator(db_client) + await project_op._check_project_exists(content["projectId"]) + + # Check that user is affiliated with project + user_op = UserOperator(db_client) + current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + operator = FolderOperator(db_client) + folder = await operator.create_folder(content) + + body = ujson.dumps({"folderId": folder}, escape_forward_slashes=False) + + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{folder}") + LOG.info(f"POST new folder with ID {folder} was successful.") + return web.Response(body=body, status=201, headers=location_headers, content_type="application/json") + + async def get_folder(self, req: Request) -> Response: + """Get one object folder by its folder id. + + :param req: GET request + :raises: HTTPNotFound if folder not owned by user + :returns: JSON response containing object folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + await self._handle_check_ownership(req, "folders", folder_id) + + folder = await operator.read_folder(folder_id) + + LOG.info(f"GET folder with ID {folder_id} was successful.") + return web.Response( + body=ujson.dumps(folder, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + async def patch_folder(self, req: Request) -> Response: + """Update object folder with a specific folder id. + + :param req: PATCH request + :returns: JSON response containing folder ID for updated folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + # Check patch operations in request are valid + patch_ops = await self._get_data(req) + self._check_patch_folder(patch_ops) + + # Validate against folders schema if DOI is being added + for op in patch_ops: + if op["path"] == "/doiInfo": + curr_folder = await operator.read_folder(folder_id) + curr_folder["doiInfo"] = op["value"] + JSONValidator(curr_folder, "folders").validate + + await self._handle_check_ownership(req, "folders", folder_id) + + upd_folder = await operator.update_folder(folder_id, patch_ops if isinstance(patch_ops, list) else [patch_ops]) + + body = ujson.dumps({"folderId": upd_folder}, escape_forward_slashes=False) + LOG.info(f"PATCH folder with ID {upd_folder} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def publish_folder(self, req: Request) -> Response: + """Update object folder specifically into published state. + + :param req: PATCH request + :returns: JSON response containing folder ID for updated folder + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + + await self._handle_check_ownership(req, "folders", folder_id) + + folder = await operator.read_folder(folder_id) + + # we first try to publish the DOI before actually publishing the folder + obj_ops = Operator(db_client) + study, datasets, metax_ids = await self._prepare_doi_update(obj_ops, folder) + + doi_ops = DOIHandler() + + datasets_patch = [] + + await doi_ops.set_state(study) + + for ds in datasets: + await doi_ops.set_state(ds) + patch_ds = { + "op": "add", + "path": "/extraInfo/datasetIdentifiers/-", + "value": { + "identifier": { + "identifierType": "DOI", + "doi": ds["id"], + }, + "url": ds["attributes"]["url"], + "types": ds["attributes"]["types"], + }, + } + datasets_patch.append(patch_ds) + + # Create draft DOI and delete draft objects from the folder + + for obj in folder["drafts"]: + await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + + # update study to metax with data comming from doi info + metax_handler = MetaxServiceHandler(req) + await metax_handler.update_dataset_with_doi_info(folder["doiInfo"], metax_ids) + await metax_handler.publish_dataset(metax_ids) + + # Patch the folder into a published state + patch = [ + {"op": "replace", "path": "/published", "value": True}, + {"op": "replace", "path": "/drafts", "value": []}, + {"op": "add", "path": "/datePublished", "value": int(datetime.now().timestamp())}, + {"op": "add", "path": "/extraInfo/publisher", "value": doi_config["publisher"]}, + {"op": "add", "path": "/extraInfo/publicationYear", "value": date.today().year}, + { + "op": "add", + "path": "/extraInfo/studyIdentifier", + "value": { + "identifier": { + "identifierType": "DOI", + "doi": study["id"], + }, + "url": study["attributes"]["url"], + "types": study["attributes"]["types"], + }, + }, + ] + patch.extend(datasets_patch) + new_folder = await operator.update_folder(folder_id, patch) + + body = ujson.dumps({"folderId": new_folder}, escape_forward_slashes=False) + LOG.info(f"Patching folder with ID {new_folder} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def delete_folder(self, req: Request) -> Response: + """Delete object folder from database. + + :param req: DELETE request + :returns: HTTP No Content response + """ + folder_id = req.match_info["folderId"] + db_client = req.app["db_client"] + operator = FolderOperator(db_client) + + await operator.check_folder_exists(folder_id) + await operator.check_folder_published(folder_id) + + await self._handle_check_ownership(req, "folders", folder_id) + + obj_ops = Operator(db_client) + + folder = await operator.read_folder(folder_id) + + for obj in folder["drafts"] + folder["metadataObjects"]: + await obj_ops.delete_metadata_object(obj["schema"], obj["accessionId"]) + + _folder_id = await operator.delete_folder(folder_id) + + LOG.info(f"DELETE folder with ID {_folder_id} was successful.") + return web.Response(status=204) diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py new file mode 100644 index 000000000..8cd4734a7 --- /dev/null +++ b/metadata_backend/api/handlers/object.py @@ -0,0 +1,505 @@ +"""Handle HTTP methods for server.""" +from math import ceil +from typing import Any, Dict, List, Tuple, Union + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...helpers.doi import DOIHandler +from ...helpers.logger import LOG +from ...helpers.metax_api_handler import MetaxServiceHandler +from ...helpers.validator import JSONValidator +from ..operators import FolderOperator, Operator, XMLOperator +from .common import multipart_content +from .restapi import RESTAPIHandler + + +class ObjectAPIHandler(RESTAPIHandler): + """API Handler for Objects.""" + + async def _handle_query(self, req: Request) -> Response: + """Handle query results. + + :param req: GET request with query parameters + :returns: JSON with query results + """ + collection = req.match_info["schema"] + req_format = req.query.get("format", "json").lower() + if req_format == "xml": + reason = "xml-formatted query results are not supported" + raise web.HTTPBadRequest(reason=reason) + + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 10) + db_client = req.app["db_client"] + + filter_list: List = [] # DEPRECATED, users don't own folders anymore + data, page_num, page_size, total_objects = await Operator(db_client).query_metadata_database( + collection, req.query, page, per_page, filter_list + ) + + result = ujson.dumps( + { + "page": { + "page": page_num, + "size": page_size, + "totalPages": ceil(total_objects / per_page), + "totalObjects": total_objects, + }, + "objects": data, + }, + escape_forward_slashes=False, + ) + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = self._header_links(url, page_num, per_page, total_objects) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for objects in {collection} resulted in {total_objects} objects ") + return web.Response( + body=result, + status=200, + headers=link_headers, + content_type="application/json", + ) + + async def get_object(self, req: Request) -> Response: + """Get one metadata object by its accession id. + + Returns original XML object from backup if format query parameter is + set, otherwise JSON. + + :param req: GET request + :returns: JSON or XML response containing metadata object + """ + accession_id = req.match_info["accessionId"] + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + req_format = req.query.get("format", "json").lower() + db_client = req.app["db_client"] + operator = XMLOperator(db_client) if req_format == "xml" else Operator(db_client) + type_collection = f"xml-{collection}" if req_format == "xml" else collection + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownership(req, collection, accession_id) + + data, content_type = await operator.read_metadata_object(type_collection, accession_id) + + data = data if req_format == "xml" else ujson.dumps(data, escape_forward_slashes=False) + LOG.info(f"GET object with accesssion ID {accession_id} from schema {collection}.") + return web.Response(body=data, status=200, content_type=content_type) + + async def post_object(self, req: Request) -> Response: + """Save metadata object to database. + + For JSON request body we validate it is consistent with the associated JSON schema. + For CSV upload we allow it for a select number objects, currently: ``sample``. + + :param req: POST request + :returns: JSON response containing accessionId for submitted object + """ + _allowed_csv = {"sample"} + _allowed_doi = {"study", "dataset"} + schema_type = req.match_info["schema"] + LOG.debug(f"Creating {schema_type} object") + filename = "" + cont_type = "" + + folder_id = req.query.get("folder", "") + if not folder_id: + reason = "Folder is required query parameter. Please provide folder id where object is added to." + raise web.HTTPBadRequest(reason=reason) + + await self._handle_check_ownership(req, "folders", folder_id) + + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + + # we need to check if there is already a study in a folder + # we only allow one study per folder + # this is not enough to catch duplicate entries if updates happen in parallel + # that is why we check in db_service.update_study + if not req.path.startswith("/drafts") and schema_type == "study": + _ids = await folder_op.get_collection_objects(folder_id, collection) + if len(_ids) == 1: + reason = "Only one study is allowed per submission." + raise web.HTTPBadRequest(reason=reason) + + content: Union[Dict[str, Any], str, List[Tuple[Any, str, str]]] + operator: Union[Operator, XMLOperator] + if req.content_type == "multipart/form-data": + _only_xml = False if schema_type in _allowed_csv else True + files, cont_type = await multipart_content(req, extract_one=True, expect_xml=_only_xml) + if cont_type == "xml": + # from this tuple we only care about the content + # files should be of form (content, schema) + content, _, filename = files[0] + else: + # for CSV files we need to treat this as a list of tuples (content, schema) + content = files + # If multipart request contains XML, XML operator is used. + # Else the multipart request is expected to contain CSV file(s) which are converted into JSON. + operator = XMLOperator(db_client) if cont_type == "xml" else Operator(db_client) + else: + content = await self._get_data(req) + if not req.path.startswith("/drafts"): + JSONValidator(content, schema_type).validate + operator = Operator(db_client) + + # Add a new metadata object or multiple objects if multiple were extracted + url = f"{req.scheme}://{req.host}{req.path}" + data: Union[List[Dict[str, str]], Dict[str, str]] + if isinstance(content, List): + LOG.debug(f"Inserting multiple objects for {schema_type}.") + objects: List[Tuple[Dict[str, Any], str]] = [] + for item in content: + json_data = await operator.create_metadata_object(collection, item[0]) + filename = item[2] + objects.append((json_data, filename)) + LOG.info( + f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." + ) + + # we format like this to make it consistent with the response from /submit endpoint + data = [dict({"accessionId": item["accessionId"]}, **{"schema": schema_type}) for item, _ in objects] + # we take the first result if we get multiple + location_headers = CIMultiDict(Location=f"{url}/{data[0]['accessionId']}") + else: + json_data = await operator.create_metadata_object(collection, content) + data = {"accessionId": json_data["accessionId"]} + location_headers = CIMultiDict(Location=f"{url}/{json_data['accessionId']}") + LOG.info( + f"POST object with accesssion ID {json_data['accessionId']} in schema {collection} was successful." + ) + objects = [(json_data, filename)] + + # Gathering data for object to be added to folder + patch = self._prepare_folder_patch_new_object(collection, objects, cont_type) + await folder_op.update_folder(folder_id, patch) + + # Create draft dataset to Metax catalog + if collection in _allowed_doi: + [await self.create_metax_dataset(req, collection, item) for item, _ in objects] + + body = ujson.dumps(data, escape_forward_slashes=False) + + return web.Response( + body=body, + status=201, + headers=location_headers, + content_type="application/json", + ) + + async def query_objects(self, req: Request) -> Response: + """Query metadata objects from database. + + :param req: GET request with query parameters (can be empty). + :returns: Query results as JSON + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + return await self._handle_query(req) + + async def delete_object(self, req: Request) -> Response: + """Delete metadata object from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if folder published + :raises: HTTPUnprocessableEntity if object does not belong to current user + :returns: HTTPNoContent response + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + LOG.debug(f"Deleting object {schema_type} {accession_id}") + _allowed_doi = {"study", "dataset"} + + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + db_client = req.app["db_client"] + + operator = Operator(db_client) + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownership(req, collection, accession_id) + + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "published objects cannot be deleted." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + await folder_op.remove_object(folder_id, collection, accession_id) + else: + reason = "This object does not seem to belong to any user." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) + + metax_id: str = "" + doi_id: str = "" + if collection in _allowed_doi: + try: + object_data, _ = await operator.read_metadata_object(collection, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, dict): + metax_id = object_data["metaxIdentifier"] + doi_id = object_data["doi"] + except KeyError: + LOG.warning(f"MetadataObject {collection} {accession_id} was never added to Metax service.") + + accession_id = await operator.delete_metadata_object(collection, accession_id) + + # Delete draft dataset from Metax catalog + if collection in _allowed_doi: + await MetaxServiceHandler(req).delete_draft_dataset(metax_id) + doi_service = DOIHandler() + await doi_service.delete(doi_id) + + LOG.info(f"DELETE object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(status=204) + + async def put_object(self, req: Request) -> Response: + """Replace metadata object in database. + + For JSON request we don't allow replacing in the DB. + For CSV upload we don't allow replace, as it is problematic to identify fields. + + :param req: PUT request + :raises: HTTPUnsupportedMediaType if JSON replace is attempted + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + LOG.debug(f"Replacing object {schema_type} {accession_id}") + _allowed_doi = {"study", "dataset"} + + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + content: Union[Dict, str] + operator: Union[Operator, XMLOperator] + filename = "" + if req.content_type == "multipart/form-data": + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + content, _, _ = files[0] + operator = XMLOperator(db_client) + else: + content = await self._get_data(req) + if not req.path.startswith("/drafts"): + reason = "Replacing objects only allowed for XML." + LOG.error(reason) + raise web.HTTPUnsupportedMediaType(reason=reason) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownership(req, collection, accession_id) + + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + data = await operator.replace_metadata_object(collection, accession_id, content) + patch = self._prepare_folder_patch_update_object(collection, data, filename) + await folder_op.update_folder(folder_id, patch) + + # Update draft dataset to Metax catalog + if collection in _allowed_doi: + await MetaxServiceHandler(req).update_draft_dataset(collection, data) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PUT object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def patch_object(self, req: Request) -> Response: + """Update metadata object in database. + + We do not support patch for XML. + + :param req: PATCH request + :raises: HTTPUnauthorized if object is in published folder + :returns: JSON response containing accessionId for submitted object + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + LOG.debug(f"Patching object {schema_type} {accession_id}") + + self._check_schema_exists(schema_type) + collection = f"draft-{schema_type}" if req.path.startswith("/drafts") else schema_type + + db_client = req.app["db_client"] + operator: Union[Operator, XMLOperator] + if req.content_type == "multipart/form-data": + reason = "XML patching is not possible." + raise web.HTTPUnsupportedMediaType(reason=reason) + else: + content = await self._get_data(req) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownership(req, collection, accession_id) + + folder_op = FolderOperator(db_client) + exists, folder_id, published = await folder_op.check_object_in_folder(collection, accession_id) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + accession_id = await operator.update_metadata_object(collection, accession_id, content) + + # If there's changed title it will be updated to folder + try: + _ = content["descriptor"]["studyTitle"] if collection == "study" else content["title"] + patch = self._prepare_folder_patch_update_object(collection, content) + await folder_op.update_folder(folder_id, patch) + except (TypeError, KeyError): + pass + + # Update draft dataset to Metax catalog + if collection in {"study", "dataset"}: + object_data, _ = await operator.read_metadata_object(collection, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, Dict): + await MetaxServiceHandler(req).update_draft_dataset(collection, object_data) + else: + raise ValueError("Object's data must be dictionary") + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PATCH object with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + def _prepare_folder_patch_new_object(self, schema: str, objects: List, cont_type: str) -> List: + """Prepare patch operations list for adding an object or objects to a folder. + + :param schema: schema of objects to be added to the folder + :param objects: metadata objects + :param params: addidtional data required for db entry + :returns: list of patch operations + """ + LOG.info("Preparing folder patch for new objects") + if not cont_type: + submission_type = "Form" + else: + submission_type = cont_type.upper() + + if schema.startswith("draft"): + path = "/drafts/-" + else: + path = "/metadataObjects/-" + + patch = [] + patch_ops: Dict[str, Any] = {} + for object, filename in objects: + try: + title = object["descriptor"]["studyTitle"] if schema in ["study", "draft-study"] else object["title"] + except (TypeError, KeyError): + title = "" + + patch_ops = { + "op": "add", + "path": path, + "value": { + "accessionId": object["accessionId"], + "schema": schema, + "tags": { + "submissionType": submission_type, + "displayTitle": title, + }, + }, + } + if submission_type != "Form": + patch_ops["value"]["tags"]["fileName"] = filename + patch.append(patch_ops) + return patch + + def _prepare_folder_patch_update_object(self, schema: str, data: Dict, filename: str = "") -> List: + """Prepare patch operation for updating object's title in a folder. + + :param schema: schema of object to be updated + :param accession_id: object ID + :param title: title to be updated + :returns: dict with patch operation + """ + LOG.info("Preparing folder patch for existing objects") + if schema.startswith("draft"): + path = "/drafts" + else: + path = "/metadataObjects" + + patch_op = { + "op": "replace", + "match": {path.replace("/", ""): {"$elemMatch": {"schema": schema, "accessionId": data["accessionId"]}}}, + } + try: + title = data["descriptor"]["studyTitle"] if schema in ["study", "draft-study"] else data["title"] + except (TypeError, KeyError): + title = "" + + if not filename: + patch_op.update( + { + "path": f"{path}/$/tags/displayTitle", + "value": title, + } + ) + else: + patch_op.update( + { + "path": f"{path}/$/tags", + "value": {"submissionType": "XML", "fileName": filename, "displayTitle": title}, + } + ) + return [patch_op] + + async def create_metax_dataset(self, req: Request, collection: str, object: Dict) -> str: + """Handle connection to Metax api handler for dataset creation. + + Dataset or Study object is assigned with DOI + and it's data is sent to Metax api handler. + Object database entry is updated with metax ID returned by Metax service. + + :param req: HTTP request + :param collection: object's schema + :param object: metadata object + :param folder_id: folder ID where metadata object belongs to + :returns: Metax ID + """ + LOG.info("Creating draft dataset to Metax.") + operator = Operator(req.app["db_client"]) + object["doi"] = await self._draft_doi(collection) + metax_id = await MetaxServiceHandler(req).post_dataset_as_draft(collection, object) + + new_info = {"doi": object["doi"], "metaxIdentifier": metax_id} + await operator.create_metax_info(collection, object["accessionId"], new_info) + + return metax_id + + async def _draft_doi(self, schema_type: str) -> str: + """Create draft DOI for study and dataset. + + The Draft DOI will be created only on POST and the data added to the + folder. Any update of this should not be possible. + + :param schema_type: schema can be either study or dataset + :returns: Dict with DOI of the study or dataset as well as the types. + """ + doi_ops = DOIHandler() + _doi_data = await doi_ops.create_draft(prefix=schema_type) + + LOG.debug(f"doi created with doi: {_doi_data['fullDOI']}") + + return _doi_data["fullDOI"] diff --git a/metadata_backend/api/handlers/restapi.py b/metadata_backend/api/handlers/restapi.py new file mode 100644 index 000000000..96c578467 --- /dev/null +++ b/metadata_backend/api/handlers/restapi.py @@ -0,0 +1,195 @@ +"""Handle HTTP methods for server.""" +import json +from math import ceil +from typing import AsyncGenerator, Dict, List, Tuple + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from motor.motor_asyncio import AsyncIOMotorClient +from multidict import CIMultiDict + +from ...conf.conf import schema_types +from ...helpers.logger import LOG +from ...helpers.schema_loader import JSONSchemaLoader, SchemaNotFoundException +from ..middlewares import get_session +from ..operators import FolderOperator, UserOperator + + +class RESTAPIHandler: + """Handler for REST API methods.""" + + def _check_schema_exists(self, schema_type: str) -> None: + """Check if schema type exists. + + :param schema_type: schema type. + :raises: HTTPNotFound if schema does not exist. + """ + if schema_type not in set(schema_types.keys()): + reason = f"Specified schema {schema_type} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + + def _get_page_param(self, req: Request, name: str, default: int) -> int: + """Handle page parameter value extracting. + + :param req: GET Request + :param param_name: Name of the parameter + :param default: Default value in case parameter not specified in request + :returns: Page parameter value + """ + try: + param = int(req.query.get(name, default)) + except ValueError: + reason = f"{name} parameter must be a number, now it is {req.query.get(name)}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if param < 1: + reason = f"{name} parameter must be over 0" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + return param + + def _get_param(self, req: Request, name: str) -> str: + """Extract mandatory query parameter from URL. + + :param req: GET Request + :param name: name of query param to get + :returns: project ID parameter value + """ + param = req.query.get(name, "") + if param == "": + reason = f"mandatory query parameter {name} is not set" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + return param + + async def _handle_check_ownership(self, req: Request, collection: str, accession_id: str) -> Tuple[bool, str]: + """Check if object belongs to project. + + For this we need to check the object is in exactly 1 folder and we need to check + that folder belongs to a project. + + :param req: HTTP request + :param collection: collection or schema of document + :param doc_id: document accession id + :raises: HTTPUnauthorized if accession id does not belong to user + :returns: bool and possible project id + """ + db_client = req.app["db_client"] + current_user = get_session(req)["user_info"] + user_op = UserOperator(db_client) + _check = False + + project_id = "" + if collection != "folders": + + folder_op = FolderOperator(db_client) + check, folder_id, _ = await folder_op.check_object_in_folder(collection, accession_id) + # if published: + # _check = True + if check: + # if the draft object is found in folder we just need to check if the folder belongs to user + _check, project_id = await user_op.check_user_has_doc(req, "folders", current_user, folder_id) + elif collection.startswith("template"): + # if collection is template but not found in a folder + # we also check if object is in templates of the user + # they will be here if they will not be deleted after publish + _check, project_id = await user_op.check_user_has_doc(req, collection, current_user, accession_id) + else: + _check = False + else: + _check, project_id = await user_op.check_user_has_doc(req, collection, current_user, accession_id) + + if not _check: + reason = f"{collection} {accession_id}." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + return _check, project_id + + async def _get_collection_objects( + self, folder_op: AsyncIOMotorClient, collection: str, seq: List + ) -> AsyncGenerator: + """Get objects ids based on folder and collection. + + Considering that many objects will be returned good to have a generator. + + :param req: HTTP request + :param collection: collection or schema of document + :param seq: list of folders + :returns: AsyncGenerator + """ + for el in seq: + result = await folder_op.get_collection_objects(el, collection) + + yield result + + async def _get_data(self, req: Request) -> Dict: + """Get the data content from a request. + + :param req: POST/PUT/PATCH request + :raises: HTTPBadRequest if request does not have proper JSON data + :returns: JSON content of the request + """ + try: + content = await req.json() + return content + except json.decoder.JSONDecodeError as e: + reason = "JSON is not correctly formatted." f" See: {e}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_schema_types(self, req: Request) -> Response: + """Get all possible metadata schema types from database. + + Basically returns which objects user can submit and query for. + :param req: GET Request + :returns: JSON list of schema types + """ + types_json = ujson.dumps([x["description"] for x in schema_types.values()], escape_forward_slashes=False) + LOG.info(f"GET schema types. Retrieved {len(schema_types)} schemas.") + return web.Response(body=types_json, status=200, content_type="application/json") + + async def get_json_schema(self, req: Request) -> Response: + """Get all JSON Schema for a specific schema type. + + Basically returns which objects user can submit and query for. + :param req: GET Request + :raises: HTTPBadRequest if request does not find the schema + :returns: JSON list of schema types + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + + try: + schema = JSONSchemaLoader().get_schema(schema_type) + LOG.info(f"{schema_type} schema loaded.") + return web.Response( + body=ujson.dumps(schema, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + except SchemaNotFoundException as error: + reason = f"{error} ({schema_type})" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + def _header_links(self, url: str, page: int, size: int, total_objects: int) -> CIMultiDict[str]: + """Create link header for pagination. + + :param url: base url for request + :param page: current page + :param size: results per page + :param total_objects: total objects to compute the total pages + :returns: JSON with query results + """ + total_pages = ceil(total_objects / size) + prev_link = f'<{url}?page={page-1}&per_page={size}>; rel="prev", ' if page > 1 else "" + next_link = f'<{url}?page={page+1}&per_page={size}>; rel="next", ' if page < total_pages else "" + last_link = f'<{url}?page={total_pages}&per_page={size}>; rel="last"' if page < total_pages else "" + comma = ", " if page > 1 and page < total_pages else "" + first_link = f'<{url}?page=1&per_page={size}>; rel="first"{comma}' if page > 1 else "" + links = f"{prev_link}{next_link}{first_link}{last_link}" + link_headers = CIMultiDict(Link=f"{links}") + LOG.debug("Link headers created") + return link_headers diff --git a/metadata_backend/api/handlers/static.py b/metadata_backend/api/handlers/static.py new file mode 100644 index 000000000..5f93d6fa2 --- /dev/null +++ b/metadata_backend/api/handlers/static.py @@ -0,0 +1,47 @@ +"""Handle HTTP methods for server.""" +import mimetypes +from pathlib import Path + +from aiohttp.web import Request, Response + +from ...helpers.logger import LOG + + +class StaticHandler: + """Handler for static routes, mostly frontend and 404.""" + + def __init__(self, frontend_static_files: Path) -> None: + """Initialize path to frontend static files folder.""" + self.path = frontend_static_files + + async def frontend(self, req: Request) -> Response: + """Serve requests related to frontend SPA. + + :param req: GET request + :returns: Response containing frontpage static file + """ + serve_path = self.path.joinpath("./" + req.path) + + if not serve_path.exists() or not serve_path.is_file(): + LOG.debug(f"{serve_path} was not found or is not a file - serving index.html") + serve_path = self.path.joinpath("./index.html") + + LOG.debug(f"Serve Frontend SPA {req.path} by {serve_path}.") + + mime_type = mimetypes.guess_type(serve_path.as_posix()) + + return Response(body=serve_path.read_bytes(), content_type=(mime_type[0] or "text/html")) + + def setup_static(self) -> Path: + """Set path for static js files and correct return mimetypes. + + :returns: Path to static js files folder + """ + mimetypes.init() + mimetypes.types_map[".js"] = "application/javascript" + mimetypes.types_map[".js.map"] = "application/json" + mimetypes.types_map[".svg"] = "image/svg+xml" + mimetypes.types_map[".css"] = "text/css" + mimetypes.types_map[".css.map"] = "application/json" + LOG.debug("static paths for SPA set.") + return self.path / "static" diff --git a/metadata_backend/api/handlers/submission.py b/metadata_backend/api/handlers/submission.py new file mode 100644 index 000000000..c0a935d15 --- /dev/null +++ b/metadata_backend/api/handlers/submission.py @@ -0,0 +1,253 @@ +"""Handle HTTP methods for server.""" +from collections import Counter +from typing import Dict, List + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import MultiDict, MultiDictProxy +from xmlschema import XMLSchemaException + +from ...helpers.logger import LOG +from ...helpers.metax_api_handler import MetaxServiceHandler +from ...helpers.parser import XMLToJSONParser +from ...helpers.schema_loader import SchemaNotFoundException, XMLSchemaLoader +from ...helpers.validator import XMLValidator +from ..operators import FolderOperator, Operator, XMLOperator +from .common import multipart_content +from .object import ObjectAPIHandler + + +class SubmissionAPIHandler(ObjectAPIHandler): + """Handler for non-rest API methods.""" + + async def submit(self, req: Request) -> Response: + """Handle submission.xml containing submissions to server. + + First submission info is parsed and then for every action in submission + (add/modify/validate) corresponding operation is performed. + Finally submission info itself is added. + + :param req: Multipart POST request with submission.xml and files + :raises: HTTPBadRequest if request is missing some parameters or cannot be processed + :returns: XML-based receipt from submission + """ + files, _ = await multipart_content(req, expect_xml=True) + schema_types = Counter(file[1] for file in files) + if "submission" not in schema_types: + reason = "There must be a submission.xml file in submission." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if schema_types["submission"] > 1: + reason = "You should submit only one submission.xml file." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + submission_xml = files[0][0] + submission_json = XMLToJSONParser().parse("submission", submission_xml) + + # Check what actions should be performed, collect them to dictionary + actions: Dict[str, List] = {} + for action_set in submission_json["actions"]["action"]: + for action, attr in action_set.items(): + if not attr: + reason = f"""You also need to provide necessary + information for submission action. + Now {action} was provided without any + extra information.""" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + LOG.debug(f"submission has action {action}") + if attr["schema"] in actions: + set = [] + set.append(actions[attr["schema"]]) + set.append(action) + actions[attr["schema"]] = set + else: + actions[attr["schema"]] = action + + # Go through parsed files and do the actual action + results: List[Dict] = [] + for file in files: + content_xml = file[0] + schema_type = file[1] + filename = file[2] + if schema_type == "submission": + LOG.debug("file has schema of submission type, continuing ...") + continue # No need to use submission xml + action = actions[schema_type] + if isinstance(action, List): + for item in action: + result = await self._execute_action(req, schema_type, content_xml, item, filename) + results.append(result) + else: + result = await self._execute_action(req, schema_type, content_xml, action, filename) + results.append(result) + + body = ujson.dumps(results, escape_forward_slashes=False) + LOG.info(f"Processed a submission of {len(results)} actions.") + return web.Response(body=body, status=200, content_type="application/json") + + async def validate(self, req: Request) -> Response: + """Handle validating an XML file sent to endpoint. + + :param req: Multipart POST request with submission.xml and files + :returns: JSON response indicating if validation was successful or not + """ + files, _ = await multipart_content(req, extract_one=True, expect_xml=True) + xml_content, schema_type, _ = files[0] + validator = await self._perform_validation(schema_type, xml_content) + return web.Response(body=validator.resp_body, content_type="application/json") + + async def _perform_validation(self, schema_type: str, xml_content: str) -> XMLValidator: + """Validate an xml. + + :param schema_type: Schema type of the object to validate. + :param xml_content: Metadata object + :raises: HTTPBadRequest if schema load fails + :returns: JSON response indicating if validation was successful or not + """ + try: + schema = XMLSchemaLoader().get_schema(schema_type) + LOG.info(f"{schema_type} schema loaded.") + return XMLValidator(schema, xml_content) + + except (SchemaNotFoundException, XMLSchemaException) as error: + reason = f"{error} ({schema_type})" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _execute_action(self, req: Request, schema: str, content: str, action: str, filename: str) -> Dict: + """Complete the command in the action set of the submission file. + + Only "add/modify/validate" actions are supported. + + :param req: Multipart POST request + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param action: Type of action to be done + :param filename: Name of file being processed + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + if action == "add": + return await self._execute_action_add(req, schema, content, filename) + + elif action == "modify": + return await self._execute_action_modify(req, schema, content, filename) + + elif action == "validate": + validator = await self._perform_validation(schema, content) + return ujson.loads(validator.resp_body) + + else: + reason = f"Action {action} in XML is not supported." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _execute_action_add(self, req: Request, schema: str, content: str, filename: str) -> Dict: + """Complete the add action. + + :param req: Multipart POST request + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param filename: Name of file being processed + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + _allowed_doi = {"study", "dataset"} + db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + + folder_id = req.query.get("folder", "") + if not folder_id: + reason = "Folder is required query parameter. Please provide folder id where object is added to." + raise web.HTTPBadRequest(reason=reason) + + # we need to check if there is already a study in a folder + # we only allow one study per folder + # this is not enough to catch duplicate entries if updates happen in parallel + # that is why we check in db_service.update_study + if not req.path.startswith("/drafts") and schema == "study": + _ids = await folder_op.get_collection_objects(folder_id, schema) + if len(_ids) == 1: + reason = "Only one study is allowed per submission." + raise web.HTTPBadRequest(reason=reason) + + json_data = await XMLOperator(db_client).create_metadata_object(schema, content) + + result = { + "accessionId": json_data["accessionId"], + "schema": schema, + } + LOG.debug(f"added some content in {schema} ...") + + # Gathering data for object to be added to folder + patch = self._prepare_folder_patch_new_object(schema, [(json_data, filename)], "xml") + await folder_op.update_folder(folder_id, patch) + + # Create draft dataset to Metax catalog + if schema in _allowed_doi: + await self.create_metax_dataset(req, schema, json_data) + + return result + + async def _execute_action_modify(self, req: Request, schema: str, content: str, filename: str) -> Dict: + """Complete the modify action. + + :param req: Multipart POST request + :param schema: Schema type of the object in question + :param content: Metadata object referred to in submission + :param filename: Name of file being processed + :raises: HTTPBadRequest if an incorrect or non-supported action is called + :returns: Dict containing specific action that was completed + """ + _allowed_doi = {"study", "dataset"} + db_client = req.app["db_client"] + folder_op = FolderOperator(db_client) + operator = Operator(db_client) + data_as_json = XMLToJSONParser().parse(schema, content) + if "accessionId" in data_as_json: + accession_id = data_as_json["accessionId"] + else: + alias = data_as_json["alias"] + query = MultiDictProxy(MultiDict([("alias", alias)])) + data, _, _, _ = await operator.query_metadata_database(schema, query, 1, 1, []) + if len(data) > 1: + reason = "Alias in provided XML file corresponds with more than one existing metadata object." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + accession_id = data[0]["accessionId"] + data_as_json.pop("accessionId", None) + result = { + # should here be replace_metadata_object ?? + "accessionId": await operator.update_metadata_object(schema, accession_id, data_as_json), + "schema": schema, + } + + exists, folder_id, published = await folder_op.check_object_in_folder(schema, result["accessionId"]) + if exists: + if published: + reason = "Published objects cannot be updated." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # If there's changed title it will be updated to folder + try: + _ = data_as_json["descriptor"]["studyTitle"] if schema == "study" else data_as_json["title"] + # should we overwrite filename as it is the name of file with partial update data + patch = self._prepare_folder_patch_update_object(schema, data_as_json, filename) + await folder_op.update_folder(folder_id, patch) + except (TypeError, KeyError): + pass + + # Update draft dataset to Metax catalog + if schema in _allowed_doi: + object_data, _ = await operator.read_metadata_object(schema, accession_id) + # MYPY related if statement, Operator (when not XMLOperator) always returns object_data as dict + if isinstance(object_data, Dict): + await MetaxServiceHandler(req).update_draft_dataset(schema, object_data) + else: + raise ValueError("Object's data must be dictionary") + + LOG.debug(f"modified some content in {schema} ...") + return result diff --git a/metadata_backend/api/handlers/template.py b/metadata_backend/api/handlers/template.py new file mode 100644 index 000000000..42aadc468 --- /dev/null +++ b/metadata_backend/api/handlers/template.py @@ -0,0 +1,248 @@ +"""Handle HTTP methods for server.""" +from typing import Union + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...helpers.logger import LOG +from ..middlewares import get_session +from ..operators import Operator, ProjectOperator, UserOperator, XMLOperator +from .restapi import RESTAPIHandler + + +class TemplatesAPIHandler(RESTAPIHandler): + """API Handler for Templates.""" + + async def get_templates(self, req: Request) -> Response: + """Get a set of templates owned by the project. + + :param req: GET Request + :returns: JSON list of templates available for the user + """ + project_id = self._get_param(req, "projectId") + db_client = req.app["db_client"] + + user_operator = UserOperator(db_client) + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {project_id}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + operator = Operator(db_client) + templates = await operator.query_templates_by_project(project_id) + + result = ujson.dumps( + templates, + escape_forward_slashes=False, + ) + + LOG.info(f"Querying for project={project_id} templates resulted in {len(templates)} templates") + return web.Response( + body=result, + status=200, + content_type="application/json", + ) + + async def get_template(self, req: Request) -> Response: + """Get one metadata template by its accession id. + + Returns JSON. + + :param req: GET request + :returns: JSON response containing template + """ + accession_id = req.match_info["accessionId"] + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + await self._handle_check_ownership(req, collection, accession_id) + + data, content_type = await operator.read_metadata_object(collection, accession_id) + + data = ujson.dumps(data, escape_forward_slashes=False) + LOG.info(f"GET template with accesssion ID {accession_id} from schema {collection}.") + return web.Response(body=data, status=200, content_type=content_type) + + async def post_template(self, req: Request) -> Response: + """Save metadata template to database. + + For JSON request body we validate it is consistent with the + associated JSON schema. + + :param req: POST request + :returns: JSON response containing accessionId for submitted template + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + content = await self._get_data(req) + + # Operators + project_op = ProjectOperator(db_client) + user_op = UserOperator(db_client) + operator = Operator(db_client) + + if isinstance(content, list): + tmpl_list = [] + for num, tmpl in enumerate(content): + if "template" not in tmpl: + reason = f"template key is missing from request body for element: {num}." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # No schema validation, so must check that project is set + if "projectId" not in tmpl: + reason = "projectId is a mandatory POST key" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Check that project exists and user is affiliated with it + await project_op._check_project_exists(tmpl["projectId"]) + current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(tmpl["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {tmpl['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # Process template + # Move projectId to template structure, so that it is saved in mongo + tmpl["template"]["projectId"] = tmpl["projectId"] + json_data = await operator.create_metadata_object(collection, tmpl["template"]) + data = [{"accessionId": json_data["accessionId"], "schema": collection}] + if "tags" in tmpl: + data[0]["tags"] = tmpl["tags"] + await project_op.assign_templates(tmpl["projectId"], data) + tmpl_list.append({"accessionId": json_data["accessionId"]}) + + body = ujson.dumps(tmpl_list, escape_forward_slashes=False) + else: + if "template" not in content: + reason = "template key is missing from request body." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # No schema validation, so must check that project is set + if "projectId" not in content: + reason = "projectId is a mandatory POST key" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + # Check that project exists and user is affiliated with it + await project_op._check_project_exists(content["projectId"]) + current_user = get_session(req)["user_info"] + user = await user_op.read_user(current_user) + user_has_project = await user_op.check_user_has_project(content["projectId"], user["userId"]) + if not user_has_project: + reason = f"user {user['userId']} is not affiliated with project {content['projectId']}" + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + + # Process template + # Move projectId to template structure, so that it is saved in mongo + content["template"]["projectId"] = content["projectId"] + json_data = await operator.create_metadata_object(collection, content["template"]) + data = [{"accessionId": json_data["accessionId"], "schema": collection}] + if "tags" in content: + data[0]["tags"] = content["tags"] + await project_op.assign_templates(content["projectId"], data) + + body = ujson.dumps({"accessionId": json_data["accessionId"]}, escape_forward_slashes=False) + + url = f"{req.scheme}://{req.host}{req.path}" + location_headers = CIMultiDict(Location=f"{url}/{json_data['accessionId']}") + LOG.info(f"POST template with accesssion ID {json_data['accessionId']} in schema {collection} was successful.") + return web.Response( + body=body, + status=201, + headers=location_headers, + content_type="application/json", + ) + + async def patch_template(self, req: Request) -> Response: + """Update metadata template in database. + + :param req: PATCH request + :raises: HTTPUnauthorized if template is in published folder + :returns: JSON response containing accessionId for submitted template + """ + schema_type = req.match_info["schema"] + accession_id = req.match_info["accessionId"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + + db_client = req.app["db_client"] + operator: Union[Operator, XMLOperator] + + content = await self._get_data(req) + operator = Operator(db_client) + + await operator.check_exists(collection, accession_id) + + _, project_id = await self._handle_check_ownership(req, collection, accession_id) + + # Update the templates-list in project-collection + if "index" in content and "tags" in content: + LOG.debug("update template-list tags") + index = content.pop("index") + tags = content.pop("tags") + update_operation = [ + { + "op": "replace", + "path": f"/templates/{index}/tags", + "value": tags, + } + ] + project_operator = ProjectOperator(db_client) + await project_operator.update_project(project_id, update_operation) + + # Update the actual template data in template-collection + accession_id = await operator.update_metadata_object(collection, accession_id, content) + + body = ujson.dumps({"accessionId": accession_id}, escape_forward_slashes=False) + LOG.info(f"PATCH template with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(body=body, status=200, content_type="application/json") + + async def delete_template(self, req: Request) -> Response: + """Delete metadata template from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if folder published + :raises: HTTPUnprocessableEntity if template does not belong to current user + :returns: HTTPNoContent response + """ + schema_type = req.match_info["schema"] + self._check_schema_exists(schema_type) + collection = f"template-{schema_type}" + accession_id = req.match_info["accessionId"] + db_client = req.app["db_client"] + + await Operator(db_client).check_exists(collection, accession_id) + project_operator = ProjectOperator(db_client) + + project_ok, project_id = await self._handle_check_ownership(req, collection, accession_id) + if project_ok: + await project_operator.remove_templates(project_id, [accession_id]) + else: + reason = "This template does not belong to this project." + LOG.error(reason) + raise web.HTTPUnprocessableEntity(reason=reason) + + accession_id = await Operator(db_client).delete_metadata_object(collection, accession_id) + + LOG.info(f"DELETE template with accession ID {accession_id} in schema {collection} was successful.") + return web.Response(status=204) diff --git a/metadata_backend/api/handlers/user.py b/metadata_backend/api/handlers/user.py new file mode 100644 index 000000000..747dadfc1 --- /dev/null +++ b/metadata_backend/api/handlers/user.py @@ -0,0 +1,177 @@ +"""Handle HTTP methods for server.""" + +import re +from math import ceil +from typing import Any, Dict, Tuple + +import ujson +from aiohttp import web +from aiohttp.web import Request, Response +from multidict import CIMultiDict + +from ...conf.conf import aai_config +from ...helpers.logger import LOG +from ..middlewares import decrypt_cookie, get_session +from ..operators import UserOperator +from .restapi import RESTAPIHandler + + +class UserAPIHandler(RESTAPIHandler): + """API Handler for users.""" + + def _check_patch_user(self, patch_ops: Any) -> None: + """Check patch operations in request are valid. + + We check that ``folders`` have string values (one or a list) + and ``drafts`` have ``_required_values``. + For tags we check that the ``submissionType`` takes either ``XML`` or + ``Form`` as values. + :param patch_ops: JSON patch request + :raises: HTTPBadRequest if request does not fullfil one of requirements + :raises: HTTPUnauthorized if request tries to do anything else than add or replace + :returns: None + """ + _arrays = {"/templates/-", "/folders/-"} + _required_values = {"schema", "accessionId"} + _tags = re.compile("^/(templates)/[0-9]*/(tags)$") + for op in patch_ops: + if _tags.match(op["path"]): + LOG.info(f"{op['op']} on tags in folder") + if "submissionType" in op["value"].keys() and op["value"]["submissionType"] not in { + "XML", + "CSV", + "Form", + }: + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + pass + else: + if all(i not in op["path"] for i in _arrays): + reason = f"Request contains '{op['path']}' key that cannot be updated to user object" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["op"] in {"remove", "copy", "test", "move", "replace"}: + reason = f"{op['op']} on {op['path']} is not allowed." + LOG.error(reason) + raise web.HTTPUnauthorized(reason=reason) + if op["path"] == "/folders/-": + if not (isinstance(op["value"], str) or isinstance(op["value"], list)): + reason = "We only accept string folder IDs." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if op["path"] == "/templates/-": + _ops = op["value"] if isinstance(op["value"], list) else [op["value"]] + for item in _ops: + if not all(key in item.keys() for key in _required_values): + reason = "accessionId and schema are required fields." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if ( + "tags" in item + and "submissionType" in item["tags"] + and item["tags"]["submissionType"] not in {"XML", "CSV", "Form"} + ): + reason = "submissionType is restricted to either 'XML' or 'Form' values." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def get_user(self, req: Request) -> Response: + """Get one user by its user ID. + + :param req: GET request + :raises: HTTPUnauthorized if not current user + :returns: JSON response containing user object or list of user templates or user folders by id + """ + user_id = req.match_info["userId"] + if user_id != "current": + LOG.info(f"User ID {user_id} was requested") + raise web.HTTPUnauthorized(reason="Only current user retrieval is allowed") + + current_user = get_session(req)["user_info"] + + # Return whole user object if templates or folders are not specified in query + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user = await operator.read_user(current_user) + LOG.info(f"GET user with ID {user_id} was successful.") + return web.Response( + body=ujson.dumps(user, escape_forward_slashes=False), status=200, content_type="application/json" + ) + + async def delete_user(self, req: Request) -> Response: + """Delete user from database. + + :param req: DELETE request + :raises: HTTPUnauthorized if not current user + :returns: HTTPNoContent response + """ + user_id = req.match_info["userId"] + if user_id != "current": + LOG.info(f"User ID {user_id} delete was requested") + raise web.HTTPUnauthorized(reason="Only current user deletion is allowed") + db_client = req.app["db_client"] + operator = UserOperator(db_client) + + current_user = get_session(req)["user_info"] + + await operator.delete_user(current_user) + LOG.info(f"DELETE user with ID {current_user} was successful.") + + cookie = decrypt_cookie(req) + + try: + req.app["Session"].pop(cookie["id"]) + req.app["Cookies"].remove(cookie["id"]) + except KeyError: + pass + + response = web.HTTPSeeOther(f"{aai_config['redirect']}/") + response.headers["Location"] = ( + "/" if aai_config["redirect"] == aai_config["domain"] else f"{aai_config['redirect']}/" + ) + LOG.debug("Logged out user ") + raise response + + async def _get_user_items(self, req: Request, user: Dict, item_type: str) -> Tuple[Dict, CIMultiDict[str]]: + """Get draft templates owned by the user with pagination values. + + :param req: GET request + :param user: User object + :param item_type: Name of the items ("templates" or "folders") + :raises: HTTPUnauthorized if not current user + :returns: Paginated list of user draft templates and link header + """ + # Check item_type parameter is not faulty + if item_type not in {"templates", "folders"}: + reason = f"{item_type} is a faulty item parameter. Should be either folders or templates" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + page = self._get_page_param(req, "page", 1) + per_page = self._get_page_param(req, "per_page", 5) + + db_client = req.app["db_client"] + operator = UserOperator(db_client) + user_id = req.match_info["userId"] + + query = {"userId": user} + + items, total_items = await operator.filter_user(query, item_type, page, per_page) + LOG.info(f"GET user with ID {user_id} was successful.") + + result = { + "page": { + "page": page, + "size": per_page, + "totalPages": ceil(total_items / per_page), + "total" + item_type.title(): total_items, + }, + item_type: items, + } + + url = f"{req.scheme}://{req.host}{req.path}" + link_headers = self._header_links(url, page, per_page, total_items) + LOG.debug(f"Pagination header links: {link_headers}") + LOG.info(f"Querying for user's {item_type} resulted in {total_items} {item_type}") + return result, link_headers diff --git a/metadata_backend/api/health.py b/metadata_backend/api/health.py index bc56100a0..8e5a0e617 100644 --- a/metadata_backend/api/health.py +++ b/metadata_backend/api/health.py @@ -1,5 +1,5 @@ """Handle health check endpoint.""" -import json +import ujson import time from typing import Dict, Union, Any @@ -35,7 +35,9 @@ async def get_health_status(self, req: Request) -> Response: full_status["services"] = services LOG.info("Health status collected.") - return web.Response(body=json.dumps(full_status), status=200, content_type="application/json") + return web.Response( + body=ujson.dumps(full_status, escape_forward_slashes=False), status=200, content_type="application/json" + ) async def create_test_db_client(self) -> AsyncIOMotorClient: """Initialize a new database client to test Mongo connection. diff --git a/metadata_backend/api/middlewares.py b/metadata_backend/api/middlewares.py index 0b04cf7b1..b70f78336 100644 --- a/metadata_backend/api/middlewares.py +++ b/metadata_backend/api/middlewares.py @@ -1,5 +1,5 @@ """Middleware methods for server.""" -import json +import ujson from http import HTTPStatus from typing import Callable, Tuple from cryptography.fernet import InvalidToken @@ -18,7 +18,7 @@ def _check_error_page_requested(req: Request, error_code: int) -> web.Response: # type:ignore """Return the correct error page with correct status code.""" if "Accept" in req.headers and req.headers["Accept"]: - if req.headers["Accept"].split(",")[0] in ["text/html", "application/xhtml+xml"]: + if req.headers["Accept"].split(",")[0] in {"text/html", "application/xhtml+xml"}: raise web.HTTPSeeOther( f"/error{str(error_code)}", headers={ @@ -68,7 +68,7 @@ async def http_error_handler(req: Request, handler: Callable) -> Response: raise web.HTTPUnprocessableEntity(text=details, content_type=c_type) else: _check_error_page_requested(req, 500) - raise web.HTTPServerError() + raise web.HTTPInternalServerError(text=details, content_type=c_type) @middleware @@ -84,6 +84,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: controlled_paths = [ "/schemas", "/drafts", + "/templates", "/validate", "/publish", "/submit", @@ -114,7 +115,7 @@ async def check_login(request: Request, handler: Callable) -> StreamResponse: if request.path.startswith(tuple(controlled_paths)) and "OIDC_URL" in os.environ and bool(os.getenv("OIDC_URL")): cookie = decrypt_cookie(request) session = request.app["Session"].setdefault(cookie["id"], {}) - if not all(x in ["access_token", "user_info", "oidc_state"] for x in session): + if not all(x in {"access_token", "user_info", "oidc_state"} for x in session): LOG.debug("checked session parameter") response = web.HTTPSeeOther(f"{aai_config['domain']}/aai") response.headers["Location"] = "/aai" @@ -161,7 +162,7 @@ def generate_cookie(request: Request) -> Tuple[dict, str]: } # Return a tuple of the session as an encrypted JSON string, and the # cookie itself - return (cookie, request.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8")) + return (cookie, request.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8")) def decrypt_cookie(request: web.Request) -> dict: @@ -176,7 +177,7 @@ def decrypt_cookie(request: web.Request) -> dict: raise web.HTTPUnauthorized() try: cookie_json = request.app["Crypt"].decrypt(request.cookies["MTD_SESSION"].encode("utf-8")).decode("utf-8") - cookie = json.loads(cookie_json) + cookie = ujson.loads(cookie_json) LOG.debug(f"Decrypted cookie: {cookie}") return cookie except InvalidToken: @@ -198,7 +199,7 @@ def _check_csrf(request: web.Request) -> bool: if "redirect" in aai_config and request.headers["Referer"].startswith(aai_config["redirect"]): LOG.info("Skipping Referer check due to request coming from frontend.") return True - if "auth_referer" in aai_config and request.headers["Referer"].startswith(aai_config["auth_referer"]): + if "oidc_url" in aai_config and request.headers["Referer"].startswith(aai_config["oidc_url"]): LOG.info("Skipping Referer check due to request coming from OIDC.") return True if cookie["referer"] not in request.headers["Referer"]: @@ -229,7 +230,7 @@ def _json_exception(status: int, exception: web.HTTPException, url: URL) -> str: :param url: Request URL that caused the exception :returns: Problem detail JSON object as a string """ - body = json.dumps( + body = ujson.dumps( { "type": "about:blank", # Replace type value above with an URL to @@ -237,6 +238,7 @@ def _json_exception(status: int, exception: web.HTTPException, url: URL) -> str: "title": HTTPStatus(status).phrase, "detail": exception.reason, "instance": url.path, # optional - } + }, + escape_forward_slashes=False, ) return body diff --git a/metadata_backend/api/operators.py b/metadata_backend/api/operators.py index 9d5924b3a..d04663978 100644 --- a/metadata_backend/api/operators.py +++ b/metadata_backend/api/operators.py @@ -1,10 +1,10 @@ """Operators for handling database-related operations.""" import re +import time from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union from uuid import uuid4 -import time from aiohttp import web from dateutil.relativedelta import relativedelta @@ -12,11 +12,12 @@ from multidict import MultiDictProxy from pymongo.errors import ConnectionFailure, OperationFailure -from ..conf.conf import query_map, mongo_database +from ..conf.conf import mongo_database, query_map from ..database.db_service import DBService, auto_reconnect from ..helpers.logger import LOG from ..helpers.parser import XMLToJSONParser from ..helpers.validator import JSONValidator +from .middlewares import get_session class BaseOperator(ABC): @@ -39,7 +40,7 @@ def __init__(self, db_name: str, content_type: str, db_client: AsyncIOMotorClien self.db_service = DBService(db_name, db_client) self.content_type = content_type - async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> str: + async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) -> Dict: """Create new metadata object to database. Data formatting and addition step for JSON or XML must be implemented @@ -49,11 +50,13 @@ async def create_metadata_object(self, schema_type: str, data: Union[Dict, str]) :param data: Data to be saved to database. :returns: Accession id for the object inserted to database """ - accession_id = await self._format_data_to_create_and_add_to_db(schema_type, data) - LOG.info(f"Inserting object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + data = await self._format_data_to_create_and_add_to_db(schema_type, data) + LOG.info( + f"Inserting object with schema {schema_type} to database succeeded with accession id: {data['accessionId']}" + ) + return data - async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: + async def replace_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> Dict: """Replace metadata object from database. Data formatting and addition step for JSON or XML must be implemented @@ -64,9 +67,9 @@ async def replace_metadata_object(self, schema_type: str, accession_id: str, dat :param data: Data to be saved to database. :returns: Accession id for the object replaced to database """ - await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) + data = await self._format_data_to_replace_and_add_to_db(schema_type, accession_id, data) LOG.info(f"Replacing object with schema {schema_type} to database succeeded with accession id: {accession_id}") - return accession_id + return data async def update_metadata_object(self, schema_type: str, accession_id: str, data: Union[Dict, str]) -> str: """Update metadata object from database. @@ -97,11 +100,11 @@ async def read_metadata_object(self, schema_type: str, accession_id: str) -> Tup try: data_raw = await self.db_service.read(schema_type, accession_id) if not data_raw: - LOG.error(f"Object with {accession_id} not found.") + LOG.error(f"Object with {accession_id} not found in schema: {schema_type}.") raise web.HTTPNotFound() data = await self._format_read_data(schema_type, data_raw) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting object: {error}" + reason = f"Error happened while reading object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) return data, self.content_type @@ -127,7 +130,7 @@ async def delete_metadata_object(self, schema_type: str, accession_id: str) -> s LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> str: + async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> bool: """Insert formatted metadata object to database. :param schema_type: Schema type of the object to insert. @@ -141,14 +144,14 @@ async def _insert_formatted_object_to_db(self, schema_type: str, data: Dict) -> reason = f"Error happened while getting object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if insert_success: - return data["accessionId"] - else: + + if not insert_success: reason = "Inserting object to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + return True - async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _replace_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> bool: """Replace formatted metadata object in database. :param schema_type: Schema type of the object to replace. @@ -168,12 +171,11 @@ async def _replace_object_from_db(self, schema_type: str, accession_id: str, dat reason = f"Error happened while getting object: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if replace_success: - return accession_id - else: + if not replace_success: reason = "Replacing object to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + return True async def _update_object_from_db(self, schema_type: str, accession_id: str, data: Dict) -> str: """Update formatted metadata object in database. @@ -249,14 +251,14 @@ async def check_exists(self, schema_type: str, accession_id: str) -> None: raise web.HTTPNotFound(reason=reason) @abstractmethod - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Any) -> Dict: """Format and add data to database. Must be implemented by subclass. """ @abstractmethod - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> Dict: """Format and replace data in database. Must be implemented by subclass. @@ -292,6 +294,55 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "application/json", db_client) + async def query_templates_by_project(self, project_id: str) -> List[Dict[str, Union[Dict[str, str], str]]]: + """Get templates list from given project ID. + + :param project_id: project internal ID that owns templates + :returns: list of templates in project + """ + try: + templates_cursor = self.db_service.query( + "project", {"projectId": project_id}, custom_projection={"_id": 0, "templates": 1} + ) + templates = [template async for template in templates_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting templates from project {project_id}: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if len(templates) == 1: + return templates[0]["templates"] + else: + return [] + + async def get_object_project(self, collection: str, accession_id: str) -> str: + """Get the project ID the object is associated to. + + :param collection: database table to look into + :param object_id: internal accession ID of object + :returns: project ID object is associated to + """ + try: + object_cursor = self.db_service.query(collection, {"accessionId": accession_id}) + objects = [object async for object in object_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting object from {collection}: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if len(objects) == 1: + try: + return objects[0]["projectId"] + except KeyError as error: + # This should not be possible and should never happen, if the object was created properly + reason = f"{collection} {accession_id} does not have an associated project, err={error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + reason = f"{collection} {accession_id} not found" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + async def query_metadata_database( self, schema_type: str, que: MultiDictProxy, page_num: int, page_size: int, filter_objects: List ) -> Tuple[List, int, int, int]: @@ -380,7 +431,32 @@ async def query_metadata_database( ) return data, page_num, page_size, total_objects[0]["total"] - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> str: + async def create_metax_info(self, schema_type: str, accession_id: str, data: Dict) -> bool: + """Update study or dataset object with metax info. + + :param schema_type: Schema type of the object to replace. + :param accession_id: Identifier of object to replace. + :param data: Metadata object + :returns: True on successed database update + """ + if schema_type not in {"study", "dataset"}: + LOG.error("Object schema type must be either study or dataset") + return False + try: + create_success = await self.db_service.update(schema_type, accession_id, data) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while updating object: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + if not create_success: + reason = "Updating object to database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Object {schema_type} with id {accession_id} opdated with metax info.") + return True + + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dict) -> Dict: """Format JSON metadata object and add it to db. Adds necessary additional information to object before adding to db. @@ -400,24 +476,26 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: Dic if schema_type == "study": data["publishDate"] = datetime.utcnow() + relativedelta(months=2) LOG.debug(f"Operator formatted data for {schema_type} to add to DB.") - return await self._insert_formatted_object_to_db(schema_type, data) + await self._insert_formatted_object_to_db(schema_type, data) + return data - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Dict) -> str: + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: Dict) -> Dict: """Format JSON metadata object and replace it in db. Replace information in object before adding to db. - We will not replace accessionId, publishDate or dateCreated, + We will not replace ``accessionId``, ``publishDate`` or ``dateCreated``, as these are generated when created. - - We will keep also publisDate and dateCreated from old object. + Will not replace ``metaxIdentifier`` and ``doi`` for ``study`` and ``dataset`` + as it is generated when created. + We will keep also ``publisDate`` and ``dateCreated`` from old object. :param schema_type: Schema type of the object to replace. :param accession_id: Identifier of object to replace. :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier", "doi"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) @@ -425,23 +503,28 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio data["accessionId"] = accession_id data["dateModified"] = datetime.utcnow() LOG.debug(f"Operator formatted data for {schema_type} to add to DB") - return await self._replace_object_from_db(schema_type, accession_id, data) + await self._replace_object_from_db(schema_type, accession_id, data) + return data async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession_id: str, data: Any) -> str: """Format and update data in database. + Will not allow to update ``metaxIdentifier`` and ``doi`` for ``study`` and ``dataset`` + as it is generated when created. + :param schema_type: Schema type of the object to replace. :param accession_id: Identifier of object to replace. :param data: Metadata object :returns: Accession Id for object inserted to database """ - forbidden_keys = ["accessionId", "publishDate", "dateCreated"] + forbidden_keys = {"accessionId", "publishDate", "dateCreated", "metaxIdentifier", "doi"} if any(i in data for i in forbidden_keys): reason = f"Some items (e.g: {', '.join(forbidden_keys)}) cannot be changed." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) data["accessionId"] = accession_id data["dateModified"] = datetime.utcnow() + LOG.debug(f"Operator formatted data for {schema_type} to add to DB") return await self._update_object_from_db(schema_type, accession_id, data) @@ -468,7 +551,7 @@ async def _format_read_data( :param schema_type: Schema type of the object to read. :param data_raw: Data from mongodb query, can contain multiple results - :returns: Mongodb query result, formatted to readable dicts + :returns: MongoDB query result, formatted to readable dicts """ if isinstance(data_raw, dict): return self._format_single_dict(schema_type, data_raw) @@ -513,7 +596,7 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ super().__init__(mongo_database, "text/xml", db_client) - async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> str: + async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str) -> Dict: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -524,16 +607,19 @@ async def _format_data_to_create_and_add_to_db(self, schema_type: str, data: str :returns: Accession Id for object inserted to database """ db_client = self.db_service.db_client - # remove `drafs-` from schema type + # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) + data_with_id = await Operator(db_client)._format_data_to_create_and_add_to_db(schema_type, data_as_json) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") - return await self._insert_formatted_object_to_db( - f"xml-{schema_type}", {"accessionId": accession_id, "content": data} + + await self._insert_formatted_object_to_db( + f"xml-{schema_type}", {"accessionId": data_with_id["accessionId"], "content": data} ) - async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: + return data_with_id + + async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> Dict: """Format XML metadata object and add it to db. XML is validated, then parsed to JSON, which is added to database. @@ -548,13 +634,14 @@ async def _format_data_to_replace_and_add_to_db(self, schema_type: str, accessio # remove `draft-` from schema type schema = schema_type[6:] if schema_type.startswith("draft") else schema_type data_as_json = XMLToJSONParser().parse(schema, data) - accession_id = await Operator(db_client)._format_data_to_replace_and_add_to_db( + data_with_id = await Operator(db_client)._format_data_to_replace_and_add_to_db( schema_type, accession_id, data_as_json ) LOG.debug(f"XMLOperator formatted data for xml-{schema_type} to add to DB") - return await self._replace_object_from_db( + await self._replace_object_from_db( f"xml-{schema_type}", accession_id, {"accessionId": accession_id, "content": data} ) + return data_with_id async def _format_data_to_update_and_add_to_db(self, schema_type: str, accession_id: str, data: str) -> str: """Raise not implemented. @@ -594,6 +681,33 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ self.db_service = DBService(mongo_database, db_client) + async def get_folder_project(self, folder_id: str) -> str: + """Get the project ID the folder is associated to. + + :param folder_id: internal accession ID of folder + :returns: project ID folder is associated to + """ + try: + folder_cursor = self.db_service.query("folder", {"folderId": folder_id}) + folders = [folder async for folder in folder_cursor] + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting folder: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if len(folders) == 1: + try: + return folders[0]["projectId"] + except KeyError as error: + # This should not be possible and should never happen, if the folder was created properly + reason = f"folder {folder_id} does not have an associated project, err={error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + reason = f"folder {folder_id} not found" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + async def check_object_in_folder(self, collection: str, accession_id: str) -> Tuple[bool, str, bool]: """Check a object/draft is in a folder. @@ -610,7 +724,7 @@ async def check_object_in_folder(self, collection: str, accession_id: str) -> Tu ) folder_check = [folder async for folder in folder_cursor] except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while checking object in folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -630,7 +744,7 @@ async def get_collection_objects(self, folder_id: str, collection: str) -> List: """List objects ids per collection. :param collection: collection it belongs to, it would be used as path - :returns: count of objects + :returns: List of objects """ try: folder_path = "drafts" if collection.startswith("draft") else "metadataObjects" @@ -640,7 +754,7 @@ async def get_collection_objects(self, folder_id: str, collection: str) -> List: ) folders = [folder async for folder in folder_cursor] except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while getting collection objects: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -658,6 +772,7 @@ async def create_folder(self, data: Dict) -> str: """ folder_id = self._generate_folder_id() data["folderId"] = folder_id + data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) data["published"] = False data["dateCreated"] = int(time.time()) data["metadataObjects"] = data["metadataObjects"] if "metadataObjects" in data else [] @@ -677,21 +792,34 @@ async def create_folder(self, data: Dict) -> str: LOG.info(f"Inserting folder with id {folder_id} to database succeeded.") return folder_id - async def query_folders(self, query: Dict, page_num: int, page_size: int) -> Tuple[List, int]: + async def query_folders( + self, query: Dict, page_num: int, page_size: int, sort_param: Optional[dict] = None + ) -> Tuple[List, int]: """Query database based on url query parameters. :param query: Dict containing query information :param page_num: Page number :param page_size: Results per page + :param sort_param: Sorting options. :returns: Paginated query result """ skips = page_size * (page_num - 1) + + if not sort_param: + sort = {"dateCreated": -1} + elif sort_param["score"] and not sort_param["date"]: + sort = {"score": {"$meta": "textScore"}, "dateCreated": -1} # type: ignore + elif sort_param["score"] and sort_param["date"]: + sort = {"dateCreated": -1, "score": {"$meta": "textScore"}} # type: ignore + else: + sort = {"dateCreated": -1} + _query = [ {"$match": query}, - {"$sort": {"dateCreated": -1}}, + {"$sort": sort}, {"$skip": skips}, {"$limit": page_size}, - {"$project": {"_id": 0}}, + {"$project": {"_id": 0, "text_name": 0}}, ] data_raw = await self.db_service.do_aggregate("folder", _query) @@ -723,7 +851,7 @@ async def read_folder(self, folder_id: str) -> Dict: raise web.HTTPBadRequest(reason=reason) return folder - async def update_folder(self, folder_id: str, patch: List) -> str: + async def update_folder(self, folder_id: str, patch: List, schema: str = "") -> str: """Update object folder from database. Utilizes JSON Patch operations specified at: http://jsonpatch.com/ @@ -734,14 +862,20 @@ async def update_folder(self, folder_id: str, patch: List) -> str: :returns: ID of the folder updated to database """ try: - update_success = await self.db_service.patch("folder", folder_id, patch) + if schema == "study": + update_success = await self.db_service.update_study("folder", folder_id, patch) + else: + update_success = await self.db_service.patch("folder", folder_id, patch) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting folder: {error}" + reason = f"Error happened while updating folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) if not update_success: - reason = "Updating folder to database failed for some reason." + if schema == "study": + reason = "Either there was a request to add another study to a folders or annother error occurred." + else: + reason = "Updating folder to database failed for some reason." LOG.error(reason) raise web.HTTPBadRequest(reason=reason) else: @@ -762,7 +896,7 @@ async def remove_object(self, folder_id: str, collection: str, accession_id: str upd_content = {folder_path: {"accessionId": accession_id}} await self.db_service.remove("folder", folder_id, upd_content) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" + reason = f"Error happened while removing object from folder: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -842,39 +976,64 @@ def __init__(self, db_client: AsyncIOMotorClient) -> None: """ self.db_service = DBService(mongo_database, db_client) - async def check_user_has_doc(self, collection: str, user_id: str, accession_id: str) -> bool: - """Check a folder/draft belongs to user. + async def check_user_has_doc( + self, req: web.Request, collection: str, user_id: str, accession_id: str + ) -> Tuple[bool, str]: + """Check a folder/template belongs to same project the user is in. :param collection: collection it belongs to, it would be used as path :param user_id: user_id from session :param accession_id: document by accession_id :raises: HTTPUnprocessableEntity if more users seem to have same folder - :returns: True if accession_id belongs to user + :returns: True and project_id if accession_id belongs to user, False otherwise + """ + LOG.debug(f"check that user {user_id} belongs to same project as {collection} {accession_id}") + + db_client = req.app["db_client"] + user_operator = UserOperator(db_client) + + project_id = "" + if collection.startswith("template"): + object_operator = Operator(db_client) + project_id = await object_operator.get_object_project(collection, accession_id) + elif collection == "folders": + folder_operator = FolderOperator(db_client) + project_id = await folder_operator.get_folder_project(accession_id) + else: + reason = f"collection must be folders or template, received {collection}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + current_user = get_session(req)["user_info"] + user = await user_operator.read_user(current_user) + user_has_project = await user_operator.check_user_has_project(project_id, user["userId"]) + return user_has_project, project_id + + async def check_user_has_project(self, project_id: str, user_id: str) -> bool: + """Check that user has project affiliation. + + :param project_id: internal project ID + :param user_id: internal user ID + :raises HTTPBadRequest: on database error + :returns: True if user has project, False if user does not have project """ try: - if collection.startswith("draft"): - user_query = {"drafts": {"$elemMatch": {"accessionId": accession_id}}, "userId": user_id} - else: - user_query = {"folders": {"$elemMatch": {"$eq": accession_id}}, "userId": user_id} + user_query = {"projects": {"$elemMatch": {"projectId": project_id}}, "userId": user_id} user_cursor = self.db_service.query("user", user_query) user_check = [user async for user in user_cursor] + if user_check: + LOG.debug(f"user {user_id} has project {project_id} affiliation") + return True + else: + reason = f"user {user_id} does not have project {project_id} affiliation" + LOG.debug(reason) + return False except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while inserting user: {error}" + reason = f"Error happened while reading user project affiliation: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - if len(user_check) == 0: - LOG.info(f"doc {accession_id} belongs to no user something is off") - return False - elif len(user_check) > 1: - reason = "There seem to be more users with same ID and/or same folders." - LOG.error(reason) - raise web.HTTPUnprocessableEntity(reason=reason) - else: - LOG.info(f"found doc {accession_id} at current user") - return True - - async def create_user(self, data: Tuple) -> str: + async def create_user(self, data: Dict[str, Union[list, str]]) -> str: """Create new user object to database. :param data: User Data to identify user @@ -883,19 +1042,16 @@ async def create_user(self, data: Tuple) -> str: """ user_data: Dict[str, Union[list, str]] = dict() - external_id = data[0] # this also can be sub key - name = data[1] try: - existing_user_id = await self.db_service.exists_user_by_external_id(external_id, name) + existing_user_id = await self.db_service.exists_user_by_external_id(data["user_id"], data["real_name"]) if existing_user_id: - LOG.info(f"User with identifier: {external_id} exists, no need to create.") + LOG.info(f"User with identifier: {data['user_id']} exists, no need to create.") return existing_user_id else: - user_data["drafts"] = [] - user_data["folders"] = [] + user_data["projects"] = data["projects"] user_data["userId"] = user_id = self._generate_user_id() - user_data["name"] = name - user_data["externalId"] = external_id + user_data["name"] = data["real_name"] + user_data["externalId"] = data["user_id"] JSONValidator(user_data, "users") insert_success = await self.db_service.create("user", user_data) if not insert_success: @@ -976,7 +1132,7 @@ async def update_user(self, user_id: str, patch: List) -> str: await self._check_user_exists(user_id) update_success = await self.db_service.patch("user", user_id, patch) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" + reason = f"Error happened while updating user: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -991,12 +1147,12 @@ async def update_user(self, user_id: str, patch: List) -> str: async def assign_objects(self, user_id: str, collection: str, object_ids: List) -> None: """Assing object to user. - An object can be folder(s) or draft(s). + An object can be folder(s) or templates(s). :param user_id: ID of user to update :param collection: collection where to remove the id from :param object_ids: ID or list of IDs of folder(s) to assign - :raises: HTTPBadRequest if assigning drafts/folders to user was not successful + :raises: HTTPBadRequest if assigning templates/folders to user was not successful returns: None """ try: @@ -1005,7 +1161,7 @@ async def assign_objects(self, user_id: str, collection: str, object_ids: List) "user", user_id, {collection: {"$each": object_ids, "$position": 0}} ) except (ConnectionFailure, OperationFailure) as error: - reason = f"Error happened while getting user: {error}" + reason = f"Error happened while assigning objects to user: {error}" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) @@ -1019,7 +1175,7 @@ async def assign_objects(self, user_id: str, collection: str, object_ids: List) async def remove_objects(self, user_id: str, collection: str, object_ids: List) -> None: """Remove object from user. - An object can be folder(s) or draft(s). + An object can be folder(s) or template(s). :param user_id: ID of user to update :param collection: collection where to remove the id from @@ -1031,8 +1187,8 @@ async def remove_objects(self, user_id: str, collection: str, object_ids: List) try: await self._check_user_exists(user_id) for obj in object_ids: - if collection == "drafts": - remove_content = {"drafts": {"accessionId": obj}} + if collection == "templates": + remove_content = {"templates": {"accessionId": obj}} else: remove_content = {"folders": obj} await self.db_service.remove("user", user_id, remove_content) @@ -1086,3 +1242,142 @@ def _generate_user_id(self) -> str: sequence = uuid4().hex LOG.debug("Generated user ID.") return sequence + + +class ProjectOperator: + """Operator class for handling database operations of project groups. + + Operations are implemented with JSON format. + """ + + def __init__(self, db_client: AsyncIOMotorClient) -> None: + """Init db_service. + + :param db_client: Motor client used for database connections. Should be + running on same loop with aiohttp, so needs to be passed from aiohttp + Application. + """ + self.db_service = DBService(mongo_database, db_client) + + async def create_project(self, project_number: str) -> str: + """Create new object project to database. + + :param project_numer: project external ID received from AAI + :raises: HTTPBadRequest if error occurs during the process of insert + :returns: Project id for the project inserted to database + """ + project_data: Dict[str, Union[str, List[str]]] = dict() + + try: + existing_project_id = await self.db_service.exists_project_by_external_id(project_number) + if existing_project_id: + LOG.info(f"Project with external ID: {project_number} exists, no need to create.") + return existing_project_id + else: + project_id = self._generate_project_id() + project_data["templates"] = [] + project_data["projectId"] = project_id + project_data["externalId"] = project_number + insert_success = await self.db_service.create("project", project_data) + if not insert_success: + reason = "Inserting project to database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Inserting project with id {project_id} to database succeeded.") + return project_id + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while inserting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + async def _check_project_exists(self, project_id: str) -> None: + """Check the existence of a project by its id in the database. + + :param project_id: Identifier of project to find. + :raises: HTTPNotFound if project does not exist + :returns: None + """ + exists = await self.db_service.exists("project", project_id) + if not exists: + reason = f"Project with id {project_id} was not found." + LOG.error(reason) + raise web.HTTPNotFound(reason=reason) + + async def assign_templates(self, project_id: str, object_ids: List) -> None: + """Assing templates to project. + + :param project_id: ID of project to update + :param object_ids: ID or list of IDs of template(s) to assign + :raises: HTTPBadRequest if assigning templates to project was not successful + returns: None + """ + try: + await self._check_project_exists(project_id) + assign_success = await self.db_service.append( + "project", project_id, {"templates": {"$each": object_ids, "$position": 0}} + ) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if not assign_success: + reason = "Assigning templates to project failed." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Assigning templates={object_ids} to project={project_id} succeeded.") + + async def remove_templates(self, project_id: str, object_ids: List) -> None: + """Remove templates from project. + + :param project_id: ID of project to update + :param object_ids: ID or list of IDs of template(s) to remove + :raises: HTTPBadRequest if db connection fails + returns: None + """ + remove_content: Dict + try: + await self._check_project_exists(project_id) + for obj in object_ids: + remove_content = {"templates": {"accessionId": obj}} + await self.db_service.remove("project", project_id, remove_content) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while removing templates from project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + LOG.info(f"Removing templates={object_ids} from project={project_id} succeeded.") + + async def update_project(self, project_id: str, patch: List) -> str: + """Update project object in database. + + :param project_id: ID of project to update + :param patch: Patch operations determined in the request + :returns: ID of the project updated to database + """ + try: + await self._check_project_exists(project_id) + update_success = await self.db_service.patch("project", project_id, patch) + except (ConnectionFailure, OperationFailure) as error: + reason = f"Error happened while getting project: {error}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + if not update_success: + reason = "Updating project in database failed for some reason." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + else: + LOG.info(f"Updating project={project_id} to database succeeded.") + return project_id + + def _generate_project_id(self) -> str: + """Generate random project id. + + :returns: str with project id + """ + sequence = uuid4().hex + LOG.debug("Generated project ID.") + return sequence diff --git a/metadata_backend/conf/conf.py b/metadata_backend/conf/conf.py index ebe4e0b35..45de435f8 100644 --- a/metadata_backend/conf/conf.py +++ b/metadata_backend/conf/conf.py @@ -7,7 +7,7 @@ - ``MONGO_USERNAME`` - Username for mongodb - ``MONGO_PASSWORD`` - Password for mongodb -- ``MONGO_HOST`` - Mongodb server hostname, with port specified +- ``MONGO_HOST`` - MongoDB server hostname, with port specified Admin access is needed in order to create new databases during runtime. Default values are the same that are used in docker-compose file @@ -22,7 +22,7 @@ Schema types (such as ``"submission"``, ``"study"``, ``"sample"``) are needed in different parts of the application. -3) Mongodb query mappings +3) MongoDB query mappings Mappings are needed to turn incoming REST api queries into mongodb queries. Change these if database structure changes. @@ -33,10 +33,11 @@ import json import os -from pathlib import Path from distutils.util import strtobool -from typing import Tuple +from pathlib import Path +from typing import Dict, Tuple +import ujson from motor.motor_asyncio import AsyncIOMotorClient from ..helpers.logger import LOG @@ -105,9 +106,9 @@ def create_db_client() -> AsyncIOMotorClient: # 2) Load schema types and descriptions from json # Default schemas will be ENA schemas -path_to_schema_file = Path(__file__).parent / "ena_schemas.json" +path_to_schema_file = Path(__file__).parent / "schemas.json" with open(path_to_schema_file) as schema_file: - schema_types = json.load(schema_file) + schema_types = ujson.load(schema_file) # 3) Define mapping between url query parameters and mongodb queries @@ -144,15 +145,36 @@ def create_db_client() -> AsyncIOMotorClient: "redirect": f'{os.getenv("REDIRECT_URL")}' if bool(os.getenv("REDIRECT_URL")) else os.getenv("BASE_URL", "http://localhost:5430"), - "scope": "openid profile email", - "iss": os.getenv("ISS_URL", ""), + "scope": os.getenv("OIDC_SCOPE", "openid profile email"), "callback_url": f'{os.getenv("BASE_URL", "http://localhost:5430").rstrip("/")}/callback', - "auth_url": f'{os.getenv("AUTH_URL", "")}' - if bool(os.getenv("AUTH_URL")) - else f'{os.getenv("OIDC_URL", "").rstrip("/")}/authorize', - "token_url": f'{os.getenv("OIDC_URL", "").rstrip("/")}/token', - "user_info": f'{os.getenv("OIDC_URL", "").rstrip("/")}/userinfo', - "revoke_url": f'{os.getenv("OIDC_URL", "").rstrip("/")}/revoke', - "jwk_server": f'{os.getenv("JWK_URL", "")}', - "auth_referer": f'{os.getenv("AUTH_REFERER", "")}', + "oidc_url": os.getenv("OIDC_URL", ""), + "auth_method": os.getenv("AUTH_METHOD", "code"), } + + +# 6) Set the DataCite REST API values + +doi_config = { + "api": os.getenv("DOI_API", ""), + "prefix": os.getenv("DOI_PREFIX", ""), + "user": os.getenv("DOI_USER", ""), + "key": os.getenv("DOI_KEY", ""), + "url": os.getenv("DATACITE_URL", "https://doi.org"), + "publisher": "CSC - IT Center for Science", + "discovery_url": os.getenv("DISCOVERY_URL", "https://etsin.fairdata.fi/dataset/"), +} + +metax_config = { + "username": os.getenv("METAX_USER", "sd"), + "password": os.getenv("METAX_PASS", "test"), + "url": os.getenv("METAX_URL", "http://mockmetax:8002"), + "rest_route": "/rest/v2/datasets", + "publish_route": "/rpc/v2/datasets/publish_dataset", + "catalog_pid": "urn:nbn:fi:att:data-catalog-sd", +} + +metax_reference_data: Dict = {"identifier_types": {}} +with open(Path(__file__).parent.parent / "conf/metax_references/identifier_types.json", "r") as codes: + codes_list = json.load(codes)["codes"] + for code in codes_list: + metax_reference_data["identifier_types"][code["codeValue"].lower()] = code["uri"] diff --git a/metadata_backend/conf/metax_references/identifier_types.json b/metadata_backend/conf/metax_references/identifier_types.json new file mode 100644 index 000000000..687ba58b9 --- /dev/null +++ b/metadata_backend/conf/metax_references/identifier_types.json @@ -0,0 +1,328 @@ +{ + "id": "973426a0-a333-4c70-90db-caea89f4e164", + "codeValue": "identifier_type", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type", + "codesUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/", + "extensionsUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/extensions/", + "codes": [ + { + "id": "7196a8a3-7e12-4fdd-81ef-63de795d1c4f", + "codeValue": "ark", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/ark", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ark", + "status": "DRAFT", + "order": 61, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.852Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Archival Resource Key (ARK)" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ark/members/" + }, + { + "id": "3a484add-3b4e-444d-b24c-c40a97ef0267", + "codeValue": "doi", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/doi", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/doi", + "status": "DRAFT", + "order": 62, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.879Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Digital Object Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/doi/members/" + }, + { + "id": "d4172012-04e3-4a5f-bccc-357f1863b5f5", + "codeValue": "arxiv", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/arxiv", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/arxiv", + "status": "DRAFT", + "order": 63, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.898Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "arXiv identifer" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/arxiv/members/" + }, + { + "id": "fdcd5509-cb9e-4a6a-8e52-4ee14b27ca2f", + "codeValue": "bibcode", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/bibcode", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/bibcode", + "status": "DRAFT", + "order": 64, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.917Z", + "modified": "2018-11-12T09:47:41.115Z", + "prefLabel": { + "en": "Astrophysics Data System Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/bibcode/members/" + }, + { + "id": "42c04d32-ee29-4cb4-b3d0-fd588ecd5f7f", + "codeValue": "ean13", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/ean13", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ean13", + "status": "DRAFT", + "order": 65, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.936Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "European Article Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/ean13/members/" + }, + { + "id": "f83ccfcc-9f63-416f-b56f-f77e69aa7b92", + "codeValue": "eissn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/eissn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/eissn", + "status": "DRAFT", + "order": 66, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.955Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Electronic International Standard Serial Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/eissn/members/" + }, + { + "id": "15b482c5-9a58-4700-8fb3-8ec5a3f630c7", + "codeValue": "handle", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/handle", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/handle", + "status": "DRAFT", + "order": 67, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.973Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Handle - an abstact reference to a resource" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/handle/members/" + }, + { + "id": "6785fe94-32e9-45b6-8d14-8b63cc70f6f8", + "codeValue": "igsn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/igsn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/igsn", + "status": "DRAFT", + "order": 68, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:39.994Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Geo Sample Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/igsn/members/" + }, + { + "id": "b59b4231-a483-49da-ba3f-122394c63d5d", + "codeValue": "isbn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/isbn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/isbn", + "status": "DRAFT", + "order": 69, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.014Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Book Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/isbn/members/" + }, + { + "id": "9fd8d7cb-aae9-435b-967e-5e949468774b", + "codeValue": "issn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/issn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/issn", + "status": "DRAFT", + "order": 70, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.035Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Serial Number" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/issn/members/" + }, + { + "id": "4abf013c-40a8-4b86-a8ba-b77376bdf42f", + "codeValue": "istc", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/istc", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/istc", + "status": "DRAFT", + "order": 71, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.055Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "International Standard Text Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/istc/members/" + }, + { + "id": "eab5a081-0de4-4bcc-b7cf-da3baf527ff7", + "codeValue": "lissn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/lissn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lissn", + "status": "DRAFT", + "order": 72, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.075Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "The linking ISSN" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lissn/members/" + }, + { + "id": "3f02f4e2-602d-446c-b3b6-b116074b27ac", + "codeValue": "lsid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/lsid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lsid", + "status": "DRAFT", + "order": 73, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.095Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Life Science Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/lsid/members/" + }, + { + "id": "64caad08-8e10-4159-916a-8abdb31c5d4b", + "codeValue": "pmid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/pmid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/pmid", + "status": "DRAFT", + "order": 74, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.115Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "PubMed Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/pmid/members/" + }, + { + "id": "ff5bd61a-e1b1-49a2-8f55-768c9e0ef35d", + "codeValue": "purl", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/purl", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/purl", + "status": "DRAFT", + "order": 75, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.135Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Persistent Uniform Resource Locator" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/purl/members/" + }, + { + "id": "f2ab3578-ba82-4fd6-ab19-8acf297d0b3d", + "codeValue": "upc", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/upc", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/upc", + "status": "DRAFT", + "order": 76, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.161Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Product Code" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/upc/members/" + }, + { + "id": "f75192b8-8c42-4a6d-bb2c-927963a4ebb0", + "codeValue": "url", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/url", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/url", + "status": "DRAFT", + "order": 77, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.185Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Resource Locator" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/url/members/" + }, + { + "id": "2914a977-931d-43e1-b5df-e470f1e71378", + "codeValue": "uri", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/uri", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/uri", + "status": "DRAFT", + "order": 78, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.206Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Universal Resource Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/uri/members/" + }, + { + "id": "8c9ca578-c4cd-4efe-be79-124f3aca9e44", + "codeValue": "urn", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/urn", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/urn", + "status": "DRAFT", + "order": 79, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.226Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Uniform Resource Name" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/urn/members/" + }, + { + "id": "07dbe700-dd43-4660-8759-1850e33622ba", + "codeValue": "orcid", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/orcid", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/orcid", + "status": "DRAFT", + "order": 80, + "hierarchyLevel": 1, + "created": "2018-09-28T10:54:40.247Z", + "modified": "2018-11-12T09:47:41.116Z", + "prefLabel": { + "en": "Open Researcher and Contributor Identifier" + }, + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/orcid/members/" + }, + { + "id": "1e985250-971b-4955-9992-0201e30e2d79", + "codeValue": "virta", + "uri": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/virta", + "url": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/virta", + "status": "DRAFT", + "order": 81, + "hierarchyLevel": 1, + "created": "2020-11-05T07:53:40.581Z", + "modified": "2020-11-05T07:53:40.581Z", + "statusModified": "2020-11-05T07:53:40.581Z", + "prefLabel": { + "en": "VIRTA ID for publication", + "fi": "VIRTA ID julkaisulle" + }, + "shortName": "VIRTA-ID", + "membersUrl": "https://koodistot.suomi.fi/codelist-api/api/v1/coderegistries/fairdata/codeschemes/identifier_type/codes/virta/members/" + } + ] +} diff --git a/metadata_backend/conf/ena_schemas.json b/metadata_backend/conf/schemas.json similarity index 68% rename from metadata_backend/conf/ena_schemas.json rename to metadata_backend/conf/schemas.json index 273b020b8..e2882f633 100644 --- a/metadata_backend/conf/ena_schemas.json +++ b/metadata_backend/conf/schemas.json @@ -1,81 +1,101 @@ -{"submission": - { "priority": 1, +{ + "submission": { + "priority": 1, "description": { "title": "Submission", - "detail": "A submission contains submission actions to be performed by the archive. A submission can add more objects to the archive, update already submitted objects or make objects publicly available.", + "detail": "A submission contains submission actions to be performed by the storage. A submission can add more objects to the storage, update already submitted objects or make objects publicly available.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.submission.xsd" - } + }, + "provider": "ENA" }, - "study": - { "priority": 2, + "study": { + "priority": 2, "description": { "title": "Study", - "detail": "A study groups together data submitted to the archive. A study accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the study is released.", + "detail": "A study groups together data submitted to the storage. A study accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the study is released.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.study.xsd" - } + }, + "provider": "ENA" }, - "project": - { "priority": 3, + "project": { + "priority": 3, "description": { "title": "Project", - "detail": "A project groups together data submitted to the archive. A project accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the project is released.", + "detail": "A project groups together data submitted to the storage. A project accession is typically used when citing data submitted to ENA. Note that all associated data and other objects are made public when the project is released.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/ENA.project.xsd" - } + }, + "provider": "ENA" }, - "sample": - { "priority": 4, + "sample": { + "priority": 4, "description": { "title": "Sample", "detail": "A sample contains information about the sequenced source material. Samples are typically associated with checklists, which define the fields used to annotate the samples.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.sample.xsd" - } + }, + "provider": "ENA" }, - "experiment": - { "priority": 5, + "experiment": { + "priority": 5, "description": { "title": "Experiment", "detail": "An experiment contain information about a sequencing experiment including library and instrument details.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.experiment.xsd" - } + }, + "provider": "ENA" }, - "run": - { "priority": 6, + "run": { + "priority": 6, "description": { "title": "Run", "detail": "A run is part of an experiment and refers to data files containing sequence reads.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.run.xsd" - } + }, + "provider": "ENA" }, - "analysis": - { "priority": 7, + "analysis": { + "priority": 7, "description": { "title": "Analysis", "detail": "An analysis contains secondary analysis results derived from sequence reads (e.g. a genome assembly),", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/SRA.analysis.xsd" - } + }, + "provider": "ENA" }, - "dac": - { "priority": 8, + "dac": { + "priority": 8, "description": { "title": "DAC", "detail": "An European Genome-phenome Archive (EGA) data access committee (DAC) is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.dac.xsd" - } + }, + "provider": "ENA" }, - "policy": - { "priority": 9, + "policy": { + "priority": 9, "description": { "title": "Policy", "detail": "An European Genome-phenome Archive (EGA) data access policy is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.policy.xsd" - } + }, + "provider": "ENA" }, - "dataset": - { "priority": 10, + "dataset": { + "priority": 10, "description": { "title": "Dataset", "detail": "An European Genome-phenome Archive (EGA) data set is required for authorized access submissions.", "url": "https://github.com/enasequence/schema/blob/master/src/main/resources/uk/ac/ebi/ena/sra/schema/EGA.dataset.xsd" - } + }, + "provider": "ENA" + }, + "datacite": { + "priority": 11, + "description": { + "title": "Datacite DOI schema", + "detail": "Derived from the DataCite Metadata Schema whihc is a list of core metadata properties chosen for an accurate and consistent identification of a resource for citation and retrieval purposes, along with recommended use instructions. We only work with a subset of them for this schema.", + "url": "http://schema.datacite.org/" + }, + "provider": "Datacite" } -} +} \ No newline at end of file diff --git a/metadata_backend/database/db_service.py b/metadata_backend/database/db_service.py index 7e0f9977b..9a119cca1 100644 --- a/metadata_backend/database/db_service.py +++ b/metadata_backend/database/db_service.py @@ -1,11 +1,10 @@ """Services that handle database connections. Implemented with MongoDB.""" from functools import wraps -from typing import Any, Callable, Dict, Union, List +from typing import Any, Callable, Dict, List, Union from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCursor -from pymongo.errors import AutoReconnect, ConnectionFailure from pymongo import ReturnDocument -from pymongo.errors import BulkWriteError +from pymongo.errors import AutoReconnect, BulkWriteError, ConnectionFailure from ..conf.conf import serverTimeout from ..helpers.logger import LOG @@ -35,8 +34,8 @@ async def retry(*args: Any, **kwargs: Any) -> Any: message = f"Connection to database failed after {attempt} tries" raise ConnectionFailure(message=message) LOG.error( - "Connection not successful, trying to reconnect." - f"Reconnection attempt number {attempt}, waiting for {default_timeout} seconds." + "Connection not successful, trying to reconnect. " + + f"Reconnection attempt number {attempt}, waiting for {default_timeout} seconds." ) continue @@ -84,19 +83,31 @@ async def exists(self, collection: str, accession_id: str) -> bool: :param accession_id: ID of the object/folder/user to be searched :returns: True if exists and False if it does not """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" projection = {"_id": False, "externalId": False} if collection == "user" else {"_id": False} find_by_id = {id_key: accession_id} exists = await self.database[collection].find_one(find_by_id, projection) LOG.debug(f"DB check exists for {accession_id} in collection {collection}.") return True if exists else False + @auto_reconnect + async def exists_project_by_external_id(self, external_id: str) -> Union[None, str]: + """Check project exists by its external id. + + :param external_id: project external id + :returns: Id if exists and None if it does not + """ + find_by_id = {"externalId": external_id} + project = await self.database["project"].find_one(find_by_id, {"_id": False, "externalId": False}) + LOG.debug(f"DB check project exists for {external_id} returned {project}.") + return project["projectId"] if project else None + @auto_reconnect async def exists_user_by_external_id(self, external_id: str, name: str) -> Union[None, str]: """Check user exists by its eppn. :param eppn: eduPersonPrincipalName to be searched - :returns: True if exists and False if it does not + :returns: Id if exists and None if it does not """ find_by_id = {"externalId": external_id, "name": name} user = await self.database["user"].find_one(find_by_id, {"_id": False, "externalId": False}) @@ -124,7 +135,7 @@ async def read(self, collection: str, accession_id: str) -> Dict: :param accession_id: ID of the object/folder/user to be searched :returns: First document matching the accession_id """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" projection = {"_id": False, "eppn": False} if collection == "user" else {"_id": False} find_by_id = {id_key: accession_id} LOG.debug(f"DB doc in {collection} read for {accession_id}.") @@ -150,6 +161,30 @@ async def patch(self, collection: str, accession_id: str, patch_data: List[Dict] LOG.error(bwe.details) return False + @auto_reconnect + async def update_study(self, collection: str, accession_id: str, patch_data: Any) -> bool: + """Update and avoid duplicates for study object. + + Currently we don't allow duplicate studies in the same folder, + thus we need to check before inserting. Regular Bulkwrite cannot prevent race condition. + + :param collection: Collection where document should be searched from + :param accession_id: ID of the object/folder/user to be updated + :param patch_data: JSON representing the data that should be + updated to object it will update fields. + :returns: True if operation was successful + """ + find_by_id = {f"{collection}Id": accession_id, "metadataObjects.schema": {"$ne": "study"}} + requests = jsonpatch_mongo(find_by_id, patch_data) + for req in requests: + result = await self.database[collection].find_one_and_update( + find_by_id, req._doc, projection={"_id": False}, return_document=ReturnDocument.AFTER + ) + LOG.debug(f"DB doc in {collection} with data: {patch_data} modified for {accession_id}.") + if not result: + return False + return True + @auto_reconnect async def update(self, collection: str, accession_id: str, data_to_be_updated: Dict) -> bool: """Update some elements of object by its accessionId. @@ -160,7 +195,7 @@ async def update(self, collection: str, accession_id: str, data_to_be_updated: D updated to object, can replace previous fields and add new ones. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" find_by_id = {id_key: accession_id} update_op = {"$set": data_to_be_updated} result = await self.database[collection].update_one(find_by_id, update_op) @@ -177,7 +212,7 @@ async def remove(self, collection: str, accession_id: str, data_to_be_removed: A updated to removed. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" find_by_id = {id_key: accession_id} remove_op = {"$pull": data_to_be_removed} result = await self.database[collection].find_one_and_update( @@ -196,7 +231,7 @@ async def append(self, collection: str, accession_id: str, data_to_be_addded: An updated to removed. :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in ["folder", "user", "project"]) else "accessionId" find_by_id = {id_key: accession_id} # push vs addtoSet # push allows us to specify the postion but it does not check the items are unique @@ -227,6 +262,9 @@ async def replace(self, collection: str, accession_id: str, new_data: Dict) -> b old_data = await self.database[collection].find_one(find_by_id) if not (len(new_data) == 2 and new_data["content"].startswith("<")): new_data["dateCreated"] = old_data["dateCreated"] + if collection in {"study", "dataset"}: + new_data["metaxIdentifier"] = old_data["metaxIdentifier"] + new_data["doi"] = old_data["doi"] if "publishDate" in old_data: new_data["publishDate"] = old_data["publishDate"] result = await self.database[collection].replace_one(find_by_id, new_data) @@ -241,13 +279,13 @@ async def delete(self, collection: str, accession_id: str) -> bool: :param accession_id: ID for object/folder/user to be deleted :returns: True if operation was successful """ - id_key = f"{collection}Id" if (collection in ["folder", "user"]) else "accessionId" + id_key = f"{collection}Id" if (collection in {"folder", "user"}) else "accessionId" find_by_id = {id_key: accession_id} result = await self.database[collection].delete_one(find_by_id) LOG.debug(f"DB doc in {collection} deleted for {accession_id}.") return result.acknowledged - def query(self, collection: str, query: Dict) -> AsyncIOMotorCursor: + def query(self, collection: str, query: Dict, custom_projection: Dict = {}) -> AsyncIOMotorCursor: """Query database with given query. Find() does no I/O and does not require an await expression, hence @@ -255,10 +293,13 @@ def query(self, collection: str, query: Dict) -> AsyncIOMotorCursor: :param collection: Collection where document should be searched from :param query: query to be used + :param custom_projection: overwrites default projection :returns: Async cursor instance which should be awaited when iterating """ LOG.debug(f"DB doc query performed in {collection}.") projection = {"_id": False, "eppn": False} if collection == "user" else {"_id": False} + if custom_projection: + projection = custom_projection return self.database[collection].find(query, projection) @auto_reconnect diff --git a/metadata_backend/helpers/doi.py b/metadata_backend/helpers/doi.py new file mode 100644 index 000000000..e99c6972a --- /dev/null +++ b/metadata_backend/helpers/doi.py @@ -0,0 +1,99 @@ +"""Tool for registering DOI at DataCite. + +The DOI handler from SDA orchestration was used as reference: +https://github.com/neicnordic/sda-orchestration/blob/master/sda_orchestrator/utils/id_ops.py +""" +from typing import Dict, Union +from uuid import uuid4 + +from aiohttp import web, ClientSession, BasicAuth, ClientTimeout + +from ..helpers.logger import LOG +from ..conf.conf import doi_config + + +class DOIHandler: + """DOI registration methods.""" + + def __init__(self) -> None: + """Get DOI credentials from config.""" + self.doi_api = doi_config["api"] + self.doi_prefix = doi_config["prefix"] + self.doi_user = doi_config["user"] + self.doi_key = doi_config["key"] + self.doi_url = f"{doi_config['url'].rstrip('/')}/{self.doi_prefix}" + self.timeout = ClientTimeout(total=2 * 60) # 2 minutes timeout + self.headers = {"Content-Type": "application/vnd.api+json"} + + async def create_draft(self, prefix: Union[str, None] = None) -> Dict: + """Generate random suffix and POST request a draft DOI to DataCite DOI API. + + :param prefix: Custom prefix to add to the DOI e.g. study/dataset + :raises: HTTPInternalServerError if we the Datacite DOI draft registration fails + :returns: Dictionary with DOI and URL + """ + suffix = uuid4().hex[:10] + doi_suffix = f"{prefix}.{suffix[:4]}-{suffix[4:]}" if prefix else f"{suffix[:4]}-{suffix[4:]}" + # this payload is sufficient to get a draft DOI + doi_payload = {"data": {"type": "dois", "attributes": {"doi": f"{self.doi_prefix}/{doi_suffix}"}}} + + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=self.headers, auth=auth) as session: + async with session.post(self.doi_api, json=doi_payload) as response: + if response.status == 201: + draft_resp = await response.json() + full_doi = draft_resp["data"]["attributes"]["doi"] + returned_suffix = draft_resp["data"]["attributes"]["suffix"] + LOG.info(f"DOI draft created with doi: {full_doi}.") + doi_data = { + "fullDOI": full_doi, + "dataset": f"{self.doi_url}/{returned_suffix.lower()}", + } + else: + reason = f"DOI API draft creation request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPInternalServerError(reason=reason) + + return doi_data + + async def set_state(self, doi_payload: Dict) -> None: + """Set DOI and associated metadata. + + We will only support publish event type, and we expect the data to be + prepared for the update. + Partial updates are possible. + + :param doi_payload: Dictionary with payload to send to Datacite + :raises: HTTPInternalServerError if the Datacite DOI update fails + :returns: None + """ + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=self.headers, auth=auth) as session: + async with session.put(f"{self.doi_api}/{doi_payload['id']}", json=doi_payload) as response: + if response.status == 200: + _resp = await response.json() + LOG.info(f"Datacite doi {doi_payload['id']} updated ") + LOG.debug(f"Datacite doi {doi_payload['id']} updated, response: {_resp}") + else: + reason = f"DOI API set state request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPInternalServerError(reason=reason) + + async def delete(self, doi: str) -> None: + """Delete DOI and associated metadata. + + Datacite only support deleting draft DOIs. + + :param doi: identifier to be utilized for deleting draft DOI + :raises: HTTPInternalServerError if we the Datacite draft DOI delete fails + :returns: None + """ + auth = BasicAuth(login=self.doi_user, password=self.doi_key) + async with ClientSession(headers=self.headers, auth=auth) as session: + async with session.delete(f"{self.doi_api}/{doi}") as response: + if response.status == 204: + LOG.info(f"Datacite doi {doi} deleted.") + else: + reason = f"DOI API delete request failed with code: {response.status}" + LOG.error(reason) + raise web.HTTPInternalServerError(reason=reason) diff --git a/metadata_backend/helpers/logger.py b/metadata_backend/helpers/logger.py index ffb86ed51..797aa58db 100644 --- a/metadata_backend/helpers/logger.py +++ b/metadata_backend/helpers/logger.py @@ -1,12 +1,12 @@ """Logging formatting and functions for debugging.""" -import json +import ujson import logging from typing import Any, Dict import os FORMAT = ( - "[%(asctime)s][%(name)s][%(process)d %(processName)s]" "[%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" + "[%(asctime)s][%(name)s][%(process)d %(processName)s] [%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" ) logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") @@ -31,4 +31,4 @@ def pprint_json(content: Dict) -> None: :param content: JSON-formatted content to be printed """ - LOG.info(json.dumps(content, indent=4)) + LOG.info(ujson.dumps(content, indent=4, escape_forward_slashes=False)) diff --git a/metadata_backend/helpers/metax_api_handler.py b/metadata_backend/helpers/metax_api_handler.py new file mode 100644 index 000000000..2ccbaea81 --- /dev/null +++ b/metadata_backend/helpers/metax_api_handler.py @@ -0,0 +1,340 @@ +"""Class for handling calls to METAX API.""" +from typing import Any, Dict, List + +from aiohttp import BasicAuth, ClientSession +from aiohttp.web import HTTPBadRequest, HTTPError, HTTPForbidden, HTTPNotFound, Request + +from ..api.middlewares import get_session +from ..api.operators import UserOperator +from ..conf.conf import metax_config +from .logger import LOG +from .metax_mapper import MetaDataMapper + + +class MetaxServiceHandler: + """API handler for uploading submitter's metadata to METAX service.""" + + def __init__(self, req: Request) -> None: + """Define variables and paths. + + Define variables and paths used for connecting to Metax API and + default inputs for Metax Dataset creation. + + :param req: HTTP request from calling service + """ + self.req = req + self.db_client = self.req.app["db_client"] + self.auth = BasicAuth(metax_config["username"], metax_config["password"]) + self.metax_url = metax_config["url"] + self.rest_route = metax_config["rest_route"] + self.publish_route = metax_config["publish_route"] + catalog_pid = metax_config["catalog_pid"] + + self.minimal_dataset_template: Dict[Any, Any] = { + "data_catalog": catalog_pid, + "metadata_provider_org": "csc.fi", + "research_dataset": { + # submitter given DOI + "preferred_identifier": "", + "title": {"en": ""}, + # study abstract or dataset description + "description": {"en": ""}, + # default + "access_rights": { + "access_type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted", + } + }, + # default + "publisher": { + "name": { + "en": "CSC Sensitive Data Services for Research", + "fi": "CSC:n Arkaluonteisen datan palveluiden aineistokatalogi", + }, + "@type": "Organization", + }, + }, + } + + async def get_metadata_provider_user(self) -> str: + """Get current user's external id. + + returns: current users external ID + """ + current_user = get_session(self.req)["user_info"] + user_op = UserOperator(self.db_client) + user = await user_op.read_user(current_user) + metadata_provider_user = user["externalId"] + return metadata_provider_user + + async def post_dataset_as_draft(self, collection: str, data: Dict) -> str: + """Send draft dataset to Metax. + + Construct Metax dataset data from submitters' Study or Dataset and + send it as new draft dataset to Metax Dataset API. + + :param collection: schema of incomming submitters metadata + :param data: validated Study or Dataset data dict + :raises: HTTPError depending on returned error from Metax + :returns: Metax ID for dataset returned by Metax API + """ + LOG.debug( + f"Creating draft dataset to Metax service from Submitter {collection} with accession ID " + f"{data['accessionId']}" + ) + metax_dataset = self.minimal_dataset_template + metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() + if collection == "dataset": + dataset_data = self.create_metax_dataset_data_from_dataset(data) + else: + dataset_data = self.create_metax_dataset_data_from_study(data) + metax_dataset["research_dataset"] = dataset_data + async with ClientSession() as sess: + resp = await sess.post( + f"{self.metax_url}{self.rest_route}", + params="draft", + json=metax_dataset, + auth=self.auth, + ) + status = resp.status + if status == 201: + metax_data = await resp.json() + LOG.info(f"Created Metax draft dataset {metax_data['identifier']}") + LOG.debug(f"Created Metax draft dataset {metax_data['identifier']} with data: {metax_dataset}.") + metax_id = metax_data["identifier"] + else: + reason = await resp.text() + raise self.process_error(status, reason) + + # Metax service overwrites preferred id (DOI) with temporary id for draft datasets + # Patching dataset with full research_dataset data updates preferred id to the real one + async with ClientSession() as sess: + resp = await sess.patch( + f"{self.metax_url}{self.rest_route}/{metax_id}", + json={"research_dataset": dataset_data}, + auth=self.auth, + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + LOG.info("Updated Metax draft dataset with permanent preferred identifier.") + LOG.debug( + f"Updated Metax draft dataset {metax_data['identifier']} with permanent preferred " + "identifier." + ) + return metax_id + else: + reason = await resp.text() + raise self.process_error(status, reason) + + async def update_draft_dataset(self, collection: str, data: Dict) -> str: + """Update draft dataset to Metax. + + Construct Metax draft dataset data from submitters' Study or Dataset and + send it to Metax Dataset API for update. + + :param collection: schema of incomming submitters metadata + :param data: validated Study or Dataset data dict + :raises: HTTPError depending on returned error from Metax + :returns: Metax ID for dataset returned by Metax API + """ + LOG.info(f"Updating Metax draft dataset {data['metaxIdentifier']}") + metax_dataset = self.minimal_dataset_template + metax_dataset["metadata_provider_user"] = await self.get_metadata_provider_user() + if collection == "dataset": + dataset_data = self.create_metax_dataset_data_from_dataset(data) + else: + dataset_data = self.create_metax_dataset_data_from_study(data) + metax_dataset["research_dataset"] = dataset_data + + async with ClientSession() as sess: + resp = await sess.put( + f'{self.metax_url}{self.rest_route}/{data["metaxIdentifier"]}', + params="draft", + json=metax_dataset, + auth=self.auth, + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + LOG.debug(f"Updated Metax draft dataset with ID {metax_data['identifier']} with data: {metax_dataset}") + return metax_data["identifier"] + else: + reason = await resp.text() + raise self.process_error(status, reason) + + async def delete_draft_dataset(self, metax_id: str) -> None: + """Delete draft dataset from Metax service. + + :param metax_id: Identification string pointing to Metax dataset to be deleted + """ + LOG.info(f"Deleting Metax draft dataset {metax_id}") + async with ClientSession() as sess: + resp = await sess.delete( + f"{self.metax_url}{self.rest_route}/{metax_id}", + auth=self.auth, + ) + status = resp.status + if status == 204: + LOG.debug(f"Deleted draft dataset {metax_id} from Metax service") + else: + reason = await resp.text() + raise self.process_error(status, reason) + + async def update_dataset_with_doi_info(self, doi_info: Dict, _metax_ids: List) -> None: + """Update dataset for publishing. + + :param doi_info: Dict containing info to complete metax dataset metadata + :param metax_id: Metax id of dataset to be updated + """ + LOG.info( + "Updating metadata with datacite info for Metax datasets: " + f"{','.join([id['metaxIdentifier'] for id in _metax_ids])}" + ) + bulk_data = [] + for id in _metax_ids: + async with ClientSession() as sess: + resp = await sess.get( + f"{self.metax_url}{self.rest_route}/{id['metaxIdentifier']}", + auth=self.auth, + ) + status = resp.status + if status == 200: + metax_data = await resp.json() + else: + reason = await resp.text() + raise self.process_error(status, reason) + + # Map fields from doi info to Metax schema + mapper = MetaDataMapper(metax_data["research_dataset"], doi_info) + # creator is required field + mapped_metax_data = mapper.map_metadata() + bulk_data.append({"identifier": id["metaxIdentifier"], "research_dataset": mapped_metax_data}) + + async with ClientSession() as sess: + resp = await sess.patch( + f"{self.metax_url}{self.rest_route}", + json=bulk_data, + auth=self.auth, + ) + if resp.status == 200: + LOG.debug("Objects metadata are updated to Metax for publishing") + return await resp.json() + else: + reason = await resp.text() + raise self.process_error(status, reason) + + async def publish_dataset(self, _metax_ids: List[Dict]) -> None: + """Publish draft dataset to Metax service. + + Iterate over the metax ids that need to be published. + + :param _metax_ids: List of metax IDs that include study and datasets + """ + LOG.info(f"Publishing Metax datasets {','.join([id['metaxIdentifier'] for id in _metax_ids])}") + for object in _metax_ids: + metax_id = object["metaxIdentifier"] + doi = object["doi"] + async with ClientSession() as sess: + resp = await sess.post( + f"{self.metax_url}{self.publish_route}", + params={"identifier": metax_id}, + auth=self.auth, + ) + status = resp.status + if status == 200: + preferred_id = await resp.json() + if doi != preferred_id["preferred_identifier"]: + LOG.warning( + f"Metax Preferred Identifier {preferred_id['preferred_identifier']} " + f"does not match object's DOI {doi}" + ) + LOG.debug( + f"Object with metax ID {object['metaxIdentifier']} and DOI {object['doi']} is " + "published to Metax service." + ) + else: + reason = await resp.text() + raise self.process_error(status, reason) + LOG.debug(f"Metax ID {object['metaxIdentifier']} was published to Metax service.") + + def create_metax_dataset_data_from_study(self, data: Dict) -> Dict: + """Construct Metax dataset's research dataset dictionary from Submitters Study. + + :param data: Study data + :returns: constructed research dataset + """ + research_dataset = self.minimal_dataset_template["research_dataset"] + research_dataset["preferred_identifier"] = data["doi"] + research_dataset["title"]["en"] = data["descriptor"]["studyTitle"] + research_dataset["description"]["en"] = data["descriptor"]["studyAbstract"] + LOG.debug(f"Created Metax dataset from Study with data: {research_dataset}") + return research_dataset + + def create_metax_dataset_data_from_dataset(self, data: Dict) -> Dict: + """Construct Metax dataset's research dataset dictionary from Submitters Dataset. + + :param data: Dataset data + :returns: constructed research dataset + """ + research_dataset = self.minimal_dataset_template["research_dataset"] + research_dataset["preferred_identifier"] = data["doi"] + research_dataset["title"]["en"] = data["title"] + research_dataset["description"]["en"] = data["description"] + LOG.debug(f"Created Metax dataset from Dataset with data: {research_dataset}") + return research_dataset + + def map_creators(self, creators: List) -> List: + """Map creators. + + :param submitter_data: Data comming from metadata submitter + :returns: Constructed creator data for Metax + """ + + metax_creators = [] + for creator in creators: + metax_creator: Dict[str, Any] = { + "name": "", + "@type": "Person", + "member_of": {"name": {"en": ""}, "@type": "Organization"}, + "identifier": "", + } + metax_creator["name"] = creator["name"] + metax_creator["@type"] = "Person" + # Metax schema accepts only one affiliation per creator + # so we take first one + if creator.get("affiliation", None): + affiliation = creator["affiliation"][0] + metax_creator["member_of"]["name"]["en"] = affiliation["name"] + metax_creator["member_of"]["@type"] = "Organization" + if affiliation.get("affiliationIdentifier"): + metax_creator["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + else: + metax_creator.pop("member_of") + # Metax schema accepts only one identifier per creator + # so we take first one + if creator.get("nameIdentifiers", None) and creator["nameIdentifiers"][0].get("nameIdentifier", None): + metax_creator["identifier"] = creator["nameIdentifiers"][0]["nameIdentifier"] + else: + metax_creator.pop("identifier") + metax_creators.append(metax_creator) + return metax_creators + + # we dont know exactly what is comming from Metax so we try it all + def process_error(self, status: int, resp_json: str) -> HTTPError: + """Construct Metax dataset's research dataset dictionary from Submitters Dataset. + + :param status: Status code of the HTTP exception + :param resp_json: Response mesage for returning exeption + :returns: HTTP error depending on incomming status + """ + LOG.error(resp_json) + if status == 400: + return HTTPBadRequest(reason=resp_json) + if status == 403: + return HTTPForbidden(reason=resp_json) + if status == 404: + return HTTPNotFound(reason=resp_json) + else: + return HTTPError(reason=resp_json) diff --git a/metadata_backend/helpers/metax_mapper.py b/metadata_backend/helpers/metax_mapper.py new file mode 100644 index 000000000..876a478a3 --- /dev/null +++ b/metadata_backend/helpers/metax_mapper.py @@ -0,0 +1,443 @@ +"""Class for mapping Submitter metadata to Metax metadata.""" +from copy import deepcopy +from datetime import datetime +from typing import Any, Dict, List + +from ..conf.conf import metax_reference_data +from .logger import LOG + + +class MetaDataMapper: + """Methods for mapping submitter's metadata to METAX service metadata. + + This helpper class maps data from datacite, study and dataset schemas to Metax research_dataset + schema: + https://raw.githubusercontent.com/CSCfi/metax-api/master/src/metax_api/api/rest/v2/schemas/att_dataset_schema.json + """ + + { + "ResearchDataset": { + # DOI + "preferred_identifier": { + "type": "string", + "format": "uri", + }, + # dates - Modified (date-time+zone) + "modified": { + "type": "string", + "format": "date-time", + }, + # dates - Issued (date) + "issued": { + "type": "string", + "format": "date", + }, + # object - title + "title": { + "type": "object", + "$ref": "#/definitions/langString", + }, + # keywords + "keyword": { + "type": "array", + "items": {"minLength": 1, "type": "string"}, + }, + # object - description/abstract + "description": { + "type": "object", + "$ref": "#/definitions/langString", + }, + # alternateIdentifiers + "other_identifier": { + "type": "array", + "items": { + "notation": { + "description": "Literal value of the identifier", + "type": "string", + }, + "type": { + "description": "a type of the identifier", + "type": "object", + "items": { + "identifier": { + "description": "This is the IRI identifier for the concept", + "type": "string", + "format": "uri", + }, + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + }, + }, + }, + "required": ["notation"], + }, + # CSC / contributors - Distributor + "publisher": { + "type": "object", + "$ref": "#/definitions/ResearchAgent", + }, + # creators + "creator": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # contributors (vs rights_holder, curator) + "contributor": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # TODO: will be implemented later + # describes study from same folder/submission for mapped datasets + "is_output_of": { + "title": "Producer project", + "description": "A project that has caused the dataset to be created", + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Project"}, + }, + # contributor - Rights Holder + # TODO: This can be an organisation at some point + "rights_holder": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # study/dataset type + # not mappable as Metax requires identifier from preconfigured list + "theme": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + # language + # TODO: cannot be mapped as is to Metax unless we take Lexvo schema in to use + "language": { + "type": "array", + "items": { + "type": "object", + "item": { + "title": { + "description": ( + "A name of the Linguistic System. Name is given as localized text from IETF language " + "codes. In case text cannot be localixed 'zxx' or 'und' language codes must be used." + ), + "type": "object", + "$ref": "#/definitions/langString", + }, + "identifier": { + "description": ( + "Recommended best practice is to identify the resource by means of a string conforming " + "to a formal identification system. \n\nAn unambiguous reference to the resource " + "within a given context." + ), + "type": "string", + "format": "uri", + }, + }, + }, + }, + # geoLocations, MUST be WGS84 coordinates, https://epsg.io/4326 + "spatial": { + "geographic_name": { + "description": ( + "A geographic name is a proper noun applied to a spatial object. Taking the example used in " + "the relevant INSPIRE data specification (page 18), the following are all valid geographic " + "names for the Greek capital:" + "- Αθήνα (the Greek endonym written in the Greek script)" + "- Athína (the standard Romanisation of the endonym)" + "- Athens (the English language exonym)" + "For INSPIRE-conformant data, provide the metadata for the geographic name using " + "a skos:Concept as a datatype." + ), + "type": "string", + }, + "as_wkt": { + "title": "Geometry", + "description": "Supported format for geometry is WKT string in WGS84 coordinate system.", + "type": "array", + "example": [ + "POLYGON((-122.358 47.653, -122.348 47.649, -122.348 47.658, -122.358 47.658, -122.358 47.653))" + ], + "items": {"minLength": 1, "type": "string"}, + }, + }, + # dates - Collected (date-time+zone) + "temporal": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/PeriodOfTime"}, + }, + # TODO: will be implemented later + # dataset from same folder/submission + "relation": { + "type": "array", + "items": { + "type": "object", + "required": ["relation_type", "entity"], + "item": { + "entity": { + "type": "object", + "item": { + "title": { + "description": "A name given to the resource.", + "type": "object", + "$ref": "#/definitions/langString", + }, + "description": { + "description": "An account of the resource.", + "type": "object", + "$ref": "#/definitions/langString", + }, + "identifier": { + "description": "Recommended best practice is to identify the resource by means of " + "a string conforming to a formal identification system. An unambiguous reference " + "to the resource within a given context.", + "type": "string", + "format": "uri", + }, + "type": { + "description": "Type of the entity, for example: API, Application, News article, " + "paper, post or visualization.", + "type": "object", + "$ref": "#/definitions/Concept", + }, + }, + }, + "relation_type": { + "description": "Role of the influence.", + "type": "object", + "$ref": "#/definitions/Concept", + }, + }, + }, + }, + # TODO: will be implemented later + "field_of_science": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/Concept"}, + }, + # TODO: Need clarification on necessarity of this field + "remote_resources": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/WebResource"}, + }, + # restricted + "access_rights": { + "type": "object", + "$ref": "#/definitions/RightsStatement", + }, + # contributors - Data Curator + "curator": { + "type": "array", + "items": {"type": "object", "$ref": "#/definitions/ResearchAgent"}, + }, + # TODO: will be implemented later + "total_remote_resources_byte_size": { + "type": "integer", + }, + } + } + + def __init__(self, metax_data: Dict, data: Dict) -> None: + """Set variables. + + :param metax_data: Metax research_dataset metadata + """ + self.research_dataset = metax_data + self.datacite_data = data + self.identifier_types = metax_reference_data["identifier_types"] + self.person: Dict[str, Any] = { + "name": "", + "@type": "Person", + "member_of": {"name": {"en": ""}, "@type": "Organization"}, + "identifier": "", + } + + def map_metadata(self) -> Dict[str, Any]: + """Public class for actual mapping of metadata fields. + + :returns: Research dataset + """ + LOG.info("Mapping datasite data to Metax metadata") + LOG.debug(f"Data incomming for mapping: {self.datacite_data}") + for key, value in self.datacite_data.items(): + if key == "creators": + self._map_creators(value) + if key == "keywords": + self.research_dataset["keyword"] = value.split(",") + if key == "contributors": + self._map_contributors(value) + if key == "dates": + self._map_dates(value) + if key == "geoLocations": + self._map_spatial(value) + if key == "alternateIdentifiers": + self._map_other_identifier(value) + return self.research_dataset + + def _map_creators(self, creators: List) -> None: + """Map creators. + + :param creators: Creators data from datacite + """ + LOG.info("Mapping creator") + LOG.debug(creators) + self.research_dataset["creator"] = [] + for creator in creators: + metax_creator = deepcopy(self.person) + metax_creator["name"] = creator["name"] + # Metax schema accepts only one affiliation per creator + # so we take first one + if creator.get("affiliation", None): + affiliation = creator["affiliation"][0] + metax_creator["member_of"]["name"]["en"] = affiliation["name"] + if affiliation.get("affiliationIdentifier"): + metax_creator["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + # Metax schema accepts only one identifier per creator + # so we take first one + else: + del metax_creator["member_of"] + if creator.get("nameIdentifiers", None) and creator["nameIdentifiers"][0].get("nameIdentifier", None): + metax_creator["identifier"] = creator["nameIdentifiers"][0]["nameIdentifier"] + else: + del metax_creator["identifier"] + self.research_dataset["creator"].append(metax_creator) + + def _map_contributors(self, contributors: List) -> None: + """Map contributors. + + :param contributors: Contributors data from + """ + LOG.info("Mapping contributors") + LOG.debug(contributors) + self.research_dataset["contributor"] = [] + self.research_dataset["rights_holder"] = [] + self.research_dataset["curator"] = [] + + for contributor in contributors: + metax_contributor = deepcopy(self.person) + metax_contributor["name"] = contributor["name"] + # Metax schema accepts only one affiliation per creator + # so we take first one + if contributor.get("affiliation", None): + affiliation = contributor["affiliation"][0] + metax_contributor["member_of"]["name"]["en"] = affiliation["name"] + if affiliation.get("affiliationIdentifier"): + metax_contributor["member_of"]["identifier"] = affiliation["affiliationIdentifier"] + else: + del metax_contributor["member_of"] + # Metax schema accepts only one identifier per creator + # so we take first one + if contributor.get("nameIdentifiers", None) and contributor["nameIdentifiers"][0].get( + "nameIdentifier", None + ): + metax_contributor["identifier"] = contributor["nameIdentifiers"][0]["nameIdentifier"] + else: + del metax_contributor["identifier"] + + if contributor.get("contributorType", None): + if contributor["contributorType"] == "DataCurator": + self.research_dataset["curator"].append(metax_contributor) + elif contributor["contributorType"] == "RightsHolder": + self.research_dataset["rights_holder"].append(metax_contributor) + else: + self.research_dataset["contributor"].append(metax_contributor) + + if not self.research_dataset["rights_holder"]: + del self.research_dataset["rights_holder"] + if not self.research_dataset["curator"]: + del self.research_dataset["curator"] + + def _map_dates(self, dates: List) -> None: + """Map dates. + + :param dates: Dates data from datacite + """ + LOG.info("Mapping dates") + LOG.debug(dates) + self.research_dataset["temporal"] = [] + temporal_date = { + "start_date": { + "type": "string", + "format": "date-time", + }, + "end_date": { + "type": "string", + "format": "date-time", + }, + } + + # format of date must be forced + for date in dates: + date_list: List = list(filter(None, date["date"].split("/"))) + if date["dateType"] == "Issued": + if not self.research_dataset.get("issued", None) or datetime.strptime( + self.research_dataset["issued"], "%Y-%m-%d" + ) > datetime.strptime(date_list[0], "%Y-%m-%d"): + self.research_dataset["issued"] = date_list[0] + if date["dateType"] == "Updated": + if not self.research_dataset.get("modified", None) or datetime.strptime( + self.research_dataset["modified"][:9], "%Y-%m-%d" + ) < datetime.strptime(date_list[0], "%Y-%m-%d"): + self.research_dataset["modified"] = date_list[-1] + "T00:00:00+03:00" + if date["dateType"] == "Collected": + temporal_date["start_date"] = date_list[0] + "T00:00:00+03:00" + temporal_date["end_date"] = date_list[-1] + "T00:00:00+03:00" + self.research_dataset["temporal"].append(temporal_date) + + if not self.research_dataset["temporal"]: + del self.research_dataset["temporal"] + + def _map_spatial(self, locations: List) -> None: + """Map geoLocations. + + If geoLocationPoint or geoLocationBox is comming with location data + lat lon coordinates will be mapped to wkt geometric presentation. + Inputs MUST be WGS84 degrees coordinates as geographic coordinate system (GCS) is used here. + + :param locations: GeoLocations data from datacite + """ + LOG.info("Mapping locations") + LOG.debug(locations) + + spatials = self.research_dataset["spatial"] = [] + for location in locations: + spatial: Dict = {} + spatial["as_wkt"] = [] + if location.get("geoLocationPlace", None): + spatial["geographic_name"] = location["geoLocationPlace"] + if location.get("geoLocationPoint", None): + lat = float(location["geoLocationPoint"]["pointLatitude"]) + lon = float(location["geoLocationPoint"]["pointLongitude"]) + spatial["as_wkt"].append(f"POINT({lon} {lat})") + if location.get("geoLocationBox", None): + west_lon = float(location["geoLocationBox"]["westBoundLongitude"]) + east_lon = float(location["geoLocationBox"]["eastBoundLongitude"]) + north_lat = float(location["geoLocationBox"]["northBoundLatitude"]) + south_lat = float(location["geoLocationBox"]["southBoundLatitude"]) + spatial["as_wkt"].append( + f"POLYGON(({west_lon} {north_lat}, {east_lon} {north_lat}, " + f"{east_lon} {south_lat}, {west_lon} {south_lat}, {west_lon} {north_lat}))" + ) + if not spatial["as_wkt"]: + del spatial["as_wkt"] + spatials.append(spatial) + + def _map_other_identifier(self, identifiers: List) -> None: + """Map alternateIdentifiers. + + :param identifiers: Alternate identifiers data from datacite + """ + LOG.info("Mapping alternate identifiers") + LOG.debug(identifiers) + self.research_dataset["other_identifier"] = [] + other_identifier: Dict[str, Any] = { + "type": { + "identifier": "", + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + }, + "notation": "", + } + for identifier in identifiers: + other_identifier["notation"] = identifier["alternateIdentifier"] + + type = self.identifier_types[identifier["alternateIdentifierType"].lower()] + + other_identifier["type"]["identifier"] = type + + self.research_dataset["other_identifier"].append(other_identifier) diff --git a/metadata_backend/helpers/parser.py b/metadata_backend/helpers/parser.py index ed8177412..aa66980c4 100644 --- a/metadata_backend/helpers/parser.py +++ b/metadata_backend/helpers/parser.py @@ -1,15 +1,17 @@ -"""Tool to parse XML files to JSON.""" +"""Tool to parse XML and CSV files to JSON.""" +import csv import re -from typing import Any, Dict, List, Union +from io import StringIO +from typing import Any, Dict, List, Optional, Type, Union from aiohttp import web +from pymongo import UpdateOne from xmlschema import XMLSchema, XMLSchemaConverter, XMLSchemaException, XsdElement, XsdType from .logger import LOG from .schema_loader import SchemaNotFoundException, XMLSchemaLoader from .validator import JSONValidator, XMLValidator -from pymongo import UpdateOne class MetadataXMLConverter(XMLSchemaConverter): @@ -21,7 +23,13 @@ class MetadataXMLConverter(XMLSchemaConverter): https://github.com/enasequence/schema/tree/master/src/main/resources/uk/ac/ebi/ena/sra/schema """ - def __init__(self, namespaces: Any = None, dict_class: dict = None, list_class: list = None, **kwargs: Any) -> None: + def __init__( + self, + namespaces: Any = None, + dict_class: Optional[Type[Dict[str, Any]]] = None, + list_class: Optional[Type[List[Any]]] = None, + **kwargs: Any, + ) -> None: """Initialize converter and settings. :param namespaces: Map from namespace prefixes to URI. @@ -44,7 +52,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: :param schema_type: XML data :returns: XML element flattened. """ - links = [ + links = { "studyLinks", "sampleLinks", "runLinks", @@ -56,7 +64,7 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: "datasetLinks", "assemblyLinks", "submissionLinks", - ] + } attrs = [ "studyAttributes", @@ -74,14 +82,14 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: "dataUses", ] - refs = ["analysisRef", "sampleRef", "runRef", "experimentRef"] + refs = {"analysisRef", "sampleRef", "runRef", "experimentRef"} - children = self.dict() + children: Any = self.dict() for key, value, _ in self.map_content(data.content): key = self._to_camel(key.lower()) - if key in attrs and len(value) == 1: + if key in set(attrs) and len(value) == 1: attrs = list(value.values()) children[key] = attrs[0] if isinstance(attrs[0], list) else attrs continue @@ -98,12 +106,23 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: continue if "assembly" in key: - if next(iter(value)) in ["standard", "custom"]: + if next(iter(value)) in {"standard", "custom"}: children[key] = next(iter(value.values())) + if "accessionId" in children[key]: + children[key]["accession"] = children[key].pop("accessionId") else: children[key] = value continue + if key == "sequence": + if "sequence" not in children: + children[key] = list() + children[key].append(value) + for d in children[key]: + if "accessionId" in d: + d["accession"] = d.pop("accessionId") + continue + if "analysisType" in key: children[key] = value continue @@ -125,6 +144,21 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: children["files"] = value["files"] continue + if "processing" in key: + if not bool(value): + continue + + if "pipeSection" in key: + if "pipeSection" not in children: + children[key] = list() + children[key].append(value) + continue + + if "prevStepIndex" in key: + if not bool(value): + children[key] = None + continue + if "spotDescriptor" in key: children[key] = value["spotDecodeSpec"] continue @@ -147,6 +181,19 @@ def _flatten(self, data: Any) -> Union[Dict, List, str, None]: LOG.error(reason) raise web.HTTPBadRequest(reason=reason) + if "processing" in key: + if not bool(value): + continue + + if "pipeSection" in key: + children[key] = [value] + continue + + if "prevStepIndex" in key: + if not bool(value): + children[key] = None + continue + if key in links and len(value) == 1: grp = list() if isinstance(value[key[:-1]], dict): @@ -233,6 +280,11 @@ def element_decode( selected - analysisRef, sampleRef, runRef, experimentRef need to be an array - experimentRef in run is an array with maxitems 1 + - if processing is empty do not show it as it is not required + - processing pipeSection should be intepreted as an array + - processing pipeSection prevStepIndex can be None if not specified empty + - if sampleData does not exist (as it can only be added via forms) we will + add it with default gender unknown """ xsd_type = xsd_type or xsd_element.type @@ -245,8 +297,12 @@ def element_decode( if data.attributes: tmp = self.dict((self._to_camel(key.lower()), value) for key, value in self.map_attributes(data.attributes)) + # we add the bool(children) condition as for referenceAlignment + # this is to distinguish between the attributes if "accession" in tmp: tmp["accessionId"] = tmp.pop("accession") + if "sampleName" in tmp and "sampleData" not in tmp: + tmp["sampleData"] = {"gender": "unknown"} if children is not None: if isinstance(children, dict): for key, value in children.items(): @@ -280,12 +336,15 @@ def parse(self, schema_type: str, content: str) -> Dict: reason = "Current request could not be processed as the submitted file was not valid" LOG.error(reason) raise web.HTTPBadRequest(reason=reason) - result = schema.to_dict(content, converter=MetadataXMLConverter, decimal_type=float, dict_class=dict)[ - schema_type.lower() - ] - if schema_type.lower() != "submission": - JSONValidator(result, schema_type.lower()).validate - return result + # result is of type: + # Union[Any, List[Any], Tuple[None, List[XMLSchemaValidationError]], + # Tuple[Any, List[XMLSchemaValidationError]], Tuple[List[Any], List[XMLSchemaValidationError]]] + # however we expect any type as it is easier to work with + result: Any = schema.to_dict(content, converter=MetadataXMLConverter, decimal_type=float, dict_class=dict) + _schema_type: str = schema_type.lower() + if _schema_type != "submission": + JSONValidator(result[_schema_type], _schema_type).validate + return result[_schema_type] @staticmethod def _load_schema(schema_type: str) -> XMLSchema: @@ -305,6 +364,73 @@ def _load_schema(schema_type: str) -> XMLSchema: return schema +class CSVToJSONParser: + """Methods to parse and convert data from CSV files to JSON format.""" + + def parse(self, schema_type: str, content: str) -> List: + """Parse a CSV file, convert it to JSON and validate against JSON schema. + + :param schema_type: Schema type of the file to be parsed + :param content: CSV content to be parsed + :returns: CSV parsed to JSON + :raises: HTTPBadRequest if error was raised during parsing or validation + """ + csv_reader = csv.DictReader(StringIO(content), delimiter=",", quoting=csv.QUOTE_NONE) + + _sample_list = { + "title", + "alias", + "description", + "subjectId", + "bioSampleId", + "caseOrControl", + "gender", + "organismPart", + "cellLine", + "region", + "phenotype", + } + + if ( + csv_reader.fieldnames + and schema_type == "sample" + and all(elem in _sample_list for elem in csv_reader.fieldnames) + ): + LOG.debug("sample CSV file has the correct header") + else: + reason = f"{schema_type} does not contain the correct header fields: {_sample_list}" + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + rows = [row for row in csv_reader] + + if not rows: + reason = "CSV file appears to be incomplete. No rows of data were parsed." + LOG.error(reason) + raise web.HTTPBadRequest(reason=reason) + + _parsed = [] + for row in rows: + LOG.debug(f"current row: {row}") + _tmp: Dict[str, Any] = row + # This is required to pass validation against current sample schema + if schema_type == "sample" and "sampleName" not in row: + # Without TaxonID provided we assume the sample relates to + # Homo Sapien which has default TaxonID of 9606 + _tmp["sampleName"] = {"taxonId": 9606} + # if geneder exists we will format it accordingly + if not bool(_tmp["gender"]): + _tmp["sampleData"] = {"gender": "unknown"} + else: + _tmp["sampleData"] = {"gender": _tmp["gender"]} + _tmp.pop("gender") + JSONValidator(_tmp, schema_type.lower()).validate + _parsed.append(_tmp) + + LOG.info(f"CSV was successfully converted to {len(_parsed)} JSON object(s).") + return _parsed + + def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: """Convert JSONpatch object to mongo query. @@ -320,7 +446,7 @@ def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: identifier, { "$addToSet": { - op["path"][1:-2]: { + op["path"][1:-2].replace("/", "."): { "$each": op["value"] if isinstance(op["value"], list) else [op["value"]] }, }, @@ -331,6 +457,8 @@ def jsonpatch_mongo(identifier: Dict, json_patch: List[Dict[str, Any]]) -> List: queries.append(UpdateOne(identifier, {"$set": {op["path"][1:].replace("/", "."): op["value"]}})) elif op["op"] == "replace": path = op["path"][1:-2] if op["path"].endswith("/-") else op["path"][1:].replace("/", ".") + if op.get("match", None): + identifier.update(op["match"]) queries.append(UpdateOne(identifier, {"$set": {path: op["value"]}})) return queries diff --git a/metadata_backend/helpers/schema_loader.py b/metadata_backend/helpers/schema_loader.py index 991c6a202..714372ce1 100644 --- a/metadata_backend/helpers/schema_loader.py +++ b/metadata_backend/helpers/schema_loader.py @@ -4,7 +4,7 @@ probably be replaced with database searching in the future. """ -import json +import ujson from abc import ABC, abstractmethod from pathlib import Path from typing import Any @@ -39,9 +39,10 @@ def _identify_file(self, schema_type: str) -> Path: """ schema_type = schema_type.lower() schema_file = None - for file in [x for x in self.path.iterdir()]: + for file in set([x for x in self.path.iterdir()]): if schema_type in file.name and file.name.endswith(self.loader_type): schema_file = file + break if not schema_file: raise SchemaNotFoundException @@ -97,5 +98,5 @@ def get_schema(self, schema_type: str) -> dict: """ file = self._identify_file(schema_type) with file.open() as f: - schema_content = json.load(f) + schema_content = ujson.load(f) return schema_content diff --git a/metadata_backend/helpers/schemas/EGA.dac.xsd b/metadata_backend/helpers/schemas/EGA.dac.xsd index 9a21e580b..d8fe3514c 100644 --- a/metadata_backend/helpers/schemas/EGA.dac.xsd +++ b/metadata_backend/helpers/schemas/EGA.dac.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -23,7 +23,7 @@ - + Short text that can be used to call out DAC records in searches or in displays. diff --git a/metadata_backend/helpers/schemas/EGA.dataset.xsd b/metadata_backend/helpers/schemas/EGA.dataset.xsd index f8c98c0ea..2d2eb426f 100644 --- a/metadata_backend/helpers/schemas/EGA.dataset.xsd +++ b/metadata_backend/helpers/schemas/EGA.dataset.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/EGA.policy.xsd b/metadata_backend/helpers/schemas/EGA.policy.xsd index 04effbfd0..de0f3dc17 100644 --- a/metadata_backend/helpers/schemas/EGA.policy.xsd +++ b/metadata_backend/helpers/schemas/EGA.policy.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.assembly.xsd b/metadata_backend/helpers/schemas/ENA.assembly.xsd index e6891888d..8dd08f892 100644 --- a/metadata_backend/helpers/schemas/ENA.assembly.xsd +++ b/metadata_backend/helpers/schemas/ENA.assembly.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.checklist.xsd b/metadata_backend/helpers/schemas/ENA.checklist.xsd index 69838ba91..c9806149c 100644 --- a/metadata_backend/helpers/schemas/ENA.checklist.xsd +++ b/metadata_backend/helpers/schemas/ENA.checklist.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/ENA.project.xsd b/metadata_backend/helpers/schemas/ENA.project.xsd index c5c49a755..18294fa83 100644 --- a/metadata_backend/helpers/schemas/ENA.project.xsd +++ b/metadata_backend/helpers/schemas/ENA.project.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.analysis.xsd b/metadata_backend/helpers/schemas/SRA.analysis.xsd index 57fba41f7..09770c494 100644 --- a/metadata_backend/helpers/schemas/SRA.analysis.xsd +++ b/metadata_backend/helpers/schemas/SRA.analysis.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -349,6 +349,8 @@ + + diff --git a/metadata_backend/helpers/schemas/SRA.common.xsd b/metadata_backend/helpers/schemas/SRA.common.xsd index 14b5b0e82..754f5fa56 100644 --- a/metadata_backend/helpers/schemas/SRA.common.xsd +++ b/metadata_backend/helpers/schemas/SRA.common.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -928,6 +928,7 @@ + @@ -976,7 +977,9 @@ + + @@ -996,6 +999,10 @@ + + + + diff --git a/metadata_backend/helpers/schemas/SRA.experiment.xsd b/metadata_backend/helpers/schemas/SRA.experiment.xsd index 11fded126..e9ee8f5fd 100644 --- a/metadata_backend/helpers/schemas/SRA.experiment.xsd +++ b/metadata_backend/helpers/schemas/SRA.experiment.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.receipt.xsd b/metadata_backend/helpers/schemas/SRA.receipt.xsd index 05382dfd4..6a3a26fd8 100644 --- a/metadata_backend/helpers/schemas/SRA.receipt.xsd +++ b/metadata_backend/helpers/schemas/SRA.receipt.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + @@ -58,10 +58,14 @@ + + - + + + diff --git a/metadata_backend/helpers/schemas/SRA.run.xsd b/metadata_backend/helpers/schemas/SRA.run.xsd index 93a576a4b..bc65c3281 100644 --- a/metadata_backend/helpers/schemas/SRA.run.xsd +++ b/metadata_backend/helpers/schemas/SRA.run.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.sample.xsd b/metadata_backend/helpers/schemas/SRA.sample.xsd index 7866ef6ca..37d94e146 100644 --- a/metadata_backend/helpers/schemas/SRA.sample.xsd +++ b/metadata_backend/helpers/schemas/SRA.sample.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.study.xsd b/metadata_backend/helpers/schemas/SRA.study.xsd index 9e1725441..15fdce88d 100644 --- a/metadata_backend/helpers/schemas/SRA.study.xsd +++ b/metadata_backend/helpers/schemas/SRA.study.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/SRA.submission.xsd b/metadata_backend/helpers/schemas/SRA.submission.xsd index 6c68cfa7f..9497871c8 100644 --- a/metadata_backend/helpers/schemas/SRA.submission.xsd +++ b/metadata_backend/helpers/schemas/SRA.submission.xsd @@ -10,7 +10,7 @@ ~ specific language governing permissions and limitations under the License. --> - + diff --git a/metadata_backend/helpers/schemas/datacite.json b/metadata_backend/helpers/schemas/datacite.json new file mode 100644 index 000000000..073a79b3e --- /dev/null +++ b/metadata_backend/helpers/schemas/datacite.json @@ -0,0 +1,869 @@ +{ + "type": "object", + "title": "Datacite DOI Registration Information", + "required": [ + "creators", + "subjects", + "keywords" + ], + "properties": { + "creators": { + "type": "array", + "title": "Creators", + "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", + "items": { + "type": "object", + "title": "Main researcher(s) involved with data or the author(s) of the publication.", + "required": [ + "givenName", + "familyName" + ], + "properties": { + "givenName": { + "type": "string", + "title": "Given Name" + }, + "familyName": { + "type": "string", + "title": "Family Name" + }, + "name": { + "type": "string", + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." + }, + "affiliation": { + "type": "array", + "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", + "items": { + "type": "object", + "title": "Affiliation Details", + "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, + "schemeUri": { + "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", + "title": "URI (location) of the affiliation scheme" + }, + "affiliationIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", + "title": "Location of affiliation identifier" + }, + "affiliationIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name. This will default to ROR.", + "title": "Name of affiliation identifier scheme" + } + } + }, + "uniqueItems": true + }, + "nameIdentifiers": { + "type": "array", + "title": "Creator Identifiers", + "description": "Uniquely identifies the creator, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "items": { + "type": "object", + "title": "Creator Name Identifier", + "properties": { + "schemeUri": { + "type": "string", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", + "format": "uri" + }, + "nameIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " + }, + "nameIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." + } + } + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "subjects": { + "type": "array", + "title": "Subjects", + "description": "Subject, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", + "items": { + "type": "object", + "title": "Subjects", + "required": [ + "subject" + ], + "properties": { + "subject": { + "type": "string", + "title": "FOS identifier", + "enum": [ + "FOS: Agricultural biotechnology", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Basic medicine", + "FOS: Biological sciences", + "FOS: Chemical engineering", + "FOS: Chemical sciences", + "FOS: Civil engineering", + "FOS: Clinical medicine", + "FOS: Computer and information sciences", + "FOS: Earth and related environmental sciences", + "FOS: Economics and business", + "FOS: Educational sciences", + "FOS: Electrical engineering, electronic engineering, information engineering", + "FOS: Engineering and technology", + "FOS: Environmental biotechnology", + "FOS: Environmental engineering", + "FOS: Health sciences", + "FOS: History and archaeology", + "FOS: Humanities", + "FOS: Industrial biotechnology", + "FOS: Languages and literature", + "FOS: Law", + "FOS: Materials engineering", + "FOS: Mathematics", + "FOS: Mechanical engineering", + "FOS: Media and communications", + "FOS: Medical and health sciences", + "FOS: Medical biotechnology", + "FOS: Medical engineering", + "FOS: Nano-technology", + "FOS: Natural sciences", + "FOS: Other agricultural sciences", + "FOS: Other engineering and technologies", + "FOS: Other humanities", + "FOS: Other medical sciences", + "FOS: Other natural sciences", + "FOS: Other social sciences", + "FOS: Philosophy, ethics and religion", + "FOS: Physical sciences", + "FOS: Political science", + "FOS: Psychology", + "FOS: Social and economic geography", + "FOS: Social sciences", + "FOS: Sociology", + "FOS: Veterinary science" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "keywords": { + "type": "string", + "title": "Keywords", + "description": "A list of keywords or tags describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. Multiple keywords can be added, separating them by comma." + }, + "contributors": { + "type": "array", + "title": "Contributors", + "description": "The person(s) responsible for contributing to the development of the dataset.", + "items": { + "type": "object", + "title": "Contributor", + "required": [ + "givenName", + "familyName", + "contributorType" + ], + "properties": { + "givenName": { + "type": "string", + "title": "Given Name" + }, + "familyName": { + "type": "string", + "title": "Family Name" + }, + "name": { + "type": "string", + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." + }, + "contributorType": { + "type": "string", + "title": "Type of contributor", + "enum": [ + "Contact Person", + "Data Collector", + "Data Curator", + "Data Manager", + "Distributor", + "Editor", + "Producer", + "Project Leader", + "Project Manager", + "Project Member", + "Related Person", + "Research Group", + "Researcher", + "Rights Holder", + "Sponsor", + "Supervisor", + "Work Package Leader", + "Other" + ] + }, + "affiliation": { + "type": "array", + "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", + "items": { + "type": "object", + "title": "Affiliation Details", + "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, + "schemeUri": { + "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", + "title": "URI (location) of the affiliation scheme" + }, + "affiliationIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", + "title": "Location of affiliation identifier" + }, + "affiliationIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name. This will default to ROR.", + "title": "Name of affiliation identifier scheme" + } + } + }, + "uniqueItems": true + }, + "nameIdentifiers": { + "type": "array", + "title": "Contributor Identifiers", + "description": "Uniquely identifies the contributor, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "items": { + "type": "object", + "title": "Contributor Name Identifier", + "properties": { + "schemeUri": { + "type": "string", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", + "format": "uri" + }, + "nameIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " + }, + "nameIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." + } + } + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "dates": { + "type": "array", + "title": "Dates", + "description": "List of relevant dates to publication. The type of date can vary and it is recommended to add information if appropriate to distinguish between dates.", + "items": { + "type": "object", + "title": "Date", + "required": [ + "date", + "dateType" + ], + "properties": { + "date": { + "type": "string", + "title": "Date", + "description": "A standard format for a date value." + }, + "dateType": { + "type": "string", + "title": "Date Type", + "description": "Relevance of the date for the resources being submitted.", + "enum": [ + "Accepted", + "Available", + "Collected", + "Copyrighted", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] + }, + "dateInformation": { + "type": "string", + "title": "Date Information", + "description": "Specific information about the date, if appropriate." + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "descriptions": { + "type": "array", + "title": "Descriptions", + "description": "Additional information about the resources that does not fit in any of the other categories, or general descriptions that are supplemental to the descriptions added for Study or Datasets.", + "items": { + "type": "object", + "title": "description", + "properties": { + "description": { + "type": "string", + "title": "description" + }, + "descriptionType": { + "type": "string", + "title": "Description Type", + "enum": [ + "Abstract", + "Methods", + "Series Information", + "Table Of Contents", + "Technical Info", + "Other" + ] + }, + "lang": { + "type": "string", + "title": "Language" + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "geoLocations": { + "type": "array", + "title": "GeoLocations", + "description": "Spatial region or named place where the data was gathered or where the resources are focused.", + "items": { + "type": "object", + "title": "GeoLocation", + "properties": { + "geoLocationPlace": { + "type": "string", + "title": "Geolocation Place" + }, + "geoLocationPoint": { + "type": "object", + "title": "Geolocation Point", + "description": "A point location in space. A point contains a single longitude-latitude pair.", + "properties": { + "pointLongitude": { + "type": "string", + "title": "Longitude coordinate" + }, + "pointLatitude": { + "type": "string", + "title": "Latitude coordinate" + } + }, + "additionalProperties": false + }, + "geoLocationBox": { + "type": "object", + "title": "Geolocation Box", + "description": "The spatial limits of a box. A box is defined by two geographic points. Left low corner and right upper corner. Each point is defined by its longitude and latitude.", + "properties": { + "westBoundLongitude": { + "type": "string", + "title": "Longitude coordinate of west bound." + }, + "eastBoundLongitude": { + "type": "string", + "title": "Longitude coordinate of east bound." + }, + "southBoundLatitude": { + "type": "string", + "title": "Latitude coordinate of south bound." + }, + "northBoundLatitude": { + "type": "string", + "title": "Latitude coordinate of north bound." + } + } + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "language": { + "type": "string", + "title": "Language", + "description": "Primary language of the submitted resources.", + "enum": [ + "Abkhaz", + "Afar", + "Afrikaans", + "Akan", + "Albanian", + "Amharic", + "Arabic", + "Aragonese", + "Armenian", + "Assamese", + "Avaric", + "Avestan", + "Aymara", + "Azerbaijani", + "Bambara", + "Bashkir", + "Basque", + "Belarusian", + "Bengali", + "Bihari", + "Bislama", + "Bosnian", + "Breton", + "Bulgarian", + "Burmese", + "Catalan", + "Chamorro", + "Chechen", + "Chichewa", + "Chinese", + "Chuvash", + "Cornish", + "Corsican", + "Cree", + "Croatian", + "Czech", + "Danish", + "Divehi", + "Dutch", + "Dzongkha", + "English", + "Esperanto", + "Estonian", + "Ewe", + "Faroese", + "Fijian", + "Finnish", + "French", + "Fula", + "Galician", + "Ganda", + "Georgian", + "German", + "Greek", + "Guaraní", + "Gujarati", + "Haitian", + "Hausa", + "Hebrew", + "Herero", + "Hindi", + "Hiri Motu", + "Hungarian", + "Icelandic", + "Ido", + "Igbo", + "Indonesian", + "Interlingua", + "Interlingue", + "Inuktitut", + "Inupiaq", + "Irish", + "Italian", + "Japanese", + "Javanese", + "Kalaallisut", + "Kannada", + "Kanuri", + "Kashmiri", + "Kazakh", + "Khmer", + "Kikuyu", + "Kinyarwanda", + "Kirundi", + "Komi", + "Kongo", + "Korean", + "Kurdish", + "Kwanyama", + "Kyrgyz", + "Lao", + "Latin", + "Latvian", + "Limburgish", + "Lingala", + "Lithuanian", + "Luba-Katanga", + "Luxembourgish", + "Macedonian", + "Malagasy", + "Malay", + "Malayalam", + "Maltese", + "Manx", + "Māori", + "Marathi", + "Marshallese", + "Mongolian", + "Nauru", + "Navajo", + "Ndonga", + "Nepali", + "Northern Ndebele", + "Northern Sami", + "Norwegian Bokmål", + "Norwegian Nynorsk", + "Norwegian", + "Nuosu", + "Occitan", + "Ojibwe", + "Old Church Slavonic", + "Oriya", + "Oromo", + "Ossetian", + "Pāli", + "Panjabi", + "Pashto", + "Persian", + "Polish", + "Portuguese", + "Quechua", + "Romanian", + "Romansh", + "Russian", + "Samoan", + "Sango", + "Sanskrit", + "Sardinian", + "Scottish Gaelic", + "Serbian", + "Shona", + "Sindhi", + "Sinhala", + "Slovak", + "Slovenian", + "Somali", + "Southern Ndebele", + "Southern Sotho", + "Spanish", + "Sundanese", + "Swahili", + "Swati", + "Swedish", + "Tagalog", + "Tahitian", + "Tajik", + "Tamil", + "Tatar", + "Telugu", + "Thai", + "Tibetan", + "Tigrinya", + "Tonga", + "Tsonga", + "Tswana", + "Turkish", + "Turkmen", + "Twi", + "Ukrainian", + "Urdu", + "Uyghur", + "Uzbek", + "Venda", + "Vietnamese", + "Volapük", + "Walloon", + "Welsh", + "Western Frisian", + "Wolof", + "Xhosa", + "Yiddish", + "Yoruba", + "Zhuang", + "Zulu" + ] + }, + "relatedIdentifiers": { + "type": "array", + "title": "Related Identifiers", + "description": "Information about a resource related to the one being registered, primarily used to provide series information or a text citation where the related resource does not have an identifier. It is also optional to provide such an identifier.", + "items": { + "type": "object", + "title": "Identifier of related resource", + "required": [ + "relatedIdentifier", + "relatedIdentifierType", + "relationType" + ], + "properties": { + "relatedIdentifier": { + "type": "string", + "title": "Identifier", + "description": "These must be globally unique identifiers and correspond to the type selected. e.g. 10.2022/example.78m9865 for DOI identifier Type" + }, + "relatedIdentifierType": { + "type": "string", + "title": "Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] + }, + "relationType": { + "type": "string", + "title": "Relationship Type", + "enum": [ + "Cites", + "Is Cited By", + "Compiles", + "Is Compiled By", + "Continues", + "Is Continued By", + "Describes", + "Is Described By", + "Documents", + "Is Documented By", + "Is Derived From", + "Is Source Of", + "Has Metadata", + "Is Metadata For", + "Has Part", + "Is Part Of", + "Is Supplemented By", + "Is Supplement To", + "Obsoletes", + "Is Obsoleted By", + "References", + "Is Referenced By", + "Requires", + "Is Required By", + "Reviews", + "Is Reviewed By", + "Has Version", + "Is Version Of", + "Is New version Of", + "Is Previous Version Of", + "Is Published In", + "Is Variant Form Of", + "Is Original Form Of", + "Is Identical To" + ] + }, + "relatedMetadataScheme": { + "type": "string", + "title": "Related Metadata Scheme", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" + }, + "schemeUri": { + "type": "string", + "title": "Related Metadata Scheme URI", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" + }, + "schemeType": { + "type": "string", + "title": "Related Metadata Scheme Type", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" + }, + "resourceTypeGeneral": { + "type": "string", + "title": "Resource Type", + "enum": [ + "Audiovisual", + "Book", + "Book Chapter", + "Collection", + "Computational Notebook", + "Conference Paper", + "Conference Proceeding", + "Data Paper", + "Dataset", + "Dissertation", + "Event", + "Image", + "Interactive Resource", + "Journal", + "Journal Article", + "Model", + "Output Management Plan", + "Peer Review", + "Physical Object", + "Preprint", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "Text", + "Workflow", + "Other" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "alternateIdentifiers": { + "type": "array", + "title": "Alternate Identifiers", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resources being registered. EGA identifier(s) obtained, that can be used as an alternative to the current resources.", + "items": { + "type": "object", + "title": "Identifier(s) other than the primary identifier of the resources", + "required": [ + "alternateIdentifier", + "alternateIdentifierType" + ], + "properties": { + "alternateIdentifier": { + "type": "string", + "title": "Alternate Identifier" + }, + "alternateIdentifierType": { + "type": "string", + "title": "Alternate Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "sizes": { + "type": "array", + "title": "Sizes", + "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resources. Examples: '15 pages', '6 MB', '45 minutes'.", + "items": { + "type": "string", + "title": "Size" + } + }, + "formats": { + "type": "array", + "title": "Formats", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will pre-fill some of them based on what was filled in metadata.", + "items": { + "type": "string", + "title": "Format" + } + }, + "fundingReferences": { + "type": "array", + "title": "Funding References", + "description": "It is recommended to supply funding information when financial support has been received. The funder will be identified by their Research Organization Registry (ROR).", + "items": { + "type": "object", + "title": "Information about financial support for producing the resources", + "required": [ + "funderName", + "funderIdentifier", + "funderIdentifierType" + ], + "properties": { + "funderName": { + "type": "string", + "title": "Funder Name" + }, + "funderIdentifier": { + "type": "string", + "title": "Funder Identifier", + "description": "Unique identifier for funding entity" + }, + "funderIdentifierType": { + "type": "string", + "title": "Funder Identity Type", + "description": "The type of funder identifier, one of Crossref Funder ID, GRID, ISNI, or ROR.", + "enum": [ + "Crossref Funder ID", + "GRID", + "ISNI", + "ROR", + "Other" + ] + }, + "awardNumber": { + "type": "string", + "title": "Award Number", + "description": "The code assigned by the funder to a sponsored award." + }, + "awardTitle": { + "type": "string", + "title": "Award Title", + "description": "The human readable title of the award." + }, + "awardUri": { + "type": "string", + "title": "Award URI", + "description": "The URI leading to a page provided by the funder for more information about the award (grant)." + } + }, + "additionalProperties": false + }, + "uniqueItems": true + } + } +} diff --git a/metadata_backend/helpers/schemas/ena_analysis.json b/metadata_backend/helpers/schemas/ena_analysis.json index ac6222aac..087b69d16 100644 --- a/metadata_backend/helpers/schemas/ena_analysis.json +++ b/metadata_backend/helpers/schemas/ena_analysis.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/analysisAttribute", "type": "object", "title": "Analysis Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -161,6 +161,8 @@ }, "sequenceType": { "$id": "#/definitions/sequenceType", + "title": "Reference Alignment", + "additionalProperties": true, "type": "object", "properties": { "assembly": { @@ -170,23 +172,27 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", "required": [ - "accessionId" + "accession" ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", "required": [ "description" ], @@ -213,20 +219,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -236,23 +245,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -267,6 +281,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -285,22 +300,21 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { + "accessionId": { "type": "string", - "title": "Label" + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -319,45 +333,49 @@ "properties": { "filename": { "type": "string", - "title": "Filename" + "description": "The name or relative path name of a run data file.", + "title": "File Name" }, "filetype": { "type": "string", - "title": "Filetype", + "description": "The Analysis data file model/type.", + "title": "File type", "enum": [ - "tab", - "bam", + "agp", "bai", - "cram", - "crai", - "vcf", - "vcf_aggregate", + "bam", "bcf", - "tabix", - "wig", "bed", - "gff", + "BioNano native", + "chromosome list", + "crai", + "cram", "fasta", "fastq", "flatfile", - "chromosome_list", - "sample_list", - "readme_file", - "phenotype_file", - "BioNano_native", - "Kallisto_native", - "agp", - "unlocalised_list", + "gff", "info", + "Kallisto native", "manifest", - "other", - "csi", - "bcf_aggregate" + "phenotype file", + "readme file", + "sample list", + "sff", + "sra", + "srf", + "tab", + "tabix", + "unlocalised list", + "vcf aggregate", + "vcf", + "wig", + "other" ] }, "checksumMethod": { "type": "string", "title": "Checksum Method", + "description": "Checksum method used MD5 or SHA-256.", "enum": [ "MD5", "SHA-256" @@ -365,7 +383,13 @@ }, "checksum": { "type": "string", + "description": "Checksum of uncompressed file.", "title": "Checksum" + }, + "unencryptedChecksum": { + "type": "string", + "description": "Checksum of un-encrypted file (used in conjunction with checksum of encrypted file).", + "title": "Un-encrypted Checksum" } } }, @@ -414,7 +438,7 @@ }, "minGapLength": { "type": "number", - "title": "MinGapLength" + "title": "Min Gap Length" }, "molType": { "type": "string", @@ -437,13 +461,13 @@ "type": "string", "title": "Type of Sequence Assembly", "enum": [ - "clone or isolate", - "primary metagenome", "binned metagenome", - "Metagenome-Assembled Genome (MAG)", - "Environmental Single-Cell Amplified Genome (SAG)", + "clinical isolate assembly", + "clone or isolate", "COVID-19 outbreak", - "clinical isolate assembly" + "Environmental Single-Cell Amplified Genome (SAG)", + "Metagenome-Assembled Genome (MAG)", + "primary metagenome" ] }, "tpa": { @@ -482,14 +506,14 @@ "type": "string", "title": "Experiment Type", "enum": [ - "Whole genome sequencing", - "Whole transcriptome sequencing", + "Curation", "Exome sequencing", "Genotyping by array", - "transcriptomics", - "Curation", "Genotyping by sequencing", - "Target sequencing" + "Target sequencing", + "transcriptomics", + "Whole genome sequencing", + "Whole transcriptome sequencing" ] }, "imputation": { @@ -504,20 +528,30 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", + "required": [ + "accession" + ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", + "required": [ + "description" + ], "properties": { "label": { "type": "string", @@ -541,20 +575,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -780,17 +817,17 @@ } } }, - "taxonomicRefrenceSet": { - "$id": "#/definitions/taxonomicRefrenceSet", + "taxonomicReferenceSet": { + "$id": "#/definitions/taxonomicReferenceSet", "type": "object", - "title": "Taxonomic Refrence Set", + "title": "Taxonomic Reference Set", "required": [ - "taxonomicRefrenceSet" + "taxonomicReferenceSet" ], "properties": { - "taxonomicRefrenceSet": { + "taxonomicReferenceSet": { "type": "object", - "title": "Taxonomic Refrence Set", + "title": "Taxonomic Reference Set", "properties": { "name": { "type": "string", @@ -892,7 +929,7 @@ "$ref": "#/definitions/transcriptomeAssembly" }, { - "$ref": "#/definitions/taxonomicRefrenceSet" + "$ref": "#/definitions/taxonomicReferenceSet" }, { "$ref": "#/definitions/assemblyAnnotation" @@ -917,20 +954,17 @@ }, "studyRef": { "title": "Study Reference", - "description": "Identifies the associated study.", + "description": "Identifies the associated parent study.", "$ref": "#/definitions/reference" }, "experimentRef": { "title": "Experiment Reference", "description": "Identifies the associated experiment.", - "type": "array", - "items": { - "$ref": "#/definitions/reference" - } + "$ref": "#/definitions/reference" }, "sampleRef": { "title": "Sample Reference", - "description": "Identifies the associated sample.", + "description": "Identifies the associated sample(s).", "type": "array", "items": { "$ref": "#/definitions/reference" @@ -962,6 +996,7 @@ "analysisLinks": { "type": "array", "title": "Analysis Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -969,9 +1004,10 @@ "analysisAttributes": { "type": "array", "title": "Analysis Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/analysisAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_dac.json b/metadata_backend/helpers/schemas/ena_dac.json index 1c6f65ed3..4afa76cc3 100644 --- a/metadata_backend/helpers/schemas/ena_dac.json +++ b/metadata_backend/helpers/schemas/ena_dac.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -152,36 +153,61 @@ "email": { "type": "string", "title": "Contact Email", - "description": "email of the person to contact.", + "description": "Email of the person to contact.", "format": "email" }, "telephoneNumber": { "type": "string", "title": "Contact Telephone Number", - "description": "telephone number of the person to contact.", + "description": "Telephone number of the person to contact.", "pattern": "^[-a-zA-Z0-9-()+ ]*" }, "organisation": { "type": "string", "title": "Organisation", - "description": "Center or institution name." + "description": "Center or institution name. We will use ROR to suggest an organisation." }, "mainContact": { "type": "boolean", "title": "Main Contact", - "description": "If true then this is the main contact." + "description": "If selected then this is the main contact for the DAC." + } + } + }, + "dacAttribute": { + "$id": "#/definitions/dacAttribute", + "type": "object", + "title": "DAC Attribute", + "required": [ + "tag", + "value" + ], + "properties": { + "tag": { + "type": "string", + "title": "Tag title" + }, + "value": { + "type": "string", + "title": "Description" + }, + "units": { + "type": "string", + "title": "Optional scientific units." } } } }, "type": "object", "required": [ - "contacts" + "contacts", + "title" ], "properties": { "contacts": { "type": "array", "title": "Contacts", + "description": "List of persons that ar part of the Data Access Committee. At least one main contact is required.", "items": { "$ref": "#/definitions/contact" }, @@ -192,7 +218,9 @@ "$ref": "#/definitions/contact" }, { - "required": ["mainContact"], + "required": [ + "mainContact" + ], "properties": { "mainContact": { "const": true @@ -204,17 +232,25 @@ "maxContains": 1 }, "title": { - "title": "DAC Description", - "description": "Short text that can be used to call out DAC records in searches or in displays.", - "type": "string", - "minLength": 10 + "title": "DAC Title", + "description": "Title of the Data Access Committee (DAC) that will approve applications to the datasets.", + "type": "string" }, "dacLinks": { "type": "array", "title": "DAC Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } + }, + "dacAttributes": { + "type": "array", + "title": "Study Attributes", + "description": "Properties and attributes of the DAC. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", + "items": { + "$ref": "#/definitions/dacAttribute" + } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_dataset.json b/metadata_backend/helpers/schemas/ena_dataset.json index 489e314a5..d781ce6be 100644 --- a/metadata_backend/helpers/schemas/ena_dataset.json +++ b/metadata_backend/helpers/schemas/ena_dataset.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/datasetAttribute", "type": "object", "title": "Dataset Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -162,23 +162,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -193,6 +198,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -211,22 +217,21 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { + "accessionId": { "type": "string", - "title": "Label" + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -235,10 +240,15 @@ }, "type": "object", "description": "Describes an object that contains data access policy information.", + "required": [ + "title", + "description" + ], + "additionalProperties": true, "properties": { "title": { "title": "Dataset Title", - "description": "Short text that can be used to call out data sets in searches or in displays.", + "description": "Title of the Dataset as would be used in a publication.", "type": "string" }, "datasetType": { @@ -247,53 +257,54 @@ "items": { "type": "string", "enum": [ - "Whole genome sequencing", + "Amplicon sequencing", + "Chip-Seq", + "Chromatin accessibility profiling by high-throughput sequencing", "Exome sequencing", + "Genomic variant calling", "Genotyping by array", - "Transcriptome profiling by high-throughput sequencing", - "Transcriptome profiling by array", - "Amplicon sequencing", + "Histone modification profiling by high-throughput sequencing", "Methylation binding domain sequencing", "Methylation profiling by high-throughput sequencing", "Phenotype information", "Study summary information", - "Genomic variant calling", - "Chromatin accessibility profiling by high-throughput sequencing", - "Histone modification profiling by high-throughput sequencing", - "Chip-Seq" + "Transcriptome profiling by array", + "Transcriptome profiling by high-throughput sequencing", + "Whole genome sequencing" ] }, "uniqueItems": true }, "description": { "title": "Dataset Description", - "description": "Free-form text describing the data sets.", + "description": "Free-form text describing the Dataset.", "type": "string" }, "policyRef": { "title": "Policy Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the data access policy controlling this Dataset.", "$ref": "#/definitions/reference" }, "runRef": { "title": "Run Reference", - "description": "Identifies the runs which are part of this dataset.", + "description": "Identifies the Runs which are part of this Dataset.", "type": "array", "items": { "$ref": "#/definitions/reference" - } + } }, "analysisRef": { "title": "Analysis Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the Analyses which are part of this Dataset.", "type": "array", "items": { "$ref": "#/definitions/reference" - } + } }, "datasetLinks": { "type": "array", "title": "Dataset Links", + "description": "Used to encode URL links, Entrez links, and xref DB links. These are links used to cross reference with other relevant resources.", "items": { "$ref": "#/definitions/Links" } @@ -301,9 +312,10 @@ "datasetAttributes": { "type": "array", "title": "Dataset Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/datasetAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_experiment.json b/metadata_backend/helpers/schemas/ena_experiment.json index 9cf1fcebf..796adf01e 100644 --- a/metadata_backend/helpers/schemas/ena_experiment.json +++ b/metadata_backend/helpers/schemas/ena_experiment.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/experimentAttribute", "type": "object", "title": "Experiment Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -168,7 +168,7 @@ "pool": { "type": "object", "title": "Sample Members", - "description": "Identifies a list of group/pool/multiplex sample members. This implies that this sample record is a group, pool, or multiplex, but it continues to receive its own accession and can be referenced by an experiment. By default ifno match to any of the listed members can be determined, then the default sample reference is used.", + "description": "Identifies a list of group/pool/multiplex sample members. This implies that this sample record is a group, pool, or multiplex, but it continues to receive its own accession and can be referenced by an experiment. By default if no match to any of the listed members can be determined, then the default sample reference is used.", "properties": { "defaultMember": { "type": "object", @@ -180,7 +180,6 @@ "title": "Member", "$ref": "#/definitions/poolMemberType" } - } } } @@ -207,8 +206,12 @@ "libraryType": { "$id": "#/definitions/libraryType", "type": "object", - "title": "Library used for experiment design", - "required": ["designDescription", "sampleDescriptor", "libraryDescriptor"], + "title": "Library used for experiment design.", + "required": [ + "designDescription", + "sampleDescriptor", + "libraryDescriptor" + ], "properties": { "designDescription": { "title": "Design Description", @@ -227,10 +230,9 @@ "$ref": "#/definitions/reference" } ] - }, "libraryDescriptor": { - "description": "The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any treatments that the material might have undergone that affect the sequencing result. This specification is needed even if the platform does not require a library construction step per se.", + "description": "The Library Descriptor specifies the origin of the material being sequenced and any treatments that the material might have undergone that affect the sequencing result. This specification is needed even if the platform does not require a library construction step per se.", "title": "Library Descriptor", "type": "object", "required": [ @@ -245,16 +247,17 @@ }, "libraryStrategy": { "title": "Library Strategy", + "description": "Sequencing technique intended for this library.", "type": "string", "enum": [ "AMPLICON", "ATAC-seq", "Bisulfite-Seq", + "ChIA-PET", + "ChIP-Seq", "CLONE", "CLONEEND", "CTS", - "ChIA-PET", - "ChIP-Seq", "DNase-Hypersensitivity", "EST", "FAIRE-seq", @@ -262,15 +265,18 @@ "FL-cDNA", "Hi-C", "MBD-Seq", + "MeDIP-Seq", + "miRNA-Seq", "MNase-Seq", "MRE-Seq", - "MeDIP-Seq", + "ncRNA-Seq", "OTHER", "POOLCLONE", "RAD-Seq", "RIP-Seq", "RNA-Seq", "SELEX", + "ssRNA-seq", "Synthetic-Long-Read", "Targeted-Capture", "Tethered Chromatin Conformation Capture", @@ -279,66 +285,66 @@ "WCS", "WGA", "WGS", - "WXS", - "miRNA-Seq", - "ncRNA-Seq", - "ssRNA-seq" + "WXS" ] }, "librarySource": { "title": "Library Source", + "description": "The Library Source specifies the type of source material that is being sequenced.", "type": "string", "enum": [ - "GENOMIC", "GENOMIC SINGLE CELL", + "GENOMIC", "METAGENOMIC", "METATRANSCRIPTOMIC", "OTHER", "SYNTHETIC", - "TRANSCRIPTOMIC", "TRANSCRIPTOMIC SINGLE CELL", + "TRANSCRIPTOMIC", "VIRAL RNA" ] }, "librarySelection": { "title": "Library Selection", + "description": "Method used to enrich the target in the sequence library preparation.", "type": "string", "enum": [ "5-methylcytidine antibody", "CAGE", - "ChIP", + "cDNA_oligo_dT", + "cDNA_randomPriming", + "cDNA", "ChIP-Seq", + "ChIP", "DNase", "HMPR", "Hybrid Selection", - "Inverse rRNA", "Inverse rRNA selection", + "Inverse rRNA", "MBD2 protein methyl-CpG binding domain", "MDA", "MF", "MNase", "MSLL", "Oligo-dT", + "padlock probes capture method", "PCR", "PolyA", "RACE", - "RANDOM", "RANDOM PCR", - "RT-PCR", + "RANDOM", "Reduced Representation", - "Restriction Digest", - "cDNA", - "cDNA_oligo_dT", - "cDNA_randomPriming", - "other", - "padlock probes capture method", "repeat fractionation", + "Restriction Digest", + "RT-PCR", "size fractionation", + "other", "unspecified" ] }, "libraryLayout": { "title": "Library Layout", + "description": "Library Layout specifies whether to expect single, paired, or other configuration of reads. In the case of paired reads, information about the relative distance and orientation is specified.", "type": "string", "enum": [ "single", @@ -373,11 +379,11 @@ "16S rRNA", "18S rRNA", "28S rRNA", - "RBCL", - "matK", "COX1", - "ITS1-5.8S-ITS2", "exome", + "ITS1-5.8S-ITS2", + "matK", + "RBCL", "other" ] }, @@ -401,7 +407,7 @@ } }, "spotDescriptor": { - "description": "The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", + "description": "The spot descriptor specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", "title": "Spot Descriptor", "type": "object", "required": [ @@ -410,6 +416,7 @@ "properties": { "spotLength": { "title": "Spot Length", + "description": "Number of base/color calls, cycles, or flows per spot (raw sequence length or flow length including all application and technical tags and mate pairs, but not including gap lengths). This value will be platform dependent, library dependent, and possibly run dependent. Variable length platforms will still have a constant flow/cycle length.", "type": "number", "minimum": 0 }, @@ -419,11 +426,13 @@ "properties": { "readIndex": { "title": "Read Index", + "description": "Read Index starts at 0 and is incrementally increased for each sequential read_spec within a spot decode specification", "type": "number", "minimum": 0 }, "readLabel": { "title": "Read Label", + "description": "Read Label is a name for this tag, and can be used to on output to determine read name, for example F or R.", "type": "string" }, "readClass": { @@ -436,6 +445,7 @@ }, "readType": { "title": "Read Type", + "description": "", "type": "string", "enum": [ "Forward", @@ -443,21 +453,24 @@ "Adapter", "Primer", "Linker", - "Barcode", + "BarCode", "Other" ] }, "relativeOrder": { "type": "object", "title": "Relative Order", + "description": "The read is located beginning at the offset or cycle relative to another read. This choice is appropriate for example when specifying a read that follows a variable length expected sequence(s).", "properties": { "followsReadIndex": { - "title": "Read Index", + "title": "Follows Read Index", "type": "number", + "description": "Specify the read index that precedes this read.", "minimum": 0 }, "precedesReadIndex": { - "title": "Read Index", + "title": "Precedes Read Index", + "description": "Specify the read index that follows this read.", "type": "number", "minimum": 0 } @@ -465,45 +478,69 @@ }, "baseCoord": { "title": "Base Coordinate", + "description": "The location of the read start in terms of base count (1 is beginning of spot).", "type": "number" }, "expectedBaseCallTable": { "title": "Expected Base Call Table", + "description": " A set of choices of expected base calls for a current read. Read will be zero-length if none is found.", "type": "array", "items": { "type": "object", "properties": { "baseCall": { - "title": "Base Call", - "type": "string" + "type": "string", + "description": "Element's body contains a basecall, attribute provide description of this read meaning as well as matching rules.", + "title": "Base Call" }, "readGroupTag": { - "title": "Read group tag", - "type": "string" + "type": "string", + "description": "When match occurs, the read will be tagged with this group membership.", + "title": "Read Group Tag" }, "minMatch": { - "title": "Min match", "type": "number", - "minimum": 0 + "description": " Minimum number of matches to trigger identification.", + "minimum": 0, + "title": "Min Match" }, "maxMisMatch": { - "title": "Max mismatch", + "description": "Maximum number of mismatches.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Max MisMatch" }, "matchEdge": { - "title": "Match edge", - "type": "string" + "description": "Where the match should occur. Changes the rules on how min_match and max_mismatch are counted.", + "type": "string", + "title": "Match Edge" + }, + "full": { + "description": "Only @max_mismatch influences matching process.", + "type": "string", + "title": "Full" + }, + "start": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared. ", + "type": "string", + "title": "Start" + }, + "end": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared.", + "type": "string", + "title": "End" }, "defaultLength": { - "title": "Default length", + "description": "Specify whether the spot should have a default length for this tag if the expected base cannot be matched.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Default Length" }, "baseCoord": { - "title": "Base coordinate", + "description": "Specify an optional starting point for tag (base offset from 1).", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Base Coordinate" } } } @@ -518,13 +555,12 @@ "$id": "#/definitions/processingType", "type": "object", "title": "Processing Type", + "description": "Information about the processing type such as pipeline and sequencing directives.", "properties": { "pipeline": { "title": "Pipeline", + "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", - "required": [ - "pipeSection" - ], "properties": { "pipeSection": { "type": "array", @@ -532,17 +568,21 @@ "items": { "type": "object", "required": [ - "prevStepIndex" + "stepIndex", + "prevStepIndex", + "program", + "version" ], "properties": { "stepIndex": { "type": "string", + "description": "Lexically ordered value that allows for the pipe section to be hierarchically ordered. The primitive data type is used to allow for pipe sections to be inserted later on.", "title": "Step Index" }, "prevStepIndex": { "oneOf": [ { - "title": "String value", + "title": "Float value", "type": "string" }, { @@ -550,22 +590,27 @@ "type": "null" } ], + "description": "stepIndex of the previous step in the workflow. Set Null if the first pipe section.", "title": "Prev Step Index" }, "program": { "type": "string", + "description": "Name of the program or process for primary analysis. This may include a test or condition that leads to branching in the workflow.", "title": "Program" }, "version": { "type": "string", + "description": "Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { "type": "string", + "description": "Notes about the program or process for primary analysis. ", "title": "Notes" }, "sectionName": { "type": "string", + "description": "If there are multiple sections specify the name of the processing pipeline section.", "title": "Section Name" } } @@ -576,22 +621,22 @@ "directives": { "type": "object", "title": "Directives", - "description": "Processing directives tell the Sequence Read Archive how to treat the input data, if any treatment is requested.", + "description": "Processing directives tell the Sequence Read Storage how to treat the input data, if any treatment is requested.", "properties": { "sampleDemuxDirective": { "type": "object", - "title": "Sample demux directive", - "description": "Tells the Archive who will execute the sample demultiplexing operation.", + "description": "Tells the Sequence Read Storage who will execute the sample demultiplexing operation.", + "title": "Sample Demux Directive", "properties": { "leaveAsPool": { "type": "string", - "description": "There shall be no sample de-multiplexing at the level of assiging individual reads to sample pool members.", - "title": "Leave as pool" + "description": "There shall be no sample de-multiplexing at the level of assigning individual reads to sample pool members.", + "title": "Leave As Pool" }, "submitterDemultiplexed": { "type": "string", "description": "The submitter has assigned individual reads to sample pool members by providing individual files containing reads with the same member assignment.", - "title": "Submitter demultiplexed" + "title": "Submitter Demultiplexed" } } } @@ -602,23 +647,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -633,6 +683,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -651,22 +702,21 @@ "additionalProperties": false, "type": "object", "properties": { - "label": { + "accessionId": { "type": "string", - "title": "Label" + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -683,7 +733,7 @@ "properties": { "title": { "title": "Experiment Title", - "description": "Short text that can be used to call out experiment records in searches or in displays. This element is technically optional but should be used for all new records.", + "description": "Short text that can be used to call out experiment records in searches or in displays.", "type": "string" }, "description": { @@ -691,21 +741,24 @@ "description": "Free-form text describing the data sets.", "type": "string" }, - "studyRef": { - "title": "Study Reference", - "description": "Identifies the associated study.", - "$ref": "#/definitions/reference" - }, - "design": { - "title": "Design", - "description": "The library design including library properties, layout, protocol, targeting information, and spot and gap descriptors. ", - "$ref": "#/definitions/libraryType" - }, "platform": { "title": "Platform / Instrument", - "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", + "description": "The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ + "454 GS 20", + "454 GS FLX Titanium", + "454 GS FLX", + "454 GS FLX+", + "454 GS Junior", + "454 GS", + "AB 310 Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3730xL Genetic Analyzer", "AB 5500 Genetic Analyzer", "AB 5500xl Genetic Analyzer", "AB 5500xl-W Genetic Analysis System", @@ -713,23 +766,19 @@ "AB SOLiD 4 System", "AB SOLiD 4hq System", "AB SOLiD PI System", - "AB SOLiD System", "AB SOLiD System 2.0", "AB SOLiD System 3.0", - "AB 310 Genetic Analyzer", - "AB 3130 Genetic Analyzer", - "AB 3130xL Genetic Analyzer", - "AB 3500 Genetic Analyzer", - "AB 3500xL Genetic Analyzer", - "AB 3730 Genetic Analyzer", - "AB 3730xL Genetic Analyzer", + "AB SOLiD System", + "BGISEQ-50", + "BGISEQ-500", "Complete Genomics", + "GridION", "Helicos HeliScope", "HiSeq X Five", "HiSeq X Ten", - "Illumina Genome Analyzer", "Illumina Genome Analyzer II", "Illumina Genome Analyzer IIx", + "Illumina Genome Analyzer", "Illumina HiScanSQ", "Illumina HiSeq 1000", "Illumina HiSeq 1500", @@ -737,50 +786,48 @@ "Illumina HiSeq 2500", "Illumina HiSeq 3000", "Illumina HiSeq 4000", - "Illumina MiSeq", + "Illumina HiSeq X", "Illumina MiniSeq", + "Illumina MiSeq", "Illumina NovaSeq 6000", - "NextSeq 500", - "NextSeq 550", + "Ion GeneStudio S5 Plus", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5", + "Ion Torrent Genexus", "Ion Torrent PGM", "Ion Torrent Proton", - "Ion Torrent S5", "Ion Torrent S5 XL", - "454 GS", - "454 GS 20", - "454 GS FLX", - "454 GS FLX Titanium", - "454 GS FLX+", - "454 GS Junior", - "GridION", + "Ion Torrent S5", + "MGISEQ-2000RS", "MinION", - "PromethION", - "PacBio RS", + "NextSeq 500", + "NextSeq 550", "PacBio RS II", + "PacBio RS", + "PromethION", + "Sequel II", "Sequel", "unspecified" ] }, + "studyRef": { + "title": "Study Reference", + "description": "Identifies the associated study.", + "$ref": "#/definitions/reference" + }, + "design": { + "title": "Design", + "description": "The library design including library properties, layout, protocol, targeting information, and spot and gap descriptors. ", + "$ref": "#/definitions/libraryType" + }, "processing": { "title": "Processing", - "oneOf": [ - { - "title": "Single Processing", - "type": "string" - }, - { - "title": "Complex Processing", - "type": "array", - "items": { - "$ref": "#/definitions/processingType" - } - } - ] + "$ref": "#/definitions/processingType" }, "experimentLinks": { "type": "array", "title": "Experiment Links", - "description": " Links to resources related to this experiment or experiment set (publication, datasets, online databases).", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -788,9 +835,10 @@ "experimentAttributes": { "type": "array", "title": "Experiment Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/experimentAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_policy.json b/metadata_backend/helpers/schemas/ena_policy.json index 34310b7c6..639eea014 100644 --- a/metadata_backend/helpers/schemas/ena_policy.json +++ b/metadata_backend/helpers/schemas/ena_policy.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,7 @@ "$id": "#/definitions/dataUseType", "type": "object", "title": "Data Use Type", - "description": "tag title and its associated value (description)", + "description": "Data Use ontology", "required": [ "ontology", "code", @@ -148,18 +149,19 @@ "properties": { "modifier": { "title": "List of Use Modifiers", + "description": "If Data Use Ontology used, see https://github.com/EBISPOT/DUO for examples.", "type": "array", "items": { "type": "object", "properties": { "modifier": { - "description": "Describes modifiers to the Data Use Restriction", + "description": "Describes modifiers to the Data Use Restriction.", "title": "Modifier", "$ref": "#/definitions/xrefLink" }, "url": { "type": "string", - "description": "Link to URL describing the Data Use" + "description": "Link to URL describing the Data Use." } } } @@ -171,10 +173,12 @@ }, "ontology": { "type": "string", - "title": "Ontology abbreviation, e.g. DUO for Data Use Ontology" + "description": "If Data Use Ontology then use DUO.", + "title": "Ontology abbreviation" }, "code": { "type": "string", + "description": "Where the ontology can be found.", "title": "Code for the ontology" }, "version": { @@ -187,7 +191,6 @@ "$id": "#/definitions/policyAttribute", "type": "object", "title": "Policy Attribute", - "description": "tag title and its associated value (description)", "required": [ "tag", "value" @@ -210,23 +213,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -241,6 +249,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -259,22 +268,21 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { + "accessionId": { "type": "string", - "title": "Label" + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -282,7 +290,7 @@ } }, "type": "object", - "description": "Describes an object that contains data access policy information.", + "description": "Describes an object that contains data access Policy information.", "required": [ "dacRef", "policy" @@ -290,21 +298,22 @@ "properties": { "title": { "title": "Policy Title", - "description": "Short text that can be used to call out data access policies in searches or in displays.", + "description": "Title of the Policy so it can be easily be found.", "type": "string" }, "dacRef": { "title": "Data Access Committee Reference", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Identifies the data access committee to which this Policy pertains.", "$ref": "#/definitions/reference" }, "policy": { "title": "Policy", - "description": "Identifies the data access committee to which this policy pertains.", + "description": "Policies can be added either by providing the text of the Policy of by pointing to an existing URL.", "oneOf": [ { "type": "object", "title": "Policy Text", + "description": "Text containing the policy.", "properties": { "policyText": { "type": "string", @@ -318,6 +327,7 @@ { "type": "object", "title": "Policy URL", + "description": "Links to the Policy text / information.", "properties": { "policyUrl": { "type": "string", @@ -331,27 +341,29 @@ } ] }, - "policyLinks": { - "type": "array", - "title": "Policy Links", - "items": { - "$ref": "#/definitions/Links" - } - }, "dataUses": { "type": "array", - "description": "Data use ontologies (DUO) related to the policy", + "description": "Data use ontologies (DUO) related to the Policy. More information at: https://github.com/EBISPOT/DUO .", "items": { "$ref": "#/definitions/dataUseType" }, "title": "Data Use Ontology" }, + "policyLinks": { + "type": "array", + "title": "Policy Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", + "items": { + "$ref": "#/definitions/Links" + } + }, "policyAttributes": { "type": "array", "title": "Policy Attributes", + "description": "Properties and attributes of the Policy. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/policyAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_run.json b/metadata_backend/helpers/schemas/ena_run.json index aeada1ae1..99336f620 100644 --- a/metadata_backend/helpers/schemas/ena_run.json +++ b/metadata_backend/helpers/schemas/ena_run.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/runAttribute", "type": "object", "title": "Run Attribute", - "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "required": [ "tag", "value" @@ -163,13 +163,12 @@ "$id": "#/definitions/processingType", "type": "object", "title": "Processing Type", + "description": "Information about the processing type such as pipeline and sequencing directives.", "properties": { "pipeline": { "title": "Pipeline", + "description": "The Pipeline type identifies the sequence or tree of actions to process the sequencing data.", "type": "object", - "required": [ - "pipeSection" - ], "properties": { "pipeSection": { "type": "array", @@ -177,17 +176,21 @@ "items": { "type": "object", "required": [ - "prevStepIndex" + "stepIndex", + "prevStepIndex", + "program", + "version" ], "properties": { "stepIndex": { "type": "string", + "description": "Lexically ordered value that allows for the pipe section to be hierarchically ordered. The primitive data type is used to allow for pipe sections to be inserted later on.", "title": "Step Index" }, "prevStepIndex": { "oneOf": [ { - "title": "String value", + "title": "Float value", "type": "string" }, { @@ -195,22 +198,27 @@ "type": "null" } ], + "description": "stepIndex of the previous step in the workflow. Set Null if the first pipe section.", "title": "Prev Step Index" }, "program": { "type": "string", + "description": "Name of the program or process for primary analysis. This may include a test or condition that leads to branching in the workflow.", "title": "Program" }, "version": { "type": "string", + "description": "Version of the program or process for primary analysis. ", "title": "Version" }, "notes": { "type": "string", + "description": "Notes about the program or process for primary analysis. ", "title": "Notes" }, "sectionName": { "type": "string", + "description": "If there are multiple sections specify the name of the processing pipeline section.", "title": "Section Name" } } @@ -221,16 +229,22 @@ "directives": { "type": "object", "title": "Directives", - "description": "Processing directives tell the Sequence Read Archive how to treat the input data, if any treatment is requested.", + "description": "Processing directives tell the Sequence Read Storage how to treat the input data, if any treatment is requested.", "properties": { "sampleDemuxDirective": { "type": "object", + "description": "Tells the Sequence Read Storage who will execute the sample demultiplexing operation.", + "title": "Sample Demux Directive", "properties": { "leaveAsPool": { - "type": "string" + "type": "string", + "description": "There shall be no sample de-multiplexing at the level of assigning individual reads to sample pool members.", + "title": "Leave As Pool" }, "submitterDemultiplexed": { - "type": "string" + "type": "string", + "description": "The submitter has assigned individual reads to sample pool members by providing individual files containing reads with the same member assignment.", + "title": "Submitter Demultiplexed" } } } @@ -241,23 +255,28 @@ "identifiers": { "$id": "#/definitions/identifiers", "title": "Identifiers", + "description": "Identifiers to be used in the International Nucleotide Sequence Database Collaboration (INSDC) namespace.", "type": "object", "properties": { "primaryId": { "type": "string", + "description": "A primary identifier in the INSDC namespace.", "title": "Primary Id" }, "secondaryId": { "type": "string", + "description": "A secondary identifier in the INSDC namespace.", "title": "Secondary Id" }, "uuid": { "type": "string", + "description": "A universally unique identifier that requires no namespace.", "title": "UUID" }, "externalId": { "type": "object", "title": "External Id", + "description": "An identifier from a public non-INSDC resource.", "properties": { "namespace": { "type": "string", @@ -272,6 +291,7 @@ "submitterId": { "type": "object", "title": "Submitter Id", + "description": "A submitter provided identifier.", "properties": { "namespace": { "type": "string", @@ -290,22 +310,21 @@ "additionalProperties": true, "type": "object", "properties": { - "label": { + "accessionId": { "type": "string", - "title": "Label" + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", + "title": "Accession Id" }, "refname": { "type": "string", + "description": "Identifies an object by name within the namespace defined by attribute refcenter", "title": "Record name" }, "refcenter": { "type": "string", + "description": "The namespace of the attribute refname.", "title": "Center Namespace" }, - "accessionId": { - "type": "string", - "title": "Accession Id" - }, "identifiers": { "$ref": "#/definitions/identifiers" } @@ -324,46 +343,49 @@ "properties": { "filename": { "type": "string", - "title": "filename" + "description": "The name or relative path name of a run data file.", + "title": "File Name" }, "filetype": { "type": "string", - "title": "filetype", + "description": "The Run data file model/type.", + "title": "File Type", "enum": [ - "tab", - "bam", + "agp", "bai", - "cram", - "crai", - "vcf", - "vcf_aggregate", + "bam", "bcf", - "tabix", - "wig", - "sra", - "sff", - "srf", "bed", - "gff", + "BioNano native", + "chromosome list", + "crai", + "cram", "fasta", "fastq", "flatfile", - "chromosome_list", - "sample_list", - "readme_file", - "phenotype_file", - "BioNano_native", - "Kallisto_native", - "agp", - "unlocalised_list", + "gff", "info", + "Kallisto native", "manifest", + "phenotype file", + "readme file", + "sample list", + "sff", + "sra", + "srf", + "tab", + "tabix", + "unlocalised list", + "vcf aggregate", + "vcf", + "wig", "other" ] }, "checksumMethod": { "type": "string", - "title": "checksumMethod", + "title": "Checksum Method", + "description": "Checksum method used MD5 or SHA-256.", "enum": [ "MD5", "SHA-256" @@ -371,42 +393,50 @@ }, "readLabel": { "title": "Read Label", + "description": "The Read Label can associate a certain file to a certain read label defined in the spot descriptor.", "type": "string" }, "readType": { "title": "Read Type", + "description": "Identifies a record by its accession. The scope of resolution is the entire Storage.", "type": "string", "enum": [ - "single", - "paired", "cell_barcode", - "umi_barcode", "feature_barcode", + "paired", "sample_barcode", + "single", + "umi_barcode", "Other" ] }, "checksum": { "type": "string", - "title": "checksum" + "description": "Checksum of uncompressed file.", + "title": "Checksum" + }, + "unencryptedChecksum": { + "type": "string", + "description": "Checksum of un-encrypted file (used in conjunction with checksum of encrypted file).", + "title": "Un-encrypted Checksum" } } } }, "type": "object", - "description": "A run contains a group of reads generated for a particular experiment.", + "description": "A Run contains a group of reads generated for a particular experiment.", "required": [ "experimentRef" ], "properties": { "title": { "title": "Run Title", - "description": "Short text that can be used to define submissions in searches or in displays.", + "description": "Title of the Run as would be used to identify it in reference to the Study and Dataset.", "type": "string" }, "description": { "title": "Run Description", - "description": "Free-form text describing the data sets.", + "description": "Free-form text describing the Run and any relevant information.", "type": "string" }, "runType": { @@ -426,23 +456,27 @@ { "type": "object", "title": "Standard", + "description": "A standard genome assembly.", "required": [ - "accessionId" + "accession" ], "properties": { "refname": { "type": "string", + "description": "A recognized name for the genome assembly.", "title": "Reference name" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory.", + "title": "Accession.version" } } }, { "type": "object", "title": "Custom", + "description": "Other genome assembly.", "required": [ "description" ], @@ -469,20 +503,23 @@ "sequence": { "type": "array", "title": "Sequence", + "description": "Reference sequence details.", "items": { "type": "object", "required": [ - "accessionId" + "accession" ], "additionalProperties": true, "properties": { "label": { "type": "string", + "description": "This is how Reference Sequence is labeled in submission file(s). It is equivalent to SQ label in BAM. Optional when submitted file uses INSDC accession.version.", "title": "Label" }, - "accessionId": { + "accession": { "type": "string", - "title": "Accession Id" + "description": "Accession.version with version being mandatory", + "title": "Accession.version" } } } @@ -493,13 +530,13 @@ }, "runDate": { "title": "Run Date", - "description": "Date when the run took place", + "description": "Date when the Run took place.", "type": "string", "format": "date-time" }, "runCenter": { "title": "Run Center", - "description": "If applicable, the name of the contract sequencing center that executed the run.", + "description": "If applicable, the name of the contract sequencing center that executed the Run.", "type": "string" }, "experimentRef": { @@ -512,7 +549,7 @@ } }, "spotDescriptor": { - "description": "The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", + "description": "The spot descriptor specifies how to decode the individual reads of interest from the monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and processing information. There will be two methods of specification: one will be an index into a table of typical decodings, the other being an exact specification. This construct is needed for loading data and for interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input files or from one input files).", "title": "Spot Descriptor", "type": "object", "required": [ @@ -521,6 +558,7 @@ "properties": { "spotLength": { "title": "Spot Length", + "description": "Number of base/color calls, cycles, or flows per spot (raw sequence length or flow length including all application and technical tags and mate pairs, but not including gap lengths). This value will be platform dependent, library dependent, and possibly run dependent. Variable length platforms will still have a constant flow/cycle length.", "type": "number", "minimum": 0 }, @@ -530,11 +568,13 @@ "properties": { "readIndex": { "title": "Read Index", + "description": "Read Index starts at 0 and is incrementally increased for each sequential read_spec within a spot decode specification", "type": "number", "minimum": 0 }, "readLabel": { "title": "Read Label", + "description": "Read Label is a name for this tag, and can be used to on output to determine read name, for example F or R.", "type": "string" }, "readClass": { @@ -547,28 +587,32 @@ }, "readType": { "title": "Read Type", + "description": "", "type": "string", "enum": [ - "single", - "paired", "cell_barcode", - "umi_barcode", "feature_barcode", + "paired", "sample_barcode", + "single", + "umi_barcode", "Other" ] }, "relativeOrder": { "type": "object", "title": "Relative Order", + "description": "The read is located beginning at the offset or cycle relative to another read. This choice is appropriate for example when specifying a read that follows a variable length expected sequence(s).", "properties": { "followsReadIndex": { - "title": "Read Index", + "title": "Follows Read Index", "type": "number", + "description": "Specify the read index that precedes this read.", "minimum": 0 }, "precedesReadIndex": { - "title": "Read Index", + "title": "Precedes Read Index", + "description": "Specify the read index that follows this read.", "type": "number", "minimum": 0 } @@ -576,38 +620,69 @@ }, "baseCoord": { "title": "Base Coordinate", + "description": "The location of the read start in terms of base count (1 is beginning of spot).", "type": "number" }, "expectedBaseCallTable": { "title": "Expected Base Call Table", + "description": " A set of choices of expected base calls for a current read. Read will be zero-length if none is found.", "type": "array", "items": { "type": "object", "properties": { "baseCall": { - "type": "string" + "type": "string", + "description": "Element's body contains a basecall, attribute provide description of this read meaning as well as matching rules.", + "title": "Base Call" }, "readGroupTag": { - "type": "string" + "type": "string", + "description": "When match occurs, the read will be tagged with this group membership.", + "title": "Read Group Tag" }, "minMatch": { "type": "number", - "minimum": 0 + "description": " Minimum number of matches to trigger identification.", + "minimum": 0, + "title": "Min Match" }, "maxMisMatch": { + "description": "Maximum number of mismatches.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Max MisMatch" }, "matchEdge": { - "type": "string" + "description": "Where the match should occur. Changes the rules on how min_match and max_mismatch are counted.", + "type": "string", + "title": "Match Edge" + }, + "full": { + "description": "Only @max_mismatch influences matching process.", + "type": "string", + "title": "Full" + }, + "start": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared. ", + "type": "string", + "title": "Start" + }, + "end": { + "description": "Both matches and mismatches are counted. When @max_mismatch is exceeded - it is not a match. When @min_match is reached - match is declared.", + "type": "string", + "title": "End" }, "defaultLength": { + "description": "Specify whether the spot should have a default length for this tag if the expected base cannot be matched.", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Default Length" }, "baseCoord": { + "description": "Specify an optional starting point for tag (base offset from 1).", "type": "number", - "minimum": 0 + "minimum": 0, + "title": "Base Coordinate" } } } @@ -618,9 +693,22 @@ }, "platform": { "title": "Platform / Instrument", - "description": " The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", + "description": "The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. This will be determined by the Center.", "type": "string", "enum": [ + "454 GS 20", + "454 GS FLX Titanium", + "454 GS FLX", + "454 GS FLX+", + "454 GS Junior", + "454 GS", + "AB 310 Genetic Analyzer", + "AB 3130 Genetic Analyzer", + "AB 3130xL Genetic Analyzer", + "AB 3500 Genetic Analyzer", + "AB 3500xL Genetic Analyzer", + "AB 3730 Genetic Analyzer", + "AB 3730xL Genetic Analyzer", "AB 5500 Genetic Analyzer", "AB 5500xl Genetic Analyzer", "AB 5500xl-W Genetic Analysis System", @@ -628,23 +716,19 @@ "AB SOLiD 4 System", "AB SOLiD 4hq System", "AB SOLiD PI System", - "AB SOLiD System", "AB SOLiD System 2.0", "AB SOLiD System 3.0", - "AB 310 Genetic Analyzer", - "AB 3130 Genetic Analyzer", - "AB 3130xL Genetic Analyzer", - "AB 3500 Genetic Analyzer", - "AB 3500xL Genetic Analyzer", - "AB 3730 Genetic Analyzer", - "AB 3730xL Genetic Analyzer", + "AB SOLiD System", + "BGISEQ-50", + "BGISEQ-500", "Complete Genomics", + "GridION", "Helicos HeliScope", "HiSeq X Five", "HiSeq X Ten", - "Illumina Genome Analyzer", "Illumina Genome Analyzer II", "Illumina Genome Analyzer IIx", + "Illumina Genome Analyzer", "Illumina HiScanSQ", "Illumina HiSeq 1000", "Illumina HiSeq 1500", @@ -652,49 +736,39 @@ "Illumina HiSeq 2500", "Illumina HiSeq 3000", "Illumina HiSeq 4000", - "Illumina MiSeq", + "Illumina HiSeq X", "Illumina MiniSeq", + "Illumina MiSeq", "Illumina NovaSeq 6000", - "NextSeq 500", - "NextSeq 550", + "Ion GeneStudio S5 Plus", + "Ion GeneStudio S5 Prime", + "Ion GeneStudio S5", + "Ion Torrent Genexus", "Ion Torrent PGM", "Ion Torrent Proton", - "Ion Torrent S5", "Ion Torrent S5 XL", - "454 GS", - "454 GS 20", - "454 GS FLX", - "454 GS FLX Titanium", - "454 GS FLX+", - "454 GS Junior", - "GridION", + "Ion Torrent S5", + "MGISEQ-2000RS", "MinION", - "PromethION", - "PacBio RS", + "NextSeq 500", + "NextSeq 550", "PacBio RS II", + "PacBio RS", + "PromethION", + "Sequel II", "Sequel", "unspecified" ] }, "processing": { "title": "Processing", - "oneOf": [ - { - "title": "Single processing", - "type": "string" - }, - { - "title": "Complex Processing", - "type": "array", - "items": { - "$ref": "#/definitions/processingType" - } - } - ] + "type": "object", + "$ref": "#/definitions/processingType" }, "files": { "type": "array", "title": "Files", + "description": "Data files associated with the Run.", "items": { "$ref": "#/definitions/file" } @@ -702,6 +776,7 @@ "runLinks": { "type": "array", "title": "Run Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -709,9 +784,10 @@ "runAttributes": { "type": "array", "title": "Run Attributes", + "description": "Properties and attributes of the data set. These can be entered as free-form tag-value pairs. Submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/runAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_sample.json b/metadata_backend/helpers/schemas/ena_sample.json index 2691b2ad1..d6a0b7fa0 100644 --- a/metadata_backend/helpers/schemas/ena_sample.json +++ b/metadata_backend/helpers/schemas/ena_sample.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/sampleAttribute", "type": "object", "title": "Sample Attribute", - "description": "tag title and its associated value (description)", "required": [ "tag", "value" @@ -161,6 +161,7 @@ } }, "type": "object", + "description": "A Sample defines an isolate of sequenceable material upon which sequencing experiments can be based. The Sample object may be a surrogate for taxonomy accession or an anonymized individual identifier. Or, it may fully specify provenance and isolation method of the starting material.", "required": [ "sampleName" ], @@ -172,7 +173,7 @@ }, "sampleName": { "title": "Sample Names", - "description": "Short text that can be used to call out sample records in search results or in displays.", + "description": "Add relevant information to properly identify the Sample: common and scientific names, taxonomy identifier etc. - information can be retrieved from NCBI Taxonomy Browser.", "type": "object", "required": [ "taxonId" @@ -180,12 +181,12 @@ "properties": { "taxonId": { "type": "integer", - "description": "NCBI Taxonomy Identifier. This is appropriate for individual organisms and some environmental samples.", + "description": "NCBI Taxonomy Identifier, this is appropriate for individual organisms and some environmental samples.", "title": "Taxon ID" }, "scientificName": { "title": "Scientific Name", - "description": "Scientific name of sample that distinguishes its taxonomy. Please use a name or synonym that is tracked in the INSDC Taxonomy database. Also, this field can be used to confirm the TAXON_ID setting.", + "description": "Scientific name of Sample that distinguishes its taxonomy. Please use a name or synonym that is tracked in the INSDC Taxonomy database. Also, this field can be used to confirm the TAXON_ID setting.", "type": "string" }, "commonName": { @@ -197,11 +198,12 @@ }, "description": { "title": "Sample Description", - "description": "More extensive free-form description of the sample.", + "description": "More extensive free-form description of the Sample.", "type": "string" }, "sampleData": { "title": "Sample Data Type", + "description": "Specify if the Sample represents a human or non-human species. Not specifying the type we will consider it human, with unknown gender.", "oneOf": [ { "type": "object", @@ -210,7 +212,11 @@ "gender": { "type": "string", "title": "Gender", - "enum": ["male", "female", "unknown"] + "enum": [ + "male", + "female", + "unknown" + ] } }, "required": [ @@ -220,6 +226,7 @@ { "type": "object", "title": "Non Human Sample", + "description": "The non-human Sample requires a free-form description of the data e.g. species, gender if known and other relevant information.", "properties": { "dataDescription": { "type": "string", @@ -235,6 +242,7 @@ "sampleLinks": { "type": "array", "title": "Sample Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -242,9 +250,10 @@ "sampleAttributes": { "type": "array", "title": "Sample Attributes", + "description": "Properties and attributes of a Sample. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/sampleAttribute" } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/ena_study.json b/metadata_backend/helpers/schemas/ena_study.json index 1c5cfea70..da2c2e823 100644 --- a/metadata_backend/helpers/schemas/ena_study.json +++ b/metadata_backend/helpers/schemas/ena_study.json @@ -27,17 +27,18 @@ "properties": { "xrefDb": { "type": "string", - "title": "Database" + "title": "Database", + "description": "Name of the database or a link where the resource is located at. INSDC controlled vocabulary of permitted cross references.Please see http://www.insdc.org/db_xref.html . For example, FLYBASE." }, "xrefId": { "type": "string", "title": "Database ID", - "description": "Accession in the referenced database." + "description": "Accession ID or stable identifier, in the referenced database. For example, FBtr0080008 (in FLYBASE)" }, "label": { - "description": "Text label to display for the link.", + "type": "string", "title": "Label", - "type": "string" + "description": "A text label to identify the xrefLink by." } } }, @@ -53,7 +54,7 @@ "label": { "type": "string", "title": "Label", - "description": "Text label to display for the link." + "description": "A text label to identify the urlLink by." }, "url": { "type": "string", @@ -73,53 +74,53 @@ ], "properties": { "entrezDb": { - "description": "NCBI controlled vocabulary of permitted cross references", + "description": "Entrez Molecular Sequence Database System, NCBI controlled vocabulary of permitted cross references. Also known as Entrez Direct (EDirect) .", "title": "Database", "type": "string", "enum": [ - "pubmed", - "protein", - "nuccore", - "ipg", - "nucleotide", - "structure", - "genome", "annotinfo", "assembly", + "biocollections", "bioproject", "biosample", + "biosystems", "blastdbinfo", "books", "cdd", "clinvar", + "dbvar", "gap", "gapplus", - "grasp", - "dbvar", - "gene", "gds", + "gene", + "genome", "geoprofiles", + "grasp", + "gtr", "homologene", + "ipg", "medgen", "mesh", "ncbisearch", "nlmcatalog", + "nuccore", + "nucleotide", "omim", "orgtrack", + "pcassay", + "pccompound", + "pcsubstance", "pmc", "popset", + "protein", "proteinclusters", - "pcassay", "protfam", - "biosystems", - "pccompound", - "pcsubstance", + "pubmed", "seqannot", "snp", "sra", - "taxonomy", - "biocollections", - "gtr" + "structure", + "taxonomy" ] }, "entrezId": { @@ -129,7 +130,7 @@ "pattern": "^[a-zA-Z0-9]+" }, "label": { - "description": "Text label to display for the link.", + "description": "A text label to identify the url Entrez link by.", "title": "Label", "type": "string" } @@ -139,7 +140,6 @@ "$id": "#/definitions/studyAttribute", "type": "object", "title": "Study Attribute", - "description": "Tag title and its associated value", "required": [ "tag", "value" @@ -162,43 +162,46 @@ "studyType": { "$id": "#/definitions/studyType", "title": "Study Type", - "description": "The Study type presents a controlled vocabulary for expressing the overall purpose of the study.", + "description": "The Study type presents a controlled vocabulary for expressing the overall purpose of the Study.", "type": "string", "enum": [ - "Whole Genome Sequencing", - "Metagenomics", - "Transcriptome Analysis", - "Resequencing", + "Cancer Genomics", "Epigenetics", - "Synthetic Genomics", + "Exome Sequencing", "Forensic or Paleo-genomics", "Gene Regulation Study", - "Cancer Genomics", + "Metagenomics", + "Pooled Clone Sequencing", "Population Genomics", + "Resequencing", "RNASeq", - "Exome Sequencing", - "Pooled Clone Sequencing", + "Synthetic Genomics", + "Transcriptome Analysis", "Transcriptome Sequencing", + "Whole Genome Sequencing", "Other" ] } }, "type": "object", + "description": "A Study is a container for a sequencing investigation that may comprise multiple experiments. The Study has an overall goal, but is otherwise minimally defined with a descriptor, zero or more experiments, and zero or more analyses. The submitter may add to the Study web links and properties.", "required": [ "descriptor" ], + "additionalProperties": true, "properties": { "descriptor": { "type": "object", "title": "Study Description", "required": [ "studyTitle", - "studyType" + "studyType", + "studyAbstract" ], "properties": { "studyTitle": { "title": "Study Title", - "description": "Title of the study as would be used in a publication.", + "description": "Title of the Study as would be used in a publication.", "type": "string" }, "studyType": { @@ -219,12 +222,13 @@ }, "studyDescription": { "title": "Study Description", - "description": "More extensive free-form description of the study.", + "description": "More extensive free-form description of the Study.", "type": "string" }, "studyLinks": { "type": "array", "title": "Study Links", + "description": "Links to resources related to this experiment or experiment set (publication, datasets, online databases). Used to encode URL links, Entrez links, and xref DB links. ", "items": { "$ref": "#/definitions/Links" } @@ -232,21 +236,22 @@ "studyAttributes": { "type": "array", "title": "Study Attributes", + "description": "Properties and attributes of the Study. These can be entered as free-form tag-value pairs. For certain studies, submitters may be asked to follow a community established ontology when describing the work.", "items": { "$ref": "#/definitions/studyAttribute" } }, "center": { "title": "Description for Center", - "description": "More for backwards compatibility, we might not need it.", + "description": "Description of the center is intended for backward tracking of the Study record to the submitter's LIMS.", "type": "object", "properties": { "centerProjectName": { "title": "Center Project Name", - "description": " Submitter defined project name. This field is intended for backward tracking of the study record to the submitter's LIMS.", + "description": "Submitter defined project name. This field is intended for backward tracking of the Study record to the submitter's LIMS.", "type": "string" } } } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/schemas/folders.json b/metadata_backend/helpers/schemas/folders.json index b1b4a9243..8e9237247 100644 --- a/metadata_backend/helpers/schemas/folders.json +++ b/metadata_backend/helpers/schemas/folders.json @@ -3,9 +3,14 @@ "title": "Folder schema containing submitted metadata objects", "required": [ "name", - "description" + "description", + "projectId" ], "properties": { + "projectId": { + "type": "string", + "title": "Owner project of the folder" + }, "folderId": { "type": "string", "title": "Folder Id" @@ -14,6 +19,10 @@ "type": "string", "title": "Folder Name" }, + "text_name": { + "type": "string", + "title": "Searchable Folder Name, used for indexing" + }, "description": { "type": "string", "title": "Folder Description" @@ -22,86 +31,107 @@ "type": "integer", "title": "Unix time stamp of creation, used for indexing" }, + "datePublished": { + "type": "integer", + "title": "Unix time stamp of publishing, used for indexing" + }, "published": { "type": "boolean", "title": "Published Folder" }, "doiInfo": { "type": "object", - "title": "The DOI info schema", + "title": "Datacite DOI Registration Information", "required": [ "creators", - "subjects" + "subjects", + "keywords" ], "properties": { "creators": { "type": "array", - "title": "List of creators", + "title": "Creators", + "description": "The main researcher(s) involved in producing the data, or the author(s) of the publication.", "items": { "type": "object", - "title": "Main researchers involved with data or the authors of the publication", + "title": "Main researcher(s) involved with data or the author(s) of the publication.", + "required": [ + "givenName", + "familyName" + ], "properties": { - "name": { + "givenName": { "type": "string", - "title": "Full name of creator (format: Family, Given)" + "title": "Given Name" }, - "nameType": { + "familyName": { "type": "string", - "title": "Type of name" + "title": "Family Name" }, - "givenName": { + "name": { "type": "string", - "title": "First name" + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, - "familyName": { + "nameType": { "type": "string", - "title": "Last name" + "title": "Type of name", + "const": "Personal" }, - "nameIdentifiers": { + "affiliation": { "type": "array", - "title": "List of name identifiers", + "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Name identifier object", + "title": "Affiliation Details", "properties": { + "name": { + "type": "string", + "title": "Name of the place of affiliation" + }, "schemeUri": { - "type": ["string", "null"], - "title": "URI (location) of the name identifier scheme" + "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", + "title": "URI (location) of the affiliation scheme" }, - "nameIdentifier": { - "type": ["string", "null"], - "title": "URI (location) of name identifier" + "affiliationIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", + "title": "Location of affiliation identifier" }, - "nameIdentifierScheme": { - "type": ["string", "null"], - "title": "Name of name identifier scheme" + "affiliationIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name. This will default to ROR.", + "title": "Name of affiliation identifier scheme" } } }, "uniqueItems": true }, - "affiliation": { + "nameIdentifiers": { "type": "array", - "title": "List of affiliations", + "title": "Creator Identifiers", + "description": "Uniquely identifies the creator, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Creator Name Identifier", "properties": { - "name": { - "type": "string", - "title": "Name of the place of affiliation" - }, "schemeUri": { "type": "string", - "title": "URI (location) of the affiliation scheme" + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", + "format": "uri" }, - "affiliationIdentifier": { + "nameIdentifier": { "type": "string", - "title": "Location of affiliation identifier" + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " }, - "affiliationIdentifierScheme": { + "nameIdentifierScheme": { "type": "string", - "title": "Name of affiliation identifier scheme" + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." } } }, @@ -114,85 +144,145 @@ }, "subjects": { "type": "array", - "title": "List of subject identifiers specified by FOS", + "title": "Subjects", + "description": "Subject, classification code, or key phrase describing the resources specified by OECD Fields of Science and Technology (FOS)", "items": { "type": "object", - "title": "Subject objects", + "title": "Subjects", "required": [ "subject" ], "properties": { "subject": { "type": "string", - "title": "FOS identifier" + "title": "FOS identifier", + "enum": [ + "FOS: Agricultural biotechnology", + "FOS: Agricultural sciences", + "FOS: Agriculture, forestry, and fisheries", + "FOS: Animal and dairy science", + "FOS: Arts (arts, history of arts, performing arts, music)", + "FOS: Basic medicine", + "FOS: Biological sciences", + "FOS: Chemical engineering", + "FOS: Chemical sciences", + "FOS: Civil engineering", + "FOS: Clinical medicine", + "FOS: Computer and information sciences", + "FOS: Earth and related environmental sciences", + "FOS: Economics and business", + "FOS: Educational sciences", + "FOS: Electrical engineering, electronic engineering, information engineering", + "FOS: Engineering and technology", + "FOS: Environmental biotechnology", + "FOS: Environmental engineering", + "FOS: Health sciences", + "FOS: History and archaeology", + "FOS: Humanities", + "FOS: Industrial biotechnology", + "FOS: Languages and literature", + "FOS: Law", + "FOS: Materials engineering", + "FOS: Mathematics", + "FOS: Mechanical engineering", + "FOS: Media and communications", + "FOS: Medical and health sciences", + "FOS: Medical biotechnology", + "FOS: Medical engineering", + "FOS: Nano-technology", + "FOS: Natural sciences", + "FOS: Other agricultural sciences", + "FOS: Other engineering and technologies", + "FOS: Other humanities", + "FOS: Other medical sciences", + "FOS: Other natural sciences", + "FOS: Other social sciences", + "FOS: Philosophy, ethics and religion", + "FOS: Physical sciences", + "FOS: Political science", + "FOS: Psychology", + "FOS: Social and economic geography", + "FOS: Social sciences", + "FOS: Sociology", + "FOS: Veterinary science" + ] }, "subjectScheme": { - "type": "string", - "title": "Subject scheme name" + "title": "Fields of Science and Technology (FOS) scheme", + "type": "string" } }, - "additionalProperties": true + "additionalProperties": false }, "uniqueItems": true }, + "keywords": { + "type": "string", + "title": "Keywords", + "description": "A list of keywords or tags describing the resources. It is recommended to use a controlled vocabulary, ontology or classification when choosing keywords. Multiple keywords can be added, separating them by comma." + }, "contributors": { "type": "array", - "title": "List of contributors", + "title": "Contributors", + "description": "The person(s) responsible for contributing to the development of the dataset.", "items": { "type": "object", - "title": "The institution or person responsible for contributing to the developement of the dataset", + "title": "Contributor", "required": [ + "givenName", + "familyName", "contributorType" ], "properties": { - "name": { + "givenName": { "type": "string", - "title": "Full name of contributor (format: Family, Given)" + "title": "Given Name" }, - "nameType": { + "familyName": { "type": "string", - "title": "Type of name" + "title": "Family Name" }, - "givenName": { + "name": { "type": "string", - "title": "First name" + "description": "This field will be constructed based from Given Name and Family Name.", + "title": "Full name." }, - "familyName": { + "nameType": { "type": "string", - "title": "Last name" + "title": "Type of name", + "const": "Personal" }, "contributorType": { "type": "string", - "title": "Type of contributor" - }, - "nameIdentifiers": { - "type": "array", - "title": "List of name identifiers", - "items": { - "type": "object", - "title": "Name identifier object", - "properties": { - "schemeUri": { - "type": ["string", "null"], - "title": "URI (location) of the name identifier scheme" - }, - "nameIdentifier": { - "type": ["string", "null"], - "title": "Location of name identifier" - }, - "nameIdentifierScheme": { - "type": ["string", "null"], - "title": "Name of name identifier scheme" - } - } - } + "title": "Type of contributor", + "enum": [ + "Contact Person", + "Data Collector", + "Data Curator", + "Data Manager", + "Distributor", + "Editor", + "Producer", + "Project Leader", + "Project Manager", + "Project Member", + "Related Person", + "Research Group", + "Researcher", + "Rights Holder", + "Sponsor", + "Supervisor", + "Work Package Leader", + "Other" + ] }, "affiliation": { "type": "array", - "title": "List of affiliations", + "title": "Affiliations", + "description": "The organizational or institutional affiliation of the creator. Upon filling the form with the organization or institution suggestion will be made from Research Organization Registry (ROR) Community API.", "items": { "type": "object", - "title": "Name affiliation object", + "title": "Affiliation Details", "properties": { "name": { "type": "string", @@ -200,18 +290,50 @@ }, "schemeUri": { "type": "string", + "description": "Name identifier scheme. This will default to https://ror.org/ .", "title": "URI (location) of the affiliation scheme" }, "affiliationIdentifier": { "type": "string", + "description": "URI location based on the URI scheme of the name identifier this will be pre-filled based on https://ror.org/name.", "title": "Location of affiliation identifier" }, "affiliationIdentifierScheme": { "type": "string", + "description": "Identifier scheme name. This will default to ROR.", "title": "Name of affiliation identifier scheme" } } - } + }, + "uniqueItems": true + }, + "nameIdentifiers": { + "type": "array", + "title": "Contributor Identifiers", + "description": "Uniquely identifies the contributor, according to various schemas, e.g. ORCID, ROR or ISNI. Use name identifier expressed as URL.", + "items": { + "type": "object", + "title": "Contributor Name Identifier", + "properties": { + "schemeUri": { + "type": "string", + "description": "Depending on the name identifier scheme e.g. https://isni.org/ or https://orcid.org/ or https://ror.org/ or https://www.grid.ac/ .", + "title": "Scheme of the URI (location) of the name identifier", + "format": "uri" + }, + "nameIdentifier": { + "type": "string", + "description": "URI location based on the URI scheme of the name identifier e.g. https://orcid.org/nameIdentifier .", + "title": "URI (location) of name identifier. " + }, + "nameIdentifierScheme": { + "type": "string", + "description": "Identifier scheme name e.g. ORCID, ROR or ISNI .", + "title": "Name of identifier scheme." + } + } + }, + "uniqueItems": true } }, "additionalProperties": false @@ -220,10 +342,11 @@ }, "dates": { "type": "array", - "title": "List of relevant dates to publication", + "title": "Dates", + "description": "List of relevant dates to publication. The type of date can vary and it is recommended to add information if appropriate to distinguish between dates.", "items": { "type": "object", - "title": "Date object", + "title": "Date", "required": [ "date", "dateType" @@ -231,15 +354,31 @@ "properties": { "date": { "type": "string", - "title": "A standard format for a date value" + "title": "Date", + "description": "A standard format for a date value." }, "dateType": { "type": "string", - "title": "Relevance of the date" + "title": "Date Type", + "description": "Relevance of the date for the resources being submitted.", + "enum": [ + "Accepted", + "Available", + "Collected", + "Copyrighted", + "Created", + "Issued", + "Submitted", + "Updated", + "Valid", + "Withdrawn", + "Other" + ] }, "dateInformation": { "type": "string", - "title": "Specific event of the date" + "title": "Date Information", + "description": "Specific information about the date, if appropriate." } }, "additionalProperties": false @@ -248,22 +387,31 @@ }, "descriptions": { "type": "array", - "title": "List of descriptions", + "title": "Descriptions", + "description": "Additional information about the resources that does not fit in any of the other categories, or general descriptions that are supplemental to the descriptions added for Study or Datasets.", "items": { "type": "object", - "title": "Description object", + "title": "description", "properties": { - "lang": { - "type": "string", - "title": "Language code of the description" - }, "description": { "type": "string", - "title": "Additional information that does not fit in any of the other categories" + "title": "description" }, "descriptionType": { "type": "string", - "title": "Type of description" + "title": "Description Type", + "enum": [ + "Abstract", + "Methods", + "Series Information", + "Table Of Contents", + "Technical Info", + "Other" + ] + }, + "lang": { + "type": "string", + "title": "Language" } }, "additionalProperties": false @@ -272,18 +420,20 @@ }, "geoLocations": { "type": "array", - "title": "List of GeoLocations", + "title": "GeoLocations", + "description": "Spatial region or named place where the data was gathered or where the resources are focused.", "items": { "type": "object", - "title": "GeoLocation object", + "title": "GeoLocation", "properties": { "geoLocationPlace": { "type": "string", - "title": "Spatial region or named place where the data was gathered" + "title": "Geolocation Place" }, "geoLocationPoint": { "type": "object", - "title": "A point containing a single latitude-longitude pair", + "title": "Geolocation Point", + "description": "A point location in space. A point contains a single longitude-latitude pair.", "properties": { "pointLongitude": { "type": "string", @@ -298,23 +448,24 @@ }, "geoLocationBox": { "type": "object", - "title": "A box determined by two longitude and two latitude borders", + "title": "Geolocation Box", + "description": "The spatial limits of a box. A box is defined by two geographic points. Left low corner and right upper corner. Each point is defined by its longitude and latitude.", "properties": { "westBoundLongitude": { "type": "string", - "title": "Longitude coordinate of west bound" + "title": "Longitude coordinate of west bound." }, "eastBoundLongitude": { "type": "string", - "title": "Longitude coordinate of east bound" + "title": "Longitude coordinate of east bound." }, "southBoundLatitude": { "type": "string", - "title": "Latitude coordinate of south bound" + "title": "Latitude coordinate of south bound." }, "northBoundLatitude": { "type": "string", - "title": "Latitude coordinate of north bound" + "title": "Latitude coordinate of north bound." } } }, @@ -343,38 +494,202 @@ }, "language": { "type": "string", - "title": "Code of the primary language of the resource" - }, - "alternateIdentifiers": { - "type": "array", - "title": "List of alternate identifiers", - "items": { - "type": "object", - "title": "An identifier or identifiers other than the primary Identifier of the resource", - "required": [ - "alternateIdentifier", - "alternateIdentifierType" - ], - "properties": { - "alternateIdentifier": { - "type": "string", - "title": "Alternate identifier info" - }, - "alternateIdentifierType": { - "type": "string", - "title": "Type of alternate identifier" - } - }, - "additionalProperties": false - }, - "uniqueItems": true + "title": "Language", + "description": "Primary language of the submitted resources.", + "enum": [ + "Abkhaz", + "Afar", + "Afrikaans", + "Akan", + "Albanian", + "Amharic", + "Arabic", + "Aragonese", + "Armenian", + "Assamese", + "Avaric", + "Avestan", + "Aymara", + "Azerbaijani", + "Bambara", + "Bashkir", + "Basque", + "Belarusian", + "Bengali", + "Bihari", + "Bislama", + "Bosnian", + "Breton", + "Bulgarian", + "Burmese", + "Catalan", + "Chamorro", + "Chechen", + "Chichewa", + "Chinese", + "Chuvash", + "Cornish", + "Corsican", + "Cree", + "Croatian", + "Czech", + "Danish", + "Divehi", + "Dutch", + "Dzongkha", + "English", + "Esperanto", + "Estonian", + "Ewe", + "Faroese", + "Fijian", + "Finnish", + "French", + "Fula", + "Galician", + "Ganda", + "Georgian", + "German", + "Greek", + "Guaraní", + "Gujarati", + "Haitian", + "Hausa", + "Hebrew", + "Herero", + "Hindi", + "Hiri Motu", + "Hungarian", + "Icelandic", + "Ido", + "Igbo", + "Indonesian", + "Interlingua", + "Interlingue", + "Inuktitut", + "Inupiaq", + "Irish", + "Italian", + "Japanese", + "Javanese", + "Kalaallisut", + "Kannada", + "Kanuri", + "Kashmiri", + "Kazakh", + "Khmer", + "Kikuyu", + "Kinyarwanda", + "Kirundi", + "Komi", + "Kongo", + "Korean", + "Kurdish", + "Kwanyama", + "Kyrgyz", + "Lao", + "Latin", + "Latvian", + "Limburgish", + "Lingala", + "Lithuanian", + "Luba-Katanga", + "Luxembourgish", + "Macedonian", + "Malagasy", + "Malay", + "Malayalam", + "Maltese", + "Manx", + "Māori", + "Marathi", + "Marshallese", + "Mongolian", + "Nauru", + "Navajo", + "Ndonga", + "Nepali", + "Northern Ndebele", + "Northern Sami", + "Norwegian Bokmål", + "Norwegian Nynorsk", + "Norwegian", + "Nuosu", + "Occitan", + "Ojibwe", + "Old Church Slavonic", + "Oriya", + "Oromo", + "Ossetian", + "Pāli", + "Panjabi", + "Pashto", + "Persian", + "Polish", + "Portuguese", + "Quechua", + "Romanian", + "Romansh", + "Russian", + "Samoan", + "Sango", + "Sanskrit", + "Sardinian", + "Scottish Gaelic", + "Serbian", + "Shona", + "Sindhi", + "Sinhala", + "Slovak", + "Slovenian", + "Somali", + "Southern Ndebele", + "Southern Sotho", + "Spanish", + "Sundanese", + "Swahili", + "Swati", + "Swedish", + "Tagalog", + "Tahitian", + "Tajik", + "Tamil", + "Tatar", + "Telugu", + "Thai", + "Tibetan", + "Tigrinya", + "Tonga", + "Tsonga", + "Tswana", + "Turkish", + "Turkmen", + "Twi", + "Ukrainian", + "Urdu", + "Uyghur", + "Uzbek", + "Venda", + "Vietnamese", + "Volapük", + "Walloon", + "Welsh", + "Western Frisian", + "Wolof", + "Xhosa", + "Yiddish", + "Yoruba", + "Zhuang", + "Zulu" + ] }, "relatedIdentifiers": { "type": "array", - "title": "List of related identifiers", + "title": "Related Identifiers", + "description": "Information about a resource related to the one being registered, primarily used to provide series information or a text citation where the related resource does not have an identifier. It is also optional to provide such an identifier.", "items": { "type": "object", - "title": "Identifier of related resources", + "title": "Identifier of related resource", "required": [ "relatedIdentifier", "relatedIdentifierType", @@ -383,31 +698,168 @@ "properties": { "relatedIdentifier": { "type": "string", - "title": "Related identifier info" + "title": "Identifier", + "description": "These must be globally unique identifiers and correspond to the type selected. e.g. 10.2022/example.78m9865 for DOI identifier Type" }, "relatedIdentifierType": { "type": "string", - "title": "Type of related identifier" + "title": "Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] }, "relationType": { "type": "string", - "title": "Specification of the relation" + "title": "Relationship Type", + "enum": [ + "Cites", + "Is Cited By", + "Compiles", + "Is Compiled By", + "Continues", + "Is Continued By", + "Describes", + "Is Described By", + "Documents", + "Is Documented By", + "Is Derived From", + "Is Source Of", + "Has Metadata", + "Is Metadata For", + "Has Part", + "Is Part Of", + "Is Supplemented By", + "Is Supplement To", + "Obsoletes", + "Is Obsoleted By", + "References", + "Is Referenced By", + "Requires", + "Is Required By", + "Reviews", + "Is Reviewed By", + "Has Version", + "Is Version Of", + "Is New version Of", + "Is Previous Version Of", + "Is Published In", + "Is Variant Form Of", + "Is Original Form Of", + "Is Identical To" + ] }, "relatedMetadataScheme": { "type": "string", - "title": "Scheme of related metadata" + "title": "Related Metadata Scheme", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example DDI-L" }, "schemeUri": { "type": "string", - "title": "URI (location) of the related metadata scheme" + "title": "Related Metadata Scheme URI", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Example: http://www.ddialliance.org/Specification/DDI-Lifecycle/3.1/XMLSchema/instance.xsd" }, "schemeType": { "type": "string", - "title": "Type of the related metadata scheme" + "title": "Related Metadata Scheme Type", + "description": "To be used only with this relation pair: (Has Metadata/Is Metadata For). Examples: XSD, DDT, Turtle" }, "resourceTypeGeneral": { "type": "string", - "title": "Optional general type name" + "title": "Resource Type", + "enum": [ + "Audiovisual", + "Book", + "Book Chapter", + "Collection", + "Computational Notebook", + "Conference Paper", + "Conference Proceeding", + "Data Paper", + "Dataset", + "Dissertation", + "Event", + "Image", + "Interactive Resource", + "Journal", + "Journal Article", + "Model", + "Output Management Plan", + "Peer Review", + "Physical Object", + "Preprint", + "Report", + "Service", + "Software", + "Sound", + "Standard", + "Text", + "Workflow", + "Other" + ] + } + }, + "additionalProperties": false + }, + "uniqueItems": true + }, + "alternateIdentifiers": { + "type": "array", + "title": "Alternate Identifiers", + "description": "An identifier or identifiers other than the primary or related identifier applied to the resources being registered. EGA identifier(s) obtained, that can be used as an alternative to the current resources.", + "items": { + "type": "object", + "title": "Identifier(s) other than the primary identifier of the resources", + "required": [ + "alternateIdentifier", + "alternateIdentifierType" + ], + "properties": { + "alternateIdentifier": { + "type": "string", + "title": "Alternate Identifier" + }, + "alternateIdentifierType": { + "type": "string", + "title": "Alternate Identifier Type", + "enum": [ + "ARK", + "arXiv", + "bibcode", + "DOI", + "EAN13", + "EISSN", + "Handle", + "IGSN", + "ISBN", + "ISSN", + "ISTC", + "LISSN", + "LSID", + "PMID", + "PURL", + "UPC", + "URL", + "URN", + "w3id" + ] } }, "additionalProperties": false @@ -416,26 +868,29 @@ }, "sizes": { "type": "array", - "title": "List of sizes", + "title": "Sizes", + "description": "Size (e.g., bytes, pages, inches, etc.) or duration (extent), e.g., hours, minutes, days, etc., of a resources. Examples: '15 pages', '6 MB', '45 minutes'.", "items": { "type": "string", - "title": "Unstructured size information about the resource" + "title": "Size" } }, "formats": { "type": "array", - "title": "List of formats", + "title": "Formats", + "description": "Use file extension or MIME type where possible, e.g., PDF, XML, MPG or application/pdf, text/xml, video/mpeg. We will pre-fill some of them based on what was filled in metadata.", "items": { "type": "string", - "title": "Technical format of the resource" + "title": "Format" } }, "fundingReferences": { "type": "array", - "title": "List of funding references", - "itmes": { + "title": "Funding References", + "description": "It is recommended to supply funding information when financial support has been received. The funder will be identified by their Research Organization Registry (ROR).", + "items": { "type": "object", - "title": "Information about financial support for the resource", + "title": "Information about financial support for producing the resources", "required": [ "funderName", "funderIdentifier", @@ -444,31 +899,39 @@ "properties": { "funderName": { "type": "string", - "title": "Name of the funding provider" + "title": "Funder Name" }, "funderIdentifier": { "type": "string", - "title": "Unique identifier for funding entity" + "title": "Funder Identifier", + "description": "Unique identifier for funding entity" }, "funderIdentifierType": { "type": "string", - "title": "Type of identifier for funding entity" - }, - "schemeUri": { - "type": ["string", "null"], - "title": "URI (location) of scheme for funder identifier" + "title": "Funder Identity Type", + "description": "The type of funder identifier, one of Crossref Funder ID, GRID, ISNI, or ROR.", + "enum": [ + "Crossref Funder ID", + "GRID", + "ISNI", + "ROR", + "Other" + ] }, "awardNumber": { - "type": ["string", "null"], - "title": "The code assigned by the funder to a sponsored award" + "type": "string", + "title": "Award Number", + "description": "The code assigned by the funder to a sponsored award." }, "awardTitle": { - "type": ["string", "null"], - "title": "The human readable title of the award" + "type": "string", + "title": "Award Title", + "description": "The human readable title of the award." }, "awardUri": { - "type": ["string", "null"], - "title": "URI (location) of the award" + "type": "string", + "title": "Award URI", + "description": "The URI leading to a page provided by the funder for more information about the award (grant)." } }, "additionalProperties": false @@ -482,21 +945,76 @@ "type": "object", "title": "The extra DOI info schema", "properties": { - "identifier": { + "studyIdentifier": { "type": "object", - "title": "identifier object", "required": [ - "identifierType", - "doi" + "identifier", + "url" ], "properties": { - "identifierType": { - "type": "string", - "title": "Type of identifier (= DOI)" + "identifier": { + "type": "object", + "title": "identifier object", + "required": [ + "identifierType", + "doi" + ], + "properties": { + "identifierType": { + "type": "string", + "title": "Type of identifier (= DOI)" + }, + "doi": { + "type": "string", + "title": "A persistent identifier for a resource" + } + } }, - "doi": { + "url": { "type": "string", - "title": "A persistent identifier for a resource" + "title": "URL of the digital location of the object" + }, + "types": { + "type": "object", + "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." + } + } + }, + "datasetIdentifiers": { + "type": "array", + "items": { + "type": "object", + "required": [ + "identifier", + "url" + ], + "properties": { + "identifier": { + "type": "object", + "title": "identifier object", + "required": [ + "identifierType", + "doi" + ], + "properties": { + "identifierType": { + "type": "string", + "title": "Type of identifier (= DOI)" + }, + "doi": { + "type": "string", + "title": "A persistent identifier for a resource" + } + } + }, + "url": { + "type": "string", + "title": "URL of the digital location of the object" + }, + "types": { + "type": "object", + "title": "Type info of the resource. Multiple types can be listed: ResourceTypeGeneral, schemaOrg etc." + } } } }, @@ -504,28 +1022,9 @@ "type": "string", "title": "Full name of publisher from Research Organization Registry" }, - "resourceType": { - "type": "object", - "title": "Type info of the resource", - "required": [ - "type", - "resourceTypeGeneral" - ], - "properties": { - "type": { - "type": "string", - "title": "Name of resource type" - }, - "resourceTypeGeneral": { - "type": "string", - "title": "Mandatory general type name" - } - }, - "additionalProperties": false - }, - "url": { - "type": "string", - "title": "URL of the digital location of the object" + "publicationYear": { + "type": "integer", + "title": "Publication Year" }, "version": { "type": "string", @@ -563,6 +1062,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } @@ -601,6 +1101,7 @@ "title": "Type of submission", "enum": [ "XML", + "CSV", "Form" ] } diff --git a/metadata_backend/helpers/schemas/users.json b/metadata_backend/helpers/schemas/users.json index 0f021b1eb..191b20293 100644 --- a/metadata_backend/helpers/schemas/users.json +++ b/metadata_backend/helpers/schemas/users.json @@ -1,9 +1,10 @@ { "type": "object", - "title": "Folder schema for user objects", + "title": "User object schema", "required": [ "userId", - "name" + "name", + "projects" ], "properties": { "userId": { @@ -14,52 +15,28 @@ "type": "string", "title": "User Name" }, - "drafts": { + "projects": { "type": "array", - "title": "User templates schema", + "title": "User's project affiliations schema", "items": { "type": "object", - "title": "Template objects", + "title": "Project objects", "required": [ - "accessionId", - "schema" + "projectId", + "projectNumber" ], "properties": { - "accessionId": { + "projectId": { "type": "string", - "title": "Accession Id" + "title": "Project internal accession ID" }, - "schema": { + "projectNumber": { "type": "string", - "title": "Draft object's schema" - }, - "tags": { - "type": "object", - "title": "Different tags to describe the template object.", - "additionalProperties": true, - "properties": { - "submissionType": { - "type": "string", - "title": "Type of submission", - "enum": [ - "XML", - "Form" - ] - } - } + "title": "Project human friendly ID" } } }, "uniqueItems": true - }, - "folders": { - "type": "array", - "title": "The folders schema", - "items": { - "type": "string", - "title": "Folder Id" - }, - "uniqueItems": true } } -} +} \ No newline at end of file diff --git a/metadata_backend/helpers/validator.py b/metadata_backend/helpers/validator.py index 4e969b2b7..68549ddf3 100644 --- a/metadata_backend/helpers/validator.py +++ b/metadata_backend/helpers/validator.py @@ -1,9 +1,9 @@ """Utility classes for validating XML or JSON files.""" -import json +import ujson import re from io import StringIO -from typing import Any, Dict +from typing import Any, Dict, cast from urllib.error import URLError from aiohttp import web @@ -38,7 +38,7 @@ def resp_body(self) -> str: try: self.schema.validate(self.xml_content) LOG.info("Submitted file is totally valid.") - return json.dumps({"isValid": True}) + return ujson.dumps({"isValid": True}) except ParseError as error: reason = self._parse_error_reason(error) @@ -48,19 +48,20 @@ def resp_body(self) -> str: instance = re.sub(r"^.*?<", "<", line) # strip whitespaces LOG.info("Submitted file does not not contain valid XML syntax.") - return json.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) + return ujson.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) except XMLSchemaValidationError as error: # Parse reason and instance from the validation error message - reason = error.reason - instance = ElementTree.tostring(error.elem, encoding="unicode") + reason = str(error.reason) + _elem = cast(ElementTree.Element, error.elem) + instance = ElementTree.tostring(_elem, encoding="unicode") # Replace element address in reason with instance element if "<" and ">" in reason: instance_parent = "".join((instance.split(">")[0], ">")) reason = re.sub("<[^>]*>", instance_parent + " ", reason) LOG.info("Submitted file is not valid against schema.") - return json.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) + return ujson.dumps({"isValid": False, "detail": {"reason": reason, "instance": instance}}) except URLError as error: reason = f"Faulty file was provided. {error.reason}." @@ -76,7 +77,7 @@ def _parse_error_reason(self, error: ParseError) -> str: @property def is_valid(self) -> bool: """Quick method for checking validation result.""" - resp = json.loads(self.resp_body) + resp = ujson.loads(self.resp_body) return resp["isValid"] diff --git a/metadata_backend/server.py b/metadata_backend/server.py index 49de9e973..48b1d59d3 100644 --- a/metadata_backend/server.py +++ b/metadata_backend/server.py @@ -1,25 +1,24 @@ """Functions to launch backend server.""" import asyncio +import secrets +import time import uvloop from aiohttp import web from cryptography.fernet import Fernet -import secrets -import time -from .api.handlers import ( - RESTAPIHandler, - StaticHandler, - SubmissionAPIHandler, - FolderAPIHandler, - UserAPIHandler, - ObjectAPIHandler, -) from .api.auth import AccessHandler -from .api.middlewares import http_error_handler, check_login +from .api.handlers.restapi import RESTAPIHandler +from .api.handlers.static import StaticHandler +from .api.handlers.folder import FolderAPIHandler +from .api.handlers.object import ObjectAPIHandler +from .api.handlers.submission import SubmissionAPIHandler +from .api.handlers.template import TemplatesAPIHandler +from .api.handlers.user import UserAPIHandler from .api.health import HealthHandler -from .conf.conf import create_db_client, frontend_static_files, aai_config +from .api.middlewares import check_login, http_error_handler +from .conf.conf import aai_config, create_db_client, frontend_static_files from .helpers.logger import LOG asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) @@ -34,10 +33,23 @@ async def kill_sess_on_shutdown(app: web.Application) -> None: # Purge the openstack connection from the server app["Session"].pop(key) LOG.debug("Purged connection information for %s :: %s", key, time.ctime()) - app["Cookies"] = set({}) LOG.debug("Removed session") +async def startup(server: web.Application) -> None: + """Add startup web server state configuration.""" + # Mutable_map handles cookie storage, also stores the object that provides + # the encryption we use + server["Crypt"] = Fernet(Fernet.generate_key()) + # Create a signature salt to prevent editing the signature on the client + # side. Hash function doesn't need to be cryptographically secure, it's + # just a convenient way of getting ascii output from byte values. + server["Salt"] = secrets.token_hex(64) + server["Session"] = {} + server["Cookies"] = set({}) + server["OIDC_State"] = set({}) + + async def init() -> web.Application: """Initialise server and setup routes. @@ -51,48 +63,53 @@ async def init() -> web.Application: """ server = web.Application() - # Mutable_map handles cookie storage, also stores the object that provides - # the encryption we use - server["Crypt"] = Fernet(Fernet.generate_key()) - # Create a signature salt to prevent editing the signature on the client - # side. Hash function doesn't need to be cryptographically secure, it's - # just a convenient way of getting ascii output from byte values. - server["Salt"] = secrets.token_hex(64) - server["Session"] = {} - server["Cookies"] = set({}) - server["OIDC_State"] = set({}) + server.on_startup.append(startup) server.middlewares.append(http_error_handler) server.middlewares.append(check_login) - _handler = RESTAPIHandler() + _schema = RESTAPIHandler() _object = ObjectAPIHandler() _folder = FolderAPIHandler() _user = UserAPIHandler() _submission = SubmissionAPIHandler() + _template = TemplatesAPIHandler() api_routes = [ - web.get("/schemas", _handler.get_schema_types), - web.get("/schemas/{schema}", _handler.get_json_schema), - web.get("/objects/{schema}/{accessionId}", _object.get_object), - web.delete("/objects/{schema}/{accessionId}", _object.delete_object), + # retrieve schema and informations about it + web.get("/schemas", _schema.get_schema_types), + web.get("/schemas/{schema}", _schema.get_json_schema), + # metadata objects operations web.get("/objects/{schema}", _object.query_objects), web.post("/objects/{schema}", _object.post_object), + web.get("/objects/{schema}/{accessionId}", _object.get_object), web.put("/objects/{schema}/{accessionId}", _object.put_object), + web.patch("/objects/{schema}/{accessionId}", _object.patch_object), + web.delete("/objects/{schema}/{accessionId}", _object.delete_object), + # drafts objects operations + web.post("/drafts/{schema}", _object.post_object), web.get("/drafts/{schema}/{accessionId}", _object.get_object), web.put("/drafts/{schema}/{accessionId}", _object.put_object), web.patch("/drafts/{schema}/{accessionId}", _object.patch_object), - web.patch("/objects/{schema}/{accessionId}", _object.patch_object), web.delete("/drafts/{schema}/{accessionId}", _object.delete_object), - web.post("/drafts/{schema}", _object.post_object), + # template objects operations + web.get("/templates", _template.get_templates), + web.post("/templates/{schema}", _template.post_template), + web.get("/templates/{schema}/{accessionId}", _template.get_template), + web.patch("/templates/{schema}/{accessionId}", _template.patch_template), + web.delete("/templates/{schema}/{accessionId}", _template.delete_template), + # folders/submissions operations web.get("/folders", _folder.get_folders), web.post("/folders", _folder.post_folder), web.get("/folders/{folderId}", _folder.get_folder), web.patch("/folders/{folderId}", _folder.patch_folder), web.delete("/folders/{folderId}", _folder.delete_folder), + # publish submissions web.patch("/publish/{folderId}", _folder.publish_folder), + # users operations web.get("/users/{userId}", _user.get_user), - web.patch("/users/{userId}", _user.patch_user), web.delete("/users/{userId}", _user.delete_user), + # submit web.post("/submit", _submission.submit), + # validate web.post("/validate", _submission.validate), ] server.router.add_routes(api_routes) diff --git a/requirements-dev.in b/requirements-dev.in new file mode 100644 index 000000000..eff2402df --- /dev/null +++ b/requirements-dev.in @@ -0,0 +1,8 @@ +aiofiles # to run integration tests +black +certifi +flake8 +mypy +pip-tools # pip depedencies management +pre-commit +tox diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 000000000..292e458a7 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,91 @@ +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile requirements-dev.in +# +aiofiles==0.8.0 + # via -r requirements-dev.in +black==22.3.0 + # via -r requirements-dev.in +certifi==2021.10.8 + # via -r requirements-dev.in +cfgv==3.3.1 + # via pre-commit +click==8.1.1 + # via + # black + # pip-tools +distlib==0.3.4 + # via virtualenv +filelock==3.6.0 + # via + # tox + # virtualenv +flake8==4.0.1 + # via -r requirements-dev.in +identify==2.4.12 + # via pre-commit +mccabe==0.6.1 + # via flake8 +mypy==0.942 + # via -r requirements-dev.in +mypy-extensions==0.4.3 + # via + # black + # mypy +nodeenv==1.6.0 + # via pre-commit +packaging==21.3 + # via tox +pathspec==0.9.0 + # via black +pep517==0.12.0 + # via pip-tools +pip-tools==6.5.1 + # via -r requirements-dev.in +platformdirs==2.5.1 + # via + # black + # virtualenv +pluggy==1.0.0 + # via tox +pre-commit==2.18.1 + # via -r requirements-dev.in +py==1.11.0 + # via tox +pycodestyle==2.8.0 + # via flake8 +pyflakes==2.4.0 + # via flake8 +pyparsing==3.0.7 + # via packaging +pyyaml==6.0 + # via pre-commit +six==1.16.0 + # via + # tox + # virtualenv +toml==0.10.2 + # via + # pre-commit + # tox +tomli==2.0.1 + # via + # black + # mypy + # pep517 +tox==3.24.5 + # via -r requirements-dev.in +typing-extensions==4.1.1 + # via mypy +virtualenv==20.14.0 + # via + # pre-commit + # tox +wheel==0.37.1 + # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/requirements.in b/requirements.in new file mode 100644 index 000000000..8b6c2f4d0 --- /dev/null +++ b/requirements.in @@ -0,0 +1,11 @@ +aiohttp +cryptography +gunicorn +jsonschema +motor +python-dateutil +requests +uvloop +xmlschema +ujson +oidcrp diff --git a/requirements.txt b/requirements.txt index 191c5e38a..7efab8023 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,103 @@ -aiohttp==3.7.4.post0 -cryptography==3.4.8 +# +# This file is autogenerated by pip-compile with python 3.8 +# To update, run: +# +# pip-compile requirements.in +# +aiohttp==3.8.1 + # via -r requirements.in +aiosignal==1.2.0 + # via aiohttp +async-timeout==4.0.2 + # via aiohttp +attrs==21.4.0 + # via + # aiohttp + # jsonschema +certifi==2021.10.8 + # via requests +cffi==1.15.0 + # via cryptography +charset-normalizer==2.0.12 + # via + # aiohttp + # requests +cryptography==36.0.2 + # via + # -r requirements.in + # cryptojwt + # pyopenssl +cryptojwt==1.6.1 + # via oidcmsg +elementpath==2.5.0 + # via xmlschema +filelock==3.6.0 + # via oidcmsg +frozenlist==1.3.0 + # via + # aiohttp + # aiosignal gunicorn==20.1.0 -jsonschema==3.2.0 + # via -r requirements.in +idna==3.3 + # via + # requests + # yarl +importlib-resources==5.6.0 + # via jsonschema +jsonschema==4.4.0 + # via -r requirements.in motor==2.5.1 + # via -r requirements.in +multidict==6.0.2 + # via + # aiohttp + # yarl +oidcmsg==1.6.0 + # via oidcrp +oidcrp==2.1.4 + # via -r requirements.in +pycparser==2.21 + # via cffi +pymongo==3.12.3 + # via motor +pyopenssl==22.0.0 + # via oidcmsg +pyrsistent==0.18.1 + # via jsonschema python-dateutil==2.8.2 + # via -r requirements.in +pyyaml==6.0 + # via + # oidcmsg + # oidcrp +readerwriterlock==1.0.9 + # via cryptojwt +requests==2.27.1 + # via + # -r requirements.in + # cryptojwt + # responses +responses==0.20.0 + # via oidcrp +six==1.16.0 + # via python-dateutil +typing-extensions==4.1.1 + # via readerwriterlock +ujson==5.1.0 + # via -r requirements.in +urllib3==1.26.9 + # via + # requests + # responses uvloop==0.16.0 -xmlschema==1.7.0 -Authlib==0.15.4 + # via -r requirements.in +xmlschema==1.10.0 + # via -r requirements.in +yarl==1.7.2 + # via aiohttp +zipp==3.7.0 + # via importlib-resources + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/scripts/init_mongo.js b/scripts/init_mongo.js new file mode 100644 index 000000000..e93e9ee53 --- /dev/null +++ b/scripts/init_mongo.js @@ -0,0 +1,17 @@ +// script to create default database collections and indexes +// on container start up + +db = new Mongo().getDB("default"); + +db.createCollection('user', { capped: false }); +db.createCollection('folder', { capped: false }); +db.folder.createIndex({ "dateCreated": -1 }); +db.folder.createIndex({ "datePublished": -1 }); +db.folder.createIndex({ "folderId": 1, unique: 1 }); +db.user.createIndex({ "userId": 1, unique: 1 }); +db.folder.createIndex( + { + text_name: "text", + } +) +db.folder.getIndexes() diff --git a/scripts/install-hooks.sh b/scripts/install-hooks.sh index 76a54b267..9cdeebf23 100755 --- a/scripts/install-hooks.sh +++ b/scripts/install-hooks.sh @@ -1,5 +1,5 @@ #!/bin/sh # Install pre-commit hook by running ./install-hooks.sh git_root=$(git rev-parse --show-toplevel) -ln -s $git_root/scripts/pre-commit.sh $git_root/.git/hooks/pre-commit +ln -s "$git_root"/scripts/pre-commit.sh "$git_root/.git/hooks/pre-commit" echo "Symlinked pre-commit hook!" diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh index 1e008ceb1..6c52ba2ad 100755 --- a/scripts/pre-commit.sh +++ b/scripts/pre-commit.sh @@ -3,9 +3,11 @@ # Comment out pre-commit hooks you don't want to use echo "Running tox as a pre-commit hook" -cd $(git rev-parse --show-toplevel) && tox -p auto +root_dir=$(git rev-parse --show-toplevel) -if [ $? -ne 0 ]; then +cd "$root_dir" || exit 1 + +if ! tox -r -p auto ; then echo "==============================" echo "Tests must pass before commit!" echo "Note: Tox also checks non-staged changes, so you might need to stash @@ -13,17 +15,17 @@ if [ $? -ne 0 ]; then exit 1 fi -command -v misspell > /dev/null 2>&1 || echo "Misspell not installed, not running as pre-commit hook" && exit 0 -echo "Running misspell as a pre-commit hook" -# Checking misspell against files and folder not in .gitignore -files=$(git ls-tree HEAD | awk '{print $4}' | tr '\n' ' ') -output=$(cd $(git rev-parse --show-toplevel) && misspell $files) +if ! command -v pyspelling > /dev/null 2>&1; then + echo "pyspelling not installed, not running as pre-commit hook" + exit 0 +fi + +echo "Running pyspelling as a pre-commit hook" +# Checking pyspelling against files and folder not in .gitignore -if [[ $output ]]; then +if ! pyspelling -v -c "$root_dir/.github/config/.spellcheck.yml"; then echo "==============================" echo "Check your spelling errors before commit!" - echo "You had following errors:" - echo $output - echo "To fix errors with one command, run: misspell -w $files" + echo "To fix errors with one command, run: pyspelling -v -c $root_dir/.github/config/.spellcheck.yml" exit 1 fi diff --git a/setup.py b/setup.py index b336b3302..d13bedbd6 100644 --- a/setup.py +++ b/setup.py @@ -29,8 +29,8 @@ packages=find_packages(exclude=["tests"]), install_requires=requirements, extras_require={ - "test": ["coverage==5.5", "coveralls==3.2.0", "pytest==6.2.4", "pytest-cov==2.12.1", "tox==3.24.3"], - "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==0.5.2"], + "test": ["coverage==6.3.2", "pytest==7.1.1", "pytest-cov==3.0.0", "tox==3.24.5"], + "docs": ["sphinx >= 1.4", "sphinx_rtd_theme==1.0.0"], }, package_data={ "": [ @@ -40,7 +40,8 @@ "frontend/static/js/*", "frontend/static/media/*", "frontend/static/css/*", - "conf/ena_schemas.json", + "conf/schemas.json", + "conf/metax_references/*.json", ] }, include_package_data=True, diff --git a/tests/coveralls.py b/tests/coveralls.py deleted file mode 100644 index 10ee5e278..000000000 --- a/tests/coveralls.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/env/python - -"""Coveralls settings for travis and local usage.""" - -import os -import sys -from subprocess import call - -if __name__ == "__main__": - if "COVERALLS_REPO_TOKEN" in os.environ: - rc = call("coveralls") - sys.stdout.write("Coveralls report from Github Actions.\n") - raise SystemExit(rc) - else: - sys.stdout.write("Not on Github Actions.\n") diff --git a/tests/integration/clean_db.py b/tests/integration/clean_db.py index 537f99e08..7ba2e82f1 100644 --- a/tests/integration/clean_db.py +++ b/tests/integration/clean_db.py @@ -3,15 +3,20 @@ To be utilised mostly for integration tests """ -from motor.motor_asyncio import AsyncIOMotorClient +import argparse import asyncio import logging -import argparse +import os + +from motor.motor_asyncio import AsyncIOMotorClient serverTimeout = 15000 connectTimeout = 15000 # === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "localhost") FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") LOG = logging.getLogger(__name__) @@ -26,20 +31,40 @@ def create_db_client(url: str) -> AsyncIOMotorClient: return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) +async def purge_mongodb(url: str) -> None: + """Erase database.""" + client = create_db_client(url) + LOG.debug(f"current databases: {*await client.list_database_names(),}") + LOG.debug("=== Drop curent database ===") + await client.drop_database(DATABASE) + LOG.debug("=== DONE ===") + + async def clean_mongodb(url: str) -> None: """Clean Collection and recreate it.""" client = create_db_client(url) - LOG.debug(f"current databases: {*await client.list_database_names(),}") - LOG.debug("=== Drop any existing database ===") - await client.drop_database("default") + db = client[DATABASE] + LOG.debug(f"Database to clear: {DATABASE}") + collections = await db.list_collection_names() + LOG.debug(f"=== Collections to be cleared: {collections} ===") + LOG.debug("=== Delete all documents in all collections ===") + for col in collections: + x = await db[col].delete_many({}) + LOG.debug(f"{x.deleted_count}{' documents deleted'}\t{'from '}{col}") + LOG.debug("=== DONE ===") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process some integers.") parser.add_argument("--tls", action="store_true", help="add tls configuration") + parser.add_argument("--purge", action="store_true", help="destroy database") args = parser.parse_args() - url = url = "mongodb://admin:admin@localhost:27017/default?authSource=admin" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" if args.tls: _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" - url = f"mongodb://admin:admin@localhost:27017/default{_params}&authSource=admin" - asyncio.run(clean_mongodb(url)) + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + if args.purge: + asyncio.run(purge_mongodb(url)) + else: + asyncio.run(clean_mongodb(url)) diff --git a/tests/integration/mock_auth.py b/tests/integration/mock_auth.py index 1acb9897f..be1edfd3e 100644 --- a/tests/integration/mock_auth.py +++ b/tests/integration/mock_auth.py @@ -1,13 +1,22 @@ """Mock OAUTH2 aiohttp.web server.""" +import logging +import urllib +from os import getenv +from time import time +from typing import Tuple + from aiohttp import web +from authlib.jose import jwk, jwt +from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa -from cryptography.hazmat.backends import default_backend -from authlib.jose import jwt, jwk -from typing import Tuple -import urllib -import logging + +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(getenv("LOG_LEVEL", "INFO")) def generate_token() -> Tuple: @@ -30,24 +39,31 @@ def generate_token() -> Tuple: return (public_jwk, private_jwk) -nonce = "" +# oidcrp is strict about iat, exp, ttl, so we can't hard code them +iat = int(time()) +ttl = 3600 +exp = iat + ttl + +nonce = "nonce" jwk_pair = generate_token() -user_eppn = "" -user_given_name = "" -user_family_name = "" +user_sub = "test@test.example" +user_given_name = "User" +user_family_name = "test" -header = {"jku": "http://mockauth:8000/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} +mock_auth_url_docker = getenv("OIDC_URL", "http://mockauth:8000") # called from inside docker-network +mock_auth_url_local = getenv("OIDC_URL_TEST", "http://localhost:8000") # called from local machine + +header = {"jku": f"{mock_auth_url_docker}/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} async def setmock(req: web.Request) -> web.Response: """Auth endpoint.""" - global user_eppn, user_family_name, user_given_name - user_eppn = req.query["eppn"] + global user_sub, user_family_name, user_given_name + user_sub = req.query["sub"] user_family_name = req.query["family"] user_given_name = req.query["given"] - - logging.info(user_eppn, user_family_name, user_given_name) + LOG.info(f"{mock_auth_url_local}: {user_sub}, {user_family_name}, {user_given_name}") return web.HTTPOk() @@ -63,7 +79,7 @@ async def auth(req: web.Request) -> web.Response: callback_url = req.query["redirect_uri"] url = f"{callback_url}?{urllib.parse.urlencode(params)}" - logging.info(url) + LOG.info(url) response = web.HTTPSeeOther(url) return response @@ -71,32 +87,40 @@ async def auth(req: web.Request) -> web.Response: async def token(req: web.Request) -> web.Response: """Auth endpoint.""" - global nonce, user_eppn, user_family_name, user_given_name + global nonce, user_sub, user_family_name, user_given_name + # oidcrp is strict about iat, exp, ttl, so we can't hard code them + iat = int(time()) + ttl = 3600 + exp = iat + ttl id_token = { "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "sub": "smth", "eduPersonAffiliation": "member;staff", - "eppn": user_eppn, + "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", - "iss": "http://mockauth:8000", + "iss": mock_auth_url_docker, "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", "given_name": user_given_name, "nonce": nonce, "aud": "aud2", - "acr": "http://mockauth:8000/LoginHaka", + "acr": f"{mock_auth_url_docker}/LoginHaka", "nsAccountLock": "false", "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, + "auth_time": iat, "name": f"{user_given_name} {user_family_name}", "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, + "exp": exp, + "iat": iat, "family_name": user_family_name, - "email": user_eppn, + "email": user_sub, + } + data = { + "access_token": "test", + "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8"), + "token_type": "Bearer", + "expires_in": ttl, } - data = {"access_token": "test", "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8")} - logging.info(data) + LOG.info(data) return web.json_response(data) @@ -107,35 +131,124 @@ async def jwk_response(request: web.Request) -> web.Response: keys[0]["kid"] = "rsa1" data = {"keys": keys} - logging.info(data) + LOG.info(data) return web.json_response(data) async def userinfo(request: web.Request) -> web.Response: """Mock an authentication to ELIXIR AAI for GA4GH claims.""" - global nonce, user_eppn, user_family_name, user_given_name + global nonce, user_sub, user_family_name, user_given_name user_info = { - "sub": "smth", "eduPersonAffiliation": "member;staff", - "eppn": user_eppn, + "sub": user_sub, "displayName": f"{user_given_name} {user_family_name}", "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", "given_name": user_given_name, - "uid": user_eppn, + "uid": user_sub, "nsAccountLock": "false", "eduPersonScopedAffiliation": "staff@test.what;member@test.what", "name": f"{user_given_name} {user_family_name}", "schacHomeOrganization": "test.what", "family_name": user_family_name, - "email": user_eppn, + "email": user_sub, + "sdSubmitProjects": "1000 2000 3000", } - logging.info(user_info) + LOG.info(user_info) return web.json_response(user_info) +async def oidc_config(request: web.Request) -> web.Response: + """Return standard OIDC configuration.""" + oidc_config_json = { + "issuer": mock_auth_url_docker, + "authorization_endpoint": f"{mock_auth_url_local}/authorize", + "token_endpoint": f"{mock_auth_url_docker}/token", + "userinfo_endpoint": f"{mock_auth_url_docker}/userinfo", + "jwks_uri": f"{mock_auth_url_docker}/keyset", + "response_types_supported": [ + "code", + "id_token", + "token id_token", + "code id_token", + "code token", + "code token id_token", + ], + "subject_types_supported": ["public", "pairwise"], + "grant_types_supported": [ + "authorization_code", + "implicit", + "refresh_token", + "urn:ietf:params:oauth:grant-type:device_code", + ], + "id_token_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "id_token_encryption_enc_values_supported": ["A128CBC-HS256"], + "id_token_signing_alg_values_supported": ["RS256", "RS384", "RS512", "HS256", "HS384", "HS512", "ES256"], + "userinfo_encryption_alg_values_supported": [ + "RSA1_5", + "RSA-OAEP", + "RSA-OAEP-256", + "A128KW", + "A192KW", + "A256KW", + "A128GCMKW", + "A192GCMKW", + "A256GCMKW", + ], + "userinfo_encryption_enc_values_supported": ["A128CBC-HS256"], + "userinfo_signing_alg_values_supported": ["RS256", "RS384", "RS512", "HS256", "HS384", "HS512", "ES256"], + "request_object_signing_alg_values_supported": [ + "none", + "RS256", + "RS384", + "RS512", + "HS256", + "HS384", + "HS512", + "ES256", + "ES384", + "ES512", + ], + "token_endpoint_auth_methods_supported": [ + "client_secret_basic", + "client_secret_post", + "client_secret_jwt", + "private_key_jwt", + ], + "claims_parameter_supported": True, + "request_parameter_supported": True, + "request_uri_parameter_supported": False, + "require_request_uri_registration": False, + "display_values_supported": ["page"], + "scopes_supported": ["openid"], + "response_modes_supported": ["query", "fragment", "form_post"], + "claims_supported": [ + "aud", + "iss", + "sub", + "iat", + "exp", + "acr", + "auth_time", + "ga4gh_passport_v1", + "remoteUserIdentifier", + ], + } + return web.json_response(oidc_config_json) + + def init() -> web.Application: """Start server.""" app = web.Application() @@ -144,6 +257,7 @@ def init() -> web.Application: app.router.add_post("/token", token) app.router.add_get("/keyset", jwk_response) app.router.add_get("/userinfo", userinfo) + app.router.add_get("/.well-known/openid-configuration", oidc_config) return app diff --git a/tests/integration/mock_doi_api.py b/tests/integration/mock_doi_api.py new file mode 100644 index 000000000..4d8e045b9 --- /dev/null +++ b/tests/integration/mock_doi_api.py @@ -0,0 +1,169 @@ +"""Mock aiohttp.web server for DOI API calls.""" + +import json +import logging +from datetime import date, datetime +from os import getenv +import collections.abc + +from aiohttp import web + +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(getenv("LOG_LEVEL", "INFO")) + +BASE_RESPONSE = { + "data": { + "id": "10.xxxx/yyyy", + "type": "dois", + "attributes": { + "doi": "10.xxxx/yyyy", + "prefix": "10.xxxx", + "suffix": "yyyy", + "identifiers": [{"identifier": "https://mock_doi.org/10.xxxx/yyyy", "identifierType": "DOI"}], + "creators": [], + "titles": [], + "publisher": None, + "container": {}, + "publicationYear": date.today().year, + "subjects": [], + "contributors": [], + "dates": [], + "language": None, + "types": {}, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": None, + "rightsList": [], + "descriptions": [], + "geoLocations": [], + "fundingReferences": [], + "xml": None, + "url": None, + "contentUrl": None, + "metadataVersion": 1, + "schemaVersion": "https://schema.datacite.org/meta/kernel-4", + "source": None, + "isActive": None, + "state": "draft", + "reason": None, + "created": "", + "registered": None, + "updated": "", + }, + "relationships": { + "client": {"data": {"id": "datacite.datacite", "type": "clients"}}, + "media": {"data": []}, + }, + }, + "included": [ + { + "id": "mockcite.mockcite", + "type": "clients", + "attributes": { + "name": "MockCite", + "symbol": "MOCKCITE.MOCKCITE", + "year": date.today().year, + "contactName": "MockCite", + "contactEmail": "support@mock_cite.org", + "description": None, + "domains": "*", + "url": None, + "created": "2010-01-01 12:00:00.000", + "updated": str(datetime.utcnow()), + "isActive": True, + "hasPassword": True, + }, + "relationships": { + "provider": {"data": {"id": "mockcite", "type": "providers"}}, + "prefixes": {"data": [{"id": "10.xxxx", "type": "prefixes"}]}, + }, + } + ], +} + + +def update_dict(d, u): + """Update values in a dictionary with values from another dictionary.""" + for k, v in u.items(): + if isinstance(v, collections.abc.Mapping): + d[k] = update_dict(d.get(k, {}), v) + else: + d[k] = v + return d + + +async def create(req: web.Request) -> web.Response: + """DOI draft creation endpoint.""" + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.info(reason) + raise web.HTTPBadRequest(reason=reason) + + data = BASE_RESPONSE + try: + _doi = content["data"]["attributes"]["doi"] + data["data"]["id"] = content["data"]["attributes"]["doi"] + data["data"]["attributes"]["doi"] = _doi + data["data"]["attributes"]["prefix"] = _doi.split("/")[0] + data["data"]["attributes"]["suffix"] = _doi.split("/")[1] + data["data"]["attributes"]["identifiers"] = [ + {"identifier": f"https://mock_doi.org/{content['data']['attributes']['doi']}", "identifierType": "DOI"} + ] + except Exception as e: + reason = f"Provided payload did not include required attributes: {e}" + LOG.info(reason) + raise web.HTTPBadRequest(reason=reason) + + data["data"]["attributes"]["created"] = str(datetime.utcnow()) + data["data"]["attributes"]["updated"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["created"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["updated"] = str(datetime.utcnow()) + + return web.json_response(data, status=201) + + +async def update(req: web.Request) -> web.Response: + """DOI update endpoint.""" + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.info(reason) + raise web.HTTPBadRequest(reason=reason) + + data = BASE_RESPONSE + data["data"]["attributes"]["updated"] = str(datetime.utcnow()) + data["included"][0]["attributes"]["updated"] = str(datetime.utcnow()) + try: + data = update_dict(data, content) + except Exception as e: + reason = f"Provided payload did not include required attributes: {e}" + LOG.info(reason) + raise web.HTTPBadRequest(reason=reason) + + return web.json_response(data, status=200) + + +async def delete(req: web.Request) -> web.Response: + """DOI delete endpoint.""" + + return web.json_response(status=204) + + +def init() -> web.Application: + """Start server.""" + app = web.Application() + app.router.add_post("/dois", create) + app.router.add_put("/dois/{id:.*}", update) + app.router.add_delete("/dois/{id:.*}", delete) + return app + + +if __name__ == "__main__": + web.run_app(init(), port=8001) diff --git a/tests/integration/mock_metax_api.py b/tests/integration/mock_metax_api.py new file mode 100644 index 000000000..c37cd8295 --- /dev/null +++ b/tests/integration/mock_metax_api.py @@ -0,0 +1,368 @@ +"""Mock aiohttp.web server for Metax API calls.""" + +import json +import logging +import os +from datetime import datetime +from typing import Dict +from uuid import uuid4 + +import ujson +from aiohttp import web + +FORMAT = "[%(asctime)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") + +LOG = logging.getLogger("server") +LOG.setLevel(os.getenv("LOG_LEVEL", "DEBUG")) + +# Example error responds from Metax +# { +# "detail": [ +# "Specified organization object does not have a name. If you are using an org identifier from reference data, \ +# then the name will be populated automatically. If your org identifier is not from reference data, \ +# you must provide the organization name. The object that caused the error: {'@type': 'Organization'}" +# ], +# "error_identifier": "2022-01-21T10:27:02-02ad2e36", +# } +# { +# "detail": "[ErrorDetail(string=\"'creator' is a required property., code='invalid')]", +# "error_identifier": "2022-01-21T10:27:02-02ad2e36", +# } + +# mimic db for saved datasets, volatile!! +drafts = {} +published = {} + + +async def get_dataset(req: web.Request) -> web.Response: + """Mock endpoint for retrieving Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + metax_id = req.match_info["metax_id"] + LOG.info(f"Retrieving Metax dataset {metax_id}") + if not metax_id: + LOG.error("Query params missing Metax ID.") + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + stuff = list(drafts.keys()) + list(published.keys()) + if metax_id not in stuff: + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + try: + content = drafts[metax_id] + except KeyError: + content = published[metax_id] + + LOG.debug(f"Found {content['state']} dataset {content['identifier']} with data: {content}") + return web.Response( + body=ujson.dumps(content, escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + +async def post_dataset(req: web.Request) -> web.Response: + """Mock endpoint for creating draft Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Creating Metax dataset") + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + validate_data(content) + + content["research_dataset"]["preferred_identifier"] = f"draft:{str(uuid4())}" + metax_id = str(uuid4()) + metax_additions = { + "identifier": metax_id, + "preservation_state": 0, + "state": "draft", + "use_doi_for_published": False, + "cumulative_state": 0, + "api_meta": {"version": 2}, + "date_created": f"{datetime.now()}", + "service_created": "sd", + "removed": False, + } + resp_data = dict(content, **metax_additions) + drafts[metax_id] = resp_data + LOG.info(f'Created Metax dataset with identifier {resp_data["identifier"]}') + return web.Response( + body=ujson.dumps(resp_data, escape_forward_slashes=False), + status=201, + content_type="application/json", + ) + + +async def update_dataset(req: web.Request) -> web.Response: + """Mock endpoint for updating Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Updating Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + validate_data(content) + + for key, value in content.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + + LOG.info(f'Updated Metax dataset with identifier {drafts[metax_id]["identifier"]}') + return web.Response( + body=ujson.dumps(drafts[metax_id], escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + +async def patch_datasets(req: web.Request) -> web.Response: + """Mock endpoint for patching bulk Metax datasets. + + :params req: HTTP request with data for Metax datasets + :return: HTTP response with IDs of patched Metax datasets and possible errors + """ + LOG.info("Patching Metax datasets") + + success = [] + failed = [] + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + for dataset in content: + try: + metax_id = dataset["identifier"] + _ = dataset["research_dataset"]["preferred_identifier"] + except KeyError: + raise web.HTTPBadRequest( + reason={ + "detail": "Dataset is missing required identifiers", + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + reason = f"No dataset found with identifier {metax_id}" + LOG.error(reason) + failed.append( + { + "object": { + "detail": reason, + "error_identifier": datetime.now(), + } + } + ) + continue + + for key, value in dataset.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + success.append({"object": drafts[metax_id]}) + + LOG.info("Metax datasets patched") + body = {"success": success, "failed": failed} + return web.Response( + body=ujson.dumps(body, escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + +async def patch_dataset(req: web.Request) -> web.Response: + """Mock endpoint for patching Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Patching Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + try: + content = await req.json() + except json.decoder.JSONDecodeError as e: + reason = f"JSON is not correctly formatted. See: {e}" + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest( + reason={ + "detail": reason, + "error_identifier": datetime.now(), + } + ) + for key, value in content.items(): + drafts[metax_id][key] = value + + drafts[metax_id]["date_modified"] = str(datetime.now()) + + LOG.info(f'Updated Metax dataset with identifier {drafts[metax_id]["identifier"]}') + return web.Response( + body=ujson.dumps(drafts[metax_id], escape_forward_slashes=False), + status=200, + content_type="application/json", + ) + + +async def publish_dataset(req: web.Request) -> web.Response: + """Mock endpoint for publishing Metax dataset. + + :params req: HTTP request with data for Metax dataset + :return: HTTP response with mocked Metax dataset data + """ + LOG.info("Publishing Metax dataset") + metax_id = req.query.get("identifier", None) + if not metax_id: + LOG.error("Query params missing Metax ID.") + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id in published: + LOG.error(f"Dataset {metax_id} is already published.") + reason = {"detail": ["Dataset is already published."], "error_identifier": datetime.now()} + raise web.HTTPBadRequest(reason=reason) + if metax_id not in drafts.keys(): + LOG.error(f"No dataset found with identifier {metax_id}") + raise web.HTTPNotFound(reason={"detail": "Not found."}) + + data = drafts[metax_id] + validate_data(data, draft=True) + published[metax_id] = data + del drafts[metax_id] + published[metax_id]["state"] = "published" + published[metax_id]["modified"] = str(datetime.now()) + LOG.info(f"Published Metax dataset with identifier {metax_id}") + return web.Response( + body=ujson.dumps( + {"preferred_identifier": data["research_dataset"]["preferred_identifier"]}, escape_forward_slashes=False + ), + status=200, + content_type="application/json", + ) + + +async def delete_dataset(req: web.Request) -> web.Response: + """Mock endpoint for deleting Metax dataset. + + :params req: HTTP request with Metax dataset id + :return: HTTP response with HTTP status + """ + LOG.info("Deleting Metax dataset") + metax_id = req.match_info["metax_id"] + if not metax_id: + raise web.HTTPBadRequest( + reason={ + "detail": ["Query params missing Metax ID."], + "error_identifier": datetime.now(), + } + ) + if metax_id not in drafts.keys(): + raise web.HTTPNotFound(reason={"detail": "Not found."}) + else: + del drafts[metax_id] + LOG.info(f"Deleted Metax dataset with identifier {metax_id}") + return web.HTTPNoContent() + + +def validate_data(data: Dict, draft=True) -> None: + """Check for required fields in dataset. + + :param data: Metax data to be validated + :param draft: Indicator if dataset needs to be validated as draft or not; default true + """ + LOG.info("Validating payload") + + required = ["data_catalog", "metadata_provider_org", "metadata_provider_user", "research_dataset"] + rd_required = ["title", "description", "preferred_identifier", "access_rights", "publisher"] + + if not draft: + rd_required = rd_required + ["creator"] + if not all(key in data.keys() for key in required): + reason = {"detail": [f"Dataset did not include all required fields: {', '.join(required)}."]} + reason = json.dumps(reason) + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest(reason=reason, content_type="application/json") + if not all(key in data["research_dataset"].keys() for key in rd_required): + reason = {"detail": [f"Research dataset did not include all required fields: {', '.join(rd_required)}."]} + reason = json.dumps(reason) + LOG.error(f"Error while validating payload: {reason}") + raise web.HTTPBadRequest(reason=reason, content_type="application/json") + + +def init() -> web.Application: + """Start server.""" + app = web.Application() + api_routes = [ + web.post("/rest/v2/datasets", post_dataset), + web.put("/rest/v2/datasets/{metax_id}", update_dataset), + web.delete("/rest/v2/datasets/{metax_id}", delete_dataset), + web.post("/rpc/v2/datasets/publish_dataset", publish_dataset), + web.get("/rest/v2/datasets/{metax_id}", get_dataset), + web.patch("/rest/v2/datasets", patch_datasets), + web.patch("/rest/v2/datasets/{metax_id}", patch_dataset), + ] + app.router.add_routes(api_routes) + LOG.info("Metax mock API started") + return app + + +if __name__ == "__main__": + web.run_app(init(), port=8002) diff --git a/tests/integration/mongo_indexes.py b/tests/integration/mongo_indexes.py new file mode 100755 index 000000000..bcde73f72 --- /dev/null +++ b/tests/integration/mongo_indexes.py @@ -0,0 +1,72 @@ +"""Create MongoDB default collections and indexes.""" + +import argparse +import asyncio +import logging +import os + +import pymongo +from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import TEXT + +serverTimeout = 15000 +connectTimeout = 15000 + +# === Global vars === +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "admin") +FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" +logging.basicConfig(format=FORMAT, datefmt="%Y-%m-%d %H:%M:%S") +LOG = logging.getLogger(__name__) +LOG.setLevel(logging.DEBUG) + + +def create_db_client(url: str) -> AsyncIOMotorClient: + """Initialize database client for AioHTTP App. + + :returns: Coroutine-based Motor client for Mongo operations + """ + return AsyncIOMotorClient(url, connectTimeoutMS=connectTimeout, serverSelectionTimeoutMS=serverTimeout) + + +async def create_indexes(url: str) -> None: + """Clean Collection and recreate it.""" + client = create_db_client(url) + db = client[DATABASE] + LOG.debug(f"Current database: {db}") + LOG.debug("=== Create collections ===") + for col in {"folder", "user"}: + try: + await db.create_collection(col) + except pymongo.errors.CollectionInvalid as e: + LOG.debug(f"=== Collection {col} not created due to {str(e)} ===") + pass + LOG.debug("=== Create indexes ===") + + indexes = [ + db.folder.create_index([("dateCreated", -1)]), + db.folder.create_index([("folderId", 1)], unique=True), + db.folder.create_index([("text_name", TEXT)]), + db.user.create_index([("userId", 1)], unique=True), + ] + + for index in indexes: + try: + await index + except Exception as e: + LOG.debug(f"=== Indexes not created due to {str(e)} ===") + pass + LOG.debug("=== DONE ===") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process some integers.") + parser.add_argument("--tls", action="store_true", help="add tls configuration") + args = parser.parse_args() + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" + if args.tls: + _params = "?tls=true&tlsCAFile=./config/cacert&ssl_keyfile=./config/key&ssl_certfile=./config/cert" + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}{_params}&authSource=admin" + LOG.debug(f"=== Database url {url} ===") + asyncio.run(create_indexes(url)) diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py index f1b56c7aa..1cee4f11c 100644 --- a/tests/integration/run_tests.py +++ b/tests/integration/run_tests.py @@ -4,17 +4,21 @@ Deleting from db is currently not supported, objects added to db in different should be taken into account. """ - import asyncio import json import logging -from pathlib import Path +import os +import re import urllib import xml.etree.ElementTree as ET +from datetime import datetime +from pathlib import Path +from uuid import uuid4 import aiofiles import aiohttp from aiohttp import FormData +from motor.motor_asyncio import AsyncIOMotorClient # === Global vars === FORMAT = "[%(asctime)s][%(name)s][%(process)d %(processName)s][%(levelname)-8s](L:%(lineno)s) %(funcName)s: %(message)s" @@ -43,18 +47,27 @@ test_json_files = [ ("study", "SRP000539.json", "SRP000539.json"), ("sample", "SRS001433.json", "SRS001433.json"), + ("dataset", "dataset.json", "dataset.json"), ("run", "ERR000076.json", "ERR000076.json"), ("experiment", "ERX000119.json", "ERX000119.json"), ("analysis", "ERZ266973.json", "ERZ266973.json"), ] -base_url = "http://localhost:5430" -mock_auth_url = "http://localhost:8000" +base_url = os.getenv("BASE_URL", "http://localhost:5430") +mock_auth_url = os.getenv("OIDC_URL_TEST", "http://localhost:8000") objects_url = f"{base_url}/objects" drafts_url = f"{base_url}/drafts" +templates_url = f"{base_url}/templates" folders_url = f"{base_url}/folders" users_url = f"{base_url}/users" submit_url = f"{base_url}/submit" publish_url = f"{base_url}/publish" +metax_url = f"{os.getenv('METAX_URL', 'http://localhost:8002')}/rest/v2/datasets" +auth = aiohttp.BasicAuth(os.getenv("METAX_USER", "sd"), os.getenv("METAX_PASS", "test")) +# to form direct contact to db with create_folder() +DATABASE = os.getenv("MONGO_DATABASE", "default") +AUTHDB = os.getenv("MONGO_AUTHDB", "admin") +HOST = os.getenv("MONGO_HOST", "localhost:27017") +TLS = os.getenv("MONGO_SSL", False) user_id = "current" test_user_given = "Given" @@ -67,10 +80,10 @@ # === Helper functions === -async def login(sess, eppn, given, family): +async def login(sess, sub, given, family): """Mock login.""" params = { - "eppn": eppn, + "sub": sub, "family": family, "given": given, } @@ -83,6 +96,18 @@ async def login(sess, eppn, given, family): LOG.debug("Doing mock user login") +async def get_user_data(sess): + """Get current logged in user's data model. + + :param sess: HTTP session in which request call is made + """ + async with sess.get(f"{base_url}/users/current") as resp: + LOG.debug("Get userdata") + ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" + return ans + + async def create_request_data(schema, filename): """Create request data from pairs of schemas and filenames. @@ -93,7 +118,8 @@ async def create_request_data(schema, filename): path_to_file = testfiles_root / schema / filename path = path_to_file.as_posix() async with aiofiles.open(path, mode="r") as f: - request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type="text/xml") + c_type = "text/xml" if filename[-3:] == "xml" else "text/csv" + request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type=c_type) return request_data @@ -107,7 +133,12 @@ async def create_multi_file_request_data(filepairs): path_to_file = testfiles_root / schema / filename path = path_to_file.as_posix() async with aiofiles.open(path, mode="r") as f: - request_data.add_field(schema.upper(), await f.read(), filename=filename, content_type="text/xml") + request_data.add_field( + schema.upper(), + await f.read(), + filename=filename, + content_type="text/xml", + ) return request_data @@ -124,7 +155,7 @@ async def create_request_json_data(schema, filename): return request_data -async def post_object(sess, schema, filename): +async def post_object(sess, schema, folder_id, filename): """Post one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -132,14 +163,30 @@ async def post_object(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: - LOG.debug(f"Adding new object to {schema}, via XML file {filename}") - assert resp.status == 201, "HTTP Status code error" + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: + LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename}") + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() - return ans["accessionId"], schema + return ans if isinstance(ans, list) else ans["accessionId"], schema -async def post_object_json(sess, schema, filename): +async def post_object_expect_status(sess, schema, folder_id, filename, status): + """Post one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing. + """ + request_data = await create_request_data(schema, filename) + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: + LOG.debug(f"Adding new object to {schema}, via XML/CSV file {filename} and expecting status: {status}") + assert resp.status == status, f"HTTP Status code error, got {resp.status}" + if status < 400: + ans = await resp.json() + return ans if isinstance(ans, list) else ans["accessionId"], schema + + +async def post_object_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -147,9 +194,9 @@ async def post_object_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{objects_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{objects_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new object to {schema}, via JSON file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] @@ -163,10 +210,10 @@ async def delete_object(sess, schema, accession_id): """ async with sess.delete(f"{objects_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Deleting object {accession_id} from {schema}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" -async def post_draft(sess, schema, filename): +async def post_draft(sess, schema, folder_id, filename): """Post one draft metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -174,14 +221,14 @@ async def post_draft(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via XML file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] -async def post_draft_json(sess, schema, filename): +async def post_draft_json(sess, schema, folder_id, filename): """Post & put one metadata object within session, returns accessionId. :param sess: HTTP session in which request call is made @@ -189,14 +236,14 @@ async def post_draft_json(sess, schema, filename): :param filename: name of the file used for testing. """ request_data = await create_request_json_data(schema, filename) - async with sess.post(f"{drafts_url}/{schema}", data=request_data) as resp: + async with sess.post(f"{drafts_url}/{schema}", params={"folder": folder_id}, data=request_data) as resp: LOG.debug(f"Adding new draft object to {schema}, via JSON file {filename}") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return ans["accessionId"] -async def get_draft(sess, schema, draft_id): +async def get_draft(sess, schema, draft_id, expected_status=200): """Get and return a drafted metadata object. :param sess: HTTP session in which request call is made @@ -205,7 +252,7 @@ async def get_draft(sess, schema, draft_id): """ async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: LOG.debug(f"Checking that {draft_id} JSON exists") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == expected_status, f"HTTP Status code error, got {resp.status}" ans = await resp.json() return json.dumps(ans) @@ -221,7 +268,7 @@ async def put_draft(sess, schema, draft_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.put(f"{drafts_url}/{schema}/{draft_id}", data=request_data) as resp: LOG.debug(f"Replace draft object in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == draft_id, "accession ID error" return ans_put["accessionId"] @@ -238,7 +285,24 @@ async def put_object_json(sess, schema, accession_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.put(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: LOG.debug(f"Try to replace object in {schema}") - assert resp.status == 415, "HTTP Status code error" + assert resp.status == 415, f"HTTP Status code error, got {resp.status}" + + +async def patch_object_json(sess, schema, accession_id, update_filename): + """Patch one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param draft_id: id of the draft + :param update_filename: name of the file used to use for updating data. + """ + request_data = await create_request_json_data(schema, update_filename) + async with sess.patch(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: + LOG.debug(f"Try to patch object in {schema}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans_put = await resp.json() + assert ans_put["accessionId"] == accession_id, "accession ID error" + return ans_put["accessionId"] async def put_object_xml(sess, schema, accession_id, update_filename): @@ -252,7 +316,7 @@ async def put_object_xml(sess, schema, accession_id, update_filename): request_data = await create_request_data(schema, update_filename) async with sess.put(f"{objects_url}/{schema}/{accession_id}", data=request_data) as resp: LOG.debug(f"Replace object with XML data in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == accession_id, "accession ID error" return ans_put["accessionId"] @@ -269,7 +333,7 @@ async def patch_draft(sess, schema, draft_id, update_filename): request_data = await create_request_json_data(schema, update_filename) async with sess.patch(f"{drafts_url}/{schema}/{draft_id}", data=request_data) as resp: LOG.debug(f"Update draft object in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_put = await resp.json() assert ans_put["accessionId"] == draft_id, "accession ID error" return ans_put["accessionId"] @@ -284,7 +348,90 @@ async def delete_draft(sess, schema, draft_id): """ async with sess.delete(f"{drafts_url}/{schema}/{draft_id}") as resp: LOG.debug(f"Deleting draft object {draft_id} from {schema}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" + + +async def post_template_json(sess, schema, filename, project_id): + """Post one metadata object within session, returns accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing. + :param project_id: id of the project the template belongs to + """ + request_data = await create_request_json_data(schema, filename) + request_data = json.loads(request_data) + if type(request_data) is list: + for rd in request_data: + rd["projectId"] = project_id + else: + request_data["projectId"] = project_id + request_data = json.dumps(request_data) + async with sess.post(f"{templates_url}/{schema}", data=request_data) as resp: + LOG.debug(f"Adding new template object to {schema}, via JSON file {filename}") + ans = await resp.json() + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" + if isinstance(ans, list): + return ans + else: + return ans["accessionId"] + + +async def get_templates(sess, project_id): + """Get templates from project. + + :param sess: HTTP session in which request call is made + :param project_id: id of the project the template belongs to + """ + async with sess.get(f"{templates_url}?projectId={project_id}") as resp: + LOG.debug(f"Requesting templates from project={project_id}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans = await resp.json() + LOG.debug(f"Received {len(ans)} templates") + return ans + + +async def get_template(sess, schema, template_id): + """Get and return a drafted metadata object. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param template_id: id of the draft + """ + async with sess.get(f"{templates_url}/{schema}/{template_id}") as resp: + LOG.debug(f"Checking that {template_id} JSON exists") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans = await resp.json() + return json.dumps(ans) + + +async def patch_template(sess, schema, template_id, update_filename): + """Patch one metadata object within session, return accessionId. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param template_id: id of the draft + :param update_filename: name of the file used to use for updating data. + """ + request_data = await create_request_json_data(schema, update_filename) + async with sess.patch(f"{templates_url}/{schema}/{template_id}", data=request_data) as resp: + LOG.debug(f"Update draft object in {schema}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + ans_put = await resp.json() + assert ans_put["accessionId"] == template_id, "accession ID error" + return ans_put["accessionId"] + + +async def delete_template(sess, schema, template_id): + """Delete metadata object within session. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param template_id: id of the draft + """ + async with sess.delete(f"{templates_url}/{schema}/{template_id}") as resp: + LOG.debug(f"Deleting template object {template_id} from {schema}") + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def post_folder(sess, data): @@ -294,9 +441,9 @@ async def post_folder(sess, data): :param data: data used to update the folder """ async with sess.post(f"{folders_url}", data=json.dumps(data)) as resp: - LOG.debug("Adding new folder") - assert resp.status == 201, "HTTP Status code error" ans = await resp.json() + assert resp.status == 201, f"HTTP Status code error {resp.status} {ans}" + LOG.debug(f"Adding new folder {ans['folderId']}") return ans["folderId"] @@ -309,7 +456,7 @@ async def patch_folder(sess, folder_id, json_patch): """ async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(json_patch)) as resp: LOG.debug(f"Updating folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_patch = await resp.json() assert ans_patch["folderId"] == folder_id, "folder ID error" return ans_patch["folderId"] @@ -323,7 +470,7 @@ async def publish_folder(sess, folder_id): """ async with sess.patch(f"{publish_url}/{folder_id}") as resp: LOG.debug(f"Publishing folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["folderId"] == folder_id, "folder ID error" return ans["folderId"] @@ -337,7 +484,7 @@ async def delete_folder(sess, folder_id): """ async with sess.delete(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Deleting folder {folder_id}") - assert resp.status == 204, "HTTP Status code error" + assert resp.status == 204, f"HTTP Status code error, got {resp.status}" async def delete_folder_publish(sess, folder_id): @@ -348,7 +495,32 @@ async def delete_folder_publish(sess, folder_id): """ async with sess.delete(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Deleting folder {folder_id}") - assert resp.status == 401, "HTTP Status code error" + assert resp.status == 401, f"HTTP Status code error, got {resp.status}" + + +async def create_folder(data, user): + """Create new object folder to database. + + :param data: Data as dict to be saved to database + :param user: User id to which data is assigned + :returns: Folder id for the folder inserted to database + """ + url = f"mongodb://{AUTHDB}:{AUTHDB}@{HOST}/{DATABASE}?authSource=admin" + db_client = AsyncIOMotorClient(url, connectTimeoutMS=1000, serverSelectionTimeoutMS=1000) + database = db_client[DATABASE] + + folder_id = uuid4().hex + LOG.info(f"Creating new folder {folder_id}") + data["folderId"] = folder_id + data["text_name"] = " ".join(re.split("[\\W_]", data["name"])) + data["drafts"] = [] + data["metadataObjects"] = [] + try: + await database["folder"].insert_one(data) + return folder_id + + except Exception as e: + LOG.error(f"Folder creation failed due to {str(e)}") async def patch_user(sess, user_id, real_user_id, json_patch): @@ -361,7 +533,7 @@ async def patch_user(sess, user_id, real_user_id, json_patch): """ async with sess.patch(f"{users_url}/current", data=json.dumps(json_patch)) as resp: LOG.debug(f"Updating user {real_user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" ans_patch = await resp.json() assert ans_patch["userId"] == real_user_id, "user ID error" return ans_patch["userId"] @@ -377,7 +549,55 @@ async def delete_user(sess, user_id): LOG.debug(f"Deleting user {user_id}") # we expect 404 as there is no frontend assert str(resp.url) == f"{base_url}/", "redirect url user delete differs" - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" + + +def extract_folders_object(res, accession_id, draft): + """Extract object from folder metadataObjects with provided accessionId. + + :param res: JSON parsed responce from folder query request + :param accession_id: accession ID of reviwed object + :returns: dict of object entry in folder + """ + object = "drafts" if draft else "metadataObjects" + actual_res = next(obj for obj in res[object] if obj["accessionId"] == accession_id) + return actual_res + + +async def check_folders_object_patch(sess, folder_id, schema, accession_id, title, filename, draft=False): + """Check that draft is added correctly to folder. + + Get draft or metadata object from the folder and assert with data + returned from object endpoint itself. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder + :param schema: name of the schema (folder) used for testing + :param accession_id: accession ID of reviwed object + :param title: title of reviwed object + :param filename: name of the file used for inserting data + :param draft: indication of object draft status, default False + """ + sub_type = "Form" if filename.split(".")[-1] == "json" else filename.split(".")[-1].upper() + async with sess.get(f"{folders_url}/{folder_id}") as resp: + res = await resp.json() + try: + actual = extract_folders_object(res, accession_id, draft) + expected = { + "accessionId": accession_id, + "schema": schema if not draft else f"draft-{schema}", + "tags": { + "submissionType": sub_type, + "displayTitle": title, + "fileName": filename, + }, + } + if sub_type == "Form": + del expected["tags"]["fileName"] + assert actual == expected, "actual end expected data did not match" + except StopIteration: + pass + return schema # === Integration tests === @@ -390,34 +610,83 @@ async def test_crud_works(sess, schema, filename, folder_id): :param sess: HTTP session in which request call is made :param schema: name of the schema (folder) used for testing - :param filename: name of the file used for testing. + :param filename: name of the file used for testing :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, schema, filename) - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, schema, folder_id, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that {accession_id[0]} JSON is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"].get("studyTitle", "") if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id[0], title, filename) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that {accession_id[0]} XML is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" await delete_object(sess, schema, accession_id[0]) async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}") as resp: LOG.debug(f"Checking that JSON object {accession_id[0]} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{objects_url}/{schema}/{accession_id[0]}?format=xml") as resp: LOG.debug(f"Checking that XML object {accession_id[0]} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" + + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that object {accession_id[0]} was deleted from folder {folder_id}") + res = await resp.json() + expected_true = not any(d["accessionId"] == accession_id[0] for d in res["metadataObjects"]) + assert expected_true, f"object {accession_id[0]} still exists" + + +async def test_csv(sess, folder_id): + """Test CRUD for a submitted CSV file. + + Test tries with good csv file first for sample object, after which we try with empty file. + After this we try with study object which is not allowed. + + :param sess: HTTP session in which request call is made + :param schema: name of the schema (folder) used for testing + :param filename: name of the file used for testing + :param folder_id: id of the folder used to group submission + """ + _schema = "sample" + _filename = "EGAformat.csv" + samples = await post_object(sess, _schema, folder_id, _filename) + # there are 3 rows and we expected to get 3rd + assert len(samples[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(samples[0])}" + # _first_csv_row_id = accession_id[0][0]["accessionId"] + first_sample = samples[0][0]["accessionId"] + + async with sess.get(f"{objects_url}/{_schema}/{first_sample}") as resp: + LOG.debug(f"Checking that {first_sample} JSON is in {_schema}") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res.get("title", "") + await check_folders_object_patch(sess, folder_id, _schema, samples, title, _filename) + + await delete_object(sess, _schema, first_sample) + async with sess.get(f"{objects_url}/{_schema}/{first_sample}") as resp: + LOG.debug(f"Checking that JSON object {first_sample} was deleted") + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: - LOG.debug(f"Checking that object {accession_id} was deleted from folder {folder_id}") + LOG.debug(f"Checking that object {first_sample} was deleted from folder {folder_id}") res = await resp.json() - expected_true = not any(d["accessionId"] == accession_id for d in res["metadataObjects"]) - assert expected_true, "draft object still exists" + expected_true = not any(d["accessionId"] == first_sample for d in res["metadataObjects"]) + assert expected_true, f"object {first_sample} still exists" + + _filename = "empty.csv" + # status should be 400 + await post_object_expect_status(sess, _schema, folder_id, _filename, 400) + + _filename = "EGA_sample_w_issue.csv" + # status should be 201 but we expect 3 rows, as the CSV has 4 rows one of which is empty + samples_2 = await post_object_expect_status(sess, _schema, folder_id, _filename, 201) + assert len(samples_2[0]) == 3, f"expected nb of CSV entries does not match, we got: {len(samples_2[0])}" + + for sample in samples_2[0] + samples[0][1:]: + await delete_object(sess, _schema, sample["accessionId"]) async def test_put_objects(sess, folder_id): @@ -430,13 +699,18 @@ async def test_put_objects(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission """ - accession_id = await post_object(sess, "study", "SRP000539.xml") - patch_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id[0], "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_object) + accession_id = await post_object(sess, "study", folder_id, "SRP000539.xml") await put_object_json(sess, "study", accession_id[0], "SRP000539.json") await put_object_xml(sess, "study", accession_id[0], "SRP000539_put.xml") + await check_folders_object_patch( + sess, + folder_id, + "study", + accession_id, + "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "SRP000539_put.xml", + ) + await delete_object(sess, "study", accession_id[0]) async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -452,26 +726,31 @@ async def test_crud_drafts_works(sess, schema, orginal_file, update_file, folder :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) + async with sess.get(f"{drafts_url}/{schema}/{draft_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, draft_id, schema, title, orginal_file, draft=True) + accession_id = await put_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", "") + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted from folder {folder_id}") res = await resp.json() expected_true = not any(d["accessionId"] == accession_id for d in res["drafts"]) - assert expected_true, "draft object still exists" + assert expected_true, f"draft object {accession_id} still exists" async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folder_id): @@ -487,23 +766,21 @@ async def test_patch_drafts_works(sess, schema, orginal_file, update_file, folde :param update_file: name of the file used for updating object. :param folder_id: id of the folder used to group submission objects """ - draft_id = await post_draft_json(sess, schema, orginal_file) - patch_draft_data = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": f"draft-{schema}"}} - ] - await patch_folder(sess, folder_id, patch_draft_data) + draft_id = await post_draft_json(sess, schema, folder_id, orginal_file) accession_id = await patch_draft(sess, schema, draft_id, update_file) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that {accession_id} JSON is in {schema}") res = await resp.json() + title = res["descriptor"]["studyTitle"] if schema == "study" else res.get("title", None) assert res["centerName"] == "GEOM", "object centerName content mismatch" assert res["alias"] == "GSE10968", "object alias content mismatch" - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + await check_folders_object_patch(sess, folder_id, schema, accession_id, title, update_file, draft=True) await delete_draft(sess, schema, accession_id) async with sess.get(f"{drafts_url}/{schema}/{accession_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" async def test_querying_works(sess, folder_id): @@ -512,13 +789,9 @@ async def test_querying_works(sess, folder_id): :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects """ - files = await asyncio.gather(*[post_object(sess, schema, filename) for schema, filename in test_xml_files]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather( + *[post_object(sess, schema, folder_id, filename) for schema, filename in test_xml_files] + ) queries = { "study": [ @@ -551,7 +824,7 @@ async def test_querying_works(sess, folder_id): async def do_one_query(schema, key, value, expected_status): async with sess.get(f"{objects_url}/{schema}?{key}={value}") as resp: - assert resp.status == expected_status, "HTTP Status code error" + assert resp.status == expected_status, f"HTTP Status code error, got {resp.status}" for schema, schema_queries in queries.items(): LOG.debug(f"Querying {schema} collection with working params") @@ -570,62 +843,236 @@ async def test_getting_all_objects_from_schema_works(sess, folder_id): :param folder_id: id of the folder used to group submission objects """ # Add objects - files = await asyncio.gather(*[post_object(sess, "study", "SRP000539.xml") for _ in range(13)]) - - for accession_id, schema in files: - patch_folder_obj = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": schema}} - ] - await patch_folder(sess, folder_id, patch_folder_obj) + files = await asyncio.gather(*[post_object(sess, "sample", folder_id, "SRS001433.xml") for _ in range(13)]) # Test default values - async with sess.get(f"{objects_url}/study") as resp: + async with sess.get(f"{objects_url}/sample") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 10 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalObjects"] == 14 + assert ans["page"]["totalObjects"] == 13, ans["page"]["totalObjects"] assert len(ans["objects"]) == 10 # Test with custom pagination values - async with sess.get(f"{objects_url}/study?page=2&per_page=3") as resp: + async with sess.get(f"{objects_url}/sample?page=2&per_page=3") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 5 - assert ans["page"]["totalObjects"] == 14 + assert ans["page"]["totalPages"] == 5, ans["page"]["totalPages"] + assert ans["page"]["totalObjects"] == 13, ans["page"]["totalObjects"] assert len(ans["objects"]) == 3 # Test with wrong pagination values - async with sess.get(f"{objects_url}/study?page=-1") as resp: + async with sess.get(f"{objects_url}/sample?page=-1") as resp: assert resp.status == 400 - async with sess.get(f"{objects_url}/study?per_page=0") as resp: + async with sess.get(f"{objects_url}/sample?per_page=0") as resp: assert resp.status == 400 # Delete objects - await asyncio.gather(*[delete_object(sess, "study", accession_id) for accession_id, _ in files]) + await asyncio.gather(*[delete_object(sess, "sample", accession_id) for accession_id, _ in files]) + + +async def test_metax_crud_with_xml(sess, folder_id): + """Test Metax service with study and dataset xml files POST, PATCH, PUBLISH and DELETE reqs. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + # POST to object endpoint creates draft dataset in Metax for Study and Dataset + ids = [] + xml_files = set() + for schema, filename, update_filename in { + ("study", "SRP000539.xml", "SRP000539_put.xml"), + ("dataset", "dataset.xml", "dataset_put.xml"), + }: + accession_id, _ = await post_object(sess, schema, folder_id, filename) + xml_files.add((schema, accession_id, update_filename)) + ids.append([schema, accession_id]) + + for object in ids: + schema, accession_id = object + async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + try: + metax_id = res["metaxIdentifier"] + except KeyError: + assert False, "Metax ID was not in response data" + object.append(metax_id) + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" + metax_res = await metax_resp.json() + assert ( + res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] + ), "Object's DOI was not in Metax response data preferred_identifier" + + # PUT and PATCH to object endpoint updates draft dataset in Metax for Study and Dataset + for schema, accession_id, filename in xml_files: + await put_object_xml(sess, schema, accession_id, filename) + + for _, _, metax_id in ids: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" + metax_res = await metax_resp.json() + assert ( + metax_res.get("date_modified", None) is not None + ), f"Object with metax id {metax_res['identifier']} was not updated in Metax" + + # DELETE object from Metax + for schema, accession_id, _ in xml_files: + await delete_object(sess, schema, accession_id) + + for _, _, metax_id in ids: + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: + assert metax_resp.status == 404, f"HTTP Status code error - expected 404 Not Found, got {metax_resp.status}" + + +async def test_metax_crud_with_json(sess, folder_id): + """Test Metax service with study and dataset json data POST, PATCH, PUBLISH and DELETE reqs. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + ids = [] + json_files = set() + for schema, filename, update_filename in { + ("study", "SRP000539.json", "patch.json"), + ("dataset", "dataset.json", "dataset_patch.json"), + }: + accession_id = await post_object_json(sess, schema, folder_id, filename) + json_files.add((schema, accession_id, filename, update_filename)) + ids.append([schema, accession_id]) + + for object in ids: + schema, accession_id = object + async with sess.get(f"{objects_url}/{schema}/{accession_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + try: + metax_id = res["metaxIdentifier"] + except KeyError: + assert False, "Metax ID was not in response data" + object.append(metax_id) + async with sess.get(f"{metax_url}/{metax_id}", auth=auth) as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" + metax_res = await metax_resp.json() + assert ( + res.get("doi", None) == metax_res["research_dataset"]["preferred_identifier"] + ), "Object's DOI was not in Metax response data preferred_identifier" + + for schema, accession_id, filename, _ in json_files: + await put_object_json(sess, schema, accession_id, filename) + for schema, accession_id, _, filename in json_files: + await patch_object_json(sess, schema, accession_id, filename) + + for schema, accession_id, _, _ in json_files: + await delete_object(sess, schema, accession_id) + + +async def test_metax_id_not_updated_on_patch(sess, folder_id): + """Test that Metax id cannot be sent in patch. + + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + for schema, filename in { + ("study", "SRP000539.json"), + ("dataset", "dataset.json"), + }: + accession_id = await post_object_json(sess, schema, folder_id, filename) + async with sess.patch(f"{objects_url}/{schema}/{accession_id}", data={"metaxIdentifier": "12345"}) as resp: + LOG.debug(f"Trying to patch object in {schema}") + assert resp.status == 400 + + await delete_object(sess, schema, accession_id) + +async def test_metax_publish_dataset(sess, folder_id): + """Test publishing dataset to Metax service after folder(submission) is published. -async def test_crud_folders_works(sess): + :param sess: HTTP session in which request call is made + :param folder_id: id of the folder where objects reside + """ + # POST to object endpoint creates draft dataset in Metax for Study and Dataset + objects = [] + for schema, filename in { + ("study", "SRP000539.xml"), + ("dataset", "dataset.xml"), + }: + accession_id, _ = await post_object(sess, schema, folder_id, filename) + objects.append([schema, accession_id]) + + for object in objects: + schema, object_id = object + async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + object.append(res["metaxIdentifier"]) + + # Publish the folder + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + folder_id = await patch_folder(sess, folder_id, patch_add_doi) + + await publish_folder(sess, folder_id) + + for schema, object_id, metax_id in objects: + async with sess.get(f"{objects_url}/{schema}/{object_id}") as resp: + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + assert res["metaxIdentifier"] == metax_id + + async with sess.get(f"{metax_url}/{metax_id}") as metax_resp: + assert metax_resp.status == 200, f"HTTP Status code error, got {metax_resp.status}" + metax_res = await metax_resp.json() + assert metax_res["state"] == "published" + + # this data is synced with /test_files/doi/test_doi.json + # if data changes inside the file it must data must be reflected here + expected_rd = json.loads(await create_request_json_data("metax", "research_dataset.json")) + actual_rd = metax_res["research_dataset"] + + title = res["title"] if schema == "dataset" else res["descriptor"]["studyTitle"] + description = res["description"] if schema == "dataset" else res["descriptor"]["studyAbstract"] + + assert actual_rd["title"]["en"] == title + assert actual_rd["description"]["en"] == description + assert actual_rd["creator"] == expected_rd["creator"] + assert ( + actual_rd["access_rights"]["access_type"]["identifier"] + == expected_rd["access_rights"]["access_type"]["identifier"] + ) + assert actual_rd["contributor"] == expected_rd["contributor"] + assert actual_rd["curator"] == expected_rd["curator"] + assert actual_rd["issued"] == expected_rd["issued"] + assert actual_rd["modified"] == expected_rd["modified"] + assert actual_rd["other_identifier"][0]["notation"] == expected_rd["other_identifier"][0]["notation"] + assert actual_rd["publisher"] == expected_rd["publisher"] + assert actual_rd["rights_holder"] == expected_rd["rights_holder"] + assert actual_rd["spatial"] == expected_rd["spatial"] + assert actual_rd["temporal"] == expected_rd["temporal"] + + +async def test_crud_folders_works(sess, project_id): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "Mock Folder", "description": "Mock Base folder to folder ops"} + folder_data = {"name": "Mock Folder", "description": "Mock Base folder to folder ops", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -633,71 +1080,100 @@ async def test_crud_folders_works(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft_data = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft_data) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft_data) as resp: LOG.debug("Adding draft to actual objects") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert "datePublished" not in res.keys() + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Publish the folder + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + folder_id = await patch_folder(sess, folder_id, patch_add_doi) + + await post_object_json(sess, "study", folder_id, "SRP000539.json") + await post_object(sess, "dataset", folder_id, "dataset.xml") + folder_id = await publish_folder(sess, folder_id) + + await get_draft(sess, "sample", draft_id, 404) # checking the draft was deleted after publication + async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is True, "folder is not published, expected True" + assert "datePublished" in res.keys() + assert "extraInfo" in res.keys() assert res["drafts"] == [], "there are drafts in folder, expected empty" - assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} - ], "folder metadataObjects content mismatch" + assert len(res["metadataObjects"]) == 3, "folder metadataObjects content mismatch" # Delete folder await delete_folder_publish(sess, folder_id) async with sess.get(f"{drafts_url}/sample/{draft_id}") as resp: LOG.debug(f"Checking that JSON object {accession_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" -async def test_crud_folders_works_no_publish(sess): +async def test_crud_folders_works_no_publish(sess, project_id): """Test folders REST api POST, GET, PATCH, PUBLISH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "Mock Unpublished folder", "description": "test umpublished folder"} + folder_data = {"name": "Mock Unpublished folder", "description": "test umpublished folder", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Create draft from test XML file and patch the draft into the newly created folder - draft_id = await post_draft(sess, "sample", "SRS001433.xml") - patch_add_draft = [ - {"op": "add", "path": "/drafts/-", "value": [{"accessionId": draft_id, "schema": "draft-sample"}]} - ] - folder_id = await patch_folder(sess, folder_id, patch_add_draft) + draft_id = await post_draft(sess, "sample", folder_id, "SRS001433.xml") async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() @@ -705,57 +1181,71 @@ async def test_crud_folders_works_no_publish(sess): assert res["name"] == folder_data["name"], "expected folder name does not match" assert res["description"] == folder_data["description"], "folder description content mismatch" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [], "there are objects in folder, expected empty" # Get the draft from the collection within this session and post it to objects collection draft = await get_draft(sess, "sample", draft_id) - async with sess.post(f"{objects_url}/sample", data=draft) as resp: + async with sess.post(f"{objects_url}/sample", params={"folder": folder_id}, data=draft) as resp: LOG.debug("Adding draft to actual objects") - assert resp.status == 201, "HTTP Status code error" + assert resp.status == 201, f"HTTP Status code error, got {resp.status}" ans = await resp.json() assert ans["accessionId"] != draft_id, "draft id does not match expected" accession_id = ans["accessionId"] - # Patch folder so that original draft becomes an object in the folder - patch_folder_move_draft = [ - {"op": "add", "path": "/metadataObjects/-", "value": [{"accessionId": accession_id, "schema": "sample"}]}, - ] - folder_id = await patch_folder(sess, folder_id, patch_folder_move_draft) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was patched") res = await resp.json() assert res["folderId"] == folder_id, "expected folder id does not match" assert res["published"] is False, "folder is published, expected False" - assert res["drafts"] == [{"accessionId": draft_id, "schema": "draft-sample"}], "folder drafts content mismatch" + assert res["drafts"] == [ + { + "accessionId": draft_id, + "schema": "draft-sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + ], "folder drafts content mismatch" assert res["metadataObjects"] == [ - {"accessionId": accession_id, "schema": "sample"} + { + "accessionId": accession_id, + "schema": "sample", + "tags": {"submissionType": "Form", "displayTitle": "HapMap sample from Homo sapiens"}, + } ], "folder metadataObjects content mismatch" # Delete folder await delete_folder(sess, folder_id) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was deleted") - assert resp.status == 404, "HTTP Status code error" - - async with sess.get(f"{users_url}/current") as resp: - LOG.debug(f"Checking that folder {folder_id} was deleted from current user") - res = await resp.json() - expected_true = not any(d == accession_id for d in res["folders"]) - assert expected_true, "folder still exists at user" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" -async def test_adding_doi_info_to_folder_works(sess): +async def test_adding_doi_info_to_folder_works(sess, project_id): """Test that proper DOI info can be added to folder and bad DOI info cannot be. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Create new folder and check its creation succeeded - folder_data = {"name": "DOI Folder", "description": "Mock Base folder for adding DOI info"} + folder_data = {"name": "DOI Folder", "description": "Mock Base folder for adding DOI info", "projectId": project_id} folder_id = await post_folder(sess, folder_data) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was created") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Get correctly formatted DOI info and patch it into the new folder successfully doi_data_raw = await create_request_json_data("doi", "test_doi.json") @@ -776,7 +1266,7 @@ async def test_adding_doi_info_to_folder_works(sess): patch_add_bad_doi = [{"op": "add", "path": "/doiInfo", "value": {"identifier": {}}}] async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(patch_add_bad_doi)) as resp: LOG.debug(f"Tried updating folder {folder_id}") - assert resp.status == 400, "HTTP Status code error" + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["detail"] == "Provided input does not seem correct for field: 'doiInfo'", "expected error mismatch" @@ -790,7 +1280,7 @@ async def test_adding_doi_info_to_folder_works(sess): patch_add_bad_doi = [{"op": "add", "path": "/extraInfo", "value": {"publisher": "something"}}] async with sess.patch(f"{folders_url}/{folder_id}", data=json.dumps(patch_add_bad_doi)) as resp: LOG.debug(f"Tried updating folder {folder_id}") - assert resp.status == 400, "HTTP Status code error" + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["detail"] == "Request contains '/extraInfo' key that cannot be updated to folders.", "error mismatch" @@ -798,16 +1288,17 @@ async def test_adding_doi_info_to_folder_works(sess): await delete_folder(sess, folder_id) async with sess.get(f"{folders_url}/{folder_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was deleted") - assert resp.status == 404, "HTTP Status code error" + assert resp.status == 404, f"HTTP Status code error, got {resp.status}" -async def test_getting_paginated_folders(sess): +async def test_getting_paginated_folders(sess, project_id): """Check that /folders returns folders with correct paginations. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Test default values - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: # The folders received here are from previous # tests where the folders were not deleted assert resp.status == 200 @@ -815,21 +1306,21 @@ async def test_getting_paginated_folders(sess): assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 + assert ans["page"]["totalFolders"] == 7 assert len(ans["folders"]) == 5 # Test with custom pagination values - async with sess.get(f"{folders_url}?page=2&per_page=3") as resp: + async with sess.get(f"{folders_url}?page=2&per_page=3&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 2 assert ans["page"]["size"] == 3 - assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 + assert ans["page"]["totalPages"] == 3 + assert ans["page"]["totalFolders"] == 7 assert len(ans["folders"]) == 3 # Test querying only published folders - async with sess.get(f"{folders_url}?published=true") as resp: + async with sess.get(f"{folders_url}?published=true&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 @@ -839,144 +1330,235 @@ async def test_getting_paginated_folders(sess): assert len(ans["folders"]) == 1 # Test querying only draft folders - async with sess.get(f"{folders_url}?published=false") as resp: + async with sess.get(f"{folders_url}?published=false&projectId={project_id}") as resp: assert resp.status == 200 ans = await resp.json() assert ans["page"]["page"] == 1 assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 1 - assert ans["page"]["totalFolders"] == 5 + assert ans["page"]["totalPages"] == 2 + assert ans["page"]["totalFolders"] == 6 assert len(ans["folders"]) == 5 # Test with wrong pagination values - async with sess.get(f"{folders_url}?page=-1") as resp: + async with sess.get(f"{folders_url}?page=-1&projectId={project_id}") as resp: assert resp.status == 400 - async with sess.get(f"{folders_url}?per_page=0") as resp: + async with sess.get(f"{folders_url}?per_page=0&projectId={project_id}") as resp: assert resp.status == 400 - async with sess.get(f"{folders_url}?published=asdf") as resp: + async with sess.get(f"{folders_url}?published=asdf&projectId={project_id}") as resp: assert resp.status == 400 -async def test_getting_user_items(sess): - """Test querying user's drafts or folders in the user object with GET user request. +async def test_getting_folders_filtered_by_name(sess, project_id): + """Check that /folders returns folders filtered by name. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ - # Get real user ID - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" - response = await resp.json() - real_user_id = response["userId"] + names = [" filter new ", "_filter_", "-filter-", "_extra-", "_2021special_"] + folders = [] + for name in names: + folder_data = {"name": f"Test{name}name", "description": "Test filtering name", "projectId": project_id} + folders.append(await post_folder(sess, folder_data)) - # Patch user to have a draft - draft_id = await post_draft_json(sess, "study", "SRP000539.json") - patch_drafts_user = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": "draft-study"}} - ] - await patch_user(sess, user_id, real_user_id, patch_drafts_user) + async with sess.get(f"{folders_url}?name=filter&projectId={project_id}") as resp: + ans = await resp.json() + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" + assert ans["page"]["totalFolders"] == 3, f'Shold be 3 returned {ans["page"]["totalFolders"]}' - # Test querying for list of user draft templates - async with sess.get(f"{users_url}/{user_id}?items=drafts") as resp: - LOG.debug(f"Reading user {user_id} drafts") - assert resp.status == 200, "HTTP Status code error" + async with sess.get(f"{folders_url}?name=extra&projectId={project_id}") as resp: ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 1 - assert ans["page"]["totalDrafts"] == 1 - assert len(ans["drafts"]) == 1 + assert resp.status == 200, f"HTTP Status code error {resp.status} {ans}" + assert ans["page"]["totalFolders"] == 1 - async with sess.get(f"{users_url}/{user_id}?items=drafts&per_page=3") as resp: - LOG.debug(f"Reading user {user_id} drafts") - assert resp.status == 200, "HTTP Status code error" + async with sess.get(f"{folders_url}?name=2021 special&projectId={project_id}") as resp: + assert resp.status == 200 ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 3 - assert len(ans["drafts"]) == 1 + assert ans["page"]["totalFolders"] == 0 + + async with sess.get(f"{folders_url}?name=new extra&projectId={project_id}") as resp: + assert resp.status == 200 + ans = await resp.json() + assert ans["page"]["totalFolders"] == 2 + + for folder in folders: + await delete_folder(sess, folder) + - await delete_draft(sess, "study", draft_id) # Future tests will assume the drafts key is empty +async def test_getting_folders_filtered_by_date_created(sess, project_id): + """Check that /folders returns folders filtered by date created. - # Test querying for the list of folder IDs - async with sess.get(f"{users_url}/{user_id}?items=folders") as resp: - LOG.debug(f"Reading user {user_id} folder list") - assert resp.status == 200, "HTTP Status code error" + :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to + """ + async with sess.get(f"{users_url}/current") as resp: ans = await resp.json() - assert ans["page"]["page"] == 1 - assert ans["page"]["size"] == 5 - assert ans["page"]["totalPages"] == 2 - assert ans["page"]["totalFolders"] == 6 - assert len(ans["folders"]) == 5 + user = ans["userId"] + + folders = [] + format = "%Y-%m-%d %H:%M:%S" + + # Test dateCreated within a year + # Create folders with different dateCreated + timestamps = ["2014-12-31 00:00:00", "2015-01-01 00:00:00", "2015-07-15 00:00:00", "2016-01-01 00:00:00"] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, + } + folders.append(await create_folder(folder_data, user)) - # Test the same with a bad query param - async with sess.get(f"{users_url}/{user_id}?items=bad") as resp: - LOG.debug(f"Reading user {user_id} but with faulty item descriptor") - assert resp.status == 400, "HTTP Status code error" + async with sess.get( + f"{folders_url}?date_created_start=2015-01-01&date_created_end=2015-12-31&projectId={project_id}" + ) as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' + + # Test dateCreated within a month + # Create folders with different dateCreated + timestamps = ["2013-01-31 00:00:00", "2013-02-02 00:00:00", "2013-03-29 00:00:00", "2013-04-01 00:00:00"] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, + } + folders.append(await create_folder(folder_data, user)) + async with sess.get( + f"{folders_url}?date_created_start=2013-02-01&date_created_end=2013-03-30&projectId={project_id}" + ) as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' + + # Test dateCreated within a day + # Create folders with different dateCreated + timestamps = [ + "2012-01-14 23:59:59", + "2012-01-15 00:00:01", + "2012-01-15 23:59:59", + "2012-01-16 00:00:01", + ] + for stamp in timestamps: + folder_data = { + "name": f"Test date {stamp}", + "description": "Test filtering date", + "dateCreated": datetime.strptime(stamp, format).timestamp(), + "projectId": project_id, + } + folders.append(await create_folder(folder_data, user)) + + async with sess.get( + f"{folders_url}?date_created_start=2012-01-15&date_created_end=2012-01-15&projectId={project_id}" + ) as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 2, f'Shold be 2 returned {ans["page"]["totalFolders"]}' -async def test_crud_users_works(sess): + # Test parameters date_created_... and name together + async with sess.get( + f"{folders_url}?name=2013&date_created_start=2012-01-01&date_created_end=2016-12-31&projectId={project_id}" + ) as resp: + ans = await resp.json() + assert resp.status == 200, f"returned status {resp.status}, error {ans}" + assert ans["page"]["totalFolders"] == 4, f'Shold be 4 returned {ans["page"]["totalFolders"]}' + + for folder in folders: + await delete_folder(sess, folder) + + +async def test_crud_users_works(sess, project_id): """Test users REST api GET, PATCH and DELETE reqs. :param sess: HTTP session in which request call is made + :param project_id: id of the project the folder belongs to """ # Check user exists in database (requires an user object to be mocked) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" - response = await resp.json() - real_user_id = response["userId"] + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" # Add user to session and create a patch to add folder to user - folder_not_published = {"name": "Mock User Folder", "description": "Mock folder for testing users"} + folder_not_published = { + "name": "Mock User Folder", + "description": "Mock folder for testing users", + "projectId": project_id, + } folder_id = await post_folder(sess, folder_not_published) - patch_add_folder = [{"op": "add", "path": "/folders/-", "value": [folder_id]}] - await patch_user(sess, user_id, real_user_id, patch_add_folder) - async with sess.get(f"{users_url}/{user_id}") as resp: + + async with sess.get(f"{folders_url}/{folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {folder_id} was added") res = await resp.json() - assert res["userId"] == real_user_id, "user id does not match" - assert res["name"] == f"{test_user_given} {test_user_family}", "user name mismatch" - assert res["drafts"] == [], "user drafts content mismatch" - assert folder_id in res["folders"], "folder added missing mismatch" + assert res["name"] == folder_not_published["name"] + assert res["projectId"] == folder_not_published["projectId"] - folder_published = {"name": "Another test Folder", "description": "Test published folder does not get deleted"} + folder_published = { + "name": "Another test Folder", + "description": "Test published folder does not get deleted", + "projectId": project_id, + } publish_folder_id = await post_folder(sess, folder_published) + + # add a study and dataset for publishing a folder + doi_data_raw = await create_request_json_data("doi", "test_doi.json") + doi_data = json.loads(doi_data_raw) + patch_add_doi = [{"op": "add", "path": "/doiInfo", "value": doi_data}] + await patch_folder(sess, publish_folder_id, patch_add_doi) + + await post_object_json(sess, "study", publish_folder_id, "SRP000539.json") + await post_object(sess, "dataset", publish_folder_id, "dataset.xml") + await publish_folder(sess, publish_folder_id) - async with sess.get(f"{folders_url}/{publish_folder_id}") as resp: + async with sess.get(f"{folders_url}/{publish_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {publish_folder_id} was published") res = await resp.json() assert res["published"] is True, "folder is not published, expected True" - folder_not_published = {"name": "Delete Folder", "description": "Mock folder to delete while testing users"} + folder_not_published = { + "name": "Delete Folder", + "description": "Mock folder to delete while testing users", + "projectId": project_id, + } delete_folder_id = await post_folder(sess, folder_not_published) - patch_delete_folder = [{"op": "add", "path": "/folders/-", "value": [delete_folder_id]}] - await patch_user(sess, user_id, real_user_id, patch_delete_folder) - async with sess.get(f"{users_url}/{user_id}") as resp: + async with sess.get(f"{folders_url}/{delete_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {delete_folder_id} was added") res = await resp.json() - assert delete_folder_id in res["folders"], "deleted folder added does not exists" + assert res["name"] == folder_not_published["name"] + assert res["projectId"] == folder_not_published["projectId"] await delete_folder(sess, delete_folder_id) - async with sess.get(f"{users_url}/{user_id}") as resp: + async with sess.get(f"{folders_url}/{delete_folder_id}?projectId={project_id}") as resp: LOG.debug(f"Checking that folder {delete_folder_id} was deleted") + assert resp.status == 404 + + template_id = await post_template_json(sess, "study", "SRP000539_template.json", project_id) + await patch_template(sess, "study", template_id, "patch.json") + async with sess.get(f"{templates_url}/study/{template_id}") as resp: + LOG.debug(f"Checking that template: {template_id} was added") res = await resp.json() - assert delete_folder_id not in res["folders"], "delete folder still exists at user" + assert res["accessionId"] == template_id + assert res["projectId"] == project_id + assert res["identifiers"]["primaryId"] == "SRP000539" - draft_id = await post_draft_json(sess, "study", "SRP000539.json") - patch_drafts_user = [ - {"op": "add", "path": "/drafts/-", "value": {"accessionId": draft_id, "schema": "draft-study"}} - ] - await patch_user(sess, user_id, real_user_id, patch_drafts_user) - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that draft {draft_id} was added") + async with sess.get(f"{templates_url}?projectId={project_id}") as resp: + LOG.debug("Checking that template display title was updated in separate templates list") res = await resp.json() - assert res["drafts"][0]["accessionId"] == draft_id, "draft added does not exists" + assert res[0]["tags"]["displayTitle"] == "new name" - await delete_draft(sess, "study", draft_id) + await delete_template(sess, "study", template_id) + async with sess.get(f"{templates_url}/study/{template_id}") as resp: + LOG.debug(f"Checking that template {template_id} was deleted") + assert resp.status == 404 - async with sess.get(f"{users_url}/{user_id}") as resp: - LOG.debug(f"Checking that draft {draft_id} was added") - res = await resp.json() - assert len(res["drafts"]) == 0, "draft was not deleted from users" + template_ids = await post_template_json(sess, "study", "SRP000539_list.json", project_id) + assert len(template_ids) == 2, "templates could not be added as batch" + templates = await get_templates(sess, project_id) + + assert len(templates) == 2, f"should be 2 templates, got {len(templates)}" + assert templates[0]["schema"] == "template-study", "wrong template schema" # Delete user await delete_user(sess, user_id) @@ -984,56 +1566,41 @@ async def test_crud_users_works(sess): # this check is not needed but good to do async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Checking that user {user_id} was deleted") - assert resp.status == 401, "HTTP Status code error" + assert resp.status == 401, f"HTTP Status code error, got {resp.status}" -async def test_get_folders(sess, folder_id: str): +async def test_get_folders(sess, folder_id: str, project_id: str): """Test folders REST api GET . :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects + :param project_id: id of the project the folder belongs to """ - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() + LOG.error(response) assert len(response["folders"]) == 1 assert response["page"] == {"page": 1, "size": 5, "totalPages": 1, "totalFolders": 1} assert response["folders"][0]["folderId"] == folder_id -async def test_get_folders_objects(sess, folder_id: str): +async def test_get_folders_objects(sess, folder_id: str, project_id: str): """Test folders REST api GET with objects. :param sess: HTTP session in which request call is made :param folder_id: id of the folder used to group submission objects + :param project_id: id of the project the folder belongs to """ - accession_id = await post_object_json(sess, "study", "SRP000539.json") - patch_add_object = [ - {"op": "add", "path": "/metadataObjects/-", "value": {"accessionId": accession_id, "schema": "study"}} - ] - await patch_folder(sess, folder_id, patch_add_object) - async with sess.get(f"{folders_url}") as resp: + accession_id = await post_object_json(sess, "study", folder_id, "SRP000539.json") + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" - response = await resp.json() - assert len(response["folders"]) == 1 - assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id - assert "tags" not in response["folders"][0]["metadataObjects"][0] - patch_add_more_object = [ - { - "op": "add", - "path": "/metadataObjects/0/tags", - "value": {"submissionType": "Form"}, - } - ] - await patch_folder(sess, folder_id, patch_add_more_object) - async with sess.get(f"{folders_url}") as resp: - LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id + assert "tags" in response["folders"][0]["metadataObjects"][0] assert response["folders"][0]["metadataObjects"][0]["tags"]["submissionType"] == "Form" patch_change_tags_object = [ @@ -1044,14 +1611,16 @@ async def test_get_folders_objects(sess, folder_id: str): } ] await patch_folder(sess, folder_id, patch_change_tags_object) - async with sess.get(f"{folders_url}") as resp: + async with sess.get(f"{folders_url}?projectId={project_id}") as resp: LOG.debug(f"Reading folder {folder_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() assert len(response["folders"]) == 1 assert response["folders"][0]["metadataObjects"][0]["accessionId"] == accession_id assert response["folders"][0]["metadataObjects"][0]["tags"]["submissionType"] == "XML" + await delete_object(sess, "study", accession_id) + async def test_submissions_work(sess, folder_id): """Test actions in submission XML files. @@ -1062,38 +1631,58 @@ async def test_submissions_work(sess, folder_id): # Post original submission with two 'add' actions sub_files = [("submission", "ERA521986_valid.xml"), ("study", "SRP000539.xml"), ("sample", "SRS001433.xml")] submission_data = await create_multi_file_request_data(sub_files) - async with sess.post(f"{submit_url}", data=submission_data) as resp: + + async with sess.post(f"{submit_url}", params={"folder": folder_id}, data=submission_data) as resp: LOG.debug("Checking initial submission worked") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert len(res) == 2, "expected 2 objects" assert res[0]["schema"] == "study", "expected first element to be study" assert res[1]["schema"] == "sample", "expected second element to be sample" study_access_id = res[0]["accessionId"] - patch = [ - { - "op": "add", - "path": "/metadataObjects/-", - "value": {"accessionId": res[0]["accessionId"], "schema": res[0]["schema"]}, - }, - { - "op": "add", - "path": "/metadataObjects/-", - "value": {"accessionId": res[1]["accessionId"], "schema": res[1]["schema"]}, - }, - ] - await patch_folder(sess, folder_id, patch) + sample_access_id = res[1]["accessionId"] # Sanity check that the study object was inserted correctly before modifying it async with sess.get(f"{objects_url}/study/{study_access_id}") as resp: LOG.debug("Sanity checking that previous object was added correctly") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["accessionId"] == study_access_id, "study accession id does not match" assert res["alias"] == "GSE10966", "study alias does not match" assert res["descriptor"]["studyTitle"] == ( "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" ), "study title does not match" + metax_id = res.get("metaxIdentifier", None) + doi = res.get("doi", None) + assert metax_id is not None + assert doi is not None + + # check that objects are added to folder + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that folder {folder_id} was patched") + res = await resp.json() + expected_study = { + "accessionId": study_access_id, + "schema": "study", + "tags": { + "submissionType": "XML", + "displayTitle": ( + "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" + ), + "fileName": "SRP000539.xml", + }, + } + assert expected_study in res["metadataObjects"], "folder metadataObjects content mismatch" + expected_sample = { + "accessionId": sample_access_id, + "schema": "sample", + "tags": { + "submissionType": "XML", + "displayTitle": "HapMap sample from Homo sapiens", + "fileName": "SRS001433.xml", + }, + } + assert expected_sample in res["metadataObjects"], "folder metadataObjects content mismatch" # Give test file the correct accession id LOG.debug("Sharing the correct accession ID created in this test instance") @@ -1109,7 +1698,7 @@ async def test_submissions_work(sess, folder_id): more_submission_data = await create_multi_file_request_data(sub_files) async with sess.post(f"{submit_url}", data=more_submission_data) as resp: LOG.debug("Checking object in initial submission was modified") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert len(res) == 2, "expected 2 objects" new_study_access_id = res[0]["accessionId"] @@ -1118,13 +1707,33 @@ async def test_submissions_work(sess, folder_id): # Check the modified object was inserted correctly async with sess.get(f"{objects_url}/study/{new_study_access_id}") as resp: LOG.debug("Checking that previous object was modified correctly") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["accessionId"] == new_study_access_id, "study accession id does not match" assert res["alias"] == "GSE10966", "study alias does not match" assert res["descriptor"]["studyTitle"] == ( "Different title for testing purposes" ), "updated study title does not match" + assert res["metaxIdentifier"] == metax_id + assert res["doi"] == doi + + # check that study is updated to folder + async with sess.get(f"{folders_url}/{folder_id}") as resp: + LOG.debug(f"Checking that folder {folder_id} was patched") + res = await resp.json() + expected_study = { + "accessionId": study_access_id, + "schema": "study", + "tags": { + "submissionType": "XML", + "displayTitle": "Different title for testing purposes", + "fileName": "SRP000539_modified.xml", + }, + } + assert expected_study in res["metadataObjects"], "folder metadataObjects content mismatch" + + await delete_object(sess, "sample", sample_access_id) + await delete_object(sess, "study", study_access_id) # Remove the accession id that was used for testing from test file LOG.debug("Sharing the correct accession ID created in this test instance") @@ -1143,7 +1752,7 @@ async def test_health_check(sess): """ async with sess.get(f"{base_url}/health") as resp: LOG.debug("Checking that health status is ok") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" res = await resp.json() assert res["status"] == "Ok" assert res["services"]["database"]["status"] == "Ok" @@ -1156,6 +1765,8 @@ async def main(): LOG.debug("=== Login other mock user ===") await login(sess, other_test_user, other_test_user_given, other_test_user_family) + user_data = await get_user_data(sess) + project_id = user_data["projects"][0]["projectId"] # Test add, modify, validate and release action with submissions # added to validate that objects belong to a specific user @@ -1163,29 +1774,38 @@ async def main(): submission_folder = { "name": "submission test 1", "description": "submission test folder 1", + "projectId": project_id, } submission_folder_id = await post_folder(sess, submission_folder) - await test_get_folders(sess, submission_folder_id) - await test_get_folders_objects(sess, submission_folder_id) + await test_get_folders(sess, submission_folder_id, project_id) + await test_get_folders_objects(sess, submission_folder_id, project_id) await test_submissions_work(sess, submission_folder_id) async with aiohttp.ClientSession() as sess: LOG.debug("=== Login mock user ===") await login(sess, test_user, test_user_given, test_user_family) + user_data = await get_user_data(sess) + project_id = user_data["projects"][0]["projectId"] # Test adding and getting objects LOG.debug("=== Testing basic CRUD operations ===") basic_folder = { "name": "basic test", "description": "basic test folder", + "projectId": project_id, } basic_folder_id = await post_folder(sess, basic_folder) + # test XML files await asyncio.gather(*[test_crud_works(sess, schema, file, basic_folder_id) for schema, file in test_xml_files]) + # test CSV files + await test_csv(sess, basic_folder_id) + put_object_folder = { "name": "test put object", "description": "put object test folder", + "projectId": project_id, } put_object_folder = await post_folder(sess, put_object_folder) @@ -1196,6 +1816,7 @@ async def main(): draft_folder = { "name": "basic test draft", "description": "basic test draft folder", + "projectId": project_id, } draft_folder_id = await post_folder(sess, draft_folder) await asyncio.gather( @@ -1215,6 +1836,7 @@ async def main(): query_folder = { "name": "basic test query", "description": "basic test query folder", + "projectId": project_id, } query_folder_id = await post_folder(sess, query_folder) await test_querying_works(sess, query_folder_id) @@ -1224,26 +1846,46 @@ async def main(): pagination_folder = { "name": "basic test pagination", "description": "basic test pagination folder", + "projectId": project_id, } pagination_folder_id = await post_folder(sess, pagination_folder) await test_getting_all_objects_from_schema_works(sess, pagination_folder_id) # Test creating, reading, updating and deleting folders LOG.debug("=== Testing basic CRUD folder operations ===") - await test_crud_folders_works(sess) - await test_crud_folders_works_no_publish(sess) - await test_adding_doi_info_to_folder_works(sess) + await test_crud_folders_works(sess, project_id) + await test_crud_folders_works_no_publish(sess, project_id) + await test_adding_doi_info_to_folder_works(sess, project_id) # Test getting a list of folders and draft templates owned by the user LOG.debug("=== Testing getting folders, draft folders and draft templates with pagination ===") - await test_getting_paginated_folders(sess) - await test_getting_user_items(sess) + await test_getting_paginated_folders(sess, project_id) + LOG.debug("=== Testing getting folders filtered with name and date created ===") + await test_getting_folders_filtered_by_name(sess, project_id) + # too much of a hassle to make test work with tls db connection in github + # must be improven in next integration test iteration + if not TLS: + await test_getting_folders_filtered_by_date_created(sess, project_id) + + # Test objects study and dataset are connecting to metax and saving metax id to db + LOG.debug("=== Testing Metax integration related basic CRUD operations for study and dataset ===") + metax_folder = { + "name": "basic test pagination", + "description": "basic test pagination folder", + "projectId": project_id, + } + metax_folder_id = await post_folder(sess, metax_folder) + await test_metax_crud_with_xml(sess, metax_folder_id) + await test_metax_crud_with_json(sess, metax_folder_id) + await test_metax_id_not_updated_on_patch(sess, metax_folder_id) + await test_metax_publish_dataset(sess, metax_folder_id) # Test add, modify, validate and release action with submissions LOG.debug("=== Testing actions within submissions ===") submission_folder = { "name": "submission test", "description": "submission test folder", + "projectId": project_id, } submission_folder_id = await post_folder(sess, submission_folder) await test_submissions_work(sess, submission_folder_id) @@ -1255,14 +1897,14 @@ async def main(): # Test reading, updating and deleting users # this needs to be done last as it deletes users LOG.debug("=== Testing basic CRUD user operations ===") - await test_crud_users_works(sess) + await test_crud_users_works(sess, project_id) # Remove the remaining user in the test database async with aiohttp.ClientSession() as sess: await login(sess, other_test_user, other_test_user_given, other_test_user_family) async with sess.get(f"{users_url}/{user_id}") as resp: LOG.debug(f"Reading user {user_id}") - assert resp.status == 200, "HTTP Status code error" + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" response = await resp.json() real_user_id = response["userId"] await delete_user(sess, real_user_id) diff --git a/tests/mockups.py b/tests/mockups.py index f3c77d6b3..baad36809 100644 --- a/tests/mockups.py +++ b/tests/mockups.py @@ -3,39 +3,9 @@ import hashlib from os import urandom import yarl -import json +import ujson import cryptography.fernet -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.primitives.asymmetric import rsa -from cryptography.hazmat.backends import default_backend -from authlib.jose import jwt, jwk -from typing import Tuple - - -class MockResponse: - """Mock-up class for HTTP response.""" - - def __init__(self, text, status): - """Initialize Mock Response.""" - self._text = text - self.status = status - - async def text(self): - """Get Mock Response body.""" - return self._text - - async def json(self): - """Get Mock Response body.""" - return self._text - - async def __aexit__(self, exc_type, exc, tb): - """Return async exit.""" - pass - - async def __aenter__(self): - """Return async enter.""" - return self class Mock_Request: @@ -112,107 +82,5 @@ def add_csrf_to_cookie(cookie, req, bad_sign=False): def encrypt_cookie(cookie, req): """Add encrypted cookie to request.""" - cookie_crypted = req.app["Crypt"].encrypt(json.dumps(cookie).encode("utf-8")).decode("utf-8") + cookie_crypted = req.app["Crypt"].encrypt(ujson.dumps(cookie).encode("utf-8")).decode("utf-8") req.cookies["MTD_SESSION"] = cookie_crypted - - -def generate_token() -> Tuple: - """Generate RSA Key pair to be used to sign token and the JWT Token itself.""" - private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048, backend=default_backend()) - public_key = private_key.public_key().public_bytes( - encoding=serialization.Encoding.PEM, format=serialization.PublicFormat.SubjectPublicKeyInfo - ) - pem = private_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.TraditionalOpenSSL, - encryption_algorithm=serialization.NoEncryption(), - ) - # we set no `exp` and other claims as they are optional in a real scenario these should bde set - # See available claims here: https://www.iana.org/assignments/jwt/jwt.xhtml - # the important claim is the "authorities" - public_jwk = jwk.dumps(public_key, kty="RSA") - private_jwk = jwk.dumps(pem, kty="RSA") - - return (public_jwk, private_jwk) - - -jwk_pair = generate_token() - -keys = [jwk_pair[0]] -keys[0]["kid"] = "rsa1" -jwk_data = {"keys": keys} -header = {"jku": "http://mockauth:8000/jwk", "kid": "rsa1", "alg": "RS256", "typ": "JWT"} -id_token = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "sub": "smth", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "nonce", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -id_token_no_sub = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "nonce", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -id_token_bad_nonce = { - "at_hash": "fSi3VUa5i2o2SgY5gPJZgg", - "eduPersonAffiliation": "member;staff", - "eppn": "eppn@test.fi", - "sub": "smth", - "displayName": "test user", - "iss": "http://iss.domain.com:5430", - "schacHomeOrganizationType": "urn:schac:homeOrganizationType:test:other", - "given_name": "user", - "nonce": "", - "aud": "aud2", - "acr": "http://iss.domain.com:5430/LoginHaka", - "nsAccountLock": "false", - "eduPersonScopedAffiliation": "staff@test.what;member@test.what", - "auth_time": 1606579533, - "name": "test user", - "schacHomeOrganization": "test.what", - "exp": 9999999999, - "iat": 1561621913, - "family_name": "test", - "email": "eppn@test.fi", -} -jwt_data = {"access_token": "test", "id_token": jwt.encode(header, id_token, jwk_pair[1]).decode("utf-8")} -jwt_data_claim_miss = { - "access_token": "test", - "id_token": jwt.encode(header, id_token_no_sub, jwk_pair[1]).decode("utf-8"), -} -jwt_data_bad_nonce = { - "access_token": "test", - "id_token": jwt.encode(header, id_token_bad_nonce, jwk_pair[1]).decode("utf-8"), -} diff --git a/tests/test_auth.py b/tests/test_auth.py index 4262efff6..182a5d1c5 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,22 +1,15 @@ """Test API auth endpoints.""" -from aiohttp.web_exceptions import HTTPForbidden, HTTPUnauthorized, HTTPBadRequest +from aiohttp.web_exceptions import HTTPForbidden, HTTPInternalServerError, HTTPSeeOther, HTTPBadRequest from metadata_backend.api.auth import AccessHandler from unittest.mock import MagicMock, patch -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.api.middlewares import generate_cookie from metadata_backend.server import init from .mockups import ( - Mock_Request, - MockResponse, get_request_with_fernet, - jwt_data, - jwk_data, - jwt_data_claim_miss, - jwt_data_bad_nonce, ) from unittest import IsolatedAsyncioTestCase -import json class AccessHandlerFailTestCase(AioHTTPTestCase): @@ -31,23 +24,25 @@ async def setUpAsync(self): access_config = {} self.patch_access_handler = patch("metadata_backend.api.auth.AccessHandler", **access_config, spec=True) self.MockedAccessHandler = self.patch_access_handler.start() + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() async def tearDownAsync(self): """Cleanup mocked stuff.""" self.patch_access_handler.stop() + await self.client.close() - @unittest_run_loop async def test_login_with_default_config_values(self): - """Test that login raises 404 when the AUTH_URL env variable is not a proper endpoint.""" - self.client.app["OIDC_State"] = set() - response = await self.client.get("/aai") - self.assertEqual(response.status, 404) - resp_json = await response.json() - self.assertEqual(resp_json["instance"], "/authorize") - # Also check that we have regisitered oidc state - self.assertEqual(1, len(self.client.app["OIDC_State"])) + """Test that login raises 500 when OIDC is improperly configured.""" + with patch("oidcrp.rp_handler.RPHandler.begin", side_effect=Exception): + response = await self.client.get("/aai") + self.assertEqual(response.status, 500) + resp_json = await response.json() + self.assertEqual("OIDC authorization request failed.", resp_json["detail"]) - @unittest_run_loop async def test_callback_fails_without_query_params(self): """Test that callback endpoint raises 400 if no params provided in the request.""" response = await self.client.get("/callback") @@ -55,24 +50,14 @@ async def test_callback_fails_without_query_params(self): resp_json = await response.json() self.assertEqual("AAI response is missing mandatory params, received: ", resp_json["detail"]) - @unittest_run_loop async def test_callback_fails_with_wrong_oidc_state(self): """Test that callback endpoint raises 403 when state in the query is not the same as specified in session.""" - self.client.app["Session"] = {} - self.client.app["OIDC_State"] = set() - response = await self.client.get("/callback?state=wrong_value&code=code") - self.assertEqual(response.status, 403) - resp_json = await response.json() - self.assertEqual(resp_json["detail"], "Bad user session.") + with patch("oidcrp.rp_handler.RPHandler.get_session_information", side_effect=KeyError): + response = await self.client.get("/callback?state=wrong_value&code=code") + self.assertEqual(response.status, 403) + resp_json = await response.json() + self.assertEqual(resp_json["detail"], "Bad user session.") - @unittest_run_loop - async def test_callback_(self): - """Test that callback.""" - self.client.app["OIDC_State"] = set(("mo_state_value",)) - response = await self.client.get("/callback?state=mo_state_value&code=code") - self.assertIn(response.status, (403, 500)) - - @unittest_run_loop async def test_logout_works(self): """Test that logout revokes all tokens.""" request = get_request_with_fernet() @@ -96,53 +81,48 @@ def setUp(self): "domain": "http://domain.com:5430", "redirect": "http://domain.com:5430", "scope": "openid profile email", - "iss": "http://iss.domain.com:5430", "callback_url": "http://domain.com:5430/callback", - "auth_url": "http://auth.domain.com:5430/authorize", - "token_url": "http://auth.domain.com:5430/token", - "user_info": "http://auth.domain.com:5430/userinfo", - "revoke_url": "http://auth.domain.com:5430/revoke", - "jwk_server": "http://auth.domain.com:5430/jwk", - "auth_referer": "http://auth.domain.com:5430", + "oidc_url": "http://auth.domain.com:5430", + "auth_method": "code", } self.AccessHandler = AccessHandler(access_config) - self.AccessHandler.nonce = "nonce" def tearDown(self): """Cleanup mocked stuff.""" pass - async def test_get_jwk_fail(self): - """Test retrieving JWK exception.""" - with patch("aiohttp.ClientSession.get", side_effect=HTTPUnauthorized): - with self.assertRaises(HTTPUnauthorized): - await self.AccessHandler._get_key() - - async def test_jwk_key(self): - """Test get jwk key.""" - data = { - "kty": "oct", - "kid": "018c0ae5-4d9b-471b-bfd6-eef314bc7037", - "use": "sig", - "alg": "HS256", - "k": "hJtXIZ2uSN5kbQfbtTNWbpdmhkV8FJG-Onbc6mxCcYg", + async def test_set_user_no_update(self): + """Test set user success.""" + request = get_request_with_fernet() + session_id = "session_id" + new_user_id = "USR12345" + + request.app["db_client"] = MagicMock() + request.app["Session"] = {session_id: {}} + user_data = { + "sub": "user@test.fi", + "given_name": "User", + "family_name": "Test", + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + }, + } + old_user_data = { + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + } } - resp = MockResponse(json.dumps(data), 200) - with patch("aiohttp.ClientSession.get", return_value=resp): - result = await self.AccessHandler._get_key() - self.assertEqual(result, json.dumps(data)) + with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): + with patch("metadata_backend.api.operators.UserOperator.read_user", return_value=old_user_data): + await self.AccessHandler._set_user(request, session_id, user_data) - async def test_set_user_fail(self): - """Test set user raises exception.""" - request = Mock_Request() - tk = ("something",) - session_id = "session_id" - with patch("aiohttp.ClientSession.get", side_effect=HTTPUnauthorized): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler._set_user(request, session_id, tk) + self.assertIn("user_info", request.app["Session"][session_id]) + self.assertEqual(new_user_id, request.app["Session"][session_id]["user_info"]) - async def test_set_user(self): + async def test_set_user_with_update(self): """Test set user success.""" request = get_request_with_fernet() session_id = "session_id" @@ -150,88 +130,121 @@ async def test_set_user(self): request.app["db_client"] = MagicMock() request.app["Session"] = {session_id: {}} - tk = "something" - data = { - "eppn": "eppn@test.fi", + user_data = { + "sub": "user@test.fi", "given_name": "User", "family_name": "Test", + "projects": { + "projectId": "internal_1000", + "projectNumber": "1000", + }, + } + old_user_data = { + "projects": { + "projectId": "internal_2000", + "projectNumber": "2000", + } } - resp = MockResponse(data, 200) - with patch("aiohttp.ClientSession.get", return_value=resp): - with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): - await self.AccessHandler._set_user(request, session_id, tk) + with patch("metadata_backend.api.operators.UserOperator.create_user", return_value=new_user_id): + with patch("metadata_backend.api.operators.UserOperator.read_user", return_value=old_user_data): + with patch("metadata_backend.api.operators.UserOperator.update_user", return_value=new_user_id): + await self.AccessHandler._set_user(request, session_id, user_data) self.assertIn("user_info", request.app["Session"][session_id]) self.assertEqual(new_user_id, request.app["Session"][session_id]["user_info"]) - async def test_callback_fail(self): - """Test callback fails.""" + async def test_login_fail(self): + """Test login fails due to bad OIDCRP config.""" + # OIDCRP init fails, because AAI config endpoint request fails request = get_request_with_fernet() - request.query["state"] = "state" - request.query["code"] = "code" - request.app["Session"] = {} - request.app["OIDC_State"] = set(("state",)) - resp_no_token = MockResponse({}, 200) - resp_400 = MockResponse({}, 400) - - with patch("aiohttp.ClientSession.post", return_value=resp_no_token): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler.callback(request) + with self.assertRaises(HTTPInternalServerError): + await self.AccessHandler.login(request) - with patch("aiohttp.ClientSession.post", return_value=resp_400): - with self.assertRaises(HTTPBadRequest): - await self.AccessHandler.callback(request) + async def test_login_pass(self): + """Test login redirects user.""" + response = {"url": "some url"} + request = get_request_with_fernet() + with patch("oidcrp.rp_handler.RPHandler.begin", return_value=response): + with self.assertRaises(HTTPSeeOther): + await self.AccessHandler.login(request) async def test_callback_pass(self): """Test callback correct validation.""" request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["Session"] = {} - request.app["Cookies"] = set({}) - request.app["OIDC_State"] = set(("state",)) - resp_token = MockResponse(jwt_data, 200) - resp_jwk = MockResponse(jwk_data, 200) - - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - await self.AccessHandler.callback(request) + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + finalize = { + "token": "token", + "userinfo": {"sub": "user", "given_name": "name", "family_name": "name", "sdSubmitProjects": "1000"}, + } + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): + with patch( + "metadata_backend.api.auth.AccessHandler._process_projects", + return_value=[{"projectId": "internal_1000", "projectNumber": "1000"}], + ): + with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): + await self.AccessHandler.callback(request) async def test_callback_missing_claim(self): """Test callback missing claim validation.""" request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["Session"] = {} - request.app["Cookies"] = set({}) - request.app["OIDC_State"] = set(("state",)) - resp_token = MockResponse(jwt_data_claim_miss, 200) - resp_jwk = MockResponse(jwk_data, 200) + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + finalize = { + "token": "token", + "userinfo": {"given_name": "some", "family_name": "one", "sdSubmitProjects": "1000"}, + } + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with patch("oidcrp.rp_handler.RPHandler.finalize", return_value=finalize): + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - with self.assertRaises(HTTPUnauthorized): - await self.AccessHandler.callback(request) + async def test_callback_fail_finalize(self): + """Test callback fail finalize.""" + request = get_request_with_fernet() + request.query["state"] = "state" + request.query["code"] = "code" + + session = {"iss": "http://auth.domain.com:5430", "auth_request": {}} + with patch("oidcrp.rp_handler.RPHandler.get_session_information", return_value=session): + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) - async def test_callback_bad_claim(self): - """Test callback bad nonce validation.""" + async def test_callback_bad_state(self): + """Test callback bad state validation.""" request = get_request_with_fernet() request.query["state"] = "state" request.query["code"] = "code" - request.app["OIDC_State"] = set() - request.app["Session"] = {} - request.app["Cookies"] = set({}) - resp_token = MockResponse(jwt_data_bad_nonce, 200) - resp_jwk = MockResponse(jwk_data, 200) + with self.assertRaises(HTTPForbidden): + await self.AccessHandler.callback(request) - with patch("aiohttp.ClientSession.post", return_value=resp_token): - with patch("aiohttp.ClientSession.get", return_value=resp_jwk): - with patch("metadata_backend.api.auth.AccessHandler._set_user", return_value=None): - with self.assertRaises(HTTPForbidden): - await self.AccessHandler.callback(request) + async def test_callback_missing_state(self): + """Test callback bad state validation.""" + request = get_request_with_fernet() + request.query["code"] = "code" + + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) + + async def test_callback_missing_code(self): + """Test callback bad state validation.""" + request = get_request_with_fernet() + request.query["state"] = "state" + + with self.assertRaises(HTTPBadRequest): + await self.AccessHandler.callback(request) + + async def test_process_projects(self): + """Test that process projects returns accession IDs.""" + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() + with patch("metadata_backend.api.operators.ProjectOperator.create_project", return_value="accession_id"): + processed_projects = await self.AccessHandler._process_projects(request, ["1000"]) + self.assertEqual(processed_projects, [{"projectId": "accession_id", "projectNumber": "1000"}]) diff --git a/tests/test_db_service.py b/tests/test_db_service.py index b88748710..afa304f9e 100644 --- a/tests/test_db_service.py +++ b/tests/test_db_service.py @@ -35,8 +35,6 @@ def setUp(self): self.user_stub = { "userId": self.user_id_stub, "name": "name", - "drafts": ["EGA123456", "EGA1234567"], - "folders": ["EGA1234569"], } self.data_stub = { "accessionId": self.id_stub, diff --git a/tests/test_doi.py b/tests/test_doi.py new file mode 100644 index 000000000..0e8818aa6 --- /dev/null +++ b/tests/test_doi.py @@ -0,0 +1,45 @@ +"""Test the DOI registering tool.""" +import unittest +from unittest.mock import patch + +from aiohttp import web + +from metadata_backend.helpers.doi import DOIHandler + + +class DOITestCase(unittest.TestCase): + """DOI registering class test case.""" + + def setUp(self): + """Set class for tests.""" + self.doi = DOIHandler() + + async def test_400_is_raised(self): + """Test 400 is raised when request to DataCite supposedly fails.""" + with patch("aiohttp.ClientSession.post") as mocked_post: + mocked_post.return_value.status_code = 400 + with self.assertRaises(web.HTTPBadRequest) as err: + await self.doi.create_draft() + self.assertEqual(str(err.exception), "DOI API draft creation request failed with code: 400") + + async def test_create_doi_draft_works(self): + """Test DOI info is returned correctly when request succeeds.""" + with patch("aiohttp.ClientSession.post") as mocked_post: + mocked_post.return_value.status = 201 + mocked_post.return_value.json.return_value = { + "data": { + "id": "10.xxxx/yyyyy", + "type": "dois", + "attributes": { + "doi": "10.xxxx/yyyyy", + "prefix": "10.xxxx", + "suffix": "yyyyy", + "identifiers": [{"identifier": "https://doi.org/10.xxxx/yyyyy", "identifierType": "DOI"}], + }, + } + } + + output = await self.doi.create_draft() + assert mocked_post.called + result = {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + self.assertEqual(output, result) diff --git a/tests/test_files/dataset/dataset.json b/tests/test_files/dataset/dataset.json new file mode 100644 index 000000000..84930d4a7 --- /dev/null +++ b/tests/test_files/dataset/dataset.json @@ -0,0 +1,7 @@ +{ + "title": "Test Dataset", + "datasetType": [ + "Amplicon sequencing" + ], + "description": "ome cool test description for json dataset" +} diff --git a/tests/test_files/dataset/dataset_patch.json b/tests/test_files/dataset/dataset_patch.json new file mode 100644 index 000000000..c7ed09a55 --- /dev/null +++ b/tests/test_files/dataset/dataset_patch.json @@ -0,0 +1,4 @@ +{ + "title": "Updated Dataset", + "description": "one cool test description for updated dataset" +} diff --git a/tests/test_files/dataset/dataset_put.xml b/tests/test_files/dataset/dataset_put.xml new file mode 100644 index 000000000..787ae60ca --- /dev/null +++ b/tests/test_files/dataset/dataset_put.xml @@ -0,0 +1,18 @@ + + + + test updated + some cool test description updated + Exome sequencing + Genotyping by array + + + + + + + + + + + diff --git a/tests/test_files/doi/test_doi.json b/tests/test_files/doi/test_doi.json index 3f3257dec..dfa38fb4a 100644 --- a/tests/test_files/doi/test_doi.json +++ b/tests/test_files/doi/test_doi.json @@ -22,6 +22,7 @@ "subjectScheme": "Fields of Science and Technology (FOS)" } ], + "keywords": "test,keyword", "contributors": [ { "name": "Contributor, Test", @@ -36,14 +37,90 @@ "affiliationIdentifierScheme": "ROR" } ], - "contributorType": "Researcher", - "nameIdentifiers": [ + "contributorType": "Researcher" + }, + { + "name": "Curator, Test", + "nameType": "Personal", + "givenName": "Test", + "familyName": "Curator", + "affiliation": [ { - "schemeUri": null, - "nameIdentifier": null, - "nameIdentifierScheme": null + "name": "affiliation place", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/test3", + "affiliationIdentifierScheme": "ROR" } - ] + ], + "contributorType": "Data Curator" + }, + { + "name": "Rights, Holder", + "nameType": "Personal", + "givenName": "Rights", + "familyName": "Holder", + "affiliation": [ + { + "name": "affiliation place", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/test3", + "affiliationIdentifierScheme": "ROR" + } + ], + "contributorType": "Rights Holder" + } + ], + "dates": [ + { + "date": "2020-10-10/2022-01-10", + "dateType": "Collected" + }, + { + "date": "2020-10-10", + "dateType": "Issued" + }, + { + "date": "2020-10-11", + "dateType": "Issued" + }, + { + "date": "2022-01-10", + "dateType": "Updated" + }, + { + "date": "2022-01-11", + "dateType": "Updated" + }, + { + "date": "2022-01-10", + "dateType": "Available" + } + ], + "geoLocations": [ + { + "geoLocationPlace": "Helsinki" + }, + { + "geoLocationPoint": { + "pointLongitude": "24.9384", + "pointLatitude": "60.1699" + }, + "geoLocationBox": { + "westBoundLongitude": "24.8994938494", + "eastBoundLongitude": "25.1845034857", + "southBoundLatitude": "60.1396430193", + "northBoundLatitude": "60.2431299506" + } + } + ], + "language": "Assamese", + "sizes": [ + "30000" + ], + "alternateIdentifiers": [ + { + "alternateIdentifier": "arXiv:9912.12345v2", + "alternateIdentifierType": "arXiv" } ] } diff --git a/tests/test_files/experiment/ERX000119.json b/tests/test_files/experiment/ERX000119.json index ba4a1d80b..110ab4fa6 100644 --- a/tests/test_files/experiment/ERX000119.json +++ b/tests/test_files/experiment/ERX000119.json @@ -1,4 +1,4 @@ -{ +{ "title": "Experiment", "alias": "NA18504.3", "centerName": "MPIMG", @@ -54,7 +54,6 @@ } }, "platform": "AB SOLiD System", - "processing": "true", "experimentAttributes": [ { "tag": "center_name", @@ -66,4 +65,4 @@ "units": "MB" } ] -} +} \ No newline at end of file diff --git a/tests/test_files/metax/research_dataset.json b/tests/test_files/metax/research_dataset.json new file mode 100644 index 000000000..8b7ba8925 --- /dev/null +++ b/tests/test_files/metax/research_dataset.json @@ -0,0 +1,110 @@ +{ + "title": { + "en": "test" + }, + "issued": "2020-10-10", + "creator": [ + { + "name": "Creator, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test1" + } + } + ], + "curator": [ + { + "name": "Curator, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test3" + } + } + ], + "spatial": [ + { + "geographic_name": "Helsinki" + }, + { + "as_wkt": [ + "POINT(24.9384 60.1699)", + "POLYGON((24.8994938494 60.2431299506, 25.1845034857 60.2431299506, 25.1845034857 60.1396430193, 24.8994938494 60.1396430193, 24.8994938494 60.2431299506))" + ] + } + ], + "modified": "2022-01-11T00:00:00+03:00", + "temporal": [ + { + "end_date": "2022-01-10T00:00:00+03:00", + "start_date": "2020-10-10T00:00:00+03:00" + } + ], + "publisher": { + "name": { + "en": "CSC Sensitive Data Services for Research", + "fi": "CSC:n Arkaluonteisen datan palveluiden aineistokatalogi" + }, + "@type": "Organization" + }, + "contributor": [ + { + "name": "Contributor, Test", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test2" + } + } + ], + "description": { + "en": "some cool test description" + }, + "access_rights": { + "access_type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted", + "pref_label": { + "en": "Restricted use", + "fi": "Saatavuutta rajoitettu", + "und": "Saatavuutta rajoitettu" + } + } + }, + "rights_holder": [ + { + "name": "Rights, Holder", + "@type": "Person", + "member_of": { + "name": { + "en": "affiliation place" + }, + "@type": "Organization", + "identifier": "https://ror.org/test3" + } + } + ], + "other_identifier": [ + { + "type": { + "in_scheme": "http://uri.suomi.fi/codelist/fairdata/identifier_type", + "identifier": "http://uri.suomi.fi/codelist/fairdata/identifier_type/code/arxiv", + "pref_label": { + "en": "arXiv identifer", + "und": "arXiv identifer" + } + }, + "notation": "arXiv:9912.12345v2" + } + ] +} diff --git a/tests/test_files/sample/EGA_sample_w_issue.csv b/tests/test_files/sample/EGA_sample_w_issue.csv new file mode 100644 index 000000000..26efa6ff0 --- /dev/null +++ b/tests/test_files/sample/EGA_sample_w_issue.csv @@ -0,0 +1,4 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype +test sample,test1,A test sample metadata based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state +,,,,,,,,,, +third test sample,test3,One more test sample metadata,123456789abc,,,unknown,,,,some disease state diff --git a/tests/test_files/sample/EGAformat.csv b/tests/test_files/sample/EGAformat.csv new file mode 100644 index 000000000..6261a5584 --- /dev/null +++ b/tests/test_files/sample/EGAformat.csv @@ -0,0 +1,4 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype +test sample,test1,A test sample metadata based on the EGA submitter portal format,123456789abc,,,unknown,,,,some disease state +another test sample,test2,Another test sample metadata,123456789abc,,,unknown,,,,some disease state +third test sample,test3,One more test sample metadata,123456789abc,,,unknown,,,,some disease state diff --git a/tests/test_files/sample/empty.csv b/tests/test_files/sample/empty.csv new file mode 100644 index 000000000..a5ac6cc45 --- /dev/null +++ b/tests/test_files/sample/empty.csv @@ -0,0 +1 @@ +title,alias,description,subjectId,bioSampleId,caseOrControl,gender,organismPart,cellLine,region,phenotype diff --git a/tests/test_files/study/SRP000539_list.json b/tests/test_files/study/SRP000539_list.json new file mode 100644 index 000000000..59b44201f --- /dev/null +++ b/tests/test_files/study/SRP000539_list.json @@ -0,0 +1,79 @@ +[ + { + "template": { + "centerName": "GEO", + "alias": "GSE10966", + "identifiers": { + "primaryId": "SRP000539", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + }, + "tags": {"submissionType": "Form"} + }, + { + "template": { + "centerName": "GEO", + "alias": "GSE10967", + "identifiers": { + "primaryId": "SRP000538", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + } + } +] diff --git a/tests/test_files/study/SRP000539_put.xml b/tests/test_files/study/SRP000539_put.xml index 126ced42a..ba92a661e 100644 --- a/tests/test_files/study/SRP000539_put.xml +++ b/tests/test_files/study/SRP000539_put.xml @@ -6,7 +6,7 @@ GSE10966 - Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing + Highly integrated epigenome maps in Arabidopsis - updated Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: diff --git a/tests/test_files/study/SRP000539_template.json b/tests/test_files/study/SRP000539_template.json new file mode 100644 index 000000000..2c189ab34 --- /dev/null +++ b/tests/test_files/study/SRP000539_template.json @@ -0,0 +1,41 @@ +{ + "template": { + "centerName": "GEO", + "alias": "GSE10966", + "identifiers": { + "primaryId": "SRP000539", + "externalId": [ + { + "namespace": "BioProject", + "label": "primary", + "value": "PRJNA108793" + }, + { + "namespace": "GEO", + "value": "GSE10966" + } + ] + }, + "descriptor": { + "studyTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing", + "studyType": "Other", + "studyAbstract": "Part of a set of highly integrated epigenome maps for Arabidopsis thaliana. Keywords: Illumina high-throughput bisulfite sequencing Overall design: Whole genome shotgun bisulfite sequencing of wildtype Arabidopsis plants (Columbia-0), and met1, drm1 drm2 cmt3, and ros1 dml2 dml3 null mutants using the Illumina Genetic Analyzer.", + "centerProjectName": "GSE10966" + }, + "studyLinks": [ + { + "xrefDb": "pubmed", + "xrefId": "18423832" + } + ], + "studyAttributes": [ + { + "tag": "parent_bioproject", + "value": "PRJNA107265" + } + ] + }, + "tags": { + "displayTitle": "Highly integrated epigenome maps in Arabidopsis - whole genome shotgun bisulfite sequencing" + } +} diff --git a/tests/test_files/study/patch.json b/tests/test_files/study/patch.json index 5b7ed1c93..196832d91 100644 --- a/tests/test_files/study/patch.json +++ b/tests/test_files/study/patch.json @@ -1,4 +1,8 @@ { "centerName": "GEOM", - "alias": "GSE10968" + "alias": "GSE10968", + "index": 0, + "tags": { + "displayTitle": "new name" + } } \ No newline at end of file diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 1bbe85595..db1aaf03b 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -1,15 +1,17 @@ """Test API endpoints from handlers module.""" from pathlib import Path -from unittest.mock import patch +from unittest.mock import call, patch from aiohttp import FormData -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop - +from aiohttp.test_utils import AioHTTPTestCase, make_mocked_coro +from metadata_backend.api.handlers.object import ObjectAPIHandler +from metadata_backend.api.handlers.restapi import RESTAPIHandler from metadata_backend.api.middlewares import generate_cookie -from .mockups import get_request_with_fernet from metadata_backend.server import init +from .mockups import get_request_with_fernet + class HandlersTestCase(AioHTTPTestCase): """API endpoint class test cases.""" @@ -22,6 +24,14 @@ async def get_application(self): server["Session"] = {"user_info": ["value", "value"]} return server + def authenticate(self, client): + """Authenticate client.""" + request = get_request_with_fernet() + request.app["Crypt"] = client.app["Crypt"] + cookie, cookiestring = generate_cookie(request) + client.app["Session"] = {cookie["id"]: {"access_token": "mock_token_value", "user_info": {}}} + client._session.cookie_jar.update_cookies({"MTD_SESSION": cookiestring}) + async def setUpAsync(self): """Configure default values for testing and other modules. @@ -29,6 +39,13 @@ async def setUpAsync(self): methods. Also sets up reusable test variables for different test methods. """ + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() + self.authenticate(self.client) + self.test_ega_string = "EGA123456" self.query_accessionId = ("EDAG3991701442770179",) self.page_num = 3 @@ -52,71 +69,66 @@ async def setUpAsync(self): {"accessionId": "EGA123456", "schema": "sample"}, ], "drafts": [], + "doiInfo": {"creators": [{"name": "Creator, Test"}]}, } self.user_id = "USR12345678" self.test_user = { "userId": self.user_id, "name": "tester", - "drafts": [], - "folders": ["FOL12345678"], } - class_parser = "metadata_backend.api.handlers.XMLToJSONParser" - class_operator = "metadata_backend.api.handlers.Operator" - class_xmloperator = "metadata_backend.api.handlers.XMLOperator" - class_folderoperator = "metadata_backend.api.handlers.FolderOperator" - class_useroperator = "metadata_backend.api.handlers.UserOperator" - operator_config = { + self._draf_doi_data = { + "identifier": { + "identifierType": "DOI", + "doi": "https://doi.org/10.xxxx/yyyyy", + }, + "types": { + "bibtex": "misc", + "citeproc": "collection", + "schemaOrg": "Collection", + "resourceTypeGeneral": "Collection", + }, + } + + self.operator_config = { "read_metadata_object.side_effect": self.fake_operator_read_metadata_object, "query_metadata_database.side_effect": self.fake_operator_query_metadata_object, "create_metadata_object.side_effect": self.fake_operator_create_metadata_object, "delete_metadata_object.side_effect": self.fake_operator_delete_metadata_object, "update_metadata_object.side_effect": self.fake_operator_update_metadata_object, "replace_metadata_object.side_effect": self.fake_operator_replace_metadata_object, + "create_metax_info.side_effect": self.fake_operator_create_metax_info, } - xmloperator_config = { + self.xmloperator_config = { "read_metadata_object.side_effect": self.fake_xmloperator_read_metadata_object, "create_metadata_object.side_effect": self.fake_xmloperator_create_metadata_object, "replace_metadata_object.side_effect": self.fake_xmloperator_replace_metadata_object, } - folderoperator_config = { + self.folderoperator_config = { "create_folder.side_effect": self.fake_folderoperator_create_folder, "read_folder.side_effect": self.fake_folderoperator_read_folder, "delete_folder.side_effect": self.fake_folderoperator_delete_folder, "check_object_in_folder.side_effect": self.fake_folderoperator_check_object, - "get_collection_objects.side_effect": self.fake_folderoperator_get_collection_objects, } - useroperator_config = { + self.useroperator_config = { "create_user.side_effect": self.fake_useroperator_create_user, "read_user.side_effect": self.fake_useroperator_read_user, "filter_user.side_effect": self.fake_useroperator_filter_user, - "check_user_has_doc.side_effect": self.fake_useroperator_user_has_folder, } - self.patch_parser = patch(class_parser, spec=True) - self.patch_operator = patch(class_operator, **operator_config, spec=True) - self.patch_xmloperator = patch(class_xmloperator, **xmloperator_config, spec=True) - self.patch_folderoperator = patch(class_folderoperator, **folderoperator_config, spec=True) - self.patch_useroperator = patch(class_useroperator, **useroperator_config, spec=True) - self.MockedParser = self.patch_parser.start() - self.MockedOperator = self.patch_operator.start() - self.MockedXMLOperator = self.patch_xmloperator.start() - self.MockedFolderOperator = self.patch_folderoperator.start() - self.MockedUserOperator = self.patch_useroperator.start() - # Set up authentication - request = get_request_with_fernet() - request.app["Crypt"] = self.client.app["Crypt"] - cookie, cookiestring = generate_cookie(request) - self.client.app["Session"] = {cookie["id"]: {"access_token": "mock_token_value", "user_info": {}}} - self.client._session.cookie_jar.update_cookies({"MTD_SESSION": cookiestring}) + self.doi_handler = { + "create_draft.side_effect": self.fake_doi_create_draft, + "set_state.side_effect": self.fake_doi_set_state, + "delete.side_effect": self.fake_doi_delete, + } + + RESTAPIHandler._handle_check_ownership = make_mocked_coro(True) + ObjectAPIHandler._delete_metax_dataset = make_mocked_coro() async def tearDownAsync(self): """Cleanup mocked stuff.""" - self.patch_parser.stop() - self.patch_operator.stop() - self.patch_xmloperator.stop() - self.patch_folderoperator.stop() - self.patch_useroperator.stop() + + await self.client.close() def create_submission_data(self, files): """Create request data from pairs of schemas and filenames.""" @@ -124,11 +136,43 @@ def create_submission_data(self, files): for schema, filename in files: schema_path = "study" if schema == "fake" else schema path_to_file = self.TESTFILES_ROOT / schema_path / filename - data.add_field( - schema.upper(), open(path_to_file.as_posix(), "r"), filename=path_to_file.name, content_type="text/xml" - ) + # Differentiate between xml and csv + if filename[-3:] == "xml": + data.add_field( + schema.upper(), + open(path_to_file.as_posix(), "r"), + filename=path_to_file.name, + content_type="text/xml", + ) + elif filename[-3:] == "csv": + # files = {schema.upper(): open(path_to_file.as_posix(), "r")} + data.add_field( + schema.upper(), + open(path_to_file.as_posix(), "r"), + filename=path_to_file.name, + content_type="text/csv", + ) return data + def get_file_data(self, schema, filename): + """Read file contents as plain text.""" + path_to_file = self.TESTFILES_ROOT / schema / filename + with open(path_to_file.as_posix(), mode="r") as csv_file: + _reader = csv_file.read() + return _reader + + async def fake_doi_create_draft(self, prefix): + """.""" + return {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + + async def fake_doi_set_state(self, data): + """.""" + return {"fullDOI": "10.xxxx/yyyyy", "dataset": "https://doi.org/10.xxxx/yyyyy"} + + async def fake_doi_delete(self, doi): + """.""" + return None + async def fake_operator_read_metadata_object(self, schema_type, accession_id): """Fake read operation to return mocked JSON.""" return (self.metadata_json, "application/json") @@ -143,15 +187,15 @@ async def fake_xmloperator_read_metadata_object(self, schema_type, accession_id) async def fake_xmloperator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return {"accessionId": self.test_ega_string} async def fake_xmloperator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return {"accessionId": self.test_ega_string} async def fake_operator_create_metadata_object(self, schema_type, content): """Fake create operation to return mocked accessionId.""" - return self.test_ega_string + return {"accessionId": self.test_ega_string} async def fake_operator_update_metadata_object(self, schema_type, accession_id, content): """Fake update operation to return mocked accessionId.""" @@ -159,12 +203,16 @@ async def fake_operator_update_metadata_object(self, schema_type, accession_id, async def fake_operator_replace_metadata_object(self, schema_type, accession_id, content): """Fake replace operation to return mocked accessionId.""" - return self.test_ega_string + return {"accessionId": self.test_ega_string} async def fake_operator_delete_metadata_object(self, schema_type, accession_id): """Fake delete operation to await successful operation indicator.""" return True + async def fake_operator_create_metax_info(self, schema_type, accession_id, data): + """Fake update operation to await successful operation indicator.""" + return True + async def fake_folderoperator_create_folder(self, content): """Fake create operation to return mocked folderId.""" return self.folder_id @@ -182,14 +230,6 @@ async def fake_folderoperator_check_object(self, schema_type, accession_id): data = True, self.folder_id, False return data - async def fake_folderoperator_get_collection_objects(self, schema_type, accession_id): - """Fake get collection of objects in folder.""" - return ["EDAG3991701442770179", "EGA123456"] - - async def fake_useroperator_user_has_folder(self, schema_type, user_id, folder_id): - """Fake check object in folder.""" - return True - async def fake_useroperator_create_user(self, content): """Fake user operation to return mocked userId.""" return self.user_id @@ -202,42 +242,10 @@ async def fake_useroperator_filter_user(self, query, item_type, page, per_page): """Fake read operation to return mocked user.""" return self.test_user[item_type], len(self.test_user[item_type]) - @unittest_run_loop - async def test_submit_endpoint_submission_does_not_fail(self): - """Test that submission with valid SUBMISSION.xml does not fail.""" - files = [("submission", "ERA521986_valid.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - self.assertEqual(response.status, 200) - self.assertEqual(response.content_type, "application/json") - - @unittest_run_loop - async def test_submit_endpoint_fails_without_submission_xml(self): - """Test that basic POST submission fails with no submission.xml. - - User should also be notified for missing file. - """ - files = [("analysis", "ERZ266973.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - failure_text = "There must be a submission.xml file in submission." - self.assertEqual(response.status, 400) - self.assertIn(failure_text, await response.text()) - @unittest_run_loop - async def test_submit_endpoint_fails_with_many_submission_xmls(self): - """Test submission fails when there's too many submission.xml -files. +class APIHandlerTestCase(HandlersTestCase): + """Schema API endpoint class test cases.""" - User should be notified for submitting too many files. - """ - files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/submit", data=data) - failure_text = "You should submit only one submission.xml file." - self.assertEqual(response.status, 400) - self.assertIn(failure_text, await response.text()) - - @unittest_run_loop async def test_correct_schema_types_are_returned(self): """Test api endpoint for all schema types.""" response = await self.client.get("/schemas") @@ -257,7 +265,6 @@ async def test_correct_schema_types_are_returned(self): for schema_type in schema_types: self.assertIn(schema_type, response_text) - @unittest_run_loop async def test_correct_study_schema_are_returned(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/study") @@ -265,13 +272,11 @@ async def test_correct_study_schema_are_returned(self): self.assertIn("study", response_text) self.assertNotIn("submission", response_text) - @unittest_run_loop async def test_raises_invalid_schema(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/something") self.assertEqual(response.status, 404) - @unittest_run_loop async def test_raises_not_found_schema(self): """Test api endpoint for study schema types.""" response = await self.client.get("/schemas/project") @@ -279,108 +284,300 @@ async def test_raises_not_found_schema(self): resp_json = await response.json() self.assertEqual(resp_json["detail"], "The provided schema type could not be found. (project)") - @unittest_run_loop + +class SubmissionHandlerTestCase(HandlersTestCase): + """Submission API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + class_parser = "metadata_backend.api.handlers.submission.XMLToJSONParser" + self.patch_parser = patch(class_parser, spec=True) + self.MockedParser = self.patch_parser.start() + + class_xmloperator = "metadata_backend.api.handlers.submission.XMLOperator" + self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) + self.MockedXMLOperator = self.patch_xmloperator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_parser.stop() + self.patch_xmloperator.stop() + + async def test_submit_endpoint_submission_does_not_fail(self): + """Test that submission with valid SUBMISSION.xml does not fail.""" + files = [("submission", "ERA521986_valid.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + self.assertEqual(response.status, 200) + self.assertEqual(response.content_type, "application/json") + + async def test_submit_endpoint_fails_without_submission_xml(self): + """Test that basic POST submission fails with no submission.xml. + + User should also be notified for missing file. + """ + files = [("analysis", "ERZ266973.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + failure_text = "There must be a submission.xml file in submission." + self.assertEqual(response.status, 400) + self.assertIn(failure_text, await response.text()) + + async def test_submit_endpoint_fails_with_many_submission_xmls(self): + """Test submission fails when there's too many submission.xml -files. + + User should be notified for submitting too many files. + """ + files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/submit", data=data) + failure_text = "You should submit only one submission.xml file." + self.assertEqual(response.status, 400) + self.assertIn(failure_text, await response.text()) + + async def test_validation_passes_for_valid_xml(self): + """Test validation endpoint for valid xml.""" + files = [("study", "SRP000539.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + self.assertEqual(response.status, 200) + self.assertIn('{"isValid":true}', await response.text()) + + async def test_validation_fails_bad_schema(self): + """Test validation fails for bad schema and valid xml.""" + files = [("fake", "SRP000539.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + self.assertEqual(response.status, 404) + + async def test_validation_fails_for_invalid_xml_syntax(self): + """Test validation endpoint for XML with bad syntax.""" + files = [("study", "SRP000539_invalid.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + resp_dict = await response.json() + self.assertEqual(response.status, 200) + self.assertIn("Faulty XML file was given, mismatched tag", resp_dict["detail"]["reason"]) + + async def test_validation_fails_for_invalid_xml(self): + """Test validation endpoint for invalid xml.""" + files = [("study", "SRP000539_invalid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + resp_dict = await response.json() + self.assertEqual(response.status, 200) + self.assertIn("value must be one of", resp_dict["detail"]["reason"]) + + async def test_validation_fails_with_too_many_files(self): + """Test validation endpoint for too many files.""" + files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] + data = self.create_submission_data(files) + response = await self.client.post("/validate", data=data) + reason = "Only one file can be sent to this endpoint at a time." + self.assertEqual(response.status, 400) + self.assertIn(reason, await response.text()) + + +class ObjectHandlerTestCase(HandlersTestCase): + """Object API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + + self._mock_draft_doi = "metadata_backend.api.handlers.object.ObjectAPIHandler._draft_doi" + + class_xmloperator = "metadata_backend.api.handlers.object.XMLOperator" + self.patch_xmloperator = patch(class_xmloperator, **self.xmloperator_config, spec=True) + self.MockedXMLOperator = self.patch_xmloperator.start() + + class_operator = "metadata_backend.api.handlers.object.Operator" + self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + self.MockedOperator = self.patch_operator.start() + + class_csv_parser = "metadata_backend.api.handlers.common.CSVToJSONParser" + self.patch_csv_parser = patch(class_csv_parser, spec=True) + self.MockedCSVParser = self.patch_csv_parser.start() + + class_folderoperator = "metadata_backend.api.handlers.object.FolderOperator" + self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + self.MockedFolderOperator = self.patch_folderoperator.start() + + class_metaxhandler = "metadata_backend.api.handlers.object.MetaxServiceHandler" + self.patch_metaxhandler = patch(class_metaxhandler, spec=True) + self.MockedMetaxHandler = self.patch_metaxhandler.start() + self.MockedMetaxHandler().post_dataset_as_draft.return_value = "123-456" + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_xmloperator.stop() + self.patch_csv_parser.stop() + self.patch_folderoperator.stop() + self.patch_operator.stop() + self.patch_metaxhandler.stop() + async def test_submit_object_works(self): """Test that submission is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedXMLOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draft_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedXMLOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_submit_object_works_with_json(self): """Test that JSON submission is handled, operator is called.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } - response = await self.client.post("/objects/study", json=json_req) - self.assertEqual(response.status, 201) - self.assertIn(self.test_ega_string, await response.text()) - self.MockedOperator().create_metadata_object.assert_called_once() + with patch(self._mock_draft_doi, return_value=self._draf_doi_data): + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) + self.assertEqual(response.status, 201) + self.assertIn(self.test_ega_string, await response.text()) + self.MockedOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_submit_object_missing_field_json(self): """Test that JSON has missing property.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} - response = await self.client.post("/objects/study", json=json_req) - reason = "Provided input does not seem correct because: " "''descriptor' is a required property'" + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) + reason = "Provided input does not seem correct because: ''descriptor' is a required property'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_submit_object_bad_field_json(self): """Test that JSON has bad studyType.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "ceva"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "ceva", + "studyAbstract": "abstract description for testing", + }, } - response = await self.client.post("/objects/study", json=json_req) - reason = "Provided input does not seem correct for field: " "'descriptor'" + response = await self.client.post("/objects/study", params={"folder": "some id"}, json=json_req) + reason = "Provided input does not seem correct for field: 'descriptor'" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_post_object_bad_json(self): """Test that post JSON is badly formated.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } - response = await self.client.post("/objects/study", data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=json_req) + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop + async def test_post_object_works_with_csv(self): + """Test that CSV file is parsed and submitted as json.""" + files = [("sample", "EGAformat.csv")] + data = self.create_submission_data(files) + file_content = self.get_file_data("sample", "EGAformat.csv") + self.MockedCSVParser().parse.return_value = [{}, {}, {}] + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) + json_resp = await response.json() + self.assertEqual(response.status, 201) + self.assertEqual(self.test_ega_string, json_resp[0]["accessionId"]) + parse_calls = [ + call( + "sample", + file_content, + ) + ] + op_calls = [call("sample", {}), call("sample", {}), call("sample", {})] + self.MockedCSVParser().parse.assert_has_calls(parse_calls, any_order=True) + self.MockedOperator().create_metadata_object.assert_has_calls(op_calls, any_order=True) + + async def test_post_objet_error_with_empty(self): + """Test multipart request post fails when no objects are parsed.""" + files = [("sample", "empty.csv")] + data = self.create_submission_data(files) + response = await self.client.post("/objects/sample", params={"folder": "some id"}, data=data) + json_resp = await response.json() + self.assertEqual(response.status, 400) + self.assertEqual(json_resp["detail"], "Request data seems empty.") + self.MockedCSVParser().parse.assert_called_once() + async def test_put_object_bad_json(self): """Test that put JSON is badly formated.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } call = "/drafts/study/EGA123456" response = await self.client.put(call, data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_patch_object_bad_json(self): """Test that patch JSON is badly formated.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} call = "/drafts/study/EGA123456" response = await self.client.patch(call, data=json_req) - reason = "JSON is not correctly formatted. " "See: Expecting value: line 1 column 1" + reason = "JSON is not correctly formatted. See: Expecting value: line 1 column 1" self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_submit_draft_works_with_json(self): """Test that draft JSON submission is handled, operator is called.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } - response = await self.client.post("/drafts/study", json=json_req) + response = await self.client.post("/drafts/study", params={"folder": "some id"}, json=json_req) self.assertEqual(response.status, 201) self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().create_metadata_object.assert_called_once() - @unittest_run_loop async def test_put_draft_works_with_json(self): """Test that draft JSON put method is handled, operator is called.""" json_req = { "centerName": "GEO", "alias": "GSE10966", - "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, + "descriptor": { + "studyTitle": "Highly", + "studyType": "Other", + "studyAbstract": "abstract description for testing", + }, } call = "/drafts/study/EGA123456" response = await self.client.put(call, json=json_req) @@ -388,7 +585,6 @@ async def test_put_draft_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().replace_metadata_object.assert_called_once() - @unittest_run_loop async def test_put_draft_works_with_xml(self): """Test that put XML submisssion is handled, XMLOperator is called.""" files = [("study", "SRP000539.xml")] @@ -399,7 +595,6 @@ async def test_put_draft_works_with_xml(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedXMLOperator().replace_metadata_object.assert_called_once() - @unittest_run_loop async def test_patch_draft_works_with_json(self): """Test that draft JSON patch method is handled, operator is called.""" json_req = {"centerName": "GEO", "alias": "GSE10966"} @@ -409,7 +604,6 @@ async def test_patch_draft_works_with_json(self): self.assertIn(self.test_ega_string, await response.text()) self.MockedOperator().update_metadata_object.assert_called_once() - @unittest_run_loop async def test_patch_draft_raises_with_xml(self): """Test that patch XML submisssion raises error.""" files = [("study", "SRP000539.xml")] @@ -418,17 +612,15 @@ async def test_patch_draft_raises_with_xml(self): response = await self.client.patch(call, data=data) self.assertEqual(response.status, 415) - @unittest_run_loop async def test_submit_object_fails_with_too_many_files(self): """Test that sending two files to endpoint results failure.""" files = [("study", "SRP000539.xml"), ("study", "SRP000539_copy.xml")] data = self.create_submission_data(files) - response = await self.client.post("/objects/study", data=data) + response = await self.client.post("/objects/study", params={"folder": "some id"}, data=data) reason = "Only one file can be sent to this endpoint at a time." self.assertEqual(response.status, 400) self.assertIn(reason, await response.text()) - @unittest_run_loop async def test_get_object(self): """Test that accessionId returns correct JSON object.""" url = f"/objects/study/{self.query_accessionId}" @@ -437,7 +629,6 @@ async def test_get_object(self): self.assertEqual(response.content_type, "application/json") self.assertEqual(self.metadata_json, await response.json()) - @unittest_run_loop async def test_get_draft_object(self): """Test that draft accessionId returns correct JSON object.""" url = f"/drafts/study/{self.query_accessionId}" @@ -446,7 +637,6 @@ async def test_get_draft_object(self): self.assertEqual(response.content_type, "application/json") self.assertEqual(self.metadata_json, await response.json()) - @unittest_run_loop async def test_get_object_as_xml(self): """Test that accessionId with XML query returns XML object.""" url = f"/objects/study/{self.query_accessionId}" @@ -455,7 +645,6 @@ async def test_get_object_as_xml(self): self.assertEqual(response.content_type, "text/xml") self.assertEqual(self.metadata_xml, await response.text()) - @unittest_run_loop async def test_query_is_called_and_returns_json_in_correct_format(self): """Test query method calls operator and returns mocked JSON object.""" url = f"/objects/study?studyType=foo&name=bar&page={self.page_num}" f"&per_page={self.page_size}" @@ -475,15 +664,14 @@ async def test_query_is_called_and_returns_json_in_correct_format(self): self.assertEqual(self.page_num, args[2]) self.assertEqual(self.page_size, args[3]) - @unittest_run_loop async def test_delete_is_called(self): """Test query method calls operator and returns status correctly.""" url = "/objects/study/EGA123456" - response = await self.client.delete(url) - self.assertEqual(response.status, 204) - self.MockedOperator().delete_metadata_object.assert_called_once() + with patch("metadata_backend.api.handlers.object.DOIHandler.delete", return_value=None): + response = await self.client.delete(url) + self.assertEqual(response.status, 204) + self.MockedOperator().delete_metadata_object.assert_called_once() - @unittest_run_loop async def test_query_fails_with_xml_format(self): """Test query method calls operator and returns status correctly.""" url = "/objects/study?studyType=foo&name=bar&format=xml" @@ -492,54 +680,6 @@ async def test_query_fails_with_xml_format(self): self.assertEqual(response.status, 400) self.assertIn("xml-formatted query results are not supported", json_resp["detail"]) - @unittest_run_loop - async def test_validation_passes_for_valid_xml(self): - """Test validation endpoint for valid xml.""" - files = [("study", "SRP000539.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - self.assertEqual(response.status, 200) - self.assertIn('{"isValid": true}', await response.text()) - - @unittest_run_loop - async def test_validation_fails_bad_schema(self): - """Test validation fails for bad schema and valid xml.""" - files = [("fake", "SRP000539.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - self.assertEqual(response.status, 404) - - @unittest_run_loop - async def test_validation_fails_for_invalid_xml_syntax(self): - """Test validation endpoint for XML with bad syntax.""" - files = [("study", "SRP000539_invalid.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - resp_dict = await response.json() - self.assertEqual(response.status, 200) - self.assertIn("Faulty XML file was given, mismatched tag", resp_dict["detail"]["reason"]) - - @unittest_run_loop - async def test_validation_fails_for_invalid_xml(self): - """Test validation endpoint for invalid xml.""" - files = [("study", "SRP000539_invalid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - resp_dict = await response.json() - self.assertEqual(response.status, 200) - self.assertIn("value must be one of", resp_dict["detail"]["reason"]) - - @unittest_run_loop - async def test_validation_fails_with_too_many_files(self): - """Test validation endpoint for too many files.""" - files = [("submission", "ERA521986_valid.xml"), ("submission", "ERA521986_valid2.xml")] - data = self.create_submission_data(files) - response = await self.client.post("/validate", data=data) - reason = "Only one file can be sent to this endpoint at a time." - self.assertEqual(response.status, 400) - self.assertIn(reason, await response.text()) - - @unittest_run_loop async def test_operations_fail_for_wrong_schema_type(self): """Test 404 error is raised if incorrect schema name is given.""" get_resp = await self.client.get("/objects/bad_scehma_name/some_id") @@ -547,7 +687,7 @@ async def test_operations_fail_for_wrong_schema_type(self): json_get_resp = await get_resp.json() self.assertIn("Specified schema", json_get_resp["detail"]) - post_rep = await self.client.post("/objects/bad_scehma_name") + post_rep = await self.client.post("/objects/bad_scehma_name", params={"folder": "some id"}) self.assertEqual(post_rep.status, 404) post_json_rep = await post_rep.json() self.assertIn("Specified schema", post_json_rep["detail"]) @@ -567,7 +707,6 @@ async def test_operations_fail_for_wrong_schema_type(self): json_get_resp = await get_resp.json() self.assertIn("Specified schema", json_get_resp["detail"]) - @unittest_run_loop async def test_query_with_invalid_pagination_params(self): """Test that 400s are raised correctly with pagination.""" get_resp = await self.client.get("/objects/study?page=2?title=joo") @@ -577,26 +716,115 @@ async def test_query_with_invalid_pagination_params(self): get_resp = await self.client.get("/objects/study?per_page=0") self.assertEqual(get_resp.status, 400) - @unittest_run_loop + +class UserHandlerTestCase(HandlersTestCase): + """User API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + class_useroperator = "metadata_backend.api.handlers.user.UserOperator" + self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) + self.MockedUserOperator = self.patch_useroperator.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_useroperator.stop() + + async def test_get_user_works(self): + """Test user object is returned when correct user id is given.""" + response = await self.client.get("/users/current") + self.assertEqual(response.status, 200) + self.MockedUserOperator().read_user.assert_called_once() + json_resp = await response.json() + self.assertEqual(self.test_user, json_resp) + + async def test_user_deletion_is_called(self): + """Test that user object would be deleted.""" + self.MockedUserOperator().read_user.return_value = self.test_user + self.MockedUserOperator().delete_user.return_value = None + await self.client.delete("/users/current") + self.MockedUserOperator().delete_user.assert_called_once() + + +class FolderHandlerTestCase(HandlersTestCase): + """Folder API endpoint class test cases.""" + + async def setUpAsync(self): + """Configure default values for testing and other modules. + + This patches used modules and sets default return values for their + methods. + """ + + await super().setUpAsync() + + class_doihandler = "metadata_backend.api.handlers.folder.DOIHandler" + self.patch_doihandler = patch(class_doihandler, **self.doi_handler, spec=True) + self.MockedDoiHandler = self.patch_doihandler.start() + + self._mock_prepare_doi = "metadata_backend.api.handlers.folder.FolderAPIHandler._prepare_doi_update" + + class_folderoperator = "metadata_backend.api.handlers.folder.FolderOperator" + self.patch_folderoperator = patch(class_folderoperator, **self.folderoperator_config, spec=True) + self.MockedFolderOperator = self.patch_folderoperator.start() + + class_useroperator = "metadata_backend.api.handlers.folder.UserOperator" + self.patch_useroperator = patch(class_useroperator, **self.useroperator_config, spec=True) + self.MockedUserOperator = self.patch_useroperator.start() + + class_operator = "metadata_backend.api.handlers.folder.Operator" + self.patch_operator = patch(class_operator, **self.operator_config, spec=True) + self.MockedOperator = self.patch_operator.start() + + class_metaxhandler = "metadata_backend.api.handlers.folder.MetaxServiceHandler" + self.patch_metaxhandler = patch(class_metaxhandler, spec=True) + self.MockedMetaxHandler = self.patch_metaxhandler.start() + + async def tearDownAsync(self): + """Cleanup mocked stuff.""" + await super().tearDownAsync() + self.patch_doihandler.stop() + self.patch_folderoperator.stop() + self.patch_useroperator.stop() + self.patch_operator.stop() + self.patch_metaxhandler.stop() + async def test_folder_creation_works(self): """Test that folder is created and folder ID returned.""" - json_req = {"name": "test", "description": "test folder"} + json_req = {"name": "test", "description": "test folder", "projectId": "1000"} + with patch( + "metadata_backend.api.operators.ProjectOperator._check_project_exists", + return_value=True, + ): + response = await self.client.post("/folders", json=json_req) + json_resp = await response.json() + self.MockedFolderOperator().create_folder.assert_called_once() + self.assertEqual(response.status, 201) + self.assertEqual(json_resp["folderId"], self.folder_id) + + async def test_folder_creation_with_missing_name_fails(self): + """Test that folder creation fails when missing name in request.""" + json_req = {"description": "test folder", "projectId": "1000"} response = await self.client.post("/folders", json=json_req) json_resp = await response.json() - self.MockedFolderOperator().create_folder.assert_called_once() - self.assertEqual(response.status, 201) - self.assertEqual(json_resp["folderId"], self.folder_id) + self.assertEqual(response.status, 400) + self.assertIn("'name' is a required property", json_resp["detail"]) - @unittest_run_loop - async def test_folder_creation_with_missing_data_fails(self): - """Test that folder creation fails when missing data in request.""" - json_req = {"description": "test folder"} + async def test_folder_creation_with_missing_project_fails(self): + """Test that folder creation fails when missing project in request.""" + json_req = {"description": "test folder", "name": "name"} response = await self.client.post("/folders", json=json_req) json_resp = await response.json() self.assertEqual(response.status, 400) - self.assertIn("'name' is a required property", json_resp["detail"]) + self.assertIn("'projectId' is a required property", json_resp["detail"]) - @unittest_run_loop async def test_folder_creation_with_empty_body_fails(self): """Test that folder creation fails when no data in request.""" response = await self.client.post("/folders") @@ -604,11 +832,10 @@ async def test_folder_creation_with_empty_body_fails(self): self.assertEqual(response.status, 400) self.assertIn("JSON is not correctly formatted.", json_resp["detail"]) - @unittest_run_loop async def test_get_folders_with_1_folder(self): """Test get_folders() endpoint returns list with 1 folder.""" self.MockedFolderOperator().query_folders.return_value = (self.test_folder, 1) - response = await self.client.get("/folders") + response = await self.client.get("/folders?projectId=1000") self.MockedFolderOperator().query_folders.assert_called_once() self.assertEqual(response.status, 200) result = { @@ -622,11 +849,10 @@ async def test_get_folders_with_1_folder(self): } self.assertEqual(await response.json(), result) - @unittest_run_loop async def test_get_folders_with_no_folders(self): """Test get_folders() endpoint returns empty list.""" self.MockedFolderOperator().query_folders.return_value = ([], 0) - response = await self.client.get("/folders") + response = await self.client.get("/folders?projectId=1000") self.MockedFolderOperator().query_folders.assert_called_once() self.assertEqual(response.status, 200) result = { @@ -640,25 +866,23 @@ async def test_get_folders_with_no_folders(self): } self.assertEqual(await response.json(), result) - @unittest_run_loop async def test_get_folders_with_bad_params(self): """Test get_folders() with faulty pagination parameters.""" - response = await self.client.get("/folders?page=ayylmao") + response = await self.client.get("/folders?page=ayylmao&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "page parameter must be a number, now it is ayylmao") - response = await self.client.get("/folders?page=1&per_page=-100") + response = await self.client.get("/folders?page=1&per_page=-100&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "per_page parameter must be over 0") - response = await self.client.get("/folders?published=yes") + response = await self.client.get("/folders?published=yes&projectId=1000") self.assertEqual(response.status, 400) resp = await response.json() self.assertEqual(resp["detail"], "'published' parameter must be either 'true' or 'false'") - @unittest_run_loop async def test_get_folder_works(self): """Test folder is returned when correct folder id is given.""" response = await self.client.get("/folders/FOL12345678") @@ -667,17 +891,15 @@ async def test_get_folder_works(self): json_resp = await response.json() self.assertEqual(self.test_folder, json_resp) - @unittest_run_loop async def test_update_folder_fails_with_wrong_key(self): """Test that folder does not update when wrong keys are provided.""" data = [{"op": "add", "path": "/objects"}] response = await self.client.patch("/folders/FOL12345678", json=data) self.assertEqual(response.status, 400) json_resp = await response.json() - reason = "Request contains '/objects' key that cannot be " "updated to folders." + reason = "Request contains '/objects' key that cannot be updated to folders." self.assertEqual(reason, json_resp["detail"]) - @unittest_run_loop async def test_update_folder_passes(self): """Test that folder would update with correct keys.""" self.MockedFolderOperator().update_folder.return_value = self.folder_id @@ -688,17 +910,28 @@ async def test_update_folder_passes(self): json_resp = await response.json() self.assertEqual(json_resp["folderId"], self.folder_id) - @unittest_run_loop async def test_folder_is_published(self): - """Test that folder would be published.""" + """Test that folder would be published and DOI would be added.""" + self.MockedDoiHandler().set_state.return_value = None self.MockedFolderOperator().update_folder.return_value = self.folder_id - response = await self.client.patch("/publish/FOL12345678") - self.MockedFolderOperator().update_folder.assert_called_once() - self.assertEqual(response.status, 200) - json_resp = await response.json() - self.assertEqual(json_resp["folderId"], self.folder_id) + self.MockedMetaxHandler().update_dataset_with_doi_info.return_value = None + self.MockedMetaxHandler().publish_dataset.return_value = None + with patch( + self._mock_prepare_doi, + return_value=( + {"id": "prefix/suffix-study", "attributes": {"url": "http://metax_id", "types": {}}}, + [{"id": "prefix/suffix-dataset", "attributes": {"url": "http://metax_id", "types": {}}}], + [ + {"doi": "prefix/suffix-study", "metaxIdentifier": "metax_id"}, + {"doi": "prefix/suffix-dataset", "metaxIdentifier": "metax_id"}, + ], + ), + ): + response = await self.client.patch("/publish/FOL12345678") + self.assertEqual(response.status, 200) + json_resp = await response.json() + self.assertEqual(json_resp["folderId"], self.folder_id) - @unittest_run_loop async def test_folder_deletion_is_called(self): """Test that folder would be deleted.""" self.MockedFolderOperator().read_folder.return_value = self.test_folder @@ -706,110 +939,3 @@ async def test_folder_deletion_is_called(self): self.MockedFolderOperator().read_folder.assert_called_once() self.MockedFolderOperator().delete_folder.assert_called_once() self.assertEqual(response.status, 204) - - @unittest_run_loop - async def test_get_user_works(self): - """Test user object is returned when correct user id is given.""" - response = await self.client.get("/users/current") - self.assertEqual(response.status, 200) - self.MockedUserOperator().read_user.assert_called_once() - json_resp = await response.json() - self.assertEqual(self.test_user, json_resp) - - @unittest_run_loop - async def test_get_user_drafts_with_no_drafts(self): - """Test getting user drafts when user has no drafts.""" - response = await self.client.get("/users/current?items=drafts") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 0, - "totalDrafts": 0, - }, - "drafts": [], - } - self.assertEqual(json_resp, result) - - @unittest_run_loop - async def test_get_user_drafts_with_1_draft(self): - """Test getting user drafts when user has 1 draft.""" - user = self.test_user - user["drafts"].append(self.metadata_json) - self.MockedUserOperator().filter_user.return_value = (user["drafts"], 1) - response = await self.client.get("/users/current?items=drafts") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalDrafts": 1, - }, - "drafts": [self.metadata_json], - } - self.assertEqual(json_resp, result) - - @unittest_run_loop - async def test_get_user_folder_list(self): - """Test get user with folders url returns a folder ID.""" - self.MockedUserOperator().filter_user.return_value = (self.test_user["folders"], 1) - response = await self.client.get("/users/current?items=folders") - self.assertEqual(response.status, 200) - self.MockedUserOperator().filter_user.assert_called_once() - json_resp = await response.json() - result = { - "page": { - "page": 1, - "size": 5, - "totalPages": 1, - "totalFolders": 1, - }, - "folders": ["FOL12345678"], - } - self.assertEqual(json_resp, result) - - @unittest_run_loop - async def test_get_user_items_with_bad_param(self): - """Test that error is raised if items parameter in query is not drafts or folders.""" - response = await self.client.get("/users/current?items=wrong_thing") - self.assertEqual(response.status, 400) - json_resp = await response.json() - self.assertEqual( - json_resp["detail"], "wrong_thing is a faulty item parameter. Should be either folders or drafts" - ) - - @unittest_run_loop - async def test_user_deletion_is_called(self): - """Test that user object would be deleted.""" - self.MockedUserOperator().read_user.return_value = self.test_user - self.MockedUserOperator().delete_user.return_value = None - await self.client.delete("/users/current") - self.MockedUserOperator().read_user.assert_called_once() - self.MockedUserOperator().delete_user.assert_called_once() - - @unittest_run_loop - async def test_update_user_fails_with_wrong_key(self): - """Test that user object does not update when forbidden keys are provided.""" - data = [{"op": "add", "path": "/userId"}] - response = await self.client.patch("/users/current", json=data) - self.assertEqual(response.status, 400) - json_resp = await response.json() - reason = "Request contains '/userId' key that cannot be updated to user object" - self.assertEqual(reason, json_resp["detail"]) - - @unittest_run_loop - async def test_update_user_passes(self): - """Test that user object would update with correct keys.""" - self.MockedUserOperator().update_user.return_value = self.user_id - data = [{"op": "add", "path": "/drafts/-", "value": [{"accessionId": "3", "schema": "sample"}]}] - response = await self.client.patch("/users/current", json=data) - self.MockedUserOperator().update_user.assert_called_once() - self.assertEqual(response.status, 200) - json_resp = await response.json() - self.assertEqual(json_resp["userId"], self.user_id) diff --git a/tests/test_health.py b/tests/test_health.py index db20cbdcc..7002562f2 100644 --- a/tests/test_health.py +++ b/tests/test_health.py @@ -2,7 +2,7 @@ from unittest.mock import patch -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.server import init @@ -24,15 +24,22 @@ async def setUpAsync(self): self.patch_motorclient = patch(class_motorclient, **motorclient_config, spec=True) self.MockedMotorClient = self.patch_motorclient.start() + self.app = await self.get_application() + self.server = await self.get_server(self.app) + self.client = await self.get_client(self.server) + + await self.client.start_server() + async def tearDownAsync(self): """Cleanup mocked stuff.""" self.patch_motorclient.stop() + await self.client.close() + async def fake_asynciomotorclient_server_info(self): """Fake server info method for a motor client.""" return True - @unittest_run_loop async def test_health_check_is_down(self): """Test that the health check returns a partially down status because a mongo db is not connected.""" response = await self.client.get("/health") diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index ba83a8ee2..25c9542f7 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -2,7 +2,8 @@ import unittest from aiohttp import FormData, web -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase +from pathlib import Path from metadata_backend.server import init from metadata_backend.api.middlewares import generate_cookie, decrypt_cookie, _check_csrf @@ -17,7 +18,6 @@ async def get_application(self): """Retrieve web Application for test.""" return await init() - @unittest_run_loop async def test_bad_HTTP_request_converts_into_json_response(self): """Test that middleware reformats 400 error with problem details.""" data = _create_improper_data() @@ -29,7 +29,6 @@ async def test_bad_HTTP_request_converts_into_json_response(self): self.assertIn("There must be a submission.xml file in submission.", resp_dict["detail"]) self.assertIn("/submit", resp_dict["instance"]) - @unittest_run_loop async def test_bad_url_returns_json_response(self): """Test that unrouted api url returns a 404 in JSON format.""" response = await self.client.get("/objects/swagadagamaster") @@ -40,13 +39,14 @@ async def test_bad_url_returns_json_response(self): def _create_improper_data(): - """Create request data that produces a 404 error. + """Create request data that produces a 400 error. Submission method in API handlers raises Bad Request (400) error if 'submission' is not included on the first field of request """ + path_to_file = Path(__file__).parent / "test_files" / "study" / "SRP000539_invalid.xml" data = FormData() - data.add_field("study", "content of a file", filename="file", content_type="text/xml") + data.add_field("STUDY", open(path_to_file.as_posix(), "r"), filename="file", content_type="text/xml") return data @@ -97,7 +97,7 @@ def test_check_csrf_idp_skip(self): """Test check_csrf when skipping referer from auth endpoint.""" with unittest.mock.patch( "metadata_backend.api.middlewares.aai_config", - new={"auth_referer": "http://idp:3000"}, + new={"oidc_url": "http://idp:3000"}, ): testreq = get_request_with_fernet() cookie, _ = generate_cookie(testreq) diff --git a/tests/test_operators.py b/tests/test_operators.py index 20ca13851..7178365ea 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -2,22 +2,25 @@ import datetime import re import unittest +from unittest import IsolatedAsyncioTestCase +from unittest.mock import MagicMock, call, patch from uuid import uuid4 -from unittest.mock import MagicMock, patch, call from aiohttp.web import HTTPBadRequest, HTTPNotFound, HTTPUnprocessableEntity -from unittest import IsolatedAsyncioTestCase - +from aiohttp.test_utils import make_mocked_coro from multidict import MultiDict, MultiDictProxy -from pymongo.errors import ConnectionFailure +from pymongo.errors import ConnectionFailure, OperationFailure from metadata_backend.api.operators import ( FolderOperator, Operator, XMLOperator, UserOperator, + ProjectOperator, ) +from .mockups import get_request_with_fernet + class AsyncIterator: """Async iterator based on range.""" @@ -74,9 +77,19 @@ def setUp(self): other patches and mocks for tests. """ self.client = MagicMock() + self.project_id = "project_1000" + self.project_generated_id = "64fbdce1c69b436e8d6c91fd746064d4" self.accession_id = uuid4().hex self.folder_id = uuid4().hex self.test_folder = { + "folderId": self.folder_id, + "projectId": self.project_generated_id, + "name": "Mock folder", + "description": "test mock folder", + "published": False, + "metadataObjects": [{"accessionId": "EGA1234567", "schema": "study"}], + } + self.test_folder_no_project = { "folderId": self.folder_id, "name": "Mock folder", "description": "test mock folder", @@ -88,8 +101,6 @@ def setUp(self): self.test_user = { "userId": self.user_generated_id, "name": "tester", - "drafts": [], - "folders": [], } class_dbservice = "metadata_backend.api.operators.DBService" self.patch_dbservice = patch(class_dbservice, spec=True) @@ -112,6 +123,12 @@ def setUp(self): autospec=True, ) self.patch_user.start() + self.patch_project = patch( + ("metadata_backend.api.operators.ProjectOperator._generate_project_id"), + return_value=self.project_generated_id, + autospec=True, + ) + self.patch_project.start() def tearDown(self): """Stop patchers.""" @@ -119,6 +136,7 @@ def tearDown(self): self.patch_accession.stop() self.patch_folder.stop() self.patch_user.stop() + self.patch_project.stop() async def test_reading_metadata_works(self): """Test JSON is read from db correctly.""" @@ -191,9 +209,9 @@ async def test_json_create_passes_and_returns_accessionId(self): "descriptor": {"studyTitle": "Highly", "studyType": "Other"}, } operator.db_service.create.return_value = True - accession = await operator.create_metadata_object("study", data) + data = await operator.create_metadata_object("study", data) operator.db_service.create.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_json_replace_passes_and_returns_accessionId(self): """Test replace method for JSON works.""" @@ -205,9 +223,9 @@ async def test_json_replace_passes_and_returns_accessionId(self): operator = Operator(self.client) operator.db_service.exists.return_value = True operator.db_service.replace.return_value = True - accession = await operator.replace_metadata_object("study", self.accession_id, data) + data = await operator.replace_metadata_object("study", self.accession_id, data) operator.db_service.replace.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_json_replace_raises_if_not_exists(self): """Test replace method raises error.""" @@ -264,19 +282,19 @@ async def test_xml_create_passes_and_returns_accessionId(self): operator.db_service.create.return_value = True with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value={"accessionId": self.accession_id}, ): with patch("metadata_backend.api.operators.XMLToJSONParser"): - accession = await operator.create_metadata_object("study", "") + data = await operator.create_metadata_object("study", "") operator.db_service.create.assert_called_once() - self.assertEqual(accession, self.accession_id) + self.assertEqual(data["accessionId"], self.accession_id) async def test_correct_data_is_set_to_json_when_creating(self): """Test operator creates object and adds necessary info.""" operator = Operator(self.client) with patch( ("metadata_backend.api.operators.Operator._insert_formatted_object_to_db"), - return_value=self.accession_id, + return_value=True, ) as mocked_insert: with patch("metadata_backend.api.operators.datetime") as m_date: m_date.utcnow.return_value = datetime.datetime(2020, 4, 14) @@ -290,9 +308,9 @@ async def test_correct_data_is_set_to_json_when_creating(self): "publishDate": datetime.datetime(2020, 6, 14), }, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) - async def test_wront_data_is_set_to_json_when_replacing(self): + async def test_wrong_data_is_set_to_json_when_replacing(self): """Test operator replace catches error.""" operator = Operator(self.client) with patch("metadata_backend.api.operators.Operator._replace_object_from_db", return_value=self.accession_id): @@ -320,13 +338,17 @@ async def test_correct_data_is_set_to_json_when_replacing(self): ) as mocked_insert: with patch("metadata_backend.api.operators.datetime") as m_date: m_date.utcnow.return_value = datetime.datetime(2020, 4, 14) + self.MockedDbService().read.return_value = { + "accessionId": self.accession_id, + "dateModified": datetime.datetime(2020, 4, 14), + } acc = await (operator._format_data_to_replace_and_add_to_db("study", self.accession_id, {})) mocked_insert.assert_called_once_with( "study", self.accession_id, {"accessionId": self.accession_id, "dateModified": datetime.datetime(2020, 4, 14)}, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_correct_data_is_set_to_json_when_updating(self): """Test operator updates object and adds necessary info.""" @@ -375,18 +397,18 @@ async def test_correct_data_is_set_to_xml_when_creating(self): xml_data = "" with patch( ("metadata_backend.api.operators.Operator._format_data_to_create_and_add_to_db"), - return_value=self.accession_id, + return_value={"accessionId": self.accession_id}, ): with patch( ("metadata_backend.api.operators.XMLOperator._insert_formatted_object_to_db"), - return_value=self.accession_id, + return_value=True, ) as m_insert: with patch("metadata_backend.api.operators.XMLToJSONParser"): acc = await (operator._format_data_to_create_and_add_to_db("study", xml_data)) m_insert.assert_called_once_with( "xml-study", {"accessionId": self.accession_id, "content": xml_data} ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_correct_data_is_set_to_xml_when_replacing(self): """Test XMLoperator replaces object and adds necessary info.""" @@ -395,7 +417,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): xml_data = "" with patch( "metadata_backend.api.operators.Operator._format_data_to_replace_and_add_to_db", - return_value=self.accession_id, + return_value={"accessionId": self.accession_id}, ): with patch( "metadata_backend.api.operators.XMLOperator._replace_object_from_db", @@ -408,7 +430,7 @@ async def test_correct_data_is_set_to_xml_when_replacing(self): self.accession_id, {"accessionId": self.accession_id, "content": xml_data}, ) - self.assertEqual(acc, self.accession_id) + self.assertEqual(acc["accessionId"], self.accession_id) async def test_deleting_metadata_deletes_json_and_xml(self): """Test metadata is deleted.""" @@ -615,6 +637,92 @@ async def test_query_skip_and_limit_are_set_correctly(self): operator.db_service.do_aggregate.assert_has_calls(calls, any_order=True) self.assertEqual(operator.db_service.do_aggregate.call_count, 2) + async def test_get_object_project_connfail(self): + """Test get object project, db connection failure.""" + operator = Operator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_opfail(self): + """Test get object project, db operation failure.""" + operator = Operator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_passes(self): + """Test get object project returns project id.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder]) + result = await operator.get_object_project("template", self.accession_id) + operator.db_service.query.assert_called_once_with("template", {"accessionId": self.accession_id}) + self.assertEqual(result, self.project_generated_id) + + async def test_get_object_project_fails(self): + """Test get object project returns nothing and raises an error.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_fails_missing_project(self): + """Test get object project returns faulty object record that is missing project id.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder_no_project]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("template", self.accession_id) + + async def test_get_object_project_fails_invalid_collection(self): + """Test get object project raises bad request on invalid collection.""" + operator = Operator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_object_project("something", self.accession_id) + + async def test_get_folder_project_connfail(self): + """Test get folder project, db connection failure.""" + operator = FolderOperator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_opfail(self): + """Test get folder project, db operation failure.""" + operator = FolderOperator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_passes(self): + """Test get folder project returns project id.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder]) + result = await operator.get_folder_project(self.folder_id) + operator.db_service.query.assert_called_once_with("folder", {"folderId": self.folder_id}) + self.assertEqual(result, self.project_generated_id) + + async def test_get_folder_project_fails(self): + """Test get folder project returns nothing and raises an error.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_fails_missing_project(self): + """Test get folder project returns faulty folder record that is missing project id.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([self.test_folder_no_project]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + + async def test_get_folder_project_fails_invalid_collection(self): + """Test get folder project raises bad request on invalid collection.""" + operator = FolderOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + with self.assertRaises(HTTPBadRequest): + await operator.get_folder_project(self.folder_id) + async def test_create_folder_works_and_returns_folderId(self): """Test create method for folders work.""" operator = FolderOperator(self.client) @@ -774,6 +882,13 @@ async def test_folder_object_remove_fails(self): with self.assertRaises(HTTPBadRequest): await operator.remove_object(self.test_folder, "study", self.accession_id) + async def test_check_folder_exists_passes(self): + """Test fails exists passes.""" + operator = FolderOperator(self.client) + operator.db_service.exists.return_value = True + await operator.check_folder_exists(self.folder_id) + operator.db_service.exists.assert_called_once() + async def test_check_folder_exists_fails(self): """Test fails exists fails.""" operator = FolderOperator(self.client) @@ -809,7 +924,7 @@ async def test_delete_folder_fails(self): async def test_create_user_works_and_returns_userId(self): """Test create method for users work.""" operator = UserOperator(self.client) - data = "externalId", "name" + data = {"user_id": "externalId", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = None operator.db_service.create.return_value = True user = await operator.create_user(data) @@ -819,7 +934,7 @@ async def test_create_user_works_and_returns_userId(self): async def test_create_user_on_create_fails(self): """Test create method fails on db create.""" operator = UserOperator(self.client) - data = "externalId", "name" + data = {"user_id": "externalId", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = None operator.db_service.create.return_value = False with self.assertRaises(HTTPBadRequest): @@ -828,45 +943,47 @@ async def test_create_user_on_create_fails(self): async def test_check_user_doc_fails(self): """Test check user doc fails.""" + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() operator = UserOperator(self.client) - operator.db_service.query.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): - await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) + await operator.check_user_has_doc(request, "something", self.user_generated_id, self.folder_id) async def test_check_user_doc_passes(self): - """Test check user doc returns proper data.""" - operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator(["1"]) - result = await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - self.assertTrue(result) - - async def test_check_user_doc_multiple_folders_fails(self): - """Test check user doc returns multiple unique folders.""" + """Test check user doc passes when object has same project id and user.""" + UserOperator.check_user_has_doc = make_mocked_coro(True) + request = get_request_with_fernet() + request.app["db_client"] = MagicMock() operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator(["1", "2"]) - with self.assertRaises(HTTPUnprocessableEntity): - await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - - async def test_check_user_doc_no_data(self): - """Test check user doc returns no data.""" - operator = UserOperator(self.client) - operator.db_service.query.return_value = AsyncIterator([]) - result = await operator.check_user_has_doc("folders", self.user_generated_id, self.folder_id) - operator.db_service.query.assert_called_once_with( - "user", {"folders": {"$elemMatch": {"$eq": self.folder_id}}, "userId": self.user_generated_id} - ) - self.assertFalse(result) + with patch( + "metadata_backend.api.operators.FolderOperator.get_folder_project", + return_value=self.project_generated_id, + ): + with patch( + "metadata_backend.api.middlewares.decrypt_cookie", + return_value={"id": "test"}, + ): + with patch( + "metadata_backend.api.middlewares.get_session", + return_value={"user_info": {}}, + ): + with patch( + "metadata_backend.api.operators.UserOperator.read_user", + return_value={"userId": "test"}, + ): + with patch( + "metadata_backend.api.operators.UserOperator.check_user_has_project", + return_value=True, + ): + result = await operator.check_user_has_doc( + request, "folders", self.user_generated_id, self.folder_id + ) + self.assertTrue(result) async def test_create_user_works_existing_userId(self): """Test create method for existing user.""" operator = UserOperator(self.client) - data = "eppn", "name" + data = {"user_id": "eppn", "real_name": "name", "projects": ""} operator.db_service.exists_user_by_external_id.return_value = self.user_generated_id user = await operator.create_user(data) operator.db_service.create.assert_not_called() @@ -874,7 +991,7 @@ async def test_create_user_works_existing_userId(self): async def test_create_user_fails(self): """Test create user fails.""" - data = "eppn", "name" + data = {"user_id": "eppn", "real_name": "name", "projects": ""} operator = UserOperator(self.client) operator.db_service.exists_user_by_external_id.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): @@ -897,6 +1014,13 @@ async def test_read_user_fails(self): with self.assertRaises(HTTPBadRequest): await operator.read_user(self.user_id) + async def test_check_user_exists_passes(self): + """Test user exists passes.""" + operator = UserOperator(self.client) + operator.db_service.exists.return_value = True + await operator._check_user_exists(self.user_id) + operator.db_service.exists.assert_called_once() + async def test_check_user_exists_fails(self): """Test user exists fails.""" operator = UserOperator(self.client) @@ -957,50 +1081,167 @@ async def test_deleting_user_fails(self): with self.assertRaises(HTTPBadRequest): await operator.delete_user(self.user_id) - async def test_user_objects_remove_passes(self): - """Test remove objects method for users works.""" + async def test_check_user_has_project_passes(self): + """Test check user has project and doesn't raise an exception.""" + operator = UserOperator(self.client) + operator.db_service.query.return_value = AsyncIterator(["1"]) + result = await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + operator.db_service.query.assert_called_once_with( + "user", + {"projects": {"$elemMatch": {"projectId": self.project_generated_id}}, "userId": self.user_generated_id}, + ) + self.assertTrue(result) + + async def test_check_user_has_no_project(self): + """Test check user does not have project and raises unauthorised.""" + operator = UserOperator(self.client) + operator.db_service.query.return_value = AsyncIterator([]) + result = await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + operator.db_service.query.assert_called_once_with( + "user", + {"projects": {"$elemMatch": {"projectId": self.project_generated_id}}, "userId": self.user_generated_id}, + ) + self.assertFalse(result) + + async def test_check_user_has_project_connfail(self): + """Test check user has project, db connection failure.""" operator = UserOperator(self.client) + operator.db_service.query.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + + async def test_check_user_has_project_opfail(self): + """Test check user has project, db operation failure.""" + operator = UserOperator(self.client) + operator.db_service.query.side_effect = OperationFailure("err") + with self.assertRaises(HTTPBadRequest): + await operator.check_user_has_project(self.project_generated_id, self.user_generated_id) + + async def test_create_project_works_and_returns_projectId(self): + """Test create method for projects work.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = None + operator.db_service.create.return_value = True + project = await operator.create_project(self.project_id) + operator.db_service.create.assert_called_once() + self.assertEqual(project, self.project_generated_id) + + async def test_create_project_works_existing_projectId(self): + """Test create method for existing user.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = self.project_generated_id + project = await operator.create_project(self.project_id) + operator.db_service.create.assert_not_called() + self.assertEqual(project, self.project_generated_id) + + async def test_create_project_on_create_fails(self): + """Test create method fails on db create.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.return_value = None + operator.db_service.create.return_value = False + with self.assertRaises(HTTPBadRequest): + await operator.create_project(self.project_id) + operator.db_service.create.assert_called_once() + + async def test_create_project_fails(self): + """Test create project fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists_project_by_external_id.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + await operator.create_project(self.project_id) + + async def test_check_project_exists_fails(self): + """Test project exists fails.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = False + with self.assertRaises(HTTPNotFound): + await operator._check_project_exists(self.project_id) + operator.db_service.exists.assert_called_once() + + async def test_check_project_exists_passes(self): + """Test project exists passes.""" + operator = ProjectOperator(self.client) operator.db_service.exists.return_value = True - operator.db_service.remove.return_value = self.test_user - await operator.remove_objects(self.user_generated_id, "study", ["id"]) + await operator._check_project_exists(self.project_id) + operator.db_service.exists.assert_called_once() + + async def test_project_objects_remove_passes(self): + """Test remove objects method for projects works.""" + operator = ProjectOperator(self.client) + operator.db_service.exists.return_value = True + operator.db_service.remove.return_value = None + await operator.remove_templates(self.project_generated_id, ["id"]) operator.db_service.exists.assert_called_once() operator.db_service.remove.assert_called_once() self.assertEqual(len(operator.db_service.remove.mock_calls), 1) - async def test_user_objects_remove_fails(self): - """Test remove objects method for users fails.""" - operator = UserOperator(self.client) + async def test_project_objects_remove_fails(self): + """Test remove objects method for projects fails.""" + operator = ProjectOperator(self.client) operator.db_service.exists.return_value = True operator.db_service.remove.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): - await operator.remove_objects(self.user_generated_id, "study", ["id"]) + await operator.remove_templates(self.project_generated_id, ["id"]) - async def test_user_objects_append_passes(self): - """Test append objects method for users works.""" - operator = UserOperator(self.client) + async def test_project_objects_append_passes(self): + """Test append objects method for projects works.""" + operator = ProjectOperator(self.client) operator.db_service.exists.return_value = True - operator.db_service.append.return_value = self.test_user - await operator.assign_objects(self.user_generated_id, "study", []) + operator.db_service.append.return_value = True + await operator.assign_templates(self.project_generated_id, []) operator.db_service.exists.assert_called_once() operator.db_service.append.assert_called_once() self.assertEqual(len(operator.db_service.append.mock_calls), 1) - async def test_user_objects_append_on_result_fails(self): - """Test append objects method for users fails on db response validation.""" - operator = UserOperator(self.client) + async def test_project_objects_append_on_result_fails(self): + """Test append objects method for projects fails on db response validation.""" + operator = ProjectOperator(self.client) operator.db_service.exists.return_value = True operator.db_service.append.return_value = False with self.assertRaises(HTTPBadRequest): - await operator.assign_objects(self.user_generated_id, "study", []) + await operator.assign_templates(self.project_generated_id, []) operator.db_service.exists.assert_called_once() operator.db_service.append.assert_called_once() - async def test_user_objects_assing_fails(self): - """Test append objects method for users fails.""" - operator = UserOperator(self.client) + async def test_project_objects_assing_fails(self): + """Test append objects method for projects fails.""" + operator = ProjectOperator(self.client) operator.db_service.exists.side_effect = ConnectionFailure with self.assertRaises(HTTPBadRequest): - await operator.assign_objects(self.user_generated_id, "study", []) + await operator.assign_templates(self.project_generated_id, []) + + async def test_update_project_fail_no_project(self): + """Test that project which does not exist can not be updated.""" + operator = ProjectOperator(self.client) + with self.assertRaises(HTTPNotFound): + with patch( + "metadata_backend.api.operators.ProjectOperator._check_project_exists", side_effect=HTTPNotFound + ): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_fail_connfail(self): + """Test project update failure with database connection failure.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.side_effect = ConnectionFailure + with self.assertRaises(HTTPBadRequest): + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_fail_general(self): + """Test project update failure with general error.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.return_value = False + with self.assertRaises(HTTPBadRequest): + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + await operator.update_project(self.project_generated_id, []) + + async def test_update_project_pass(self): + """Test project update passes.""" + operator = ProjectOperator(self.client) + operator.db_service.patch.return_value = True + with patch("metadata_backend.api.operators.ProjectOperator._check_project_exists", return_value=True): + pid = await operator.update_project(self.project_generated_id, []) + self.assertEqual(pid, self.project_generated_id) if __name__ == "__main__": diff --git a/tests/test_parser.py b/tests/test_parser.py index 8b0f63544..5a2d1e2c4 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -4,21 +4,22 @@ from aiohttp import web -from metadata_backend.helpers.parser import XMLToJSONParser, jsonpatch_mongo +from metadata_backend.helpers.parser import XMLToJSONParser, CSVToJSONParser, jsonpatch_mongo from pymongo import UpdateOne class ParserTestCase(unittest.TestCase): - """API endpoint class test cases.""" + """Parser Test Cases.""" TESTFILES_ROOT = Path(__file__).parent / "test_files" def setUp(self): """Configure variables for tests.""" - self.parser = XMLToJSONParser() + self.xml_parser = XMLToJSONParser() + self.csv_parser = CSVToJSONParser() - def load_xml_from_file(self, submission, filename): - """Load XML as string from given file.""" + def load_file_to_text(self, submission, filename): + """Load XML or CSV as a string from given file.""" path_to_xml_file = self.TESTFILES_ROOT / submission / filename return path_to_xml_file.read_text() @@ -27,8 +28,8 @@ def test_study_is_parsed(self): Tests for some values that converted JSON should have. """ - study_xml = self.load_xml_from_file("study", "SRP000539.xml") - study_json = self.parser.parse("study", study_xml) + study_xml = self.load_file_to_text("study", "SRP000539.xml") + study_json = self.xml_parser.parse("study", study_xml) self.assertIn("Highly integrated epigenome maps in Arabidopsis", study_json["descriptor"]["studyTitle"]) self.assertIn("18423832", study_json["studyLinks"][0]["xrefId"]) @@ -37,8 +38,8 @@ def test_sample_is_parsed(self): Tests for some values that converted JSON should have. """ - sample_xml = self.load_xml_from_file("sample", "SRS001433.xml") - sample_json = self.parser.parse("sample", sample_xml) + sample_xml = self.load_file_to_text("sample", "SRS001433.xml") + sample_json = self.xml_parser.parse("sample", sample_xml) self.assertIn("Human HapMap individual NA18758", sample_json["description"]) self.assertIn("Homo sapiens", sample_json["sampleName"]["scientificName"]) @@ -47,8 +48,8 @@ def test_experiment_is_parsed(self): Tests for some values that convert JSON should have. """ - experiment_xml = self.load_xml_from_file("experiment", "ERX000119.xml") - experiment_json = self.parser.parse("experiment", experiment_xml) + experiment_xml = self.load_file_to_text("experiment", "ERX000119.xml") + experiment_json = self.xml_parser.parse("experiment", experiment_xml) self.assertIn( "SOLiD sequencing of Human HapMap individual NA18504", experiment_json["design"]["designDescription"] ) @@ -58,8 +59,8 @@ def test_run_is_parsed(self): Tests for some values that convert JSON should have. """ - run_xml = self.load_xml_from_file("run", "ERR000076.xml") - run_json = self.parser.parse("run", run_xml) + run_xml = self.load_file_to_text("run", "ERR000076.xml") + run_json = self.xml_parser.parse("run", run_xml) self.assertIn("ERA000/ERA000014/srf/BGI-FC304RWAAXX_5.srf", run_json["files"][0]["filename"]) self.assertIn("ERX000037", run_json["experimentRef"][0]["accessionId"]) @@ -68,11 +69,11 @@ def test_analysis_is_parsed(self): Tests for some values that convert JSON should have. """ - analysis_xml = self.load_xml_from_file("analysis", "ERZ266973.xml") - analysis_json = self.parser.parse("analysis", analysis_xml) + analysis_xml = self.load_file_to_text("analysis", "ERZ266973.xml") + analysis_json = self.xml_parser.parse("analysis", analysis_xml) self.assertIn( "GCA_000001405.1", - analysis_json["analysisType"]["processedReads"]["assembly"]["accessionId"], + analysis_json["analysisType"]["processedReads"]["assembly"]["accession"], ) def test_submission_is_parsed(self): @@ -80,20 +81,40 @@ def test_submission_is_parsed(self): Test for specific actions in submission. """ - submission_xml = self.load_xml_from_file("submission", "ERA521986_valid.xml") - submission_json = self.parser.parse("submission", submission_xml) + submission_xml = self.load_file_to_text("submission", "ERA521986_valid.xml") + submission_json = self.xml_parser.parse("submission", submission_xml) self.assertEqual({"schema": "study", "source": "SRP000539.xml"}, submission_json["actions"]["action"][0]["add"]) def test_error_raised_when_schema_not_found(self): - """Test 400 is returned when schema.""" + """Test 400 is returned when schema type is invalid.""" with self.assertRaises(web.HTTPBadRequest): - self.parser._load_schema("None") + self.xml_parser._load_schema("None") def test_error_raised_when_input_xml_not_valid_xml(self): """Give parser XML with broken syntax, should fail.""" - study_xml = self.load_xml_from_file("study", "SRP000539_invalid.xml") + study_xml = self.load_file_to_text("study", "SRP000539_invalid.xml") with self.assertRaises(web.HTTPBadRequest): - self.parser.parse("study", study_xml) + self.xml_parser.parse("study", study_xml) + + def test_csv_sample_is_parsed(self): + """Test that a CSV sample is parsed and validated.""" + sample_csv = self.load_file_to_text("sample", "EGAformat.csv") + result = self.csv_parser.parse("sample", sample_csv) + self.assertEqual(len(result), 3) + self.assertEqual("test sample", result[0]["title"]) + self.assertEqual({"taxonId": 9606}, result[0]["sampleName"]) + + def test_csv_parse_with_wrong_schema(self): + """Test 400 is raised with wrong schema type.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("wrong", "id,title\n,\n") + + def test_empty_csv_raises_error(self): + """Test 400 is raised with an empty or an incomplete CSV input.""" + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("sample", "") + with self.assertRaises(web.HTTPBadRequest): + self.csv_parser.parse("sample", "id,title,description\n") def test_json_patch_mongo_conversion(self): """Test JSON patch to mongo query conversion.""" diff --git a/tests/test_server.py b/tests/test_server.py index dd3e29f8b..958f74ef4 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -6,7 +6,7 @@ from unittest.mock import patch from aiohttp import web -from aiohttp.test_utils import AioHTTPTestCase, unittest_run_loop +from aiohttp.test_utils import AioHTTPTestCase from metadata_backend.server import init, main @@ -33,19 +33,16 @@ async def get_application(self): """Retrieve web Application for test.""" return await init() - @unittest_run_loop async def test_init(self): """Test everything works in init().""" server = await self.get_application() self.assertIs(type(server), web.Application) - @unittest_run_loop async def test_api_routes_are_set(self): """Test correct amount of api (no frontend) routes is set.""" server = await self.get_application() self.assertIs(len(server.router.resources()), 19) - @unittest_run_loop async def test_frontend_routes_are_set(self): """Test correct routes are set when frontend folder is exists.""" frontend_static = "metadata_backend.server.frontend_static_files" diff --git a/tox.ini b/tox.ini index b724108db..ebda48ea8 100644 --- a/tox.ini +++ b/tox.ini @@ -4,8 +4,9 @@ skipsdist = True [flake8] max-line-length = 120 -ignore = D202, D203,D212,D213,D404,W503,ANN101 -exclude = .git/, ./venv/, ./.tox/, build/, metadata_backend.egg-info/ +# ANN40 will be fixed with separate PR +ignore = D202, D203, D212, D213, D404, W503, ANN101, ANN401 +exclude = .git/, ./env/, ./venv/, ./.tox/, build/, metadata_backend.egg-info/ # Not using type hints in tests, ignore all errors per-file-ignores = tests/*:ANN @@ -30,6 +31,8 @@ deps = -rrequirements.txt mypy types-python-dateutil + types-ujson + types-requests # Mypy fails if 3rd party library doesn't have type hints configured. # Alternative to ignoring imports would be to write custom stub files, which # could be done at some point. @@ -49,13 +52,11 @@ deps = commands = bandit -r metadata_backend/ [testenv] -passenv = COVERALLS_REPO_TOKEN deps = .[test] -rrequirements.txt # Stop after first failure commands = py.test -x --cov=metadata_backend tests/ - python {toxinidir}/tests/coveralls.py [gh-actions] python =