From 673c520c88934fc7ea1a4905f0f24446e6e1e0b3 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:43:22 +0200 Subject: [PATCH 01/11] add json schemas for validating messages --- sda_orchestrator/schemas/__init__.py | 1 + sda_orchestrator/schemas/dataset-mapping.json | 47 ++++++ sda_orchestrator/schemas/inbox-remove.json | 39 +++++ sda_orchestrator/schemas/inbox-rename.json | 49 ++++++ sda_orchestrator/schemas/inbox-upload.json | 154 +++++++++++++++++ .../schemas/ingestion-accession-request.json | 146 ++++++++++++++++ .../schemas/ingestion-accession.json | 154 +++++++++++++++++ .../schemas/ingestion-completion.json | 157 ++++++++++++++++++ .../schemas/ingestion-trigger.json | 145 ++++++++++++++++ .../schemas/ingestion-user-error.json | 145 ++++++++++++++++ setup.py | 10 +- 11 files changed, 1044 insertions(+), 3 deletions(-) create mode 100644 sda_orchestrator/schemas/__init__.py create mode 100644 sda_orchestrator/schemas/dataset-mapping.json create mode 100644 sda_orchestrator/schemas/inbox-remove.json create mode 100644 sda_orchestrator/schemas/inbox-rename.json create mode 100644 sda_orchestrator/schemas/inbox-upload.json create mode 100644 sda_orchestrator/schemas/ingestion-accession-request.json create mode 100644 sda_orchestrator/schemas/ingestion-accession.json create mode 100644 sda_orchestrator/schemas/ingestion-completion.json create mode 100644 sda_orchestrator/schemas/ingestion-trigger.json create mode 100644 sda_orchestrator/schemas/ingestion-user-error.json diff --git a/sda_orchestrator/schemas/__init__.py b/sda_orchestrator/schemas/__init__.py new file mode 100644 index 0000000..da2d329 --- /dev/null +++ b/sda_orchestrator/schemas/__init__.py @@ -0,0 +1 @@ +"""JSON Schemas and function for validating messages.""" diff --git a/sda_orchestrator/schemas/dataset-mapping.json b/sda_orchestrator/schemas/dataset-mapping.json new file mode 100644 index 0000000..603713c --- /dev/null +++ b/sda_orchestrator/schemas/dataset-mapping.json @@ -0,0 +1,47 @@ +{ + "title": "JSON schema for dataset mapping message interface. Derived from Federated EGA schemas.", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/dataset-mapping.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "type", + "dataset_id", + "accession_ids" + ], + "additionalProperties": true, + "properties": { + "type": { + "$id": "#/properties/type", + "type": "string", + "title": "The message type", + "description": "The message type", + "const": "mapping" + }, + "dataset_id": { + "$id": "#/properties/dataset_id", + "type": "string", + "title": "The Accession identifier for the dataset", + "description": "The Accession identifier for the dataset", + "pattern": "^\\S+$", + "examples": [ + "anyidentifier" + ] + }, + "accession_ids": { + "$id": "#/properties/accession_ids", + "type": "array", + "title": "The file stable ids in that dataset", + "description": "The file stable ids in that dataset", + "examples": [ + [ + "anyidentifier" + ] + ], + "additionalItems": false, + "items": { + "type": "string", + "pattern": "^\\S+$" + } + } + } +} \ No newline at end of file diff --git a/sda_orchestrator/schemas/inbox-remove.json b/sda_orchestrator/schemas/inbox-remove.json new file mode 100644 index 0000000..c371d6b --- /dev/null +++ b/sda_orchestrator/schemas/inbox-remove.json @@ -0,0 +1,39 @@ +{ + "title": "JSON schema for Local EGA inbox remove message interface", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-remove.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "operation" + ], + "additionalProperties": true, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The unique identifier to the file location", + "description": "The unique identifier to the file location", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "operation": { + "$id": "#/properties/operation", + "type": "string", + "const": "remove", + "title": "The operation type", + "description": "The operation type" + } + } +} diff --git a/sda_orchestrator/schemas/inbox-rename.json b/sda_orchestrator/schemas/inbox-rename.json new file mode 100644 index 0000000..d557b2c --- /dev/null +++ b/sda_orchestrator/schemas/inbox-rename.json @@ -0,0 +1,49 @@ +{ + "title": "JSON schema for Local EGA inbox rename message interface", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-rename.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "oldpath", + "operation" + ], + "additionalProperties": true, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "oldpath": { + "$id": "#/properties/oldpath", + "type": "string", + "title": "The old filepath", + "description": "The old filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-old-file.c4gh" + ] + }, + "operation": { + "$id": "#/properties/operation", + "type": "string", + "const": "rename", + "title": "The operation type", + "description": "The operation type" + } + } +} diff --git a/sda_orchestrator/schemas/inbox-upload.json b/sda_orchestrator/schemas/inbox-upload.json new file mode 100644 index 0000000..3a1a9d6 --- /dev/null +++ b/sda_orchestrator/schemas/inbox-upload.json @@ -0,0 +1,154 @@ +{ + "title": "JSON schema for Local EGA inbox upload message interface", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/inbox-upload.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "operation" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "operation": { + "$id": "#/properties/operation", + "type": "string", + "const": "upload", + "title": "The operation type", + "description": "The operation type" + }, + "filesize": { + "$id": "#/properties/filesize", + "type": "integer", + "title": "The size of the file in bytes", + "description": "The size of the file in bytes" + }, + "file_last_modified": { + "$id": "#/properties/file_last_modified", + "type": "integer", + "title": "The UNIX timestamp of last modification on the file", + "description": "The UNIX timestamp of last modification on the file" + }, + "encrypted_checksums": { + "$id": "#/properties/encrypted_checksums", + "type": "array", + "title": "The checksums of the encrypted file", + "description": "An explanation about the purpose of this instance.", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} \ No newline at end of file diff --git a/sda_orchestrator/schemas/ingestion-accession-request.json b/sda_orchestrator/schemas/ingestion-accession-request.json new file mode 100644 index 0000000..61bd8ba --- /dev/null +++ b/sda_orchestrator/schemas/ingestion-accession-request.json @@ -0,0 +1,146 @@ +{ + "title": "JSON schema for Local EGA message interface for requesting an Accession ID to Central EGA", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-accession-request.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "decrypted_checksums" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "decrypted_checksums": { + "$id": "#/properties/decrypted_checksums", + "type": "array", + "title": "The checksums of the original file", + "description": "The checksums of the original file. The md5 one is required", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "contains": { + "type": "object", + "properties": { + "type": { + "const": "md5" + } + }, + "required": [ + "type" + ] + }, + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} diff --git a/sda_orchestrator/schemas/ingestion-accession.json b/sda_orchestrator/schemas/ingestion-accession.json new file mode 100644 index 0000000..844e363 --- /dev/null +++ b/sda_orchestrator/schemas/ingestion-accession.json @@ -0,0 +1,154 @@ +{ + "title": "JSON schema for accession message interface. Derived from Federated EGA schemas.", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-accession.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "type", + "user", + "filepath", + "accession_id", + "decrypted_checksums" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "type": { + "$id": "#/properties/type", + "type": "string", + "title": "The message type", + "description": "The message type", + "const": "accession" + }, + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "accession_id": { + "$id": "#/properties/accession_id", + "type": "string", + "title": "The Accession identifier", + "description": "The Accession identifier", + "pattern": "^\\S+$", + "examples": [ + "anyidentifier" + ] + }, + "decrypted_checksums": { + "$id": "#/properties/decrypted_checksums", + "type": "array", + "title": "The checksums of the original file", + "description": "The checksums of the original file. The md5 one is required", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} \ No newline at end of file diff --git a/sda_orchestrator/schemas/ingestion-completion.json b/sda_orchestrator/schemas/ingestion-completion.json new file mode 100644 index 0000000..6e60343 --- /dev/null +++ b/sda_orchestrator/schemas/ingestion-completion.json @@ -0,0 +1,157 @@ +{ + "title": "JSON schema for sending message for ingestion completion. Derived from Federated EGA schemas.", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-completion.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "decrypted_checksums", + "accession_id" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "accession_id": { + "$id": "#/properties/accession_id", + "type": "string", + "title": "The Accession identifier", + "description": "The Accession identifier", + "pattern": "^\\S+$", + "examples": [ + "anyidentifier" + ] + }, + "decrypted_checksums": { + "$id": "#/properties/decrypted_checksums", + "type": "array", + "title": "The checksums of the original file", + "description": "The checksums of the original file. The md5 one is required", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "contains": { + "type": "object", + "properties": { + "type": { + "const": "md5" + } + }, + "required": [ + "type" + ] + }, + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} \ No newline at end of file diff --git a/sda_orchestrator/schemas/ingestion-trigger.json b/sda_orchestrator/schemas/ingestion-trigger.json new file mode 100644 index 0000000..4047339 --- /dev/null +++ b/sda_orchestrator/schemas/ingestion-trigger.json @@ -0,0 +1,145 @@ +{ + "title": "JSON schema for Local EGA ingestion trigger message interface", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-ingest.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "type", + "user", + "filepath" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "type": { + "$id": "#/properties/type", + "type": "string", + "title": "The message type", + "description": "The message type", + "enum": [ + "ingest", + "cancel" + ] + }, + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "encrypted_checksums": { + "$id": "#/properties/encrypted_checksums", + "type": "array", + "title": "The checksums of the encrypted file", + "description": "An explanation about the purpose of this instance.", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} diff --git a/sda_orchestrator/schemas/ingestion-user-error.json b/sda_orchestrator/schemas/ingestion-user-error.json new file mode 100644 index 0000000..e9677ec --- /dev/null +++ b/sda_orchestrator/schemas/ingestion-user-error.json @@ -0,0 +1,145 @@ +{ + "title": "JSON schema for Local EGA message interface to Central EGA", + "$id": "https://github.com/EGA-archive/LocalEGA/tree/master/schemas/ingestion-user-error.json", + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": [ + "user", + "filepath", + "reason" + ], + "additionalProperties": true, + "definitions": { + "checksum-sha256": { + "$id": "#/definitions/checksum-sha256", + "type": "object", + "title": "The sha256 checksum schema", + "description": "A representation of a sha256 checksum value", + "examples": [ + { + "type": "sha256", + "value": "82e4e60e7beb3db2e06a00a079788f7d71f75b61a4b75f28c4c942703dabb6d6" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-sha256/properties/type", + "type": "string", + "const": "sha256", + "title": "The checksum type schema", + "description": "We use sha256" + }, + "value": { + "$id": "#/definitions/checksum-sha256/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{64}$", + "examples": [ + "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + ] + } + } + }, + "checksum-md5": { + "$id": "#/definitions/checksum-md5", + "type": "object", + "title": "The md5 checksum schema", + "description": "A representation of a md5 checksum value", + "examples": [ + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ], + "required": [ + "type", + "value" + ], + "additionalProperties": false, + "properties": { + "type": { + "$id": "#/definitions/checksum-md5/properties/type", + "type": "string", + "const": "md5", + "title": "The checksum type schema", + "description": "We use md5" + }, + "value": { + "$id": "#/definitions/checksum-md5/properties/value", + "type": "string", + "title": "The checksum value in hex format", + "description": "The checksum value in (case-insensitive) hex format", + "pattern": "^[a-fA-F0-9]{32}$", + "examples": [ + "7Ac236b1a8dce2dac89e7cf45d2b48BD" + ] + } + } + } + }, + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The username", + "description": "The username", + "examples": [ + "user.name@central-ega.eu" + ] + }, + "filepath": { + "$id": "#/properties/filepath", + "type": "string", + "title": "The new filepath", + "description": "The new filepath", + "examples": [ + "/ega/inbox/user.name@central-ega.eu/the-file.c4gh" + ] + }, + "reason": { + "$id": "#/properties/reason", + "type": "string", + "title": "A user-related error", + "description": "The reason why the error occured", + "examples": [ + "File not found in the inbox", + "Invalid file decryption" + ] + }, + "encrypted_checksums": { + "$id": "#/properties/encrypted_checksums", + "type": "array", + "title": "The checksums of the original file", + "description": "The checksums of the original file. The md5 one is required", + "examples": [ + [ + { + "type": "sha256", + "value": "82E4e60e7beb3db2e06A00a079788F7d71f75b61a4b75f28c4c942703dabb6d6" + }, + { + "type": "md5", + "value": "7Ac236b1a8dce2dac89e7cf45d2b48BD" + } + ] + ], + "additionalItems": false, + "items": { + "anyOf": [ + { + "$ref": "#/definitions/checksum-sha256" + }, + { + "$ref": "#/definitions/checksum-md5" + } + ] + } + } + } +} diff --git a/setup.py b/setup.py index 36590fa..7e650e0 100644 --- a/setup.py +++ b/setup.py @@ -16,9 +16,13 @@ author_email="", description="SDA orchestrator", long_description="", - packages=["sda_orchestrator", "sda_orchestrator/utils"], + packages=["sda_orchestrator", "sda_orchestrator/utils", "sda_orchestrator/schemas"], # If any package contains *.json, include them: - package_data={"": ["*.html"]}, + package_data={ + "": [ + "schemas/*.json", + ] + }, entry_points={ "console_scripts": [ "sdainbox=sda_orchestrator.inbox_consume:main", @@ -40,7 +44,7 @@ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.7", ], - install_requires=["amqpstorm"], + install_requires=["amqpstorm", "jsonschema"], extras_require={ "test": ["coverage", "coveralls", "pytest", "pytest-cov", "tox"], }, From 51ffa0d230fe89b4b0e4b92c7843e00633381fd7 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:43:42 +0200 Subject: [PATCH 02/11] add jsonschema module as dependency --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 46c0b7e..c8de5ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -amqpstorm \ No newline at end of file +amqpstorm +jsonschema \ No newline at end of file From c399649f720432b61899653e1b60207f006f37f3 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:48:57 +0200 Subject: [PATCH 03/11] send error message if validation for smth fails --- sda_orchestrator/utils/consumer.py | 58 ++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/sda_orchestrator/utils/consumer.py b/sda_orchestrator/utils/consumer.py index ab1bbfe..0ddc32d 100644 --- a/sda_orchestrator/utils/consumer.py +++ b/sda_orchestrator/utils/consumer.py @@ -1,14 +1,19 @@ """Message Broker Consumer class.""" import time -from typing import Union -from amqpstorm import Connection, AMQPError, Message -from .logger import LOG +import os +import json import ssl from pathlib import Path -import os +from typing import Union from distutils.util import strtobool +from amqpstorm import Connection, AMQPError, Message + +from .logger import LOG +from jsonschema.exceptions import ValidationError +from ..schemas.validate import ValidateJSON, load_schema + class Consumer: """CEGA message consumer.""" @@ -102,12 +107,51 @@ def handle_message(self, message: Message) -> None: """Handle message.""" pass + def _error_message(self, message: Message, reason: str) -> None: + """Send formated error message to error queue.""" + channel = self.connection.channel() # type: ignore + properties = { + "content_type": "application/json", + "headers": {}, + "correlation_id": message.correlation_id, + "delivery_mode": 2, + } + original_message = json.loads(message.body) + + error_trigger = {"user": original_message["user"], "filepath": original_message["filepath"], "reason": reason} + + if "encrypted_checksums" in original_message: + error_trigger["encrypted_checksums"] = original_message["encrypted_checksums"] + + if "decrypted_checksums" in original_message: + error_trigger["decrypted_checksums"] = original_message["decrypted_checksums"] + + error_msg = json.dumps(error_trigger) + LOG.debug(f"Error Message: {error_msg}") + ValidateJSON(load_schema("ingestion-user-error")).validate(json.loads(error_msg)) + + error = Message.create(channel, error_msg, properties) + error.publish(os.environ.get("ERROR_QUEUE", "error"), exchange=os.environ.get("BROKER_EXCHANGE", "sda")) + + channel.close() + + LOG.info( + f"Published error message (corr-id: {message.correlation_id} filepath: {original_message['filepath']}, ", + f"user: {original_message['user']}, with reason: {reason})", + ) + def __call__(self, message: Message) -> None: """Process the message body.""" try: self.handle_message(message) - except Exception as error: - LOG.error("Something went wrong: {0}".format(error)) - message.reject(requeue=False) + except (ValidationError, Exception) as error: + try: + self._error_message(message, f"Exception occurred: {error}") + except ValidationError: + LOG.error("Could not validate the error message. Not properly formatted.") + except Exception as error: + LOG.error(f"Exception occurred: {error}") + finally: + message.reject(requeue=False) else: message.ack() From 1f0390ac921fd2818cabf6a9b2674f1fcecf4fd4 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:49:46 +0200 Subject: [PATCH 04/11] validate JSON based on schema function --- sda_orchestrator/schemas/validate.py | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 sda_orchestrator/schemas/validate.py diff --git a/sda_orchestrator/schemas/validate.py b/sda_orchestrator/schemas/validate.py new file mode 100644 index 0000000..19a953e --- /dev/null +++ b/sda_orchestrator/schemas/validate.py @@ -0,0 +1,46 @@ +"""Validate JSON module with Draft7Validator.""" + +import json +from jsonschema import Draft7Validator, validators + +from typing import Any, Dict, Generator +from pathlib import Path + + +def load_schema(name: str) -> Dict: + """Load JSON schemas.""" + module_path = Path(__file__).resolve().parent + path = module_path.joinpath(f"{name}.json") + + with open(str(path), "r") as fp: + data = fp.read() + + return json.loads(data) + + +def extend_with_default(validator_class: Draft7Validator) -> Draft7Validator: + """Include default values present in JSON Schema.""" + validate_properties = validator_class.VALIDATORS["properties"] + + def set_defaults(validator: Draft7Validator, properties: Dict, instance: Any, schema: str) -> Generator: + """Set defaults in validator.""" + for property, subschema in properties.items(): + if "default" in subschema: + instance.setdefault(property, subschema["default"]) + + for error in validate_properties( + validator, + properties, + instance, + schema, + ): + # Difficult to unit test + yield error # pragma: no cover + + return validators.extend( + validator_class, + {"properties": set_defaults}, + ) + + +ValidateJSON = extend_with_default(Draft7Validator) From a8616952be7211dbf4c1e907c044618cda289290 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:49:50 +0200 Subject: [PATCH 05/11] validate complete consumer incoming & outgoing msg --- sda_orchestrator/complete_consume.py | 61 ++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/sda_orchestrator/complete_consume.py b/sda_orchestrator/complete_consume.py index a33b39b..abc02e0 100644 --- a/sda_orchestrator/complete_consume.py +++ b/sda_orchestrator/complete_consume.py @@ -5,6 +5,8 @@ from .utils.logger import LOG import os from .utils.id_ops import generate_dataset_id +from jsonschema.exceptions import ValidationError +from .schemas.validate import ValidateJSON, load_schema class CompleteConsumer(Consumer): @@ -15,35 +17,60 @@ def handle_message(self, message: Message) -> None: try: complete_msg = json.loads(message.body) - LOG.info(f"Completed message received: {complete_msg} .") - properties = { - "content_type": "application/json", - "headers": {}, - "correlation_id": message.correlation_id, - "delivery_mode": 2, - } + LOG.debug(f"MQ Message body: {message.body} .") + LOG.debug(f"Complete Consumer message received: {complete_msg} .") + LOG.info( + f"Received work (corr-id: {message.correlation_id} filepath: {complete_msg['filepath']}, \ + user: {complete_msg['user']}, accessionid: {complete_msg['accession_id']}, \ + decryptedChecksums: {complete_msg['decrypted_checksums']})", + ) - channel = self.connection.channel() # type: ignore - datasetID = generate_dataset_id(complete_msg["user"], complete_msg["filepath"]) + ValidateJSON(load_schema("ingestion-completion")).validate(complete_msg) + + # Send message to mappings queue for dataset to file mapping accessionID = complete_msg["accession_id"] - content = { - "type": "mapping", - "dataset_id": datasetID, - "accession_ids": [accessionID], - } - mapping = Message.create(channel, json.dumps(content), properties) + datasetID = generate_dataset_id(complete_msg["user"], complete_msg["filepath"]) + self._publish_mappings(message, accessionID, datasetID) + + except ValidationError: + LOG.error("Could not validate the ingestion complete message. Not properly formatted.") + raise + + except Exception as error: + LOG.error(f"Error occurred in complete consumer: {error}.") + raise + + def _publish_mappings(self, message: Message, accessionID: str, datasetID: str) -> None: + """Publish message with dataset to accession ID mapping.""" + properties = { + "content_type": "application/json", + "headers": {}, + "correlation_id": message.correlation_id, + "delivery_mode": 2, + } + try: + + channel = self.connection.channel() # type: ignore + mappings_trigger = {"type": "mapping", "dataset_id": datasetID, "accession_ids": [accessionID]} + + mappings_msg = json.dumps(mappings_trigger) + ValidateJSON(load_schema("dataset-mapping")).validate(json.loads(mappings_msg)) + + mapping = Message.create(channel, mappings_msg, properties) mapping.publish( os.environ.get("MAPPINGS_QUEUE", "mappings"), exchange=os.environ.get("BROKER_EXCHANGE", "sda") ) channel.close() + LOG.info( f"Sent the message to mappings queue to set dataset ID {datasetID} for file \ with accessionID {accessionID}." ) - except Exception as error: - LOG.error("Something went wrong: {0}".format(error)) + except ValidationError: + LOG.error("Could not validate the ingestion mappings message. Not properly formatted.") + raise Exception("Could not validate the ingestion mappings message. Not properly formatted.") def main() -> None: From 066cfdc73ef1ae7aa2b2295dd438b02fa142f362 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:49:57 +0200 Subject: [PATCH 06/11] validate inbox consumer incoming & outgoing msg --- sda_orchestrator/inbox_consume.py | 76 ++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/sda_orchestrator/inbox_consume.py b/sda_orchestrator/inbox_consume.py index 8c545a1..cd9988a 100644 --- a/sda_orchestrator/inbox_consume.py +++ b/sda_orchestrator/inbox_consume.py @@ -1,10 +1,13 @@ """Message Broker inbox step consumer.""" import json +from typing import Dict from amqpstorm import Message from .utils.consumer import Consumer from .utils.logger import LOG import os from pathlib import Path +from jsonschema.exceptions import ValidationError +from .schemas.validate import ValidateJSON, load_schema class InboxConsumer(Consumer): @@ -13,29 +16,70 @@ class InboxConsumer(Consumer): def handle_message(self, message: Message) -> None: """Handle message.""" try: - inbx_msg = json.loads(message.body) - properties = {"content_type": "application/json", "headers": {}, "correlation_id": message.correlation_id} + inbox_msg = json.loads(message.body) + + LOG.debug(f"MQ Message body: {message.body} .") + LOG.debug(f"Inbox Consumer message received: {inbox_msg} .") + LOG.info( + f"Received work (corr-id: {message.correlation_id} filepath: {inbox_msg['filepath']}, \ + user: {inbox_msg['user']} with operation: {inbox_msg['operation']})", + ) + + if inbox_msg["operation"] == "upload": + ValidateJSON(load_schema("inbox-upload")).validate(inbox_msg) + elif inbox_msg["operation"] == "rename": + ValidateJSON(load_schema("inbox-rename")).validate(inbox_msg) + elif inbox_msg["operation"] == "remove": + ValidateJSON(load_schema("inbox-remove")).validate(inbox_msg) + else: + LOG.error("Un-identified inbox operation.") + # we check if this is a path with a suffix or a name - test_path = Path(inbx_msg["filepath"]) + test_path = Path(inbox_msg["filepath"]) if test_path.suffix == "" or test_path.name in ["", ".", ".."]: + LOG.error(f"file: {test_path} does not appear to be a correct path.") raise FileNotFoundError + # Create the files message. # we keep the encrypted_checksum but it can also be missing + self._publish_ingest(message, inbox_msg) + + except ValidationError: + LOG.error("Could not validate the inbox message. Not properly formatted.") + raise + + except Exception as error: + LOG.error(f"Error occurred in inbox consumer: {error}.") + raise + + def _publish_ingest(self, message: Message, inbox_msg: Dict) -> None: + """Publish message with dataset to accession ID mapping.""" + properties = { + "content_type": "application/json", + "headers": {}, + "correlation_id": message.correlation_id, + "delivery_mode": 2, + } + try: channel = self.connection.channel() # type: ignore - content = { - "type": "ingest", - "user": inbx_msg["user"], - "filepath": inbx_msg["filepath"], - } - if "encrypted_checksums" in inbx_msg: - content["encrypted_checksums"] = inbx_msg["encrypted_checksums"] - sent = Message.create(channel, json.dumps(content), properties) - - sent.publish(os.environ.get("INGEST_QUEUE", "ingest"), exchange=os.environ.get("BROKER_EXCHANGE", "sda")) + + ingest_trigger = {"type": "ingest", "user": inbox_msg["user"], "filepath": inbox_msg["filepath"]} + if "encrypted_checksums" in inbox_msg: + ingest_trigger["encrypted_checksums"] = inbox_msg["encrypted_checksums"] + + ingest_msg = json.dumps(ingest_trigger) + ValidateJSON(load_schema("ingestion-trigger")).validate(json.loads(ingest_msg)) + + ingest = Message.create(channel, ingest_msg, properties) + + ingest.publish(os.environ.get("INGEST_QUEUE", "ingest"), exchange=os.environ.get("BROKER_EXCHANGE", "sda")) channel.close() - LOG.info(f'Sent the message to ingest queue to trigger ingestion for filepath: {inbx_msg["filepath"]}.') - except Exception as error: - LOG.error("Something went wrong: {0}".format(error)) + + LOG.info(f'Sent the message to ingest queue to trigger ingestion for filepath: {inbox_msg["filepath"]}.') + + except ValidationError: + LOG.error("Could not validate the ingest trigger message. Not properly formatted.") + raise Exception("Could not validate the ingest trigger message. Not properly formatted.") def main() -> None: From 90a3200366bbd368bb4ddca38d6953f247cfeb91 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:50:04 +0200 Subject: [PATCH 07/11] validate verify consumer incoming & outgoing msg --- sda_orchestrator/verified_consume.py | 65 ++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/sda_orchestrator/verified_consume.py b/sda_orchestrator/verified_consume.py index 516035e..a05aec6 100644 --- a/sda_orchestrator/verified_consume.py +++ b/sda_orchestrator/verified_consume.py @@ -1,10 +1,13 @@ """Message Broker verify step consumer.""" import json +from typing import Dict from amqpstorm import Message from .utils.consumer import Consumer from .utils.logger import LOG import os from .utils.id_ops import generate_accession_id +from jsonschema.exceptions import ValidationError +from .schemas.validate import ValidateJSON, load_schema class VerifyConsumer(Consumer): @@ -13,26 +16,53 @@ class VerifyConsumer(Consumer): def handle_message(self, message: Message) -> None: """Handle message.""" try: - cmp_msg = json.loads(message.body) - properties = { - "content_type": "application/json", - "headers": {}, - "correlation_id": message.correlation_id, - "delivery_mode": 2, - } + verify_msg = json.loads(message.body) + + LOG.debug(f"MQ Message body: {message.body} .") + LOG.debug(f"Verify Consumer message received: {verify_msg} .") + LOG.info( + f"Received work (corr-id: {message.correlation_id} filepath: {verify_msg['filepath']}, \ + user: {verify_msg['user']}, \ + decryptedChecksums: {verify_msg['decrypted_checksums']})", + ) + ValidateJSON(load_schema("ingestion-accession-request")).validate(verify_msg) + + accessionID = generate_accession_id() + self._publish_accessionID(message, accessionID, verify_msg) + + except ValidationError: + LOG.error("Could not validate the verify message. Not properly formatted.") + raise + except Exception as error: + LOG.error(f"Error occurred in verify consumer: {error}.") + raise + + def _publish_accessionID(self, message: Message, accessionID: str, verify_msg: Dict) -> None: + """Publish message with dataset to accession ID mapping.""" + properties = { + "content_type": "application/json", + "headers": {}, + "correlation_id": message.correlation_id, + "delivery_mode": 2, + } + try: # Create the message. channel = self.connection.channel() # type: ignore - accessionID = generate_accession_id() - content = { + + accession_trigger = { "type": "accession", - "user": cmp_msg["user"], - "filepath": cmp_msg["filepath"], - "decrypted_checksums": cmp_msg["decrypted_checksums"], + "user": verify_msg["user"], + "filepath": verify_msg["filepath"], + "decrypted_checksums": verify_msg["decrypted_checksums"], "accession_id": accessionID, } - accession = Message.create(channel, json.dumps(content), properties) - checksum_data = list(filter(lambda x: x["type"] == "sha256", cmp_msg["decrypted_checksums"])) + + accession_msg = json.dumps(accession_trigger) + ValidateJSON(load_schema("ingestion-accession")).validate(json.loads(accession_msg)) + + accession = Message.create(channel, accession_msg, properties) + checksum_data = list(filter(lambda x: x["type"] == "sha256", verify_msg["decrypted_checksums"])) decrypted_checksum = checksum_data[0]["value"] accession.publish( os.environ.get("ACCESSIONIDS_QUEUE", "accessionIDs"), exchange=os.environ.get("BROKER_EXCHANGE", "sda") @@ -40,12 +70,13 @@ def handle_message(self, message: Message) -> None: channel.close() LOG.info( - f'Sent the message to accessionIDs queue to set accession ID for file {cmp_msg["filepath"]} \ + f'Sent the message to accessionIDs queue to set accession ID for file {verify_msg["filepath"]} \ with checksum {decrypted_checksum}.' ) - except Exception as error: - LOG.error("Something went wrong: {0}".format(error)) + except ValidationError: + LOG.error("Could not validate the ingestion accession message. Not properly formatted.") + raise Exception("Could not validate the ingestion accession message. Not properly formatted.") def main() -> None: From d79fa2ec4681f037bc09f728a321f775a38397f2 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:50:13 +0200 Subject: [PATCH 08/11] update dockerfile --- Dockerfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 520f0d7..0cb14b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ -FROM python:3.7-alpine3.10 as BUILD +FROM python:3.7-alpine3.11 as BUILD -RUN apk add --no-cache git postgresql-libs postgresql-dev gcc musl-dev libffi-dev make gnupg && \ +RUN apk add --no-cache git gcc musl-dev libffi-dev make gnupg && \ rm -rf /var/cache/apk/* COPY requirements.txt /root/sdaorch/requirements.txt @@ -11,7 +11,7 @@ RUN pip install --upgrade pip && \ pip install -r /root/sdaorch/requirements.txt && \ pip install /root/sdaorch -FROM python:3.7-alpine3.10 +FROM python:3.7-alpine3.11 LABEL maintainer "NeIC System Developers" LABEL org.label-schema.schema-version="1.0" @@ -28,9 +28,8 @@ COPY --from=BUILD /usr/local/bin/sdaverified /usr/local/bin/ ADD supervisor.conf /etc/ -RUN addgroup -g 1000 sda && \ - adduser -D -u 1000 -G sda sda +RUN echo "nobody:x:65534:65534:nobody:/:/sbin/nologin" > passwd -USER 1000 +USER 65534 ENTRYPOINT ["supervisord", "--configuration", "/etc/supervisor.conf"] \ No newline at end of file From eb88caed4bd1ab0ad06ee1cae8a200fd01e68368 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 22:56:18 +0200 Subject: [PATCH 09/11] bump to version 0.4.0 --- sda_orchestrator/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sda_orchestrator/__init__.py b/sda_orchestrator/__init__.py index 41d3d63..b5af6d2 100644 --- a/sda_orchestrator/__init__.py +++ b/sda_orchestrator/__init__.py @@ -1,5 +1,5 @@ """SDA Orchestrator service for coordinating messages and mapping file id to dataset id.""" __title__ = "sda_orchestrator" -__version__ = "0.3.0" +__version__ = "0.4.0" __author__ = "NeIC System Developers" From 4cd18e2fb1bf2e13e31eff82e1e234a30703f89f Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Tue, 8 Dec 2020 23:04:27 +0200 Subject: [PATCH 10/11] add schemas license --- sda_orchestrator/schemas/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sda_orchestrator/schemas/__init__.py b/sda_orchestrator/schemas/__init__.py index da2d329..602df9e 100644 --- a/sda_orchestrator/schemas/__init__.py +++ b/sda_orchestrator/schemas/__init__.py @@ -1 +1,5 @@ -"""JSON Schemas and function for validating messages.""" +"""JSON Schemas and function for validating messages. + +Schemas are provided by https://github.com/EGA-archive/LocalEGA/tree/master/ingestion/schemas +Under Apache 2.0 license +""" From 3548ccdf04e157f9670d9925d2da7d140ea5ae31 Mon Sep 17 00:00:00 2001 From: Stefan Negru Date: Thu, 10 Dec 2020 16:27:36 +0200 Subject: [PATCH 11/11] check schema path exists --- sda_orchestrator/schemas/validate.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sda_orchestrator/schemas/validate.py b/sda_orchestrator/schemas/validate.py index 19a953e..2d21f2d 100644 --- a/sda_orchestrator/schemas/validate.py +++ b/sda_orchestrator/schemas/validate.py @@ -5,6 +5,7 @@ from typing import Any, Dict, Generator from pathlib import Path +from ..utils.logger import LOG def load_schema(name: str) -> Dict: @@ -12,10 +13,14 @@ def load_schema(name: str) -> Dict: module_path = Path(__file__).resolve().parent path = module_path.joinpath(f"{name}.json") - with open(str(path), "r") as fp: - data = fp.read() + if path.is_file(): + with open(str(path), "r") as fp: + data = fp.read() - return json.loads(data) + return json.loads(data) + else: + LOG.error(f"Schema file {name} not found.") + raise FileNotFoundError def extend_with_default(validator_class: Draft7Validator) -> Draft7Validator: