diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0aa9ac468..a082aceca3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: # to `docker run` and bind-mount an /etc/passwd that maps that to `developer`. # We also need write permissions for the group chmod -R g+w . && sudo chgrp -R 1000 . && make format && sudo chgrp -R $(id -g) . - make openapi + make -C lambdas openapi make check_clean make pep8 AZUL_DEBUG=0 GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} make test diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 25e7b794be..70c8d0c8e0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -73,7 +73,7 @@ test: script: - make format # Any ill-formatted sources, ... - test "$azul_is_sandbox" = 1 && make requirements_update # ... stale transitive dependencies ... - - make openapi # ... or changes to the canned OpenAPI definition document ... + - make -C lambdas openapi # ... or changes to the canned OpenAPI definition document ... - make check_clean # would dirty up the working copy and fail the build. - make pep8 - AZUL_DEBUG=0 make test diff --git a/Makefile b/Makefile index e521cc68bf..a438a36a1f 100644 --- a/Makefile +++ b/Makefile @@ -242,7 +242,3 @@ integration_test: check_python check_branch $(project_root)/lambdas/service/.cha .PHONY: check_clean check_clean: check_env git diff --exit-code && git diff --cached --exit-code - -.PHONY: openapi -openapi: - python $(project_root)/scripts/generate_openapi_document.py diff --git a/README.md b/README.md index 157c681c15..195c4ba757 100644 --- a/README.md +++ b/README.md @@ -2384,7 +2384,7 @@ Changes to the OpenAPI definition are tracked in the source tree. When making changes that affect the definition, run: ``` -make openapi +make -C lambdas openapi ``` and commit any modifications to the `openapi.json` file. Failure to do so will diff --git a/lambdas/Makefile b/lambdas/Makefile index de7bd5553f..cf78d594d6 100644 --- a/lambdas/Makefile +++ b/lambdas/Makefile @@ -17,4 +17,8 @@ service: check_env .PHONY: clean clean: check_env - for d in indexer service layer; do $(MAKE) -C $$d clean; done + for d in indexer service layer; do $(MAKE) -C $$d clean || ! break; done + +.PHONY: openapi +openapi: check_env + for d in indexer service; do $(MAKE) -C $$d openapi || ! break; done diff --git a/lambdas/indexer/app.py b/lambdas/indexer/app.py index d1fe5250d7..41e6a714fc 100644 --- a/lambdas/indexer/app.py +++ b/lambdas/indexer/app.py @@ -28,6 +28,7 @@ SignatureHelper, ) from azul.indexer.index_controller import ( + Action, IndexController, ) from azul.indexer.log_forwarding_controller import ( @@ -38,6 +39,14 @@ ) from azul.openapi import ( format_description, + params, + schema, +) +from azul.openapi.responses import ( + json_content, +) +from azul.openapi.spec import ( + CommonEndpointSpecs, ) log = logging.getLogger(__name__) @@ -46,10 +55,8 @@ 'openapi': '3.0.1', 'info': { 'title': config.indexer_name, - # FIXME: Swagger UI for indexer is a stub - # https://github.com/DataBiosphere/azul/issues/5051 'description': format_description(''' - This is the indexer component for Azul. + This is the internal API for Azul's indexer component. '''), 'version': '1.0' } @@ -103,14 +110,17 @@ def static_resource(file): return app.swagger_resource(file) -@app.route('/openapi', methods=['GET'], cors=True) +common_specs = CommonEndpointSpecs(app_name='indexer') + + +@app.route('/openapi', methods=['GET'], cors=True, **common_specs.openapi) def openapi(): return Response(status_code=200, headers={'content-type': 'application/json'}, body=app.spec()) -@app.route('/version', methods=['GET'], cors=True) +@app.route('/version', methods=['GET'], cors=True, **common_specs.version) def version(): from azul.changelog import ( compact_changes, @@ -121,27 +131,39 @@ def version(): } -@app.route('/health', methods=['GET'], cors=True) +@app.route('/health', methods=['GET'], cors=True, **common_specs.full_health) def health(): return app.health_controller.health() -@app.route('/health/basic', methods=['GET'], cors=True) +@app.route('/health/basic', + methods=['GET'], + cors=True, + **common_specs.basic_health) def basic_health(): return app.health_controller.basic_health() -@app.route('/health/cached', methods=['GET'], cors=True) +@app.route('/health/cached', + methods=['GET'], + cors=True, + **common_specs.cached_health) def cached_health(): return app.health_controller.cached_health() -@app.route('/health/fast', methods=['GET'], cors=True) +@app.route('/health/fast', + methods=['GET'], + cors=True, + **common_specs.fast_health) def fast_health(): return app.health_controller.fast_health() -@app.route('/health/{keys}', methods=['GET'], cors=True) +@app.route('/health/{keys}', + methods=['GET'], + cors=True, + **common_specs.custom_health) def health_by_key(keys: Optional[str] = None): return app.health_controller.custom_health(keys) @@ -151,7 +173,65 @@ def update_health_cache(_event: chalice.app.CloudWatchEvent): app.health_controller.update_cache() -@app.route('/{catalog}/{action}', methods=['POST']) +@app.route('/{catalog}/{action}', methods=['POST'], method_spec={ + 'tags': ['Indexing'], + 'summary': 'Notify the indexer to perform an action on a bundle', + 'description': format_description(''' + Queue a bundle for addition to or deletion from the index. + + The request must be authenticated using HMAC via the ``signature`` + header. Each Azul deployment has its own unique HMAC key. The HMAC + components are the request method, request path, and the SHA256 digest + of the request body. + + A valid HMAC header proves that the client is in possession of the + secret HMAC key and that the request wasn't tampered with while + travelling between client and service, even though the latter is not + strictly necessary considering that TLS is used to encrypt the entire + exchange. Internal clients can obtain the secret key from the + environment they are running in, and that they share with the service. + External clients must have been given the secret key. The now-defunct + DSS was such an external client. The Azul indexer provided the HMAC + secret to DSS when it registered with DSS to be notified about bundle + additions/deletions. These days only internal clients use this endpoint. + '''), + 'requestBody': { + 'description': 'Contents of the notification', + 'required': True, + **json_content(schema.object( + bundle_fqid=schema.object( + uuid=str, + version=str, + source=schema.object( + id=str, + spec=str + ) + ) + )) + }, + 'parameters': [ + params.path('catalog', + schema.enum(*config.catalogs), + description='The name of the catalog to notify.'), + params.path('action', + schema.enum(Action.add.name, Action.delete.name), + description='Which action to perform.'), + params.header('signature', + str, + description='HMAC authentication signature.') + ], + 'responses': { + '200': { + 'description': 'Notification was successfully queued for processing' + }, + '400': { + 'description': 'Request was rejected due to malformed parameters' + }, + '401': { + 'description': 'Request lacked a valid HMAC header' + } + } +}) def post_notification(catalog: CatalogName, action: str): """ Receive a notification event and queue it for indexing or deletion. diff --git a/lambdas/indexer/openapi.json b/lambdas/indexer/openapi.json new file mode 100644 index 0000000000..6d54aaf8b7 --- /dev/null +++ b/lambdas/indexer/openapi.json @@ -0,0 +1,755 @@ +{ + "openapi": "3.0.1", + "info": { + "title": "azul_indexer", + "description": "\nThis is the internal API for Azul's indexer component.\n", + "version": "1.0" + }, + "paths": { + "/openapi": { + "get": { + "summary": "Return OpenAPI specifications for this REST API", + "description": "\nThis endpoint returns the [OpenAPI specifications]'\n(https://github.com/OAI/OpenAPI-Specification) for this REST\nAPI. These are the specifications used to generate the page\nyou are visiting now.\n", + "responses": { + "200": { + "description": "200 response", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "openapi": { + "type": "string" + }, + "info": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "tags": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "servers": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "paths": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "components": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + }, + "required": [ + "openapi", + "info", + "tags", + "servers", + "paths", + "components" + ], + "additionalProperties": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/version": { + "get": { + "summary": "Describe current version of this REST API", + "tags": [ + "Auxiliary" + ], + "responses": { + "200": { + "description": "Version endpoint is reachable.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "git": { + "type": "object", + "properties": { + "commit": { + "type": "string" + }, + "dirty": { + "type": "boolean" + } + }, + "required": [ + "commit", + "dirty" + ], + "additionalProperties": false + }, + "changes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string" + }, + "issues": { + "type": "array", + "items": { + "type": "string" + } + }, + "upgrade": { + "type": "array", + "items": { + "type": "string" + } + }, + "notes": { + "type": "string" + } + }, + "required": [ + "title", + "issues", + "upgrade" + ], + "additionalProperties": false + } + } + }, + "required": [ + "git", + "changes" + ], + "additionalProperties": false + } + } + } + } + } + } + }, + "/health": { + "get": { + "summary": "Complete health check", + "description": "\nHealth check of the indexer REST API and all\nresources it depends on. This may take long time to complete\nand exerts considerable load on the API. For that reason it\nshould not be requested frequently or by automated\nmonitoring facilities that would be better served by the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) or\n[`/health/cached`](#operations-Auxiliary-get_health_cached)\nendpoints.\n", + "responses": { + "200": { + "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "other_lambdas": {}, + "queues": {}, + "progress": {}, + "api_endpoints": {}, + "elasticsearch": {}, + "up": true + } + } + } + }, + "503": { + "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "other_lambdas": {}, + "queues": {}, + "progress": {}, + "api_endpoints": {}, + "elasticsearch": {}, + "up": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/health/basic": { + "get": { + "summary": "Basic health check", + "description": "\nHealth check of only the REST API itself, excluding other\nresources that it depends on. A 200 response indicates that\nthe indexer is reachable via HTTP(S) but nothing\nmore.\n", + "responses": { + "200": { + "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "up": true + } + } + } + }, + "503": { + "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "up": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/health/cached": { + "get": { + "summary": "Cached health check for continuous monitoring", + "description": "\nReturn a cached copy of the\n[`/health/fast`](#operations-Auxiliary-get_health_fast)\nresponse. This endpoint is optimized for continuously\nrunning, distributed health monitors such as Route 53 health\nchecks. The cache ensures that the indexer is not\noverloaded by these types of health monitors. The cache is\nupdated every minute.\n", + "responses": { + "200": { + "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "elasticsearch": {}, + "queues": {}, + "progress": {}, + "up": true + } + } + } + }, + "503": { + "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "elasticsearch": {}, + "queues": {}, + "progress": {}, + "up": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/health/fast": { + "get": { + "summary": "Fast health check", + "description": "\nPerformance-optimized health check of the REST API and other\ncritical resources tht it depends on. This endpoint can be\nrequested more frequently than\n[`/health`](#operations-Auxiliary-get_health) but\nperiodically scheduled, automated requests should be made to\n[`/health/cached`](#operations-Auxiliary-get_health_cached).\n", + "responses": { + "200": { + "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "elasticsearch": {}, + "queues": {}, + "progress": {}, + "up": true + } + } + } + }, + "503": { + "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "elasticsearch": {}, + "queues": {}, + "progress": {}, + "up": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/health/{keys}": { + "parameters": [ + { + "name": "keys", + "in": "path", + "required": true, + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "api_endpoints", + "elasticsearch", + "other_lambdas", + "progress", + "queues" + ] + } + }, + "description": "\nA comma-separated list of keys selecting the health\nchecks to be performed. Each key corresponds to an\nentry in the response.\n" + } + ], + "get": { + "summary": "Selective health check", + "description": "\nThis endpoint allows clients to request a health check on a\nspecific set of resources. Each resource is identified by a\n*key*, the same key under which the resource appears in a\n[`/health`](#operations-Auxiliary-get_health) response.\n", + "responses": { + "200": { + "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + true + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "other_lambdas": {}, + "queues": {}, + "progress": {}, + "api_endpoints": {}, + "elasticsearch": {}, + "up": true + } + } + } + }, + "503": { + "description": "\nAt least one of the checked resources\nis not healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`false`.\n\n\nAt least one of the nested `up` keys\nis `false`.\n", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": { + "type": "object", + "properties": { + "up": { + "type": "boolean", + "enum": [ + false + ] + } + }, + "required": [ + "up" + ], + "additionalProperties": true + } + }, + "example": { + "other_lambdas": {}, + "queues": {}, + "progress": {}, + "api_endpoints": {}, + "elasticsearch": {}, + "up": false + } + } + } + } + }, + "tags": [ + "Auxiliary" + ] + } + }, + "/{catalog}/{action}": { + "post": { + "tags": [ + "Indexing" + ], + "summary": "Notify the indexer to perform an action on a bundle", + "description": "\nQueue a bundle for addition to or deletion from the index.\n\nThe request must be authenticated using HMAC via the ``signature``\nheader. Each Azul deployment has its own unique HMAC key. The HMAC\ncomponents are the request method, request path, and the SHA256 digest\nof the request body.\n\nA valid HMAC header proves that the client is in possession of the\nsecret HMAC key and that the request wasn't tampered with while\ntravelling between client and service, even though the latter is not\nstrictly necessary considering that TLS is used to encrypt the entire\nexchange. Internal clients can obtain the secret key from the\nenvironment they are running in, and that they share with the service.\nExternal clients must have been given the secret key. The now-defunct\nDSS was such an external client. The Azul indexer provided the HMAC\nsecret to DSS when it registered with DSS to be notified about bundle\nadditions/deletions. These days only internal clients use this endpoint.\n", + "requestBody": { + "description": "Contents of the notification", + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "bundle_fqid": { + "type": "object", + "properties": { + "uuid": { + "type": "string" + }, + "version": { + "type": "string" + }, + "source": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "spec": { + "type": "string" + } + }, + "required": [ + "id", + "spec" + ], + "additionalProperties": false + } + }, + "required": [ + "uuid", + "version", + "source" + ], + "additionalProperties": false + } + }, + "required": [ + "bundle_fqid" + ], + "additionalProperties": false + } + } + } + }, + "parameters": [ + { + "name": "catalog", + "in": "path", + "required": true, + "schema": { + "type": "string", + "enum": [ + "dcp2" + ] + }, + "description": "The name of the catalog to notify." + }, + { + "name": "action", + "in": "path", + "required": true, + "schema": { + "type": "string", + "enum": [ + "add", + "delete" + ] + }, + "description": "Which action to perform." + }, + { + "name": "signature", + "in": "header", + "required": true, + "schema": { + "type": "string" + }, + "description": "HMAC authentication signature." + } + ], + "responses": { + "200": { + "description": "Notification was successfully queued for processing" + }, + "400": { + "description": "Request was rejected due to malformed parameters" + }, + "401": { + "description": "Request lacked a valid HMAC header" + } + } + } + } + }, + "tags": [], + "servers": [ + { + "url": "http://localhost/" + } + ] +} \ No newline at end of file diff --git a/lambdas/lambdas.mk b/lambdas/lambdas.mk index a71ac5e50f..2a0949c891 100644 --- a/lambdas/lambdas.mk +++ b/lambdas/lambdas.mk @@ -49,3 +49,7 @@ clean: git_clean_recursive package: check_branch check_python check_aws config environ compile python -m azul.changelog vendor chalice package --stage $(AZUL_DEPLOYMENT_STAGE) --pkg-format terraform .chalice/terraform + +.PHONY: openapi +openapi: check_python + python $(project_root)/scripts/generate_openapi_document.py diff --git a/lambdas/service/app.py b/lambdas/service/app.py index 4ea8b3ebc2..b781fd30fb 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -58,7 +58,6 @@ AccessMethod, ) from azul.health import ( - Health, HealthController, ) from azul.indexer.document import ( @@ -75,6 +74,9 @@ responses, schema, ) +from azul.openapi.spec import ( + CommonEndpointSpecs, +) from azul.plugins import ( ManifestFormat, MetadataPlugin, @@ -439,174 +441,49 @@ def oauth2_redirect(): body=oauth2_redirect_html) -@app.route('/openapi', methods=['GET'], cors=True, method_spec={ - 'summary': 'Return OpenAPI specifications for this service', - 'description': 'This endpoint returns the [OpenAPI specifications]' - '(https://github.com/OAI/OpenAPI-Specification) for this ' - 'service. These are the specifications used to generate the ' - 'page you are visiting now.', - 'responses': { - '200': { - 'description': '200 response', - **responses.json_content( - schema.object( - openapi=str, - **{ - k: schema.object() - for k in ('info', 'tags', 'servers', 'paths', 'components') - } - ) - ) - } - }, - 'tags': ['Auxiliary'] -}) +common_specs = CommonEndpointSpecs(app_name='service') + + +@app.route('/openapi', methods=['GET'], cors=True, **common_specs.openapi) def openapi(): return Response(status_code=200, headers={'content-type': 'application/json'}, body=app.spec()) -health_up_key = { - 'up': format_description(''' - indicates the overall result of the health check - '''), -} - -fast_health_keys = { - **{ - prop.key: format_description(prop.description) - for prop in Health.fast_properties['service'] - }, - **health_up_key -} - -health_all_keys = { - **{ - prop.key: format_description(prop.description) - for prop in Health.all_properties - }, - **health_up_key -} - - -def health_spec(health_keys: dict): - return { - 'responses': { - f'{200 if up else 503}': { - 'description': format_description(f''' - {'The' if up else 'At least one of the'} checked resources - {'are' if up else 'is not'} healthy. - - The response consists of the following keys: - - ''') + ''.join(f'* `{k}` {v}' for k, v in health_keys.items()) + format_description(f''' - - The top-level `up` key of the response is - `{'true' if up else 'false'}`. - - ''') + (format_description(f''' - {'All' if up else 'At least one'} of the nested `up` keys - {'are `true`' if up else 'is `false`'}. - ''') if len(health_keys) > 1 else ''), - **responses.json_content( - schema.object( - additional_properties=schema.object( - additional_properties=True, - up=schema.enum(up) - ), - up=schema.enum(up) - ), - example={ - k: up if k == 'up' else {} for k in health_keys - } - ) - } for up in [True, False] - }, - 'tags': ['Auxiliary'] - } - - -@app.route('/health', methods=['GET'], cors=True, method_spec={ - 'summary': 'Complete health check', - 'description': format_description(''' - Health check of the service and all resources it depends on. This may - take long time to complete and exerts considerable load on the service. - For that reason it should not be requested frequently or by automated - monitoring facilities that would be better served by the - [`/health/fast`](#operations-Auxiliary-get_health_fast) or - [`/health/cached`](#operations-Auxiliary-get_health_cached) endpoints. - '''), - **health_spec(health_all_keys) -}) +@app.route('/health', methods=['GET'], cors=True, **common_specs.full_health) def health(): return app.health_controller.health() -@app.route('/health/basic', methods=['GET'], cors=True, method_spec={ - 'summary': 'Basic health check', - 'description': format_description(''' - Health check of only the REST API itself, excluding other resources - the service depends on. A 200 response indicates that the service is - reachable via HTTP(S) but nothing more. - '''), - **health_spec(health_up_key) -}) +@app.route('/health/basic', + methods=['GET'], + cors=True, + **common_specs.basic_health) def basic_health(): return app.health_controller.basic_health() -@app.route('/health/cached', methods=['GET'], cors=True, method_spec={ - 'summary': 'Cached health check for continuous monitoring', - 'description': format_description(''' - Return a cached copy of the - [`/health/fast`](#operations-Auxiliary-get_health_fast) response. - This endpoint is optimized for continuously running, distributed health - monitors such as Route 53 health checks. The cache ensures that the - service is not overloaded by these types of health monitors. The cache - is updated every minute. - '''), - **health_spec(fast_health_keys) -}) +@app.route('/health/cached', + methods=['GET'], + cors=True, + **common_specs.cached_health) def cached_health(): return app.health_controller.cached_health() -@app.route('/health/fast', methods=['GET'], cors=True, method_spec={ - 'summary': 'Fast health check', - 'description': format_description(''' - Performance-optimized health check of the REST API and other critical - resources the service depends on. This endpoint can be requested more - frequently than [`/health`](#operations-Auxiliary-get_health) but - periodically scheduled, automated requests should be made to - [`/health/cached`](#operations-Auxiliary-get_health_cached). - '''), - **health_spec(fast_health_keys) -}) +@app.route('/health/fast', + methods=['GET'], + cors=True, + **common_specs.fast_health) def fast_health(): return app.health_controller.fast_health() -@app.route('/health/{keys}', methods=['GET'], cors=True, method_spec={ - 'summary': 'Selective health check', - 'description': format_description(''' - This endpoint allows clients to request a health check on a specific set - of resources. Each resource is identified by a *key*, the same key - under which the resource appears in a - [`/health`](#operations-Auxiliary-get_health) response. - '''), - **health_spec(health_all_keys) -}, path_spec={ - 'parameters': [ - params.path( - 'keys', - type_=schema.array(schema.enum(*sorted(Health.all_keys))), - description=''' - A comma-separated list of keys selecting the health checks to be - performed. Each key corresponds to an entry in the response. - ''') - ], -}) +@app.route('/health/{keys}', + methods=['GET'], + cors=True, + **common_specs.custom_health) def custom_health(keys: Optional[str] = None): return app.health_controller.custom_health(keys) @@ -616,31 +493,7 @@ def update_health_cache(_event: chalice.app.CloudWatchEvent): app.health_controller.update_cache() -@app.route('/version', methods=['GET'], cors=True, method_spec={ - 'summary': 'Describe current version of the Azul service', - 'tags': ['Auxiliary'], - 'responses': { - '200': { - 'description': 'Version endpoint is reachable.', - **responses.json_content( - schema.object( - git=schema.object( - commit=str, - dirty=bool - ), - changes=schema.array( - schema.object( - title=str, - issues=schema.array(str), - upgrade=schema.array(str), - notes=schema.optional(str) - ) - ) - ) - ) - } - } -}) +@app.route('/version', methods=['GET'], cors=True, **common_specs.version) def version(): from azul.changelog import ( compact_changes, diff --git a/lambdas/service/openapi.json b/lambdas/service/openapi.json index 3608a0fda8..d26dacd72d 100644 --- a/lambdas/service/openapi.json +++ b/lambdas/service/openapi.json @@ -26,8 +26,8 @@ "paths": { "/openapi": { "get": { - "summary": "Return OpenAPI specifications for this service", - "description": "This endpoint returns the [OpenAPI specifications](https://github.com/OAI/OpenAPI-Specification) for this service. These are the specifications used to generate the page you are visiting now.", + "summary": "Return OpenAPI specifications for this REST API", + "description": "\nThis endpoint returns the [OpenAPI specifications]'\n(https://github.com/OAI/OpenAPI-Specification) for this REST\nAPI. These are the specifications used to generate the page\nyou are visiting now.\n", "responses": { "200": { "description": "200 response", @@ -87,7 +87,7 @@ "/health": { "get": { "summary": "Complete health check", - "description": "\nHealth check of the service and all resources it depends on. This may\ntake long time to complete and exerts considerable load on the service.\nFor that reason it should not be requested frequently or by automated\nmonitoring facilities that would be better served by the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) or\n[`/health/cached`](#operations-Auxiliary-get_health_cached) endpoints.\n", + "description": "\nHealth check of the service REST API and all\nresources it depends on. This may take long time to complete\nand exerts considerable load on the API. For that reason it\nshould not be requested frequently or by automated\nmonitoring facilities that would be better served by the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) or\n[`/health/cached`](#operations-Auxiliary-get_health_cached)\nendpoints.\n", "responses": { "200": { "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", @@ -186,7 +186,7 @@ "/health/basic": { "get": { "summary": "Basic health check", - "description": "\nHealth check of only the REST API itself, excluding other resources\nthe service depends on. A 200 response indicates that the service is\nreachable via HTTP(S) but nothing more.\n", + "description": "\nHealth check of only the REST API itself, excluding other\nresources that it depends on. A 200 response indicates that\nthe service is reachable via HTTP(S) but nothing\nmore.\n", "responses": { "200": { "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n", @@ -275,7 +275,7 @@ "/health/cached": { "get": { "summary": "Cached health check for continuous monitoring", - "description": "\nReturn a cached copy of the\n[`/health/fast`](#operations-Auxiliary-get_health_fast) response.\nThis endpoint is optimized for continuously running, distributed health\nmonitors such as Route 53 health checks. The cache ensures that the\nservice is not overloaded by these types of health monitors. The cache\nis updated every minute.\n", + "description": "\nReturn a cached copy of the\n[`/health/fast`](#operations-Auxiliary-get_health_fast)\nresponse. This endpoint is optimized for continuously\nrunning, distributed health monitors such as Route 53 health\nchecks. The cache ensures that the service is not\noverloaded by these types of health monitors. The cache is\nupdated every minute.\n", "responses": { "200": { "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", @@ -368,7 +368,7 @@ "/health/fast": { "get": { "summary": "Fast health check", - "description": "\nPerformance-optimized health check of the REST API and other critical\nresources the service depends on. This endpoint can be requested more\nfrequently than [`/health`](#operations-Auxiliary-get_health) but\nperiodically scheduled, automated requests should be made to\n[`/health/cached`](#operations-Auxiliary-get_health_cached).\n", + "description": "\nPerformance-optimized health check of the REST API and other\ncritical resources tht it depends on. This endpoint can be\nrequested more frequently than\n[`/health`](#operations-Auxiliary-get_health) but\nperiodically scheduled, automated requests should be made to\n[`/health/cached`](#operations-Auxiliary-get_health_cached).\n", "responses": { "200": { "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", @@ -477,12 +477,12 @@ ] } }, - "description": "\nA comma-separated list of keys selecting the health checks to be\nperformed. Each key corresponds to an entry in the response.\n" + "description": "\nA comma-separated list of keys selecting the health\nchecks to be performed. Each key corresponds to an\nentry in the response.\n" } ], "get": { "summary": "Selective health check", - "description": "\nThis endpoint allows clients to request a health check on a specific set\nof resources. Each resource is identified by a *key*, the same key\nunder which the resource appears in a\n[`/health`](#operations-Auxiliary-get_health) response.\n", + "description": "\nThis endpoint allows clients to request a health check on a\nspecific set of resources. Each resource is identified by a\n*key*, the same key under which the resource appears in a\n[`/health`](#operations-Auxiliary-get_health) response.\n", "responses": { "200": { "description": "\nThe checked resources\nare healthy.\n\nThe response consists of the following keys:\n\n* `other_lambdas` \nIndicates whether the companion REST API responds to HTTP requests.\n* `queues` \nReturns information about the SQS queues used by the indexer.\n* `progress` \nThe number of Data Store bundles pending to be indexed and the number\nof index documents in need of updating.\n* `api_endpoints` \nIndicates whether important service API endpoints are operational.\n* `elasticsearch` \nIndicates whether the Elasticsearch cluster is responsive.\n* `up` \nindicates the overall result of the health check\n\n\nThe top-level `up` key of the response is\n`true`.\n\n\nAll of the nested `up` keys\nare `true`.\n", @@ -580,7 +580,7 @@ }, "/version": { "get": { - "summary": "Describe current version of the Azul service", + "summary": "Describe current version of this REST API", "tags": [ "Auxiliary" ], diff --git a/scripts/generate_openapi_document.py b/scripts/generate_openapi_document.py index 2da3b8a294..df988c87f0 100644 --- a/scripts/generate_openapi_document.py +++ b/scripts/generate_openapi_document.py @@ -1,5 +1,7 @@ import json -import os +from pathlib import ( + Path, +) from unittest.mock import ( PropertyMock, patch, @@ -12,6 +14,9 @@ from azul import ( config, ) +from azul.chalice import ( + AzulChaliceApp, +) from azul.files import ( write_file_atomically, ) @@ -30,6 +35,8 @@ def main(): sources=set()) } + lambda_name = Path.cwd().name + # To create a normalized OpenAPI document, we patch any # deployment-specific variables that affect the document. with patch.object(target=type(config), @@ -38,19 +45,24 @@ def main(): return_value=catalogs): assert config.catalogs == catalogs with patch.object(target=config, - attribute='service_function_name', - return_value='azul_service'): - assert config.service_name == 'azul_service' - service_endpoint = furl('http://localhost') + attribute=f'{lambda_name}_function_name', + return_value=f'azul_{lambda_name}'): + assert getattr(config, f'{lambda_name}_name') == f'azul_{lambda_name}' with patch.object(target=type(config), - attribute='service_endpoint', - new=service_endpoint): - assert config.service_endpoint == service_endpoint - app_module = load_app_module('service') - app_spec = app_module.app.spec() - doc_path = os.path.join(config.project_root, 'lambdas/service/openapi.json') - with write_file_atomically(doc_path) as file: - json.dump(app_spec, file, indent=4) + attribute='enable_log_forwarding', + new_callable=PropertyMock, + return_value=False): + assert not config.enable_log_forwarding + lambda_endpoint = furl('http://localhost') + with patch.object(target=AzulChaliceApp, + attribute='base_url', + new=lambda_endpoint): + app_module = load_app_module(lambda_name) + assert app_module.app.base_url == lambda_endpoint + app_spec = app_module.app.spec() + doc_path = Path(config.project_root) / 'lambdas' / lambda_name / 'openapi.json' + with write_file_atomically(doc_path) as file: + json.dump(app_spec, file, indent=4) if __name__ == '__main__': diff --git a/src/azul/openapi/params.py b/src/azul/openapi/params.py index 9490ee74af..f1b153ac50 100644 --- a/src/azul/openapi/params.py +++ b/src/azul/openapi/params.py @@ -54,6 +54,25 @@ def query(name: str, type_: Union[TYPE, schema.optional], **kwargs: PrimitiveJSO return _make_param(name, in_='query', type_=type_, **kwargs) +def header(name: str, type_: Union[TYPE, schema.optional], **kwargs: PrimitiveJSON) -> JSON: + """ + Returns an OpenAPI `parameters` specification of a request header. + + >>> from azul.doctests import assert_json + >>> assert_json(header('X-foo', schema.optional(int))) + { + "name": "X-foo", + "in": "header", + "required": false, + "schema": { + "type": "integer", + "format": "int64" + } + } + """ + return _make_param(name, in_='header', type_=type_, **kwargs) + + def _make_param(name: str, in_: str, type_: Union[TYPE, schema.optional], **kwargs: PrimitiveJSON) -> JSON: is_optional = isinstance(type_, schema.optional) if is_optional: diff --git a/src/azul/openapi/spec.py b/src/azul/openapi/spec.py new file mode 100644 index 0000000000..c14708aa36 --- /dev/null +++ b/src/azul/openapi/spec.py @@ -0,0 +1,234 @@ +import attr + +from azul import ( + JSON, +) +from azul.health import ( + Health, +) +from azul.openapi import ( + format_description, + params, + responses, + schema, +) + + +@attr.s(auto_attribs=True, frozen=True, kw_only=True) +class CommonEndpointSpecs: + app_name: str + + _up_key = { + 'up': format_description(''' + indicates the overall result of the health check + '''), + } + + @property + def _fast_keys(self): + return { + **{ + prop.key: format_description(prop.description) + for prop in Health.fast_properties[self.app_name] + }, + **self._up_key + } + + _all_keys = { + **{ + prop.key: format_description(prop.description) + for prop in Health.all_properties + }, + **_up_key + } + + def _health_spec(self, health_keys: dict) -> JSON: + return { + 'responses': { + f'{200 if up else 503}': { + 'description': format_description(f''' + {'The' if up else 'At least one of the'} checked resources + {'are' if up else 'is not'} healthy. + + The response consists of the following keys: + + ''') + ''.join(f'* `{k}` {v}' for k, v in health_keys.items()) + format_description(f''' + + The top-level `up` key of the response is + `{'true' if up else 'false'}`. + + ''') + (format_description(f''' + {'All' if up else 'At least one'} of the nested `up` keys + {'are `true`' if up else 'is `false`'}. + ''') if len(health_keys) > 1 else ''), + **responses.json_content( + schema.object( + additional_properties=schema.object( + additional_properties=True, + up=schema.enum(up) + ), + up=schema.enum(up) + ), + example={ + k: up if k == 'up' else {} for k in health_keys + } + ) + } for up in [True, False] + }, + 'tags': ['Auxiliary'] + } + + @property + def full_health(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Complete health check', + 'description': format_description(f''' + Health check of the {self.app_name} REST API and all + resources it depends on. This may take long time to complete + and exerts considerable load on the API. For that reason it + should not be requested frequently or by automated + monitoring facilities that would be better served by the + [`/health/fast`](#operations-Auxiliary-get_health_fast) or + [`/health/cached`](#operations-Auxiliary-get_health_cached) + endpoints. + '''), + **self._health_spec(self._all_keys) + } + } + + @property + def basic_health(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Basic health check', + 'description': format_description(f''' + Health check of only the REST API itself, excluding other + resources that it depends on. A 200 response indicates that + the {self.app_name} is reachable via HTTP(S) but nothing + more. + '''), + **self._health_spec(self._up_key) + } + } + + @property + def cached_health(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Cached health check for continuous monitoring', + 'description': format_description(f''' + Return a cached copy of the + [`/health/fast`](#operations-Auxiliary-get_health_fast) + response. This endpoint is optimized for continuously + running, distributed health monitors such as Route 53 health + checks. The cache ensures that the {self.app_name} is not + overloaded by these types of health monitors. The cache is + updated every minute. + '''), + **self._health_spec(self._fast_keys) + } + } + + @property + def fast_health(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Fast health check', + 'description': format_description(''' + Performance-optimized health check of the REST API and other + critical resources tht it depends on. This endpoint can be + requested more frequently than + [`/health`](#operations-Auxiliary-get_health) but + periodically scheduled, automated requests should be made to + [`/health/cached`](#operations-Auxiliary-get_health_cached). + '''), + **self._health_spec(self._fast_keys) + } + } + + @property + def custom_health(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Selective health check', + 'description': format_description(''' + This endpoint allows clients to request a health check on a + specific set of resources. Each resource is identified by a + *key*, the same key under which the resource appears in a + [`/health`](#operations-Auxiliary-get_health) response. + '''), + **self._health_spec(self._all_keys) + }, + 'path_spec': { + 'parameters': [ + params.path( + 'keys', + type_=schema.array(schema.enum(*sorted(Health.all_keys))), + description=''' + A comma-separated list of keys selecting the health + checks to be performed. Each key corresponds to an + entry in the response. + ''') + ] + } + } + + @property + def openapi(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Return OpenAPI specifications for this REST API', + 'description': format_description(''' + This endpoint returns the [OpenAPI specifications]' + (https://github.com/OAI/OpenAPI-Specification) for this REST + API. These are the specifications used to generate the page + you are visiting now. + '''), + 'responses': { + '200': { + 'description': '200 response', + **responses.json_content( + schema.object( + openapi=str, + **{ + k: schema.object() + for k in ('info', 'tags', 'servers', 'paths', 'components') + } + ) + ) + } + }, + 'tags': ['Auxiliary'] + } + } + + @property + def version(self) -> JSON: + return { + 'method_spec': { + 'summary': 'Describe current version of this REST API', + 'tags': ['Auxiliary'], + 'responses': { + '200': { + 'description': 'Version endpoint is reachable.', + **responses.json_content( + schema.object( + git=schema.object( + commit=str, + dirty=bool + ), + changes=schema.array( + schema.object( + title=str, + issues=schema.array(str), + upgrade=schema.array(str), + notes=schema.optional(str) + ) + ) + ) + ) + } + } + } + }