diff --git a/lambdas/service/app.py b/lambdas/service/app.py index 405652c806..3334b120fb 100644 --- a/lambdas/service/app.py +++ b/lambdas/service/app.py @@ -73,6 +73,9 @@ from azul.logging import ( configure_app_logging, ) +from azul.maintenance import ( + MaintenanceService, +) from azul.openapi import ( application_json, format_description as fd, @@ -870,6 +873,44 @@ def get_integrations(): body=json.dumps(body)) +@app.route( + '/maintenance/schedule', + methods=['GET'], + cors=True, + method_spec={ + 'summary': 'A maintenance schedule as an JSON object', + 'tags': ['Auxiliary'], + 'responses': { + '200': { + 'description': fd(''' + This object may be hanceforth refered to as "the schedule" + or `schedule`. + The `start` time of an event is its `actual_start` if set, + or its `planned_start` otherwise. The `end` time of an event + is its `actual_end` if set, or its `start` plus + `planned_duration` otherwise. All events in the schedule are + sorted by their `start` time. No two events have the same + `start` time. Each event defines an interval + `[e.start, e.end)` and there is no overlap between these + intervals. + + A pending event is one where `actual_start` is absent. An + active event is one where `actual_start` is present but + `actual_end` is absent. There can be at most one active + event. + ''') + } + } + } +) +def get_maintenance_schedule(): + service = MaintenanceService() + schedule = service.get_schedule.to_json() + return Response(status_code=200, + headers={'content-type': 'application/json'}, + body=json.dumps(schedule)) + + @app.route( '/index/catalogs', methods=['GET'], diff --git a/lambdas/service/openapi.json b/lambdas/service/openapi.json index 2e75a0f1da..0c9de5a5a2 100644 --- a/lambdas/service/openapi.json +++ b/lambdas/service/openapi.json @@ -653,6 +653,19 @@ } } }, + "/maintenance/schedule": { + "get": { + "summary": "A maintenance schedule as an JSON object", + "tags": [ + "Auxiliary" + ], + "responses": { + "200": { + "description": "\nThis object may be hanceforth refered to as \"the schedule\"\nor `schedule`.\nThe `start` time of an event is its `actual_start` if set,\nor its `planned_start` otherwise. The `end` time of an event\nis its `actual_end` if set, or its `start` plus\n`planned_duration` otherwise. All events in the schedule are\nsorted by their `start` time. No two events have the same\n`start` time. Each event defines an interval\n`[e.start, e.end)` and there is no overlap between these\n intervals.\n\n A pending event is one where `actual_start` is absent. An\n active event is one where `actual_start` is present but\n `actual_end` is absent. There can be at most one active\n event.\n" + } + } + } + }, "/index/catalogs": { "get": { "summary": "List all available catalogs.", diff --git a/scripts/manage_maintenance.py b/scripts/manage_maintenance.py new file mode 100644 index 0000000000..6654f4e57c --- /dev/null +++ b/scripts/manage_maintenance.py @@ -0,0 +1,142 @@ +""" +This is a command line utility for managing announcement of maintenance events. +Reads the JSON from designated bucket, deserializes the model from it, validates +the model, applies an action to it, serializes the model back to JSON and +finally uploads it back to the bucket where the service exposes it. The service +must also validate the model before returning it. +""" +import argparse +from datetime import ( + timedelta, +) +import json +import re +import sys + +from azul import ( + require, +) +from azul.args import ( + AzulArgumentHelpFormatter, +) +from azul.maintenance import ( + MaintenanceService, +) + + +def parse_duration(duration: str) -> timedelta: + """ + >>> parse_duration('1d') + datetime.timedelta(days=1) + >>> parse_duration('24 hours') + datetime.timedelta(days=1) + >>> parse_duration('.5 Days 12 hours') + datetime.timedelta(days=1) + + >>> parse_duration('2h20Min') + datetime.timedelta(seconds=8400) + >>> parse_duration('1 H 80m') + datetime.timedelta(seconds=8400) + >>> parse_duration('140 Minutes') + datetime.timedelta(seconds=8400) + + >>> parse_duration('2 Days 3hours 4min 5 secs') + datetime.timedelta(days=2, seconds=11045) + >>> parse_duration('1d 25h') + datetime.timedelta(days=2, seconds=3600) + >>> parse_duration('1m30s') + datetime.timedelta(seconds=90) + + >>> parse_duration('Bad foo') + Traceback (most recent call last): + ... + azul.RequirementError: Try a duration such as "2d 6hrs", "1.5 Days", or "15m" + """ + + pattern = r'(\d*\.?\d+)\s*(d|h|m|s)' + matches = re.findall(pattern, duration.lower()) + require(bool(matches), 'Try a duration such as "2d 6hrs", "1.5 Days", or "15m"') + time_delta = {'days': 0, 'hours': 0, 'minutes': 0, 'seconds': 0} + for value, unit in matches: + value = float(value) + match unit: + case 'd': + time_delta['days'] += value + case 'h': + time_delta['hours'] += value + case 'm': + time_delta['minutes'] += value + case 's': + time_delta['seconds'] += value + return timedelta(**time_delta) + + +def main(args: list[str]): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=AzulArgumentHelpFormatter) + subparsers = parser.add_subparsers(dest="command") + list_parser = subparsers.add_parser("list", help="List events in JSON form") + list_parser.add_argument("--all", action="store_true", + help="Include completed events") + add_parser = subparsers.add_parser("add", help="Schedule an event") + add_parser.add_argument("--start", required=True, + help="Event start time (ISO format)") + add_parser.add_argument("--duration", required=True, + help="Event duration (e.g., '1h30m', '2d')") + add_parser.add_argument("--description", required=True, + help="Event description") + add_parser.add_argument("--partial-responses", nargs="+", + help="Catalog names for partial responses") + add_parser.add_argument("--degraded-performance", nargs="+", + help="Catalog names for degraded performance") + add_parser.add_argument("--service-unavailable", nargs="+", + help="Catalog names for service unavailability") + cancel_parser = subparsers.add_parser("cancel", + help="Cancel a pending event") + cancel_parser.add_argument("--index", type=int, required=True, + help="Index of the event to cancel") + subparsers.add_parser("start", help="Activate a pending event") + subparsers.add_parser("end", help="Complete the active event") + adjust_parser = subparsers.add_parser("adjust", + help="Modify the active event") + adjust_parser.add_argument("--duration", required=True, + help="New event duration (e.g., '1h30m', '2d')") + + args = parser.parse_args(args) + + service = MaintenanceService() + + if args.command == "list": + events = service.get_schedule + if args.all: + events = events.to_json() + else: + active = events.active_event() + active = {} if active is None else {'active': active.to_json()} + pending = events.pending_events() + pending = {'pending': list(pe.to_json() for pe in pending)} + events = active | pending + elif args.command == "add": + duration = int(parse_duration(args.duration).total_seconds()) + events = service.provision_event(planned_start=args.start, + planned_duration=duration, + description=args.description, + partial=args.partial_responses, + degraded=args.degraded_performance, + unavailable=args.service_unavailable) + events = service.add_event(events) + elif args.command == "cancel": + events = service.cancel_event(args.index) + elif args.command == "start": + events = service.start_event() + elif args.command == "end": + events = service.end_event() + elif args.command == "adjust": + events = service.adjust_event(parse_duration(args.duration)) + else: + assert False, 'Invalid command' + print(json.dumps(events, indent=4)) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/src/azul/maintenance.py b/src/azul/maintenance.py index 4e56ed9270..77de486e86 100644 --- a/src/azul/maintenance.py +++ b/src/azul/maintenance.py @@ -11,9 +11,9 @@ from operator import ( attrgetter, ) -import sys from typing import ( Iterator, + Self, Sequence, ) @@ -24,13 +24,22 @@ ) from azul import ( + CatalogName, JSON, + cached_property, + config, reject, require, ) from azul.collections import ( adict, ) +from azul.deployment import ( + aws, +) +from azul.service.storage_service import ( + StorageObjectNotFound, +) from azul.time import ( format_dcp2_datetime, parse_dcp2_datetime, @@ -46,7 +55,7 @@ class MaintenanceImpactKind(Enum): @attrs.define class MaintenanceImpact: kind: MaintenanceImpactKind - affected_catalogs: list[str] + affected_catalogs: list[CatalogName] @classmethod def from_json(cls, impact: JSON): @@ -58,8 +67,9 @@ def to_json(self) -> JSON: affected_catalogs=self.affected_catalogs) def validate(self): - require(all(isinstance(c, str) and c for c in self.affected_catalogs), - 'Invalid catalog name/pattern') + require(all( + isinstance(c, CatalogName) and c for c in self.affected_catalogs), + 'Invalid catalog name/pattern') require(all({0: True, 1: c[-1] == '*'}.get(c.count('*'), False) for c in self.affected_catalogs), 'Invalid catalog pattern') @@ -75,7 +85,7 @@ class MaintenanceEvent: actual_end: datetime | None @classmethod - def from_json(cls, event: JSON): + def from_json(cls, event: JSON) -> Self: return cls(planned_start=cls._parse_datetime(event['planned_start']), planned_duration=timedelta(seconds=event['planned_duration']), description=event['description'], @@ -139,7 +149,7 @@ def validate(self): starts = set(e.start for e in self.events) require(len(starts) == len(self.events), 'Start times are not distinct') - # Since starts are distinct, we'll never need the end as a tie breaker + # Since starts are distinct, we'll never need the end as a tie-breaker intervals = [(e.start, e.end) for e in self.events] require(intervals == sorted(intervals), 'Events are not sorted by start time') @@ -154,9 +164,9 @@ def validate(self): def pending_events(self) -> list[MaintenanceEvent]: """ Returns a list of pending events in this schedule. The elements in the - returned list can be modified until another method is invoked on this schedule. The - modifications will be reflected in ``self.events`` but the caller is - responsible for ensuring they don't invalidate this schedule. + returned list can be modified until another method is invoked on this + schedule. The modifications will be reflected in ``self.events`` but the + caller is responsible for ensuring they don't invalidate this schedule. """ events = enumerate(self.events) for i, e in events: @@ -164,6 +174,13 @@ def pending_events(self) -> list[MaintenanceEvent]: return self.events[i:] return [] + def past_events(self) -> list[MaintenanceEvent]: + return [ + e + for e in self.events + if e.actual_end is not None and e.actual_start is not None + ] + def active_event(self) -> MaintenanceEvent | None: return only(self._active_events()) @@ -192,6 +209,14 @@ def add_event(self, event: MaintenanceEvent): self.events = events raise + def adjust_event(self, additional_duration: timedelta): + event = self.active_event() + reject(event is None, 'No active event') + event.planned_duration += additional_duration + self._heal(event, iter(self.pending_events())) + assert self.active_event() is not None + return event + def cancel_event(self, index: int) -> MaintenanceEvent: event = self.pending_events()[index] self.events.remove(event) @@ -223,3 +248,108 @@ def _heal(self, next_event.planned_start = event.end event = next_event self.validate() + + +class MaintenanceService: + + @property + def bucket(self): + return aws.shared_bucket + + @property + def object_key(self): + return f'azul/{config.deployment_stage}/azul.json' + + @cached_property + def client(self): + return aws.s3 + + @property + def _get_schedule(self) -> JSON: + try: + response = self.client.get_object(Bucket=self.bucket, + Key=self.object_key) + except self.client.exceptions.NoSuchKey: + raise StorageObjectNotFound + else: + return json.loads(response['Body'].read()) + + @property + def get_schedule(self) -> MaintenanceSchedule: + schedule = self._get_schedule + schedule = MaintenanceSchedule.from_json(schedule['maintenance']['schedule']) + schedule.validate() + return schedule + + def put_schedule(self, schedule: MaintenanceSchedule): + schedule = schedule.to_json() + return self.client.put_object(Bucket=self.bucket, + Key=self.object_key, + Body=json.dumps({ + "maintenance": { + "schedule": schedule + } + }).encode()) + + def provision_event(self, + planned_start: str, + planned_duration: int, + description: str, + partial: list[str] | None = None, + degraded: list[str] | None = None, + unavailable: list[str] | None = None) -> MaintenanceEvent: + """ + Uses the given inmput parametes to provision a new MaintenanceEvent. + This new MaintenanceEvent object can then be added as an event to an + existing schedule. It is primarily used by `add_event` to create and add + events to the maintenance schedule. + """ + partial = [{ + 'kind': 'partial_responses', + 'affected_catalogs': partial + }] if partial is not None else [] + degraded = [{ + 'kind': 'degraded_performance', + 'affected_catalogs': degraded + }] if degraded is not None else [] + unavailable = [{ + 'kind': 'service_unavailable', + 'affected_catalogs': unavailable + }] if unavailable is not None else [] + impacts = [*partial, *degraded, *unavailable] + return MaintenanceEvent.from_json({ + 'planned_start': planned_start, + 'planned_duration': planned_duration, + 'description': description, + 'impacts': impacts + }) + + def add_event(self, event: MaintenanceEvent) -> JSON: + schedule = self.get_schedule + schedule.add_event(event) + self.put_schedule(schedule) + return schedule.to_json() + + def cancel_event(self, index: int) -> JSON: + schedule = self.get_schedule + event = schedule.cancel_event(index) + self.put_schedule(schedule) + return event.to_json() + + def start_event(self) -> JSON: + schedule = self.get_schedule + event = schedule.start_event() + self.put_schedule(schedule) + return event.to_json() + + def end_event(self) -> JSON: + schedule = self.get_schedule + event = schedule.end_event() + self.put_schedule(schedule) + return event.to_json() + + def adjust_event(self, additional_duration: timedelta) -> JSON: + schedule = self.get_schedule + event = schedule.adjust_event(additional_duration) + self.put_schedule(schedule) + return event.to_json() diff --git a/test/test_doctests.py b/test/test_doctests.py index c4bbe4023e..36b658683a 100644 --- a/test/test_doctests.py +++ b/test/test_doctests.py @@ -104,6 +104,7 @@ def load_tests(_loader, tests, _ignore): load_script('can_bundle'), load_script('envhook'), load_script('export_environment'), + load_script('manage_maintenance'), load_module(root + '/.flake8/azul_flake8.py', 'azul_flake8'), load_module(root + '/.github/workflows/schedule.py', 'schedule'), test_tagging,