diff --git a/.gitignore b/.gitignore index 8a4be15f614a..8f788ddc636b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ docker-compose.override.yaml node_modules/ +dev/tuf.* dev/example.sql dev/prod.sql dev/prod.sql.xz @@ -29,6 +30,7 @@ warehouse/.commit warehouse/static/components warehouse/static/dist warehouse/admin/static/dist +warehouse/tuf/dist tags *.sw* diff --git a/Makefile b/Makefile index 3b92f72c3cde..a3ddb0f3e95e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ DB := example IPYTHON := no +LOCALES := $(shell .state/env/bin/python -c "from warehouse.i18n import KNOWN_LOCALES; print(' '.join(set(KNOWN_LOCALES)-{'en'}))") +WAREHOUSE_CLI := docker-compose run --rm web python -m warehouse # set environment variable WAREHOUSE_IPYTHON_SHELL=1 if IPython # needed in development environment @@ -94,6 +96,16 @@ initdb: .state/docker-build-web docker-compose run --rm web python -m warehouse classifiers sync $(MAKE) reindex +inittuf: + $(WAREHOUSE_CLI) tuf keypair --name root --path /opt/warehouse/src/dev/tuf.root + $(WAREHOUSE_CLI) tuf keypair --name snapshot --path /opt/warehouse/src/dev/tuf.snapshot + $(WAREHOUSE_CLI) tuf keypair --name targets --path /opt/warehouse/src/dev/tuf.targets + $(WAREHOUSE_CLI) tuf keypair --name timestamp --path /opt/warehouse/src/dev/tuf.timestamp + $(WAREHOUSE_CLI) tuf keypair --name bins --path /opt/warehouse/src/dev/tuf.bins + $(WAREHOUSE_CLI) tuf keypair --name bin-n --path /opt/warehouse/src/dev/tuf.bin-n + $(WAREHOUSE_CLI) tuf new-repo + $(WAREHOUSE_CLI) tuf build-targets + reindex: .state/docker-build-web docker-compose run --rm web python -m warehouse search reindex diff --git a/Procfile b/Procfile index c4ccc5b214d7..a8149ff1c783 100644 --- a/Procfile +++ b/Procfile @@ -4,3 +4,4 @@ web-uploads: bin/start-web ddtrace-run python -m gunicorn.app.wsgiapp -c gunicor worker: bin/start-worker celery -A warehouse worker -Q default -l info --max-tasks-per-child 32 worker-malware: bin/start-worker celery -A warehouse worker -Q malware -l info --max-tasks-per-child 32 worker-beat: bin/start-worker celery -A warehouse beat -S redbeat.RedBeatScheduler -l info +worker-tuf: bin/start-worker celery -A warehouse worker -Q tuf -l info --max-tasks-per-child 32 diff --git a/dev/environment b/dev/environment index c48ad202e5b0..3c964d9361ba 100644 --- a/dev/environment +++ b/dev/environment @@ -57,3 +57,13 @@ TWOFACTORREQUIREMENT_ENABLED=true TWOFACTORMANDATE_AVAILABLE=true TWOFACTORMANDATE_ENABLED=true OIDC_ENABLED=true + +TUF_KEY_BACKEND=warehouse.tuf.services.LocalKeyService key.path=/opt/warehouse/src/dev +TUF_STORAGE_BACKEND=warehouse.tuf.services.LocalStorageService +TUF_REPO_BACKEND=warehouse.tuf.services.LocalRepositoryService repo.path=/opt/warehouse/src/warehouse/tuf/dist +TUF_ROOT_SECRET="an insecure private key password" +TUF_SNAPSHOT_SECRET="an insecure private key password" +TUF_TARGETS_SECRET="an insecure private key password" +TUF_TIMESTAMP_SECRET="an insecure private key password" +TUF_BINS_SECRET="an insecure private key password" +TUF_BIN_N_SECRET="an insecure private key password" diff --git a/dev/tufkeys/bin-n b/dev/tufkeys/bin-n new file mode 100644 index 000000000000..f1ba72f72ea2 --- /dev/null +++ b/dev/tufkeys/bin-n @@ -0,0 +1 @@ +2f0570511d84b133d6e1f875920587a9@@@@100000@@@@dfacbe8d4966935d16b5c9b7910e7b01189f83e0db2d5169eb4d7a2edab91924@@@@f3f67a27b03a24c8b767f5721bc345b5@@@@e9855ff4f41d1ec10877b3476eabd99c86f2162ef16172aaa17200af3ec97a0e7b05a389e3084a8c79aa2756fd999eaa67f5462e37a024b66feba784a3e70850d8d2466165ba86d745cac2a526d44439892f46a3342d1c890589c833f652942283b59441b214625b275de95d16ec199cb4fbbd7fa8b1442153fbac65db18d9c9e1345b37a7fa850d2ffe0d13035f28d68a3b47ddb310750002e8e96e751b633f06e4d9c70fa117de12d848cab845f830e082c51e0ad42342f2c24869b091e3c7dae70410578fbb6877a975983aa3dde6aa699e4e45ba7114c4e373109d2128c5211b3471390cb0ef09da3cd5e552813a906f604038218f7891213cefa10fbb7ba6c01e98c821ba45b69504d5ac2d41feeafccc974d82b89c33d200b6ceb04a7af47f08f8258cfea9a6855d3cbe91bc0892ddaabd0f508a6ed85f811dcff66e0fed066f3607f5a5f1ccecc80ad1ede362 \ No newline at end of file diff --git a/dev/tufkeys/bin-n.pub b/dev/tufkeys/bin-n.pub new file mode 100644 index 000000000000..23af98de62af --- /dev/null +++ b/dev/tufkeys/bin-n.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "ca2694d781367f94974c5176cae6c7290fe3b65f03a5c6331fe500b8c700f3aa"}} \ No newline at end of file diff --git a/dev/tufkeys/bins b/dev/tufkeys/bins new file mode 100644 index 000000000000..d72696818560 --- /dev/null +++ b/dev/tufkeys/bins @@ -0,0 +1 @@ +c4509bc89725ce7b1467b4d596536564@@@@100000@@@@47c863bddaad67e2482760e0da329f3f9170dee5de9e9265cd23873947b4a373@@@@8333670a562d5b2fa37559bb4372de32@@@@d567e40c7d544b4d41f023d208cacdb76009991fb1dc4681e8b7ebae42d5756c21434937061297dfe264150d3ab9c00985d4217ca18c70904d8c1becc9e172a1677221d09d7f9de5b6778340bf26d305dcf34556066e744abcf96bd489e612d6ea9c483838f21bf06646d250b5d7f380cccf57eb990387baf28c3fd00815b1db3a34418d879718607fb54ccfb1fe20edc06bdb765dce6d3617dba5cc5aff41f5eab62a6e0f8e095a97dda3784dcb95f97af5a1c4de7fb0424d659469305cafb092a209ab78932444ffb497286cb44e4ba360e0025f71fc108e8f3b2fac794a05242ef5167bdcd5e4a1a3573805cb91b8359bcb317eb026c2ee102db096b50b5f6fc478af5146dbcd9e2ada046d6a392854e779ffa4b2bdbb0ea90d41eb6030fd0d98f0aa34510a2a6e9292d5095140864658d8db63701b661ed8ee3b6a428dfb9163e7918942ffb0506f290a76fc4703 \ No newline at end of file diff --git a/dev/tufkeys/bins.pub b/dev/tufkeys/bins.pub new file mode 100644 index 000000000000..d0ef047f9a51 --- /dev/null +++ b/dev/tufkeys/bins.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "a4361da485b8ae5cb10915d5254d96f05ed3d8d4c87979c5a203504b4b65ecf5"}} \ No newline at end of file diff --git a/dev/tufkeys/root b/dev/tufkeys/root new file mode 100644 index 000000000000..ddb1d5081269 --- /dev/null +++ b/dev/tufkeys/root @@ -0,0 +1 @@ +83589fb72074fcb8533c8c4414015c30@@@@100000@@@@be3d99c519272cc8b8c1150ca03450c0aac21d084a0dc79966faca4d05787ff1@@@@dccf47ed06f92da01e9de35113477e88@@@@72e37b2acdb2115888975e66990009e6f6a4cdd94ff77473e056d16ef33e9ef6ea450308f1b4962f544a0796d7b7f788b35a6ae33d6cc83e56cc416440716418102d39573692e87350935088bc08b4b82d910731a0f536bebded8f80e49c3df052da2bf657ed6a1f64463da477905bc8a1bc8fb65215653a6efa3e0a74b2fabcb65d96fc1f1c31196878e9eb39eca1cfb709b69a400f159f5e8f8dc9b765590fb5cac705170149b71c0f54951d239eafaa9ea9b8c3ac0897e57e5c8f027f8d160d91e3995da00535ced37ff41bccd31e7ed5076c22b448f9a7e701a2024de56b839fd7be4689c23f8b81102fe4711585300862b01f499408295723f995775de33ce69e666883bb25302adb1ce6b21ac710ceeacefacc316aa80432a7d8e9819d78913707b8daef35d03667ab11727e6ea6bc44b9356449f6d6aed53825f2fd1397c68d2669b2b1895874678234a42afd \ No newline at end of file diff --git a/dev/tufkeys/root.pub b/dev/tufkeys/root.pub new file mode 100644 index 000000000000..61e15f87d712 --- /dev/null +++ b/dev/tufkeys/root.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "5cb1a1622f72ae901bc2ed25546503bae70b53308833385a11b6630fb52c9bc9"}} \ No newline at end of file diff --git a/dev/tufkeys/snapshot b/dev/tufkeys/snapshot new file mode 100644 index 000000000000..4b899fc99d94 --- /dev/null +++ b/dev/tufkeys/snapshot @@ -0,0 +1 @@ +97074b6eccb9d5920192f231249c33dc@@@@100000@@@@3965a3500a3de4f6a052f9f26efd2714affae878daec6df53c50fe20fb3b5732@@@@dbc2d164f88ea8d77b093db20475db0c@@@@d6db7159767d46f6f255a2184b2615df892f067ca0861dba18ccc5b6b7dce95d5d2b377d0d8424d39fb0b337e9bd086a6405804a55e7c8d591f090dc55d38c0820a6bc2ddd27078e1bc482fa4f19a66941cf023680d032da70a8ac6bb5c9fc7454a273d014ef569f7a79f89145c29ade6a5f56752a9e4b5f3a4ee0426680b3d1921a962a6edee5c77661439f7bc95674fbc009ecf903c20fbac786e94164bc43df1fb8536bc6666ce098cfad411c852154e4dd7dda0b9046cd4310d7ae9b822be3cb7b62e66e73718424743e2297b091d2cd8cfce108f1f6a2e17ad382655c517d8131da3a72ac1f2ae8cd760705bd17ec5ff6834c86e8f08fa804152774146a3bda063ae29a01be00723b96e3370d9f046a1287ad4ccaa007cc016d59bf935268cb8b7946bb5b5480962137ed6cb449df43aa83d3d086e4b10e7c33110847c9bc1ee2c24ebbd66e989c6e28c52ac28d \ No newline at end of file diff --git a/dev/tufkeys/snapshot.pub b/dev/tufkeys/snapshot.pub new file mode 100644 index 000000000000..92093748d91e --- /dev/null +++ b/dev/tufkeys/snapshot.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "c19d2c8532667d7784f5b33f5bdccd529b52c5d55754b4ef989a261bf1695431"}} \ No newline at end of file diff --git a/dev/tufkeys/targets b/dev/tufkeys/targets new file mode 100644 index 000000000000..a36285e32f8e --- /dev/null +++ b/dev/tufkeys/targets @@ -0,0 +1 @@ +e25efe7833ec8682ff0d09366d264fe5@@@@100000@@@@ecb082633529fc0d392b3d0c8f36972423c91060bbfc1903a8e390deeb650817@@@@3f116f9b022d8e1e3959e99ee800f5cd@@@@45defac0e6ef65003645fd34e3a39a47579e6da578529fe425bd9798e6007cb901d4003e866068a853e17faeba365367f63ead34fac88bca0938f308e2556913c8f9ed56892262704b0a7dcb25bedf61fcd8c72af6eaa4b8630936b704f2aae364313211a32d3bac84d88273fd2e2ee48f1c3d7bef8de74a800124f469bacad916ed3d1242b2c691548b68d3e890a8ef19d3e56817a94e1e508136a8f6cd4d5af4012a355e874bfe446504b75991187f76f521dc3b589d1eae126d978e1e7b268688e1992729cb820f5057352e6c916af4ce448bf0a26d1cf52968a5398c6c516cae831f7f75a7885489fae79fb3a9f8faae0dc32ceaf17106df7834ffc4e28589df0d886a08f51751d2c15999d538197a1e392647ffc81a314e87f936e2087762f0d6004a862789e27021e11e489818320da3bd8801f208deb0b4aa4d1f6319712729bb716c71837ac229d216180ef4 \ No newline at end of file diff --git a/dev/tufkeys/targets.pub b/dev/tufkeys/targets.pub new file mode 100644 index 000000000000..fe0f800e39af --- /dev/null +++ b/dev/tufkeys/targets.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "715ea559e2fa80a2bad3c50836c55ce33d256fa7ad5468931cc76a8a10e86d37"}} \ No newline at end of file diff --git a/dev/tufkeys/timestamp b/dev/tufkeys/timestamp new file mode 100644 index 000000000000..a77ed1a42473 --- /dev/null +++ b/dev/tufkeys/timestamp @@ -0,0 +1 @@ +7a59984187faa4893ceba030f5c5b4a7@@@@100000@@@@7ec97a52365194434c8294e50ceb454222194670104eeacce30846222cdd2756@@@@20d9afcaebb214eb40b3827d6730f318@@@@7763c7a7fd2d9897ad9951149bbd00ced57d9a68339ce0b1a1dbfc7cbb1defdd6d2a87178c140a9de2180e58616f6004a53f384356743b3aad3d54d073e70d8a6c9420ee9e6880405d969f59ddac072235740259868af52a9c0d4582ce3cd0435e9dc0138f0571b02629721f11308e5a6fb26f75d2bd9d298da3bfaad4ec6aaa80d0d81a2e22690d8416b188e132b87bc5ef89d1460e4f7b2c12dac555ad99212b30c73d5f3ef30a77e0eb2f0f82e88c869ef585a8707267be76619c7a3e430563be1d7d1073ed1706bea7d5232e784674c104920d4f44382beeb8d07cfe0d9f5ec0812497d48d2f8b71634a2ae29ca9bf2732d4e0c3216a62b318d11dfc1608f0c9f9e8eff142b45cce4ac7bc91c4967fcd9b3b5484bf016375100391321c54e6a3da85cfb99c6e429da2a586ea15d2f09e2f7acb3e5358bef10f0116b9f3da2aa2c49f9fb07ffcaea4677144bc1969 \ No newline at end of file diff --git a/dev/tufkeys/timestamp.pub b/dev/tufkeys/timestamp.pub new file mode 100644 index 000000000000..cece0173a650 --- /dev/null +++ b/dev/tufkeys/timestamp.pub @@ -0,0 +1 @@ +{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "5e12fa7b474cb886cdc0cd14c5017b09b3759125ab7e6985bd9c5953063de333"}} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 4d7095d3bee9..83027ab4eb9a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -129,6 +129,7 @@ services: DEVEL: "yes" command: hupper -m celery -A warehouse worker -B -S redbeat.RedBeatScheduler -l info volumes: + - ./dev:/opt/warehouse/src/dev:z - ./warehouse:/opt/warehouse/src/warehouse:z env_file: dev/environment environment: diff --git a/requirements/main.in b/requirements/main.in index eb7f0c078d40..e9fc78ac0743 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -63,6 +63,7 @@ stripe structlog transaction trove-classifiers +tuf==0.15.0 typeguard webauthn>=1.0.0,<2.0.0 whitenoise diff --git a/tests/conftest.py b/tests/conftest.py index bfd3af365868..a116ad5007b8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -255,6 +255,8 @@ def app_config(database): "sponsorlogos.backend": "warehouse.admin.services.LocalSponsorLogoStorage", "billing.backend": "warehouse.subscriptions.services.MockStripeBillingService", "mail.backend": "warehouse.email.services.SMTPEmailSender", + "tuf.key_backend": "warehouse.tuf.services.LocalKeyService", + "tuf.repo_backend": "warehouse.tuf.services.LocalRepositoryService", "malware_check.backend": ( "warehouse.malware.services.PrinterMalwareCheckService" ), diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 6a81494280bf..e7ff3722b597 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -358,6 +358,7 @@ def __init__(self): pretend.call(".organizations"), pretend.call(".subscriptions"), pretend.call(".packaging"), + pretend.call(".tuf"), pretend.call(".redirects"), pretend.call(".routes"), pretend.call(".sponsors"), @@ -410,7 +411,8 @@ def __init__(self): ), ] assert configurator_obj.add_static_view.calls == [ - pretend.call("static", "warehouse:static/dist/", cache_max_age=315360000) + pretend.call("tuf", "warehouse:tuf/dist/metadata.staged/"), + pretend.call("static", "warehouse:static/dist/", cache_max_age=315360000), ] assert configurator_obj.add_cache_buster.calls == [ pretend.call("warehouse:static/dist/", cachebuster_obj) diff --git a/tests/unit/test_tasks.py b/tests/unit/test_tasks.py index 6ea032e5e425..fb146271a097 100644 --- a/tests/unit/test_tasks.py +++ b/tests/unit/test_tasks.py @@ -508,8 +508,12 @@ def test_includeme(env, ssl, broker_url, expected_url, transport_options): "task_queues": ( Queue("default", routing_key="task.#"), Queue("malware", routing_key="malware.#"), + Queue("tuf", routing_key="tuf.#"), ), - "task_routes": {"warehouse.malware.tasks.*": {"queue": "malware"}}, + "task_routes": { + "warehouse.malware.tasks.*": {"queue": "malware"}, + "warehouse.tuf.tasks.*": {"queue": "tuf"}, + }, "REDBEAT_REDIS_URL": (config.registry.settings["celery.scheduler_url"]), }.items(): assert app.conf[key] == value diff --git a/warehouse/cli/tuf.py b/warehouse/cli/tuf.py new file mode 100644 index 000000000000..9350759e62e8 --- /dev/null +++ b/warehouse/cli/tuf.py @@ -0,0 +1,176 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import click + +from tuf import repository_tool + +from warehouse.cli import warehouse +from warehouse.config import Environment +from warehouse.tuf import utils +from warehouse.tuf.constants import BIN_N_COUNT, TOPLEVEL_ROLES, Role + + +def _make_backsigned_fileinfo_from_file(file): + return utils.make_fileinfo(file, custom={"backsigned": True}) + + +def _key_service(config): + key_service_class = config.maybe_dotted(config.registry.settings["tuf.key_backend"]) + return key_service_class.create_service(None, config) + + +def _repository_service(config): + repo_service_class = config.maybe_dotted( + config.registry.settings["tuf.repo_backend"] + ) + return repo_service_class.create_service(None, config) + + +def _set_expiration_for_role(config, role_obj, role_name): + # If we're initializing TUF for development purposes, give + # every role a long expiration time so that developers don't have to + # continually re-initialize it. + if config.registry.settings["warehouse.env"] == Environment.development: + role_obj.expiration = datetime.datetime.now() + datetime.timedelta( + seconds=config.registry.settings["tuf.development_metadata_expiry"] + ) + else: + role_obj.expiration = datetime.datetime.now() + datetime.timedelta( + seconds=config.registry.settings[f"tuf.{role_name}.expiry"] + ) + + +@warehouse.group() # pragma: no-branch +def tuf(): + """ + Manage Warehouse's TUF state. + """ + + +@tuf.command() +@click.pass_obj +@click.option("--name", "name_", help="The name of the TUF role for this keypair") +@click.option("--path", "path_", help="The basename of the Ed25519 keypair to generate") +def keypair(config, name_, path_): + """ + Generate a new TUF keypair, for development purposes. + """ + + repository_tool.generate_and_write_ed25519_keypair( + path_, password=config.registry.settings[f"tuf.{name_}.secret"] + ) + + +@tuf.command() +@click.pass_obj +def new_repo(config): + """ + Initialize the TUF repository from scratch, including a brand new root. + """ + + repository = repository_tool.create_new_repository( + config.registry.settings["tuf.repo.path"] + ) + + key_service = _key_service(config) + for role in TOPLEVEL_ROLES: + role_obj = getattr(repository, role) + role_obj.threshold = config.registry.settings[f"tuf.{role}.threshold"] + _set_expiration_for_role(config, role_obj, role) + + pubkeys = key_service.pubkeys_for_role(role) + privkeys = key_service.privkeys_for_role(role) + if len(pubkeys) < role_obj.threshold or len(privkeys) < role_obj.threshold: + raise click.ClickException( + f"Unable to initialize TUF repo ({role} needs {role_obj.threshold} keys" + ) + + for pubkey in pubkeys: + role_obj.add_verification_key(pubkey) + + for privkey in privkeys: + role_obj.load_signing_key(privkey) + + repository.mark_dirty(TOPLEVEL_ROLES) + repository.writeall( + consistent_snapshot=True, + ) + + +@tuf.command() +@click.pass_obj +def build_targets(config): + """ + Given an initialized (but empty) TUF repository, create the delegated + targets role (bins) and its hashed bin delegations (each bin-n). + """ + + repo_service = _repository_service(config) + repository = repo_service.load_repository() + + # Load signing keys. We do this upfront for the top-level roles. + key_service = _key_service(config) + for role in ["snapshot", "targets", "timestamp"]: + role_obj = getattr(repository, role) + + [role_obj.load_signing_key(k) for k in key_service.privkeys_for_role(role)] + + # NOTE: TUF normally does delegations by path patterns (i.e., globs), but PyPI + # doesn't store its uploads on the same logical host as the TUF repository. + # The last parameter to `delegate` is a special sentinel for this. + repository.targets.delegate( + Role.BINS.value, key_service.pubkeys_for_role(Role.BINS.value), ["*"] + ) + bins_role = repository.targets(Role.BINS.value) + _set_expiration_for_role(config, bins_role, Role.BINS.value) + + for privkey in key_service.privkeys_for_role(Role.BINS.value): + bins_role.load_signing_key(privkey) + + bins_role.delegate_hashed_bins( + [], + key_service.pubkeys_for_role(Role.BIN_N.value), + BIN_N_COUNT, + ) + + dirty_roles = ["snapshot", "targets", "timestamp", Role.BINS.value] + for bin_n_role in bins_role.delegations: + _set_expiration_for_role(config, bin_n_role, Role.BIN_N.value) + dirty_roles.append(bin_n_role.rolename) + + for privkey in key_service.privkeys_for_role(Role.BIN_N.value): + for bin_n_role in bins_role.delegations: + bin_n_role.load_signing_key(privkey) + + # Collect the "paths" for every PyPI package. These are packages already in + # existence, so we'll add some additional data to their targets to + # indicate that we're back-signing them. + from warehouse.db import Session + from warehouse.packaging.models import File + + db = Session(bind=config.registry["sqlalchemy.engine"]) + for file in db.query(File).all(): + fileinfo = _make_backsigned_fileinfo_from_file(file) + bins_role.add_target_to_bin( + file.path, + number_of_bins=BIN_N_COUNT, + fileinfo=fileinfo, + ) + + repository.mark_dirty(dirty_roles) + repository.writeall( + consistent_snapshot=True, + use_existing_fileinfo=True, + ) diff --git a/warehouse/config.py b/warehouse/config.py index 08a7ad3f56fc..3ebde77017ba 100644 --- a/warehouse/config.py +++ b/warehouse/config.py @@ -228,6 +228,12 @@ def configure(settings=None): default=21600, # 6 hours ) maybe_set_compound(settings, "billing", "backend", "BILLING_BACKEND") + maybe_set(settings, "tuf.root.secret", "TUF_ROOT_SECRET") + maybe_set(settings, "tuf.snapshot.secret", "TUF_SNAPSHOT_SECRET") + maybe_set(settings, "tuf.targets.secret", "TUF_TARGETS_SECRET") + maybe_set(settings, "tuf.timestamp.secret", "TUF_TIMESTAMP_SECRET") + maybe_set(settings, "tuf.bins.secret", "TUF_BINS_SECRET") + maybe_set(settings, "tuf.bin-n.secret", "TUF_BIN_N_SECRET") maybe_set_compound(settings, "files", "backend", "FILES_BACKEND") maybe_set_compound(settings, "simple", "backend", "SIMPLE_BACKEND") maybe_set_compound(settings, "docs", "backend", "DOCS_BACKEND") @@ -237,6 +243,9 @@ def configure(settings=None): maybe_set_compound(settings, "metrics", "backend", "METRICS_BACKEND") maybe_set_compound(settings, "breached_passwords", "backend", "BREACHED_PASSWORDS") maybe_set_compound(settings, "malware_check", "backend", "MALWARE_CHECK_BACKEND") + maybe_set_compound(settings, "tuf", "key_backend", "TUF_KEY_BACKEND") + maybe_set_compound(settings, "tuf", "storage_backend", "TUF_STORAGE_BACKEND") + maybe_set_compound(settings, "tuf", "repo_backend", "TUF_REPO_BACKEND") # Pythondotorg integration settings maybe_set(settings, "pythondotorg.host", "PYTHONDOTORG_HOST", default="python.org") @@ -358,6 +367,10 @@ def configure(settings=None): ], ) + # For development only: this artificially prolongs the expirations of any + # Warehouse-generated TUF metadata by approximately one year. + settings.setdefault("tuf.development_metadata_expiry", 31536000) + # Actually setup our Pyramid Configurator with the values pulled in from # the environment as well as the ones passed in to the configure function. config = Configurator(settings=settings) @@ -565,6 +578,13 @@ def configure(settings=None): # Allow the packaging app to register any services it has. config.include(".packaging") + # Register TUF support for package integrity + config.include(".tuf") + + # Serve the TUF metadata files. + # TODO: This should be routed to the TUF GCS bucket. + config.add_static_view("tuf", "warehouse:tuf/dist/metadata.staged/") + # Configure redirection support config.include(".redirects") diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 088e8557a22c..65204948173b 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -60,7 +60,7 @@ Release, Role, ) -from warehouse.packaging.tasks import update_bigquery_release_files +from warehouse.tuf.interfaces import IRepositoryService from warehouse.utils import http, readme from warehouse.utils.project import add_project, validate_project_name from warehouse.utils.security_policy import AuthenticationMethod @@ -1372,56 +1372,8 @@ def file_upload(request): }, ) - # We are flushing the database requests so that we - # can access the server default values when initiating celery - # tasks. - request.db.flush() - - # Push updates to BigQuery - dist_metadata = { - "metadata_version": form["metadata_version"].data, - "name": form["name"].data, - "version": form["version"].data, - "summary": form["summary"].data, - "description": form["description"].data, - "author": form["author"].data, - "description_content_type": form["description_content_type"].data, - "author_email": form["author_email"].data, - "maintainer": form["maintainer"].data, - "maintainer_email": form["maintainer_email"].data, - "license": form["license"].data, - "keywords": form["keywords"].data, - "classifiers": form["classifiers"].data, - "platform": form["platform"].data, - "home_page": form["home_page"].data, - "download_url": form["download_url"].data, - "requires_python": form["requires_python"].data, - "pyversion": form["pyversion"].data, - "filetype": form["filetype"].data, - "comment": form["comment"].data, - "requires": form["requires"].data, - "provides": form["provides"].data, - "obsoletes": form["obsoletes"].data, - "requires_dist": form["requires_dist"].data, - "provides_dist": form["provides_dist"].data, - "obsoletes_dist": form["obsoletes_dist"].data, - "requires_external": form["requires_external"].data, - "project_urls": form["project_urls"].data, - "filename": file_data.filename, - "python_version": file_data.python_version, - "packagetype": file_data.packagetype, - "comment_text": file_data.comment_text, - "size": file_data.size, - "has_signature": file_data.has_signature, - "md5_digest": file_data.md5_digest, - "sha256_digest": file_data.sha256_digest, - "blake2_256_digest": file_data.blake2_256_digest, - "path": file_data.path, - "uploaded_via": file_data.uploaded_via, - "upload_time": file_data.upload_time, - } - if not request.registry.settings.get("warehouse.release_files_table") is None: - request.task(update_bigquery_release_files).delay(dist_metadata) + repository = request.find_service(IRepositoryService) + repository.add_target(file_) # Log a successful upload metrics.increment("warehouse.upload.ok", tags=[f"filetype:{form.filetype.data}"]) diff --git a/warehouse/tasks.py b/warehouse/tasks.py index b49b07c63856..c2362a1ea28d 100644 --- a/warehouse/tasks.py +++ b/warehouse/tasks.py @@ -206,8 +206,12 @@ def includeme(config): task_queues=( Queue("default", routing_key="task.#"), Queue("malware", routing_key="malware.#"), + Queue("tuf", routing_key="tuf.#"), ), - task_routes={"warehouse.malware.tasks.*": {"queue": "malware"}}, + task_routes={ + "warehouse.malware.tasks.*": {"queue": "malware"}, + "warehouse.tuf.tasks.*": {"queue": "tuf"}, + }, task_serializer="json", worker_disable_rate_limits=True, REDBEAT_REDIS_URL=s["celery.scheduler_url"], diff --git a/warehouse/tuf/__init__.py b/warehouse/tuf/__init__.py new file mode 100644 index 000000000000..8a1878f0e2c4 --- /dev/null +++ b/warehouse/tuf/__init__.py @@ -0,0 +1,71 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from celery.schedules import crontab + +from warehouse.tuf.interfaces import IKeyService, IRepositoryService, IStorageService +from warehouse.tuf.tasks import bump_bin_ns, bump_snapshot + + +def includeme(config): + config.add_settings( + { + "tuf.keytype": "ed25519", + "tuf.root.threshold": 1, + "tuf.root.expiry": 31536000, + "tuf.snapshot.threshold": 1, + "tuf.snapshot.expiry": 86400, + "tuf.targets.threshold": 1, + "tuf.targets.expiry": 31536000, + "tuf.timestamp.threshold": 1, + "tuf.timestamp.expiry": 86400, + "tuf.bins.threshold": 1, + "tuf.bins.expiry": 31536000, + "tuf.bin-n.threshold": 1, + # NOTE: This is a deviation from PEP 458, as published: the PEP + # stipulates that bin-n metadata expires every 24 hours, which is + # both burdensome for mirrors and requires a large number of redundant + # signing operations even when the targets themselves do not change. + # An amended version of the PEP should be published, at which point + # this note can be removed. + "tuf.bin-n.expiry": 604800, + "tuf.spec_version": "1.0.0", + } + ) + + key_service_class = config.maybe_dotted(config.registry.settings["tuf.key_backend"]) + config.register_service_factory(key_service_class.create_service, IKeyService) + + storage_service_class = config.maybe_dotted( + config.registry.settings["tuf.storage_backend"] + ) + config.register_service_factory( + storage_service_class.create_service, IStorageService + ) + + repo_service_class = config.maybe_dotted( + config.registry.settings["tuf.repo_backend"] + ) + config.register_service_factory( + repo_service_class.create_service, IRepositoryService + ) + + # Per PEP 458: The snapshot and timestamp metadata expire every 24 hours. + # We conservatively bump them every 6 hours. + # Note that bumping the snapshot causes us to bump the timestamp, so we + # only need to explicitly bump the former. + # NOTE: PEP 458 currently specifies that each bin-n role expires every 24 hours, + # but Warehouse sets them to expire every 7 days instead. See the corresponding + # note in tuf/__init__.py. + # We conservatively bump all delegated bins at least once daily. + config.add_periodic_task(crontab(minute=0, hour="*/6"), bump_snapshot) + config.add_periodic_task(crontab(minute=0, hour=0), bump_bin_ns) diff --git a/warehouse/tuf/constants.py b/warehouse/tuf/constants.py new file mode 100644 index 000000000000..0f42a1162629 --- /dev/null +++ b/warehouse/tuf/constants.py @@ -0,0 +1,37 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import enum + + +@enum.unique +class Role(enum.Enum): + ROOT: str = "root" + SNAPSHOT: str = "snapshot" + TARGETS: str = "targets" + TIMESTAMP: str = "timestamp" + BINS: str = "bins" + BIN_N: str = "bin-n" + + +TOPLEVEL_ROLES = [ + Role.ROOT.value, + Role.SNAPSHOT.value, + Role.TARGETS.value, + Role.TIMESTAMP.value, +] + +HASH_ALGORITHM = "blake2b" + +TUF_REPO_LOCK = "tuf-repo" + +BIN_N_COUNT = 16384 diff --git a/warehouse/tuf/interfaces.py b/warehouse/tuf/interfaces.py new file mode 100644 index 000000000000..7a4e4915abab --- /dev/null +++ b/warehouse/tuf/interfaces.py @@ -0,0 +1,68 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from zope.interface import Interface + + +class IKeyService(Interface): + def create_service(context, request): + """ + Create the service, given the context and request for which it is being + created. + """ + + def pubkeys_for_role(rolename): + """ + Return a list of (TUF-formatted) public keys for the given TUF role. + """ + + def privkeys_for_role(rolename): + """ + Return a list of (TUF-formatted) private keys for the given TUF role. + """ + + +class IStorageService(Interface): + def create_service(context, request): + """ + Create the service, given the context and request for which it is being + created. + """ + + def get_backend(): + """ + Return an implementation of `securesystemslib.storage.StorageBackendInterface`. + """ + + +class IRepositoryService(Interface): + def create_service(context, request): + """ + Create the service, given the context and request for which it is being + created. + """ + + def load_repository(): + """ + Return a TUF Repository object for direct manipulation of the underlying + repository. + + NOTE: The Repository object returned from this method cannot be manipulated + safely by multiple tasks or threads, especially. It should only be used during + TUF initialization or offline maintenance tasks. + """ + + def add_target(file, backsigned=False): + """ + Given a warehouse.packaging.models.File, add it to the TUF + repository. + """ diff --git a/warehouse/tuf/services.py b/warehouse/tuf/services.py new file mode 100644 index 000000000000..3b39951d2b64 --- /dev/null +++ b/warehouse/tuf/services.py @@ -0,0 +1,120 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os.path +import warnings + +from tuf import repository_tool +from zope.interface import implementer + +from warehouse.tuf.interfaces import IKeyService, IRepositoryService, IStorageService +from warehouse.tuf.tasks import add_target +from warehouse.tuf.utils import GCSBackend, LocalBackend, make_fileinfo + + +class InsecureKeyWarning(UserWarning): + pass + + +@implementer(IKeyService) +class LocalKeyService: + def __init__(self, key_path, request): + warnings.warn( + "LocalKeyService is intended only for use in development, you " + "should not use it in production to avoid unnecessary key exposure.", + InsecureKeyWarning, + ) + + self._key_path = key_path + self._request = request + + @classmethod + def create_service(cls, context, request): + return cls(request.registry.settings["tuf.key.path"], request) + + def pubkeys_for_role(self, rolename): + pubkey_path = os.path.join(self._key_path, f"tuf.{rolename}.pub") + return [repository_tool.import_ed25519_publickey_from_file(pubkey_path)] + + def privkeys_for_role(self, rolename): + privkey_path = os.path.join(self._key_path, f"tuf.{rolename}") + return [ + repository_tool.import_ed25519_privatekey_from_file( + privkey_path, + password=self._request.registry.settings[f"tuf.{rolename}.secret"], + ) + ] + + +@implementer(IStorageService) +class LocalStorageService: + def __init__(self, request): + self._store = LocalBackend(request) + + @classmethod + def create_service(cls, context, request): + return cls(request) + + def get_backend(self): + return self._store + + +@implementer(IStorageService) +class GCSStorageService: + def __init__(self, request): + self._store = GCSBackend(request) + + @classmethod + def create_service(cls, context, request): + return cls(request) + + def get_backend(self): + return self._store + + +@implementer(IRepositoryService) +class LocalRepositoryService: + def __init__(self, repo_path, executor): + self._repo_path = repo_path + self._executor = executor + + @classmethod + def create_service(cls, context, request): + return cls( + request.registry.settings["tuf.repo.path"], + request.task(add_target).delay, + ) + + def load_repository(self): + return repository_tool.load_repository(self._repo_path) + + def add_target(self, file, custom=None): + fileinfo = make_fileinfo(file, custom=custom) + self._executor(file.path, fileinfo) + + +@implementer(IRepositoryService) +class GCSRepositoryService: + def __init__(self, executor, request): + self._store = GCSBackend(request) + + @classmethod + def create_service(cls, context, request): + return cls(request.task(add_target).delay, request) + + def load_repository(self): + return repository_tool.load_repository("tuf", storage_backend=self._store) + + def add_target(self, file, custom=None): + fileinfo = make_fileinfo(file, custom=custom) + self._executor(file.path, fileinfo) diff --git a/warehouse/tuf/tasks.py b/warehouse/tuf/tasks.py new file mode 100644 index 000000000000..b83ef2889960 --- /dev/null +++ b/warehouse/tuf/tasks.py @@ -0,0 +1,323 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import timedelta + +import redis + +from securesystemslib.util import get_file_hashes +from tuf.api import metadata + +from warehouse.tasks import task +from warehouse.tuf import utils +from warehouse.tuf.constants import HASH_ALGORITHM, TUF_REPO_LOCK, Role +from warehouse.tuf.interfaces import IKeyService, IStorageService + + +@task(bind=True, ignore_result=True, acks_late=True) +def bump_snapshot(task, request): + """ + Re-signs the TUF snapshot role, incrementing its version and renewing its + expiration period. + + Bumping the snapshot transitively bumps the timestamp role. + """ + r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) + + with r.lock(TUF_REPO_LOCK): + # Bumping the snapshot role involves the following steps: + # 1. First, we grab our key and storage services. We'll use the former + # for signing operations, and the latter to read and write individual + # metadata files to and from the repository without loading the entire + # repo. + # 2. Using our storage service, we fetch the timestamp metadata, which + # is always at `timestamp.json`. We load it using the `Timestamp` model + # provided by the TUF API. + # 3. Using the snapshot version stored in the current `Timestamp`, we fetch + # `{VERSION}.snapshot.json` and load it using the `Snapshot` model + # provided by the TUF API. + # 4. We call `utils.bump_metadata()` and `Snapshot.sign()` to bump + # and re-sign the current snapshot. + # 5. We call `Snapshot.to_json_file()` with `{VERSION + 1}.snapshot.json`, + # where `{VERSION + 1}` is the incremented snapshot version. + # 6. We call `Timestamp.update()` on the loaded timestamp, giving it the + # incremented snapshot version as well as the serialized length and + # BLAKE2B hash of the serialized form. + # 7. We call `utils.bump_metadata()` and `Timestamp.sign()` to bump + # and re-sign the current timestamp. + # 8. We call `Timestamp.to_json_file()`, writing to `timestamp.json`. + # + # Each of the steps is labeled below for clarity. + + # 1. Service retrieval. + storage_service = request.find_service(IStorageService) + key_service = request.find_service(IKeyService) + + storage_backend = storage_service.get_backend() + + # 2. Timestamp retrieval and loading. + timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) + + # 3. Snapshot retrieval and loading. + snapshot = utils.find_snapshot(timestamp.signed, storage_backend) + + # 4. Snapshot bumping and versioning. + utils.bump_metadata( + snapshot.signed, + timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), + ) + for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): + snapshot.sign(key) + + # 5. Writing the updated snapshot back to the repository. + snapshot_filename = f"{snapshot.signed.version}.snapshot.json" + snapshot.to_json_file(snapshot_filename, storage_backend) + + # 6. Timestamp updating. + timestamp.signed.update( + snapshot.signed.version, + len(snapshot.to_json().encode()), + get_file_hashes( + snapshot_filename, + hash_algorithms=[HASH_ALGORITHM], + storage_backend=storage_backend, + ), + ) + + # 7. Timestamp bumping. + utils.bump_metadata( + timestamp.signed, + timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), + ) + for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): + timestamp.sign(key) + + # 8. Writing the updated timestamp back to the repository. + timestamp.to_json_file("timestamp.json", storage_backend) + + +@task(bind=True, ignore_result=True, acks_late=True) +def bump_bin_ns(task, request): + r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) + + with r.lock(TUF_REPO_LOCK): + # Bumping all of the delegated bin roles in the TUF repository involves + # the following steps: + # 1. Grab key and storage services. + # 2. Fetch timestamp. + # 3. Fetch snapshot using timestamp. + # 4. For each delegated target in the snapshot, fetch its current version, + # bump, re-sign, and write back to the repo. Update the snapshot to + # match the bumped version. + # 5. Bump and re-sign the snapshot. + # 6. Write the snapshot back. + # 7. Bump and re-sign the timestamp. + # 8. Write the timestamp back. + + # 1. Service retrieval. + storage_service = request.find_service(IStorageService) + key_service = request.find_service(IKeyService) + + storage_backend = storage_service.get_backend() + + # 2. Timestamp retrieval and loading. + timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) + + # 3. Snapshot retrieval and loading. + snapshot = utils.find_snapshot(timestamp.signed, storage_backend) + + # Target iteration: skip the top-level targets role. + for role_name, role_info in snapshot.signed.meta.items(): + if role_name == "targets.json": + continue + + role_version = role_info["version"] + delegated_bin_filename = f"{role_version}.{role_name}" + + # Load the delegated bin. + delegated_bin = metadata.Metadata.from_json_file( + delegated_bin_filename, storage_backend + ) + + # Bump and sign the delegated bin. + utils.bump_metadata( + delegated_bin.signed, + timedelta(seconds=request.registry.settings["tuf.bin-n.expiry"]), + ) + + for key in key_service.privkeys_for_role(Role.BIN_N.value): + delegated_bin.sign(key) + + # Write-back. + delegated_bin.to_json_file(delegated_bin_filename, storage_backend) + + # Update the snapshot with this updated target's version. + # TODO: Ideally we'd use snapshot.update here, but that takes + # the role name without .json on the end. But role_name here + # has that suffix. Annoying. + snapshot.meta[role_name]["version"] = delegated_bin.signed.version + + # Bump and sign the snapshot. + utils.bump_metadata( + snapshot.signed, + timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), + ) + + for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): + snapshot.sign(key) + + # Write-back. + snapshot_filename = f"{snapshot.signed.version}.snapshot.json" + snapshot.to_json_file(snapshot_filename, storage_backend) + + # Bump and sign the timestamp. + utils.bump_metadata( + timestamp.signed, + timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), + ) + timestamp.signed.update( + snapshot.signed.version, + len(snapshot.to_json().encode()), + get_file_hashes( + snapshot_filename, + hash_algorithms=[HASH_ALGORITHM], + storage_backend=storage_backend, + ), + ) + + for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): + timestamp.sign(key) + + # Write-back. + timestamp.to_json_file("timestamp.json", storage_backend) + + +@task(bind=True, ignore_result=True, acks_late=True) +def add_target(task, request, filepath, fileinfo): + r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) + + with r.lock(TUF_REPO_LOCK): + # Adding a target to the TUF repository involves the following steps: + # 1. First, we grab our key and storage services. We'll use the former + # for signing operations, and the latter to read and write individual + # metadata files to and from the repository without loading the entire + # repo. + # 2. Using our storage service, we fetch the timestamp metadata, which + # is always at `timestamp.json`. We load it using the `Timestamp` model + # provided by the TUF API. + # 3. Using the snapshot version stored in the current `Timestamp`, we fetch + # `{VERSION}.snapshot.json` and load it using the `Snapshot` model + # provided by the TUF API. + # 4. Using the target's name (`filepath`), we determine the name of the + # delegated bin that it'll end up in. We use that delegated bin name to + # index into the `Snapshot` model and get the current version for that bin. + # Then, we fetch `{VERSION}.{BIN}.json` and load it using the `Targets` + # model provided by the TUF API. + # 5. We update the delegated bin, bumping its version, expiration, and + # adding our new target to it. + # 6. We call `Targets.sign()` on the loaded bin, giving it the bin-n + # signing key from our key service. + # 7. We call `Targets.to_json_file()` with `{VERSION + 1}.{BIN}.json` + # as the filepath, where `{VERSION + 1}` is the incremented version + # of the previous delegated bin version. + # 8. We update the snapshot, bumping its version, expiration, and giving + # it our new delegated bin version. + # 9. We call `Snapshot.sign()` on the loaded snapshot, giving it the + # snapshot signing key from our key service. + # 10. We call `Snapshot.to_json_file()` with `{VERSION + 1}.snapshot.json`, + # where `{VERSION + 1}` is the incremented version of the previous + # snapshot version. + # 11. We update the timestamp, bumping its version, expiration, and giving + # it our new snapshot version and integrity information. + # 12. We call `Timestamp.sign()` on the loaded timestamp, giving it the + # timestamp signing key from our key service. + # 13. We call `Timestamp.to_json_file()`, writing to `timestamp.json`. + # + # Each of the steps is labeled below for clarity. + + # 1. Service retrieval. + storage_service = request.find_service(IStorageService) + key_service = request.find_service(IKeyService) + + storage_backend = storage_service.get_backend() + + # 2. Timestamp retrieval and loading. + timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) + + # 3. Snapshot retrieval and loading. + snapshot = utils.find_snapshot(timestamp.signed, storage_backend) + + # 4. Delegated bin retrieval and loading. + delegated_bin_name, delegated_bin = utils.find_delegated_bin( + filepath, snapshot.signed, storage_backend + ) + + # 5. Updating the delegated bin. + utils.bump_metadata( + delegated_bin.signed, + timedelta(seconds=request.registry.settings["tuf.bin-n.expiry"]), + ) + delegated_bin.signed.update(filepath, fileinfo) + + # 6. Signing the updated delegated bin metadata. + for key in key_service.privkeys_for_role(Role.BIN_N.value): + delegated_bin.sign(key) + + # 7. Writing the updated delegated bin back to the TUF repository. + delegated_bin.to_json_file( + f"{delegated_bin.signed.version}.{delegated_bin_name}.json", storage_backend + ) + + # 8. Updating the snapshot. + # TODO(ww): Fill in length and hashes? + utils.bump_metadata( + snapshot.signed, + timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), + ) + snapshot.signed.update(f"{delegated_bin_name}.json", delegated_bin.signed.version) + + # 9. Signing the updated snapshot metadata. + for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): + snapshot.sign(key) + + # 10. Writing the updated snapshot back to the TUF repository. + snapshot_filename = f"{snapshot.signed.version}.snapshot.json" + snapshot.to_json_file( + snapshot_filename, + storage_backend, + ) + + # 11. Updating the timestamp. + # NOTE(ww): Calling get_file_hashes here causes us to round-trip + # through the object store just to compute our snapshot's hash. + # Maybe add a function to securesystemslib that does the digest + # calculation on a string/bytes. + utils.bump_metadata( + timestamp.signed, + timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), + ) + timestamp.signed.update( + snapshot.signed.version, + len(snapshot.to_json().encode()), + get_file_hashes( + snapshot_filename, + hash_algorithms=[HASH_ALGORITHM], + storage_backend=storage_backend, + ), + ) + + # 12. Signing the updated timestamp metadata. + for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): + timestamp.sign(key) + + # 13. Writing the updated timestamp back to the TUF repository. + timestamp.to_json_file("timestamp.json", storage_backend) diff --git a/warehouse/tuf/utils.py b/warehouse/tuf/utils.py new file mode 100644 index 000000000000..8c6ae0c99abe --- /dev/null +++ b/warehouse/tuf/utils.py @@ -0,0 +1,174 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from contextlib import contextmanager +from io import BytesIO + +import tuf.formats +import tuf.repository_lib + +from google.cloud.exceptions import GoogleCloudError, NotFound +from securesystemslib.exceptions import StorageError +from securesystemslib.storage import FilesystemBackend, StorageBackendInterface +from tuf.api import metadata + +from warehouse.tuf.constants import BIN_N_COUNT + + +def make_fileinfo(file, custom=None): + """ + Given a warehouse.packaging.models.File, create a TUF-compliant + "fileinfo" dictionary suitable for addition to a delegated bin. + + The optional "custom" kwarg can be used to supply additional custom + metadata (e.g., metadata for indicating backsigning). + """ + hashes = {"blake2b": file.blake2_256_digest} + fileinfo = tuf.formats.make_targets_fileinfo(file.size, hashes, custom=custom) + + return fileinfo + + +def bump_metadata(metadata, delta): + """ + Given a tuf.api.metadata.Signed, bump its version and expiration (with the given + timedelta). + """ + metadata.bump_version() + metadata.bump_expiration(delta=delta) + + +def find_snapshot(timestamp, storage_backend): + """ + Given a tuf.api.metadata.Timestamp model, return the Metadata container + for the consistent snapshot that it references. + """ + snapshot_version = timestamp.meta["snapshot.json"]["version"] + + return metadata.Metadata.from_json_file( + f"{snapshot_version}.snapshot.json", storage_backend + ) + + +def find_delegated_bin(filepath, snapshot, storage_backend): + """ + Given a new target filepath and a tuf.api.metadata.Snapshot model, + return a tuple of the bin name and tup.api.metadata.Metadata container for + the consistent delegated targets bin that the target belongs in. + """ + + # TODO: This probably isn't using the right hash function. + filepath_hash = tuf.repository_lib.get_target_hash(filepath) + bin_name = tuf.repository_lib.find_bin_for_target_hash(filepath_hash, BIN_N_COUNT) + bin_version = snapshot.meta[f"{bin_name}.json"]["version"] + + return bin_name, metadata.Metadata.from_json_file( + f"{bin_version}.{bin_name}.json", storage_backend + ) + + +class LocalBackend(StorageBackendInterface): + def __init__(self, request): + self._filesystem_backend = FilesystemBackend() + self._repo_path = os.path.join( + request.registry.settings["tuf.repo.path"], "metadata.staged" + ) + + def get(self, filepath): + return self._filesystem_backend.get(os.path.join(self._repo_path, filepath)) + + def put(self, fileobj, filepath): + return self._filesystem_backend.put( + fileobj, os.path.join(self._repo_path, filepath) + ) + + def remove(self, filepath): + return self._filesystem_backend.remove(os.path.join(self._repo_path, filepath)) + + def getsize(self, filepath): + return self._filesystem_backend.getsize(os.path.join(self._repo_path, filepath)) + + def create_folder(self, filepath): + return self._filesystem_backend.create_folder( + os.path.join(self._repo_path, filepath) + ) + + def list_folder(self, filepath): + return self._filesystem_backend.list_folder( + os.path.join(self._repo_path, filepath) + ) + + +class GCSBackend(StorageBackendInterface): + def __init__(self, request): + self._client = request.find_service(name="gcloud.gcs") + # NOTE: This needs to be created. + self._bucket = self._client.get_bucket(request.registry.settings["tuf.bucket"]) + + @contextmanager + def get(self, filepath): + try: + contents = self._bucket.blob(filepath).download_as_string() + yield BytesIO(contents) + except NotFound as e: + raise StorageError(f"{filepath} not found") + + def put(self, fileobj, filepath): + try: + blob = self._bucket.blob(filepath) + # NOTE(ww): rewind=True reflects the behavior of the securesystemslib + # implementation of StorageBackendInterface, which seeks to the file start. + # I'm not sure it's actually required. + blob.upload_from_file(fileobj, rewind=True) + except GoogleCloudError: + # TODO: expose details of the underlying error in the message here? + raise StorageError(f"couldn't store to {filepath}") + + def remove(self, filepath): + try: + self._bucket.blob(filepath).delete() + except NotFound: + raise StorageError(f"{filepath} not found") + + def getsize(self, filepath): + blob = self._bucket.get_blob(filepath) + + if blob is None: + raise StorageError(f"{filepath} not found") + + return blob.size + + def create_folder(self, filepath): + if not filepath: + return + + if not filepath.endswith("/"): + filepath = f"{filepath}/" + + try: + blob = self._bucket.blob(filepath) + blob.upload_from_string(b"") + except GoogleCloudError as e: + raise StorageError(f"couldn't create folder: {filepath}") + + def list_folder(self, filepath): + if not filepath.endswith("/"): + filepath = f"{filepath}/" + + # NOTE: The `nextPageToken` appears to be required due to an implementation detail leak. + # See https://github.com/googleapis/google-cloud-python/issues/7875 + blobs = self._client.list_blobs( + self._bucket, prefix=filepath, fields="items(name),nextPageToken" + ) + return [blob.name for blob in blobs]