From 809c960f3ff10b79be4f253aea006faf00752140 Mon Sep 17 00:00:00 2001 From: Kairo de Araujo Date: Thu, 10 Feb 2022 18:49:51 +0000 Subject: [PATCH] WIP: TUF Initialization using python-tuf 1.0.0 This work refactors the [Draft PR](https://github.com/pypa/warehouse/pull/7488) by @ woodruffw, to build a new repository tool on top of the Python-TUF Metadata API, and use it instead of the Python-TUF repository tool that was deprecated in v1.0.0. Part of #10672 Signed-off-by: Kairo de Araujo --- .gitignore | 2 +- Makefile | 21 +- dev/environment | 9 +- dev/tufkeys/bin-n | 1 - dev/tufkeys/bin-n.pub | 1 - dev/tufkeys/bins | 1 - dev/tufkeys/bins.pub | 1 - dev/tufkeys/root | 1 - dev/tufkeys/root.pub | 1 - dev/tufkeys/snapshot | 1 - dev/tufkeys/snapshot.pub | 1 - dev/tufkeys/targets | 1 - dev/tufkeys/targets.pub | 1 - dev/tufkeys/timestamp | 1 - dev/tufkeys/timestamp.pub | 1 - docker-compose.yml | 5 + requirements/main.in | 3 +- requirements/main.txt | 12 +- tests/conftest.py | 3 +- tests/unit/test_config.py | 1 - tests/unit/test_routes.py | 8 +- tests/unit/tuf/__init__.py | 11 + tests/unit/tuf/test_services.py | 54 ++++ warehouse/cli/tuf.py | 227 ++++++++------- warehouse/config.py | 6 +- warehouse/forklift/legacy.py | 54 +++- warehouse/packaging/utils.py | 8 +- warehouse/routes.py | 4 +- warehouse/tuf/README.md | 242 ++++++++++++++++ warehouse/tuf/__init__.py | 15 +- warehouse/tuf/constants.py | 7 - warehouse/tuf/hash_bins.py | 90 ++++++ warehouse/tuf/interfaces.py | 77 +++-- warehouse/tuf/repository.py | 490 ++++++++++++++++++++++++++++++++ warehouse/tuf/services.py | 391 +++++++++++++++++++++---- warehouse/tuf/tasks.py | 305 ++------------------ warehouse/tuf/utils.py | 174 ------------ 37 files changed, 1527 insertions(+), 704 deletions(-) delete mode 100644 dev/tufkeys/bin-n delete mode 100644 dev/tufkeys/bin-n.pub delete mode 100644 dev/tufkeys/bins delete mode 100644 dev/tufkeys/bins.pub delete mode 100644 dev/tufkeys/root delete mode 100644 dev/tufkeys/root.pub delete mode 100644 dev/tufkeys/snapshot delete mode 100644 dev/tufkeys/snapshot.pub delete mode 100644 dev/tufkeys/targets delete mode 100644 dev/tufkeys/targets.pub delete mode 100644 dev/tufkeys/timestamp delete mode 100644 dev/tufkeys/timestamp.pub create mode 100644 tests/unit/tuf/__init__.py create mode 100644 tests/unit/tuf/test_services.py create mode 100644 warehouse/tuf/README.md create mode 100644 warehouse/tuf/hash_bins.py create mode 100644 warehouse/tuf/repository.py delete mode 100644 warehouse/tuf/utils.py diff --git a/.gitignore b/.gitignore index 8f788ddc636b..f4e885597c79 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ docker-compose.override.yaml node_modules/ -dev/tuf.* +dev/tufkeys/ dev/example.sql dev/prod.sql dev/prod.sql.xz diff --git a/Makefile b/Makefile index 78edda573342..72d01ab2f511 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,5 @@ DB := example IPYTHON := no -LOCALES := $(shell .state/env/bin/python -c "from warehouse.i18n import KNOWN_LOCALES; print(' '.join(set(KNOWN_LOCALES)-{'en'}))") WAREHOUSE_CLI := docker-compose run --rm web python -m warehouse # set environment variable WAREHOUSE_IPYTHON_SHELL=1 if IPython @@ -94,14 +93,17 @@ initdb: .state/docker-build-web $(MAKE) reindex inittuf: - $(WAREHOUSE_CLI) tuf keypair --name root --path /opt/warehouse/src/dev/tuf.root - $(WAREHOUSE_CLI) tuf keypair --name snapshot --path /opt/warehouse/src/dev/tuf.snapshot - $(WAREHOUSE_CLI) tuf keypair --name targets --path /opt/warehouse/src/dev/tuf.targets - $(WAREHOUSE_CLI) tuf keypair --name timestamp --path /opt/warehouse/src/dev/tuf.timestamp - $(WAREHOUSE_CLI) tuf keypair --name bins --path /opt/warehouse/src/dev/tuf.bins - $(WAREHOUSE_CLI) tuf keypair --name bin-n --path /opt/warehouse/src/dev/tuf.bin-n - $(WAREHOUSE_CLI) tuf new-repo - $(WAREHOUSE_CLI) tuf build-targets + $(WAREHOUSE_CLI) tuf dev keypair --name root --path /opt/warehouse/src/dev/tufkeys/root + $(WAREHOUSE_CLI) tuf dev keypair --name snapshot --path /opt/warehouse/src/dev/tufkeys/snapshot + $(WAREHOUSE_CLI) tuf dev keypair --name targets --path /opt/warehouse/src/dev/tufkeys/targets1 + $(WAREHOUSE_CLI) tuf dev keypair --name targets --path /opt/warehouse/src/dev/tufkeys/targets2 + $(WAREHOUSE_CLI) tuf dev keypair --name timestamp --path /opt/warehouse/src/dev/tufkeys/timestamp + $(WAREHOUSE_CLI) tuf dev keypair --name bins --path /opt/warehouse/src/dev/tufkeys/bins + $(WAREHOUSE_CLI) tuf dev keypair --name bin-n --path /opt/warehouse/src/dev/tufkeys/bin-n + $(WAREHOUSE_CLI) tuf dev init-repo + $(WAREHOUSE_CLI) tuf dev init-delegations + $(WAREHOUSE_CLI) tuf dev add-all-packages + $(WAREHOUSE_CLI) tuf dev add-all-indexes reindex: .state/docker-build-web docker-compose run --rm web python -m warehouse search reindex @@ -111,6 +113,7 @@ shell: .state/docker-build-web clean: rm -rf dev/*.sql + rm -rf dev/tufkeys purge: stop clean rm -rf .state diff --git a/dev/environment b/dev/environment index 4a46a7c1ea81..8bf1043f8f61 100644 --- a/dev/environment +++ b/dev/environment @@ -49,12 +49,13 @@ GITHUB_TOKEN_SCANNING_META_API_URL="http://notgithub:8000/meta/public_keys/token TWOFACTORREQUIREMENT_ENABLED=true TWOFACTORMANDATE_AVAILABLE=true TWOFACTORMANDATE_ENABLED=true -TUF_KEY_BACKEND=warehouse.tuf.services.LocalKeyService key.path=/opt/warehouse/src/dev -TUF_STORAGE_BACKEND=warehouse.tuf.services.LocalStorageService -TUF_REPO_BACKEND=warehouse.tuf.services.LocalRepositoryService repo.path=/opt/warehouse/src/warehouse/tuf/dist + +TUF_KEY_BACKEND=warehouse.tuf.services.LocalKeyService key.path=/opt/warehouse/src/dev/tufkeys +TUF_STORAGE_BACKEND=warehouse.tuf.services.LocalStorageService repo.path=/var/opt/warehouse/tuf_metadata/ url=http://localhost:9001/metadata/{path} +TUF_REPOSITORY_BACKEND=warehouse.tuf.services.RepositoryService TUF_ROOT_SECRET="an insecure private key password" TUF_SNAPSHOT_SECRET="an insecure private key password" TUF_TARGETS_SECRET="an insecure private key password" TUF_TIMESTAMP_SECRET="an insecure private key password" TUF_BINS_SECRET="an insecure private key password" -TUF_BIN_N_SECRET="an insecure private key password" +TUF_BIN_N_SECRET="an insecure private key password" \ No newline at end of file diff --git a/dev/tufkeys/bin-n b/dev/tufkeys/bin-n deleted file mode 100644 index f1ba72f72ea2..000000000000 --- a/dev/tufkeys/bin-n +++ /dev/null @@ -1 +0,0 @@ -2f0570511d84b133d6e1f875920587a9@@@@100000@@@@dfacbe8d4966935d16b5c9b7910e7b01189f83e0db2d5169eb4d7a2edab91924@@@@f3f67a27b03a24c8b767f5721bc345b5@@@@e9855ff4f41d1ec10877b3476eabd99c86f2162ef16172aaa17200af3ec97a0e7b05a389e3084a8c79aa2756fd999eaa67f5462e37a024b66feba784a3e70850d8d2466165ba86d745cac2a526d44439892f46a3342d1c890589c833f652942283b59441b214625b275de95d16ec199cb4fbbd7fa8b1442153fbac65db18d9c9e1345b37a7fa850d2ffe0d13035f28d68a3b47ddb310750002e8e96e751b633f06e4d9c70fa117de12d848cab845f830e082c51e0ad42342f2c24869b091e3c7dae70410578fbb6877a975983aa3dde6aa699e4e45ba7114c4e373109d2128c5211b3471390cb0ef09da3cd5e552813a906f604038218f7891213cefa10fbb7ba6c01e98c821ba45b69504d5ac2d41feeafccc974d82b89c33d200b6ceb04a7af47f08f8258cfea9a6855d3cbe91bc0892ddaabd0f508a6ed85f811dcff66e0fed066f3607f5a5f1ccecc80ad1ede362 \ No newline at end of file diff --git a/dev/tufkeys/bin-n.pub b/dev/tufkeys/bin-n.pub deleted file mode 100644 index 23af98de62af..000000000000 --- a/dev/tufkeys/bin-n.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "ca2694d781367f94974c5176cae6c7290fe3b65f03a5c6331fe500b8c700f3aa"}} \ No newline at end of file diff --git a/dev/tufkeys/bins b/dev/tufkeys/bins deleted file mode 100644 index d72696818560..000000000000 --- a/dev/tufkeys/bins +++ /dev/null @@ -1 +0,0 @@ -c4509bc89725ce7b1467b4d596536564@@@@100000@@@@47c863bddaad67e2482760e0da329f3f9170dee5de9e9265cd23873947b4a373@@@@8333670a562d5b2fa37559bb4372de32@@@@d567e40c7d544b4d41f023d208cacdb76009991fb1dc4681e8b7ebae42d5756c21434937061297dfe264150d3ab9c00985d4217ca18c70904d8c1becc9e172a1677221d09d7f9de5b6778340bf26d305dcf34556066e744abcf96bd489e612d6ea9c483838f21bf06646d250b5d7f380cccf57eb990387baf28c3fd00815b1db3a34418d879718607fb54ccfb1fe20edc06bdb765dce6d3617dba5cc5aff41f5eab62a6e0f8e095a97dda3784dcb95f97af5a1c4de7fb0424d659469305cafb092a209ab78932444ffb497286cb44e4ba360e0025f71fc108e8f3b2fac794a05242ef5167bdcd5e4a1a3573805cb91b8359bcb317eb026c2ee102db096b50b5f6fc478af5146dbcd9e2ada046d6a392854e779ffa4b2bdbb0ea90d41eb6030fd0d98f0aa34510a2a6e9292d5095140864658d8db63701b661ed8ee3b6a428dfb9163e7918942ffb0506f290a76fc4703 \ No newline at end of file diff --git a/dev/tufkeys/bins.pub b/dev/tufkeys/bins.pub deleted file mode 100644 index d0ef047f9a51..000000000000 --- a/dev/tufkeys/bins.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "a4361da485b8ae5cb10915d5254d96f05ed3d8d4c87979c5a203504b4b65ecf5"}} \ No newline at end of file diff --git a/dev/tufkeys/root b/dev/tufkeys/root deleted file mode 100644 index ddb1d5081269..000000000000 --- a/dev/tufkeys/root +++ /dev/null @@ -1 +0,0 @@ -83589fb72074fcb8533c8c4414015c30@@@@100000@@@@be3d99c519272cc8b8c1150ca03450c0aac21d084a0dc79966faca4d05787ff1@@@@dccf47ed06f92da01e9de35113477e88@@@@72e37b2acdb2115888975e66990009e6f6a4cdd94ff77473e056d16ef33e9ef6ea450308f1b4962f544a0796d7b7f788b35a6ae33d6cc83e56cc416440716418102d39573692e87350935088bc08b4b82d910731a0f536bebded8f80e49c3df052da2bf657ed6a1f64463da477905bc8a1bc8fb65215653a6efa3e0a74b2fabcb65d96fc1f1c31196878e9eb39eca1cfb709b69a400f159f5e8f8dc9b765590fb5cac705170149b71c0f54951d239eafaa9ea9b8c3ac0897e57e5c8f027f8d160d91e3995da00535ced37ff41bccd31e7ed5076c22b448f9a7e701a2024de56b839fd7be4689c23f8b81102fe4711585300862b01f499408295723f995775de33ce69e666883bb25302adb1ce6b21ac710ceeacefacc316aa80432a7d8e9819d78913707b8daef35d03667ab11727e6ea6bc44b9356449f6d6aed53825f2fd1397c68d2669b2b1895874678234a42afd \ No newline at end of file diff --git a/dev/tufkeys/root.pub b/dev/tufkeys/root.pub deleted file mode 100644 index 61e15f87d712..000000000000 --- a/dev/tufkeys/root.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "5cb1a1622f72ae901bc2ed25546503bae70b53308833385a11b6630fb52c9bc9"}} \ No newline at end of file diff --git a/dev/tufkeys/snapshot b/dev/tufkeys/snapshot deleted file mode 100644 index 4b899fc99d94..000000000000 --- a/dev/tufkeys/snapshot +++ /dev/null @@ -1 +0,0 @@ -97074b6eccb9d5920192f231249c33dc@@@@100000@@@@3965a3500a3de4f6a052f9f26efd2714affae878daec6df53c50fe20fb3b5732@@@@dbc2d164f88ea8d77b093db20475db0c@@@@d6db7159767d46f6f255a2184b2615df892f067ca0861dba18ccc5b6b7dce95d5d2b377d0d8424d39fb0b337e9bd086a6405804a55e7c8d591f090dc55d38c0820a6bc2ddd27078e1bc482fa4f19a66941cf023680d032da70a8ac6bb5c9fc7454a273d014ef569f7a79f89145c29ade6a5f56752a9e4b5f3a4ee0426680b3d1921a962a6edee5c77661439f7bc95674fbc009ecf903c20fbac786e94164bc43df1fb8536bc6666ce098cfad411c852154e4dd7dda0b9046cd4310d7ae9b822be3cb7b62e66e73718424743e2297b091d2cd8cfce108f1f6a2e17ad382655c517d8131da3a72ac1f2ae8cd760705bd17ec5ff6834c86e8f08fa804152774146a3bda063ae29a01be00723b96e3370d9f046a1287ad4ccaa007cc016d59bf935268cb8b7946bb5b5480962137ed6cb449df43aa83d3d086e4b10e7c33110847c9bc1ee2c24ebbd66e989c6e28c52ac28d \ No newline at end of file diff --git a/dev/tufkeys/snapshot.pub b/dev/tufkeys/snapshot.pub deleted file mode 100644 index 92093748d91e..000000000000 --- a/dev/tufkeys/snapshot.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "c19d2c8532667d7784f5b33f5bdccd529b52c5d55754b4ef989a261bf1695431"}} \ No newline at end of file diff --git a/dev/tufkeys/targets b/dev/tufkeys/targets deleted file mode 100644 index a36285e32f8e..000000000000 --- a/dev/tufkeys/targets +++ /dev/null @@ -1 +0,0 @@ -e25efe7833ec8682ff0d09366d264fe5@@@@100000@@@@ecb082633529fc0d392b3d0c8f36972423c91060bbfc1903a8e390deeb650817@@@@3f116f9b022d8e1e3959e99ee800f5cd@@@@45defac0e6ef65003645fd34e3a39a47579e6da578529fe425bd9798e6007cb901d4003e866068a853e17faeba365367f63ead34fac88bca0938f308e2556913c8f9ed56892262704b0a7dcb25bedf61fcd8c72af6eaa4b8630936b704f2aae364313211a32d3bac84d88273fd2e2ee48f1c3d7bef8de74a800124f469bacad916ed3d1242b2c691548b68d3e890a8ef19d3e56817a94e1e508136a8f6cd4d5af4012a355e874bfe446504b75991187f76f521dc3b589d1eae126d978e1e7b268688e1992729cb820f5057352e6c916af4ce448bf0a26d1cf52968a5398c6c516cae831f7f75a7885489fae79fb3a9f8faae0dc32ceaf17106df7834ffc4e28589df0d886a08f51751d2c15999d538197a1e392647ffc81a314e87f936e2087762f0d6004a862789e27021e11e489818320da3bd8801f208deb0b4aa4d1f6319712729bb716c71837ac229d216180ef4 \ No newline at end of file diff --git a/dev/tufkeys/targets.pub b/dev/tufkeys/targets.pub deleted file mode 100644 index fe0f800e39af..000000000000 --- a/dev/tufkeys/targets.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "715ea559e2fa80a2bad3c50836c55ce33d256fa7ad5468931cc76a8a10e86d37"}} \ No newline at end of file diff --git a/dev/tufkeys/timestamp b/dev/tufkeys/timestamp deleted file mode 100644 index a77ed1a42473..000000000000 --- a/dev/tufkeys/timestamp +++ /dev/null @@ -1 +0,0 @@ -7a59984187faa4893ceba030f5c5b4a7@@@@100000@@@@7ec97a52365194434c8294e50ceb454222194670104eeacce30846222cdd2756@@@@20d9afcaebb214eb40b3827d6730f318@@@@7763c7a7fd2d9897ad9951149bbd00ced57d9a68339ce0b1a1dbfc7cbb1defdd6d2a87178c140a9de2180e58616f6004a53f384356743b3aad3d54d073e70d8a6c9420ee9e6880405d969f59ddac072235740259868af52a9c0d4582ce3cd0435e9dc0138f0571b02629721f11308e5a6fb26f75d2bd9d298da3bfaad4ec6aaa80d0d81a2e22690d8416b188e132b87bc5ef89d1460e4f7b2c12dac555ad99212b30c73d5f3ef30a77e0eb2f0f82e88c869ef585a8707267be76619c7a3e430563be1d7d1073ed1706bea7d5232e784674c104920d4f44382beeb8d07cfe0d9f5ec0812497d48d2f8b71634a2ae29ca9bf2732d4e0c3216a62b318d11dfc1608f0c9f9e8eff142b45cce4ac7bc91c4967fcd9b3b5484bf016375100391321c54e6a3da85cfb99c6e429da2a586ea15d2f09e2f7acb3e5358bef10f0116b9f3da2aa2c49f9fb07ffcaea4677144bc1969 \ No newline at end of file diff --git a/dev/tufkeys/timestamp.pub b/dev/tufkeys/timestamp.pub deleted file mode 100644 index cece0173a650..000000000000 --- a/dev/tufkeys/timestamp.pub +++ /dev/null @@ -1 +0,0 @@ -{"keytype": "ed25519", "scheme": "ed25519", "keyid_hash_algorithms": ["sha256", "sha512"], "keyval": {"public": "5e12fa7b474cb886cdc0cd14c5017b09b3759125ab7e6985bd9c5953063de333"}} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 869bfb5e3cef..5fe5fc2e3ffa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ volumes: simple: packages: sponsorlogos: + tuf_metadata: vault: services: @@ -79,6 +80,7 @@ services: # The :z option fixes permission issues with SELinux by setting a # permissive security context. - ./dev:/opt/warehouse/src/dev:z + - ./dev/tufkeys:/opt/warehouse/src/dev/tufkeys:z - ./docs:/opt/warehouse/src/docs:z - ./warehouse:/opt/warehouse/src/warehouse:z - ./tests:/opt/warehouse/src/tests:z @@ -87,6 +89,7 @@ services: - packages:/var/opt/warehouse/packages - sponsorlogos:/var/opt/warehouse/sponsorlogos - simple:/var/opt/warehouse/simple + - tuf_metadata:/var/opt/warehouse/tuf_metadata - ./bin:/opt/warehouse/src/bin:z - ./requirements:/opt/warehouse/src/requirements:z ports: @@ -103,6 +106,7 @@ services: - packages:/var/opt/warehouse/packages - sponsorlogos:/var/opt/warehouse/sponsorlogos - simple:/var/opt/warehouse/simple + - tuf_metadata:/var/opt/warehouse/metadata ports: - "9001:9001" @@ -115,6 +119,7 @@ services: volumes: - ./dev:/opt/warehouse/src/dev:z - ./warehouse:/opt/warehouse/src/warehouse:z + - tuf_metadata:/var/opt/warehouse/tuf_metadata env_file: dev/environment environment: C_FORCE_ROOT: "1" diff --git a/requirements/main.in b/requirements/main.in index 32c8c1e08149..87ea11ca4d20 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -49,6 +49,7 @@ requests requests-aws4auth redis>=2.8.0,<5.0.0 rfc3986 +securesystemslib sentry-sdk setuptools sqlalchemy[asyncio]>=0.9,<1.5.0 # https://github.com/pypa/warehouse/pull/9228 @@ -58,7 +59,7 @@ stdlib-list structlog transaction trove-classifiers -tuf==0.15.0 +tuf==1.0.0 typeguard webauthn>=1.0.0,<2.0.0 whitenoise diff --git a/requirements/main.txt b/requirements/main.txt index 38e12e02e76c..474ba09d7a08 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1056,6 +1056,7 @@ requests==2.27.1 \ # google-cloud-storage # premailer # requests-aws4auth + # tuf requests-aws4auth==1.1.1 \ --hash=sha256:c0883346ce30b5018903a67da88df72f73ff06e1a320845bba9cd85e811ba0ba \ --hash=sha256:dfd9f930ffde48a756b72b55698a8522875ea6358dcffbcc44a66700ace31783 @@ -1075,7 +1076,12 @@ s3transfer==0.5.2 \ sentry-sdk==1.5.8 \ --hash=sha256:32af1a57954576709242beb8c373b3dbde346ac6bd616921def29d68846fb8c3 \ --hash=sha256:38fd16a92b5ef94203db3ece10e03bdaa291481dd7e00e77a148aa0302267d47 - # via -r requirements/main.in +securesystemslib==0.22.0 \ + --hash=sha256:2f58ca1ee30fde5401300fe3b3841adcf7b4369674247fa63b258e07e1f52fd2 \ + --hash=sha256:c3fc41ac32fe8bc9744b89e6ce2ebca45f4417ca737beb766a41c6cb21935662 + # via + # -r requirements/main.in + # tuf six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 @@ -1175,6 +1181,10 @@ trove-classifiers==2022.3.17 \ --hash=sha256:378273657cb76c751bf5270d9a748f569839bdac5b11a21195b37fcd1ee7e43f \ --hash=sha256:a706e93248afa7dcecd35c61b70b698a7437475cc5a03697245fc20425e29730 # via -r requirements/main.in +tuf==1.0.0 \ + --hash=sha256:6d967f992d494678b684a6f5f03d948130e2d9956310c7af74fa2f0296e8ec4d \ + --hash=sha256:ec74150954d56e206fa72f3b0234ea802649370e528dea5a83e507bd408fda82 + # via -r requirements/main.in typeguard==2.13.3 \ --hash=sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4 \ --hash=sha256:5e3e3be01e887e7eafae5af63d1f36c849aaa94e3a0112097312aabfa16284f1 diff --git a/tests/conftest.py b/tests/conftest.py index 71ed72bcc434..def3c1c8dd1f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -227,8 +227,9 @@ def app_config(database): "docs.backend": "warehouse.packaging.services.LocalDocsStorage", "sponsorlogos.backend": "warehouse.admin.services.LocalSponsorLogoStorage", "mail.backend": "warehouse.email.services.SMTPEmailSender", + "tuf.storage_backend": "warehouse.tuf.services.LocalStorageService", "tuf.key_backend": "warehouse.tuf.services.LocalKeyService", - "tuf.repo_backend": "warehouse.tuf.services.LocalRepositoryService", + "tuf.repository_backend": "warehouse.tuf.services.RepositoryService", "malware_check.backend": ( "warehouse.malware.services.PrinterMalwareCheckService" ), diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 68cb6400e7a5..c82da6ba7c40 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -403,7 +403,6 @@ def __init__(self): ), ] assert configurator_obj.add_static_view.calls == [ - pretend.call("tuf", "warehouse:tuf/dist/metadata.staged/"), pretend.call("static", "warehouse:static/dist/", cache_max_age=315360000), ] assert configurator_obj.add_cache_buster.calls == [ diff --git a/tests/unit/test_routes.py b/tests/unit/test_routes.py index 634d0a927eb9..5ebcbefadc78 100644 --- a/tests/unit/test_routes.py +++ b/tests/unit/test_routes.py @@ -26,6 +26,7 @@ def __init__(self): settings={ "docs.url": docs_route_url, "files.url": "https://files.example.com/packages/{path}", + "tuf.url": "https://files.example.com/metadata/{path}", } ) if warehouse: @@ -337,6 +338,7 @@ def add_policy(name, filename): domain=warehouse, ), pretend.call("packaging.file", "https://files.example.com/packages/{path}"), + pretend.call("tuf.metadata", "https://files.example.com/metadata/{path}"), pretend.call("ses.hook", "/_/ses-hook/", domain=warehouse), pretend.call("rss.updates", "/rss/updates.xml", domain=warehouse), pretend.call("rss.packages", "/rss/packages.xml", domain=warehouse), @@ -425,7 +427,6 @@ def add_policy(name, filename): view_kw={"has_translations": True}, ), ] - assert config.add_redirect.calls == [ pretend.call("/sponsor/", "/sponsors/", domain=warehouse), pretend.call("/u/{username}/", "/user/{username}/", domain=warehouse), @@ -440,6 +441,11 @@ def add_policy(name, filename): "https://files.example.com/packages/{path}", domain=warehouse, ), + pretend.call( + "/metadata/{path:.*}", + "https://files.example.com/metadata/{path}", + domain=warehouse, + ), ] assert config.add_pypi_action_route.calls == [ diff --git a/tests/unit/tuf/__init__.py b/tests/unit/tuf/__init__.py new file mode 100644 index 000000000000..164f68b09175 --- /dev/null +++ b/tests/unit/tuf/__init__.py @@ -0,0 +1,11 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/tuf/test_services.py b/tests/unit/tuf/test_services.py new file mode 100644 index 000000000000..0b724c119184 --- /dev/null +++ b/tests/unit/tuf/test_services.py @@ -0,0 +1,54 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pretend + +from zope.interface.verify import verifyClass + +from warehouse.tuf.interfaces import IKeyService +from warehouse.tuf.services import LocalKeyService + + +class TestLocalLocalKeyService: + def test_verify_service(self): + assert verifyClass(IKeyService, LocalKeyService) + + def test_create_service(self): + request = pretend.stub( + registry=pretend.stub(settings={"tuf.key.path": "/tuf/key/path/"}) + ) + service = LocalKeyService.create_service(None, request) + assert service._key_path == "/tuf/key/path/" + + def test_basic_init(self, db_request): + service = LocalKeyService("/opt/warehouse/src/dev/tufkeys", db_request) + assert service._key_path == "/opt/warehouse/src/dev/tufkeys" + + def test_get_private_key(self, db_request, monkeypatch): + service = LocalKeyService("/opt/warehouse/src/dev/tufkeys", db_request) + + expected_priv_key_dict = { + "keytype": "ed25519", + "scheme": "ed25519", + "keyval": {"public": "720a9a588deefd5...4d08984e87bfc5a18f34618e438434c7"}, + "keyid": "2de4eb9afe9fb73...2235d3418bd63f4214d3ba7d23b516f23e", + "keyid_hash_algorithms": ["sha256", "sha512"], + } + db_request.registry.settings["tuf.root.secret"] = "tuf.root.secret" + monkeypatch.setattr( + "warehouse.tuf.services.import_ed25519_privatekey_from_file", + lambda *a, **kw: expected_priv_key_dict, + ) + + root_keyid = service.get("root", "private") + + assert root_keyid == [expected_priv_key_dict] diff --git a/warehouse/cli/tuf.py b/warehouse/cli/tuf.py index 9350759e62e8..744f06f73ce4 100644 --- a/warehouse/cli/tuf.py +++ b/warehouse/cli/tuf.py @@ -10,46 +10,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime - import click -from tuf import repository_tool +from securesystemslib.exceptions import StorageError +from securesystemslib.interface import generate_and_write_ed25519_keypair from warehouse.cli import warehouse -from warehouse.config import Environment -from warehouse.tuf import utils -from warehouse.tuf.constants import BIN_N_COUNT, TOPLEVEL_ROLES, Role - - -def _make_backsigned_fileinfo_from_file(file): - return utils.make_fileinfo(file, custom={"backsigned": True}) - - -def _key_service(config): - key_service_class = config.maybe_dotted(config.registry.settings["tuf.key_backend"]) - return key_service_class.create_service(None, config) - - -def _repository_service(config): - repo_service_class = config.maybe_dotted( - config.registry.settings["tuf.repo_backend"] - ) - return repo_service_class.create_service(None, config) - - -def _set_expiration_for_role(config, role_obj, role_name): - # If we're initializing TUF for development purposes, give - # every role a long expiration time so that developers don't have to - # continually re-initialize it. - if config.registry.settings["warehouse.env"] == Environment.development: - role_obj.expiration = datetime.datetime.now() + datetime.timedelta( - seconds=config.registry.settings["tuf.development_metadata_expiry"] - ) - else: - role_obj.expiration = datetime.datetime.now() + datetime.timedelta( - seconds=config.registry.settings[f"tuf.{role_name}.expiry"] - ) +from warehouse.packaging.utils import render_simple_detail +from warehouse.tuf.tasks import ( + add_hashed_targets as _add_hashed_targets, + bump_bin_n_roles as _bump_bin_n_roles, + bump_snapshot as _bump_snapshot, + init_repository as _init_repository, + init_targets_delegation as _init_targets_delegation, +) @warehouse.group() # pragma: no-branch @@ -59,118 +33,139 @@ def tuf(): """ -@tuf.command() +@tuf.group() +def dev(): + """ + TUF Development purposes commands + """ + + +@dev.command() @click.pass_obj @click.option("--name", "name_", help="The name of the TUF role for this keypair") @click.option("--path", "path_", help="The basename of the Ed25519 keypair to generate") def keypair(config, name_, path_): """ - Generate a new TUF keypair, for development purposes. + Generate a new TUF keypair. """ - - repository_tool.generate_and_write_ed25519_keypair( - path_, password=config.registry.settings[f"tuf.{name_}.secret"] - ) + password = config.registry.settings[f"tuf.{name_}.secret"] + generate_and_write_ed25519_keypair(password, filepath=path_) -@tuf.command() +@dev.command() @click.pass_obj -def new_repo(config): +def init_repo(config): """ - Initialize the TUF repository from scratch, including a brand new root. + Initialize a new TUF repository if it does not exist. """ + try: + request = config.task(_init_repository).get_request() + config.task(_init_repository).run(request) + except FileExistsError as err: + raise click.ClickException(str(err)) - repository = repository_tool.create_new_repository( - config.registry.settings["tuf.repo.path"] - ) + click.echo("Repository Initialization finished.") - key_service = _key_service(config) - for role in TOPLEVEL_ROLES: - role_obj = getattr(repository, role) - role_obj.threshold = config.registry.settings[f"tuf.{role}.threshold"] - _set_expiration_for_role(config, role_obj, role) - pubkeys = key_service.pubkeys_for_role(role) - privkeys = key_service.privkeys_for_role(role) - if len(pubkeys) < role_obj.threshold or len(privkeys) < role_obj.threshold: - raise click.ClickException( - f"Unable to initialize TUF repo ({role} needs {role_obj.threshold} keys" - ) - - for pubkey in pubkeys: - role_obj.add_verification_key(pubkey) +@dev.command() +@click.pass_obj +def bump_snapshot(config): + """ + Bump Snapshot metadata. + """ + request = config.task(_bump_snapshot).get_request() + config.task(_bump_snapshot).run(request) + click.echo("Snapshot bump finished.") - for privkey in privkeys: - role_obj.load_signing_key(privkey) - repository.mark_dirty(TOPLEVEL_ROLES) - repository.writeall( - consistent_snapshot=True, - ) +@dev.command() +@click.pass_obj +def bump_bin_n_roles(config): + """ + Bump delegated targets roles (BIN-N). + """ + request = config.task(_bump_bin_n_roles).get_request() + config.task(_bump_bin_n_roles).run(request) + click.echo("BIN-N roles (hash bins) bump finished.") -@tuf.command() +@dev.command() @click.pass_obj -def build_targets(config): +def init_delegations(config): """ + Create delegated targets roles (BINS and BIN-N). + Given an initialized (but empty) TUF repository, create the delegated targets role (bins) and its hashed bin delegations (each bin-n). """ + request = config.task(_init_targets_delegation).get_request() + try: + config.task(_init_targets_delegation).run(request) + except (FileExistsError, StorageError) as err: + raise click.ClickException(str(err)) - repo_service = _repository_service(config) - repository = repo_service.load_repository() + click.echo("BINS and BIN-N roles targets delegation finished.") - # Load signing keys. We do this upfront for the top-level roles. - key_service = _key_service(config) - for role in ["snapshot", "targets", "timestamp"]: - role_obj = getattr(repository, role) - [role_obj.load_signing_key(k) for k in key_service.privkeys_for_role(role)] +@dev.command() +@click.pass_obj +def add_all_packages(config): + """ + Collect every PyPI package and add as targets. - # NOTE: TUF normally does delegations by path patterns (i.e., globs), but PyPI - # doesn't store its uploads on the same logical host as the TUF repository. - # The last parameter to `delegate` is a special sentinel for this. - repository.targets.delegate( - Role.BINS.value, key_service.pubkeys_for_role(Role.BINS.value), ["*"] - ) - bins_role = repository.targets(Role.BINS.value) - _set_expiration_for_role(config, bins_role, Role.BINS.value) + Collect the "paths" for every PyPI package and add as targets.These are + packages already in existence, so we'll add some additional data to their targets to + indicate that we're back-signing them. + """ + from warehouse.db import Session + from warehouse.packaging.models import File - for privkey in key_service.privkeys_for_role(Role.BINS.value): - bins_role.load_signing_key(privkey) + request = config.task(_add_hashed_targets).get_request() + db = Session(bind=request.registry["sqlalchemy.engine"]) - bins_role.delegate_hashed_bins( - [], - key_service.pubkeys_for_role(Role.BIN_N.value), - BIN_N_COUNT, - ) + targets = list() + for file in db.query(File).all(): + hashes = {"blake2b-256": file.blake2_256_digest} + targetinfo = dict() + targetinfo["length"] = file.size + targetinfo["hashes"] = hashes + targetinfo["custom"] = {"backsigned": True} + targets.append({"info": targetinfo, "path": file.path}) - dirty_roles = ["snapshot", "targets", "timestamp", Role.BINS.value] - for bin_n_role in bins_role.delegations: - _set_expiration_for_role(config, bin_n_role, Role.BIN_N.value) - dirty_roles.append(bin_n_role.rolename) + config.task(_add_hashed_targets).run(request, targets) - for privkey in key_service.privkeys_for_role(Role.BIN_N.value): - for bin_n_role in bins_role.delegations: - bin_n_role.load_signing_key(privkey) - # Collect the "paths" for every PyPI package. These are packages already in - # existence, so we'll add some additional data to their targets to - # indicate that we're back-signing them. +@dev.command() +@click.pass_obj +def add_all_indexes(config): + """ + Collect every PyPI project Index and add as targets. + """ from warehouse.db import Session - from warehouse.packaging.models import File - - db = Session(bind=config.registry["sqlalchemy.engine"]) - for file in db.query(File).all(): - fileinfo = _make_backsigned_fileinfo_from_file(file) - bins_role.add_target_to_bin( - file.path, - number_of_bins=BIN_N_COUNT, - fileinfo=fileinfo, + from warehouse.packaging.models import Project + + request = config.task(_add_hashed_targets).get_request() + request.db = Session(bind=request.registry["sqlalchemy.engine"]) + + targets = list() + for project in request.db.query(Project).all(): + try: + simple_detail = render_simple_detail(project, request, store=True) + except OSError as err: + click.ClickException(str(err)) + + if simple_detail.get("content_hash") is None: + continue + hashes = {"blake2b-256": simple_detail.get("content_hash")} + targetinfo = dict() + targetinfo["hashes"] = hashes + targetinfo["length"] = simple_detail.get("length") + targetinfo["custom"] = {"backsigned": True} + targets.append( + { + "info": targetinfo, + "path": f"{project.normalized_name}/{project.normalized_name}.html", + } ) - repository.mark_dirty(dirty_roles) - repository.writeall( - consistent_snapshot=True, - use_existing_fileinfo=True, - ) + config.task(_add_hashed_targets).run(request, targets) diff --git a/warehouse/config.py b/warehouse/config.py index b7c9714a1a23..969b2713b19b 100644 --- a/warehouse/config.py +++ b/warehouse/config.py @@ -242,7 +242,7 @@ def configure(settings=None): maybe_set_compound(settings, "malware_check", "backend", "MALWARE_CHECK_BACKEND") maybe_set_compound(settings, "tuf", "key_backend", "TUF_KEY_BACKEND") maybe_set_compound(settings, "tuf", "storage_backend", "TUF_STORAGE_BACKEND") - maybe_set_compound(settings, "tuf", "repo_backend", "TUF_REPO_BACKEND") + maybe_set_compound(settings, "tuf", "repository_backend", "TUF_REPOSITORY_BACKEND") # Pythondotorg integration settings maybe_set(settings, "pythondotorg.host", "PYTHONDOTORG_HOST", default="python.org") @@ -515,10 +515,6 @@ def configure(settings=None): # Register TUF support for package integrity config.include(".tuf") - # Serve the TUF metadata files. - # TODO: This should be routed to the TUF GCS bucket. - config.add_static_view("tuf", "warehouse:tuf/dist/metadata.staged/") - # Configure redirection support config.include(".redirects") diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index a0790b8fefc6..3187bce7ccec 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -63,7 +63,7 @@ Release, Role, ) -from warehouse.tuf.interfaces import IRepositoryService +from warehouse.packaging.tasks import update_bigquery_release_files from warehouse.utils import http, readme ONE_MB = 1 * 1024 * 1024 @@ -1471,8 +1471,56 @@ def file_upload(request): }, ) - repository = request.find_service(IRepositoryService) - repository.add_target(file_) + # We are flushing the database requests so that we + # can access the server default values when initiating celery + # tasks. + request.db.flush() + + # Push updates to BigQuery + dist_metadata = { + "metadata_version": form["metadata_version"].data, + "name": form["name"].data, + "version": form["version"].data, + "summary": form["summary"].data, + "description": form["description"].data, + "author": form["author"].data, + "description_content_type": form["description_content_type"].data, + "author_email": form["author_email"].data, + "maintainer": form["maintainer"].data, + "maintainer_email": form["maintainer_email"].data, + "license": form["license"].data, + "keywords": form["keywords"].data, + "classifiers": form["classifiers"].data, + "platform": form["platform"].data, + "home_page": form["home_page"].data, + "download_url": form["download_url"].data, + "requires_python": form["requires_python"].data, + "pyversion": form["pyversion"].data, + "filetype": form["filetype"].data, + "comment": form["comment"].data, + "requires": form["requires"].data, + "provides": form["provides"].data, + "obsoletes": form["obsoletes"].data, + "requires_dist": form["requires_dist"].data, + "provides_dist": form["provides_dist"].data, + "obsoletes_dist": form["obsoletes_dist"].data, + "requires_external": form["requires_external"].data, + "project_urls": form["project_urls"].data, + "filename": file_data.filename, + "python_version": file_data.python_version, + "packagetype": file_data.packagetype, + "comment_text": file_data.comment_text, + "size": file_data.size, + "has_signature": file_data.has_signature, + "md5_digest": file_data.md5_digest, + "sha256_digest": file_data.sha256_digest, + "blake2_256_digest": file_data.blake2_256_digest, + "path": file_data.path, + "uploaded_via": file_data.uploaded_via, + "upload_time": file_data.upload_time, + } + if not request.registry.settings.get("warehouse.release_files_table") is None: + request.task(update_bigquery_release_files).delay(dist_metadata) # Log a successful upload metrics.increment("warehouse.upload.ok", tags=[f"filetype:{form.filetype.data}"]) diff --git a/warehouse/packaging/utils.py b/warehouse/packaging/utils.py index f321217f546f..e12bbd965201 100644 --- a/warehouse/packaging/utils.py +++ b/warehouse/packaging/utils.py @@ -39,6 +39,9 @@ def _simple_detail(project, request): def render_simple_detail(project, request, store=False): context = _simple_detail(project, request) + if len(context.get("files")) == 0: + return {"content_hash": None, "path": None, "length": None} + env = request.registry.queryUtility(IJinja2Environment, name=".jinja2") template = env.get_template("templates/legacy/api/simple/detail.html") content = template.render(**context, request=request) @@ -56,7 +59,6 @@ def render_simple_detail(project, request, store=False): with tempfile.NamedTemporaryFile() as f: f.write(content.encode("utf-8")) f.flush() - storage.store( simple_detail_path, f.name, @@ -66,6 +68,7 @@ def render_simple_detail(project, request, store=False): "hash": content_hash, }, ) + length = os.path.getsize(f.name) storage.store( os.path.join(project.normalized_name, "index.html"), f.name, @@ -75,5 +78,4 @@ def render_simple_detail(project, request, store=False): "hash": content_hash, }, ) - - return (content_hash, simple_detail_path) + return {"content_hash": content_hash, "path": simple_detail_path, "length": length} diff --git a/warehouse/routes.py b/warehouse/routes.py index e9d6006af7ed..4d05b6db692c 100644 --- a/warehouse/routes.py +++ b/warehouse/routes.py @@ -17,7 +17,7 @@ def includeme(config): # Forklift is properly split out into it's own project. warehouse = config.get_settings().get("warehouse.domain") files_url = config.get_settings()["files.url"] - + metadata_url = config.get_settings()["tuf.url"] # Simple Route for health checks. config.add_route("health", "/_health/") @@ -324,6 +324,7 @@ def includeme(config): domain=warehouse, ) config.add_route("packaging.file", files_url) + config.add_route("tuf.metadata", metadata_url) # SES Webhooks config.add_route("ses.hook", "/_/ses-hook/", domain=warehouse) @@ -441,6 +442,7 @@ def includeme(config): ) config.add_redirect("/pypi/", "/", domain=warehouse) config.add_redirect("/packages/{path:.*}", files_url, domain=warehouse) + config.add_redirect("/metadata/{path:.*}", metadata_url, domain=warehouse) # Legacy Action Redirects config.add_pypi_action_redirect("rss", "/rss/updates.xml", domain=warehouse) diff --git a/warehouse/tuf/README.md b/warehouse/tuf/README.md new file mode 100644 index 000000000000..9a1c10b15686 --- /dev/null +++ b/warehouse/tuf/README.md @@ -0,0 +1,242 @@ +# General TUF Warehouse implementation Notes + +## Current Warehouse and tools (pip, twine, WebUI) flow investigation + +### twine + 1. post resquest ``/simple/{project}`` and file name to Warehouse + 2. Warehouse proceed with validations + 3. Warehouse uses the ``forklift.legacy.file_upload()`` and writes in the + ``db`` and ``Storage[/packages/{blakeb_256/XX/YY}/{filename}]`` + + +### PyPI WebUI + + 1. from the ``manage.views.ManageProjectRelease()`` request to remove the a + release version using the ``utils.project.remove_project()`` + 2. The file is deleted from ``db``, but not from the `Storage[/packages]` + +### PIP +Using diferent commands ``pip `` + +#### index + 1. Request ``/simple/{project}`` + 2. Warehouse render dynamically the index + ``legacy.api.simple_detail()`` -> ``packaging.utils.simple_details()`` + if the project exists. + +#### download + 1. Call ``pip index`` + 2. Look for the latest version in the simpleindex and request from + ``Storage[/packages/{blakeb_256/XX/YY}/{filename}]`` + +#### install + 1. Call ``pip index`` + 2. Call ``pip install`` + 3. Look into the dependencies + 4. loop for the dependencies + ```mermaid + sequenceDiagram + participant pip + participant warehouse + pip->>warehouse: simple/ + warehouse-->>pip: 404, simple index not found + warehouse->>pip: 200, simple index + loop look in the index + pip->>pip: Get latest version or specific version + end + pip->>warehouse: Get specific version /packages/{blake2b_256/XX/YY}/ + warehouse-->pip: 404, not found + warehouse->>pip: 200, + pip->>pip: Looking for dependencies dependencies + ``` + + +## General flows on Warehouse +```mermaid + flowchart TD + + subgraph pip["pip "] + download + index + install + end + PyPI[PyPI WebUI] + twine + + subgraph warehouse + request["request /simple/{project} dynamic (transversal)"] + subgraph forklift + legacy.file_upload["legacy.file_upload()"] + end + subgraph legacy + api.simple_detail["api.simple_detail()"] + end + subgraph manage + views.ManageProjectRelease + end + subgraph utils + project.remove_project["project.remove_project()"] + end + subgraph packaging + utils._simple_detail["utils._simple_detail()"] + utils.render_simple_index["utils.render_simple_index()"] + end + end + + db[(Database)] + simple[("[local, SaaS]\n/simple/{project}/index.html\n/simple//.html")] + packages[("[local, SaaS]\n/packages/{blake2b_256/XX/YY}/")] + + + download--1-->request + download--2-->packages + install--1-->request + install--2-->packages + index-->request + twine-->request + PyPI-->views.ManageProjectRelease + request-->legacy.file_upload + views.ManageProjectRelease-->project.remove_project + legacy.file_upload--->db + legacy.file_upload--->packages + project.remove_project-->db + request-->api.simple_detail + api.simple_detail-->utils._simple_detail + utils.render_simple_index-.->simple + + + linkStyle 0,2,4,12,13 stroke:blue; + linkStyle 1,3 stroke:green; + linkStyle 5,7,9,10 stroke:yellow; + linkStyle 6,8,11 stroke:red; + style utils.render_simple_index fill:purple + style db fill:black,stroke:grey + style packages fill:black,stroke:grey + style simple fill:purple,stroke:grey +``` + +- Recently was merge [PR 458](https://github.com/pypa/warehouse/pull/8586), that +enables the persistent index for Simple Details. + +## TUF WIP + +This work refactors the [Draft PR](https://github.com/pypa/warehouse/pull/7488) by @ +woodruffw, to build a new repository tool on top of the Python-TUF Metadata API, and +use it instead of the Python-TUF repository tool that was deprecated in v1.0.0. + +**Note to reviewer** + +The current implementation has some development-only components, and lacks a few services for full PEP458 compliance as well as extensive tests. However, it should qualify for a review of the overall architecture and flow (see details in 'Overview' below). Components and functionality that are planned for subsequent PRs are listed in 'Next steps' below. + +### Overview + + ```mermaid + classDiagram + direction LR + class MetadataRepository { + <> + +storage_backend + +key_backend + initialize() + load_role() + bump_role_version() + timestamp_bump_version() + snapshot_bump_version() + snapshot_update_meta() + delegate_targets_roles() + add_targets() + } + class `tuf.interfaces` { + zope.interface.Interface + IKeyService(Interface) + IStorageService(Interface) + IRepositoryService(Interface) + } + class `tuf.services` { + IKeyService + IRepositoryService + IStorageService + LocalKeyService(IKeyService) + LocalStorageService(IStorageService) + RepositoryService(IRepositoryService) + } + class `tuf.tasks` { + init_repository + init_targets_delegation + bump_snapshot + bump_bin_n_roles + add_hashed_targets + } + + class `cli.tuf`{ + dev keypairs + dev init-repo + dev init-delegations + dev add-all-packages + dev add-all-indexes + dev bump-snapshot + dev bump-bin-n-roles + } + + + `tuf.services` <|-- `tuf.interfaces` + `tuf.services` --* MetadataRepository + `tuf.tasks` -- `tuf.services` + `cli.tuf` -- `tuf.tasks` + warehouse -- `cli.tuf` + warehouse -- `tuf.tasks` + ``` + +#### warehouse.tuf.repository + +- ``MetadataRepository`` implements a custom TUF metadata repository tool on top of +the new Python-TUF Metadata API to create and maintain (update, sign, sync with storage) TUF metadata for Warehouse. + + +#### warehouse.tuf.services + +- ``LocalKeyService`` provides a local file storage backend for TUF role keys used by the repository tool (development only!!). +- ``LocalStorageService`` provides a local file storage backend for TUF role metadata used by the repository tool. +- ``RepositoryService`` provides methods for common Warehouse-TUF tasks, using the repository tool. + +#### warehouse.tuf.tasks + +Defines common Warehouse-TUF tasks that use the `RepositoryService` for +- bootstrapping a metadata repository (`init_repository`, `init_targets_delegation`), +- updating metadata upon package upload (`add_hashed_targets`) +- scheduled metadata updates (`bump_bin_n_roles`, `bump_snapshot`) + +#### warehouse.cli.tuf + +Defines development commands for bootstrapping a TUF metadata repository (`keypair`, `init_repo`, `init_delegations`), backsigning existing packages and simple index pages (`add_all_packages`, `add_all_indexes`), and for manually triggering scheduled tasks (`bump_bin_n_roles`, `bump_snapshot`). CLI calls go through `warehouse.cli.tasks`, to take advantage of the Celery/Redis queue. + + +### Next steps: + +- [ ] Polish the new Warehouse metadata repository tool based on review feedback +- [ ] PRs to implement TUF in the Warehouse request flow + - upload target file + - delete target file + - tasks for refreshing indexes/projects +- [ ] Tests + + +## Using the Warehouse development environment for TUF + +Follow the official Warehouse until [``make initdb``](https://warehouse.pypa.io/development/getting-started.html#) + +```shell +$ make inittuf +``` + +The metadata is available at http://localhost:9001/metadata/ + +You can also upload a file using the Warehouse and add the targets using CLI +- Create a user [using Web UI](https://warehouse.pypa.io/development/getting-started.html#viewing-warehouse-in-a-browser) +- Validate the [email](https://warehouse.pypa.io/development/email.html) +- Upload file using ``twine`` + +```shell +docker-compose run --rm web python -m warehouse tuf dev add-all-packages +docker-compose run --rm web python -m warehouse tuf dev add-all-indexes +``` diff --git a/warehouse/tuf/__init__.py b/warehouse/tuf/__init__.py index 8a1878f0e2c4..a82a44734ba5 100644 --- a/warehouse/tuf/__init__.py +++ b/warehouse/tuf/__init__.py @@ -13,7 +13,8 @@ from celery.schedules import crontab from warehouse.tuf.interfaces import IKeyService, IRepositoryService, IStorageService -from warehouse.tuf.tasks import bump_bin_ns, bump_snapshot +from warehouse.tuf.repository import SPEC_VERSION +from warehouse.tuf.tasks import bump_bin_n_roles, bump_snapshot def includeme(config): @@ -24,7 +25,7 @@ def includeme(config): "tuf.root.expiry": 31536000, "tuf.snapshot.threshold": 1, "tuf.snapshot.expiry": 86400, - "tuf.targets.threshold": 1, + "tuf.targets.threshold": 2, "tuf.targets.expiry": 31536000, "tuf.timestamp.threshold": 1, "tuf.timestamp.expiry": 86400, @@ -38,7 +39,7 @@ def includeme(config): # An amended version of the PEP should be published, at which point # this note can be removed. "tuf.bin-n.expiry": 604800, - "tuf.spec_version": "1.0.0", + "tuf.spec_version": SPEC_VERSION, } ) @@ -52,11 +53,11 @@ def includeme(config): storage_service_class.create_service, IStorageService ) - repo_service_class = config.maybe_dotted( - config.registry.settings["tuf.repo_backend"] + repository_service_class = config.maybe_dotted( + config.registry.settings["tuf.repository_backend"] ) config.register_service_factory( - repo_service_class.create_service, IRepositoryService + repository_service_class.create_service, IRepositoryService ) # Per PEP 458: The snapshot and timestamp metadata expire every 24 hours. @@ -68,4 +69,4 @@ def includeme(config): # note in tuf/__init__.py. # We conservatively bump all delegated bins at least once daily. config.add_periodic_task(crontab(minute=0, hour="*/6"), bump_snapshot) - config.add_periodic_task(crontab(minute=0, hour=0), bump_bin_ns) + config.add_periodic_task(crontab(minute=0, hour=0), bump_bin_n_roles) diff --git a/warehouse/tuf/constants.py b/warehouse/tuf/constants.py index 0f42a1162629..3ed4c8fbfa05 100644 --- a/warehouse/tuf/constants.py +++ b/warehouse/tuf/constants.py @@ -23,13 +23,6 @@ class Role(enum.Enum): BIN_N: str = "bin-n" -TOPLEVEL_ROLES = [ - Role.ROOT.value, - Role.SNAPSHOT.value, - Role.TARGETS.value, - Role.TIMESTAMP.value, -] - HASH_ALGORITHM = "blake2b" TUF_REPO_LOCK = "tuf-repo" diff --git a/warehouse/tuf/hash_bins.py b/warehouse/tuf/hash_bins.py new file mode 100644 index 000000000000..9e7fb43de251 --- /dev/null +++ b/warehouse/tuf/hash_bins.py @@ -0,0 +1,90 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + +from typing import Iterator, List, Tuple + + +class HashBins: + """ + Hash Bins management + + This code is inspired on python-tuf repo examples for hash bins + """ + + def __init__(self, number_of_bins: int) -> None: + """ + Hash Bins + + Args: + number_of_bins: number of bins + """ + self.number_of_bins = number_of_bins + # The prefix length is the number of digits in the hexadecimal representation + # (see 'x' in Python Format Specification) of the number of bins minus one + # (counting starts at zero), i.e. ... + self.prefix_len = len(f"{(self.number_of_bins - 1):x}") # ... 2. + # Compared to decimal, hexadecimal numbers can express higher numbers + # with fewer digits and thus further decrease metadata sizes. With the + # above prefix length of 2 we can represent at most 256 prefixes, i.e. + # 00, 01, ..., ff. + self.number_of_prefixes = 16**self.prefix_len + # If the number of bins is a power of two, hash prefixes are evenly + # distributed over all bins, which allows to calculate the uniform size + # of 8, where each bin is responsible for a range of 8 prefixes, i.e. + # 00-07, 08-0f, ..., f8-ff. + self.bin_size = self.number_of_prefixes // self.number_of_bins + + def _bin_name(self, low: int, high: int) -> str: + """ + Generates a bin name according to the hash prefixes the bin serves. + + The name is either a single hash prefix for bin size 1, or a range of hash + prefixes otherwise. The prefix length is needed to zero-left-pad the + hex representation of the hash prefix for uniform bin name lengths. + """ + if low == high: + return f"{low:0{self.prefix_len}x}" + + return f"{low:0{self.prefix_len}x}-{high:0{self.prefix_len}x}" + + def generate(self) -> Iterator[Tuple[str, List[str]]]: + """Returns generator for bin names and hash prefixes per bin.""" + # Iterate over the total number of hash prefixes in 'bin size'-steps to + # generate bin names and a list of hash prefixes served by each bin. + for low in range(0, self.number_of_prefixes, self.bin_size): + high = low + self.bin_size - 1 + bin_name = self._bin_name(low, high) + hash_prefixes = [] + for prefix in range(low, low + self.bin_size): + hash_prefixes.append(f"{prefix:0{self.prefix_len}x}") + + yield bin_name, hash_prefixes + + def get_delegate(self, file_path: str) -> str: + """ + Gets the delegated role name bin based on the target file path. + + Args: + file_path + + Returns: + bin name low-high + """ + hasher = hashlib.sha256() + hasher.update(file_path.encode("utf-8")) + target_name_hash = hasher.hexdigest() + prefix = int(target_name_hash[: self.prefix_len], 16) + low = prefix - (prefix % self.bin_size) + high = low + self.bin_size - 1 + return self._bin_name(low, high) diff --git a/warehouse/tuf/interfaces.py b/warehouse/tuf/interfaces.py index 7a4e4915abab..2bab97fc495e 100644 --- a/warehouse/tuf/interfaces.py +++ b/warehouse/tuf/interfaces.py @@ -20,15 +20,8 @@ def create_service(context, request): created. """ - def pubkeys_for_role(rolename): - """ - Return a list of (TUF-formatted) public keys for the given TUF role. - """ - - def privkeys_for_role(rolename): - """ - Return a list of (TUF-formatted) private keys for the given TUF role. - """ + def get(rolename, key_type): + """Return a key from specific rolename""" class IStorageService(Interface): @@ -38,9 +31,22 @@ def create_service(context, request): created. """ - def get_backend(): + def get(rolename, version): + """ + Return metadata from specific role name, optionally specific version. + """ + + def put(file_object, filename): + """ + Stores file object with a specific filename. + + An alias to store() to be compatible with + ``tuf.api.metadata.StorageBackendInterface`` + """ + + def store(file_object, filename): """ - Return an implementation of `securesystemslib.storage.StorageBackendInterface`. + Stores file object with a specific filename. """ @@ -51,18 +57,49 @@ def create_service(context, request): created. """ - def load_repository(): + def init_repository(): + """ + Initializes a Metadata Repository from scratch, including a new root. + """ + + def init_targets_delegation(): + """ + Delegate targets role bins further delegates to the bin-n roles, + which sign for all distribution files belonging to registered PyPI + projects. """ - Return a TUF Repository object for direct manipulation of the underlying - repository. - NOTE: The Repository object returned from this method cannot be manipulated - safely by multiple tasks or threads, especially. It should only be used during - TUF initialization or offline maintenance tasks. + def bump_snapshot(): + """ + Bump the Snapshot Metadata Role """ - def add_target(file, backsigned=False): + def bump_bin_n_roles(): + """ + Bump all BIN-N delegate roles Metadata """ - Given a warehouse.packaging.models.File, add it to the TUF - repository. + + def add_hashed_targets(targets): + """ + Add hashed Targets + + Args: + targets: list of dictionary with file ``info`` and ``path``. + + ``info`` contains a dict with ``lenght``, ``hashes`` optionally + ``custom`` nested dictionary. + ``path`` file path + + Example: + ``` + [ + { + "info": { + "hashes": {"blake2b-256": file.blake2_256_digest}, + "lenght": 256, + "custom": {"key": "value}, + }, + "path": "/xx/yy/file.tar.gz" + } + ] """ diff --git a/warehouse/tuf/repository.py b/warehouse/tuf/repository.py new file mode 100644 index 000000000000..f919ef0f936a --- /dev/null +++ b/warehouse/tuf/repository.py @@ -0,0 +1,490 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, List, Optional + +from securesystemslib.exceptions import StorageError +from securesystemslib.signer import SSlibSigner +from tuf.api.metadata import ( + SPECIFICATION_VERSION, + TOP_LEVEL_ROLE_NAMES, + DelegatedRole, + Delegations, + Key, + Metadata, + MetaFile, + Role, + Root, + Snapshot, + StorageBackendInterface, + TargetFile, + Targets, + Timestamp, +) +from tuf.api.serialization.json import JSONSerializer + +from warehouse.tuf.interfaces import IKeyService + +SPEC_VERSION = ".".join(SPECIFICATION_VERSION) + + +@dataclass +class RolesPayload: + """ + Container for various role data. + + This includes data that can be assigned to any role (``expiration`` and + ``threshold``), data that can only be assigned to delegating roles + (``delegation_role``, ``paths``, ``path_hash_prefixes`` and the keyids or public + portions of ``keys``), and data that is used to sign any roles (private portion of + ``keys``). + + """ + + expiration: datetime + threshold: int + keys: List[Dict[str, Any]] + delegation_role: str = None + paths: List[str] = None + path_hash_prefixes: List[str] = None + + +@dataclass +class TargetsPayload: + """ + Container for target files info, suitable for targets metadata. + """ + + fileinfo: str + path: str + + +class MetadataRepository: + """ + TUF metadata repository abstraction to create and maintain role metadata. + """ + + def __init__( + self, + storage_backend: StorageBackendInterface, + key_backend: IKeyService, + ): + self.storage_backend: StorageBackendInterface = storage_backend + self.key_backend: IKeyService = key_backend + + @property + def is_initialized(self) -> bool: + """ + Repository state based on metadata availability in storage. + """ + try: + if any( + role + for role in TOP_LEVEL_ROLE_NAMES + if isinstance(self.load_role(role), Metadata) + ): + return True + except StorageError as err: + if "Can't open" in str(err): + return False + + return False + + def _create_delegated_targets_roles( + self, + delegator_metadata: Metadata, + snapshot_metadata: Optional[Metadata[Snapshot]], + delegate_role_parameters: List[RolesPayload], + ) -> Metadata[Snapshot]: + """ + Creates delegated targets roles metadata and updates delegator and snapshot. + """ + if not snapshot_metadata: + snapshot_metadata = self.load_role(Snapshot.Type) + + for role_parameter in delegate_role_parameters: + rolename = role_parameter.delegation_role + try: + if self.load_role(rolename): + raise FileExistsError(f"Role {rolename} already exists.") + except StorageError: + pass + + delegated_role = DelegatedRole( + name=rolename, + keyids=[key["keyid"] for key in role_parameter.keys], + threshold=role_parameter.threshold, + terminating=None, + paths=role_parameter.paths, + path_hash_prefixes=role_parameter.path_hash_prefixes, + ) + + if delegator_metadata.signed.delegations is None: + delegation = self._build_delegations( + rolename, delegated_role, role_parameter.keys + ) + delegator_metadata.signed.delegations = delegation + else: + delegator_metadata.signed.delegations.roles[rolename] = delegated_role + + targets = Targets(1, SPEC_VERSION, role_parameter.expiration, {}, None) + role_metadata = Metadata(targets, {}) + + for key in role_parameter.keys: + delegator_metadata.signed.add_key( + rolename, Key.from_securesystemslib_key(key) + ) + role_metadata.sign(SSlibSigner(key), append=True) + + self._store(rolename, role_metadata) + snapshot_metadata = self.snapshot_update_meta( + rolename, role_metadata.signed.version, snapshot_metadata + ) + + return snapshot_metadata + + def _filename(self, rolename: str, version: int) -> str: + """ + Builds metadata filename with passed role name and metadata version. + """ + if rolename == Timestamp.type: + filename = f"{rolename}.json" + else: + filename = f"{version}.{rolename}.json" + + return filename + + def _store(self, rolename: str, metadata: Metadata) -> None: + """ + Writes role metadata to storage using the configured backend. + """ + filename = self._filename(rolename, metadata.signed.version) + metadata.to_file(filename, JSONSerializer(), self.storage_backend) + + def _build_delegations( + self, rolename: str, delegated_role: DelegatedRole, keys: List[Dict[str, Any]] + ) -> Delegations: + """ + Returns ``Delegations`` object assigning passed keys and roles information. + """ + return Delegations( + keys={key["keyid"]: Key.from_securesystemslib_key(key) for key in keys}, + roles={rolename: delegated_role}, + ) + + def initialize( + self, payload: Dict[str, RolesPayload], store: Optional[bool] + ) -> Dict[str, Metadata]: + """ + Initializes metadata repository with basic top-level role metadata. + + Args: + payload: Initial per-role infos to populate metadata. + store: Indicates whether metadata should be written to storage. + + Raises: + FileExistsError: Repository is already initialized. + ValueError: Not enough signing keys for the signature threshold of a role. + + Returns: + Dictionary of role names as keys and metadata objects as values. + ``Dict[str, Metadata]`` + """ + + top_level_roles_metadata = dict() + if self.is_initialized: + raise FileExistsError("Metadata already exists in the Storage Service") + + targets = Targets(1, SPEC_VERSION, payload[Targets.type].expiration, {}, None) + targets_metadata = Metadata(targets, {}) + top_level_roles_metadata[Targets.type] = targets_metadata + + meta = {"targets.json": MetaFile(targets.version)} + snapshot = Snapshot(1, SPEC_VERSION, payload[Snapshot.type].expiration, meta) + snapshot_metadata = Metadata(snapshot, {}) + top_level_roles_metadata[Snapshot.type] = snapshot_metadata + + snapshot_meta = MetaFile(snapshot.version) + timestamp = Timestamp( + 1, SPEC_VERSION, payload[Timestamp.type].expiration, snapshot_meta + ) + timestamp_metadata = Metadata(timestamp, {}) + top_level_roles_metadata[Timestamp.type] = timestamp_metadata + + roles = { + role_name: Role([], payload[role_name].threshold) + for role_name in TOP_LEVEL_ROLE_NAMES + } + root = Root(1, SPEC_VERSION, payload[Root.type].expiration, {}, roles, True) + + # Sign all top level roles metadata + signers = dict() + for role in TOP_LEVEL_ROLE_NAMES: + if payload[role].threshold > len(payload[role].keys): + raise ValueError( + f"Role {role} has missing Key(s) " + f"to match to defined threshold {payload[role].threshold}." + ) + + for key in payload[role].keys: + root.add_key(role, Key.from_securesystemslib_key(key)) + + signers[role] = { + key["keyid"]: SSlibSigner(key) for key in payload[role].keys + } + + root_metadata = Metadata(root, {}) + top_level_roles_metadata[Root.type] = root_metadata + for role in signers: + for signer in signers[role].values(): + top_level_roles_metadata[role].sign(signer, append=True) + + if store: + self._store(role, top_level_roles_metadata[role]) + + return top_level_roles_metadata + + def load_role(self, rolename: str) -> Metadata: + """ + Loads latest version of metadata for rolename using configured storage backend. + + NOTE: The storage backend is expected to translate rolenames to filenames. + + Returns: + Role metadata + ``tuf.api.metadata.Metadata`` + """ + return Metadata.from_file(rolename, None, self.storage_backend) + + def delegate_targets_roles( + self, + payload: Dict[str, List[RolesPayload]], + ) -> Metadata[Snapshot]: + """ + Performs targets delegation for delegator-to-delegates items in passed payload. + + Creates new basic delegate metadata, configures delegation in delegator metadata + and bumps its version, and updates snapshot metadata accordingly. + + Args: + payload: Dictionary of delegator role names as keys and lists of + per-delegate info to populate the delegate metadata. + + Raises: + FileExistsError: Delegate metadata already exists. + + Returns: + Updated snapshot metadata + ``tuf.api.metadata.Metadata[Snapshot]`` + """ + + snapshot_metadata = self.load_role(Snapshot.type) + for delegator, delegate_role_parameters in payload.items(): + delegator_metadata = self.load_role(delegator) + snapshot_metadata = self._create_delegated_targets_roles( + delegator_metadata, snapshot_metadata, delegate_role_parameters + ) + delegator_metadata = self.bump_role_version( + rolename=delegator, + role_metadata=delegator_metadata, + role_expires=delegator_metadata.signed.expires, + key_rolename=None, + store=True, + ) + snapshot_metadata = self.snapshot_update_meta( + delegator, delegator_metadata.signed.version, snapshot_metadata + ) + + return snapshot_metadata + + def bump_role_version( + self, + rolename: str, + role_metadata: Metadata, + role_expires: datetime, + key_rolename: Optional[str] = None, + store: Optional[bool] = False, + ) -> Metadata: + """ + Bumps metadata version by one and assigns new expiration date for passed role. + + Args: + rolename: Used to associate signing key and (optionally) store metadata. + role_metadata: Role metadata to be bumped. + role_expires: New role expiration date. + key_rolename: Used to associate a signing key by a name other than rolename. + store: Indicates whether metadata should be written to storage. + + Returns: + Updated metadata + ``tuf.api.metadata.Metadata`` + """ + if key_rolename: + key_rolename = key_rolename + else: + key_rolename = rolename + role_metadata.signed.expires = role_expires + role_metadata.signed.version += 1 + key_rolename_keys = self.key_backend.get(key_rolename, "private") + for key in key_rolename_keys: + role_metadata.sign(SSlibSigner(key), append=True) + + if store: + self._store(rolename, role_metadata) + + return role_metadata + + def timestamp_bump_version( + self, + snapshot_version: int, + timestamp_expires: datetime, + store: bool = False, + ) -> Metadata[Timestamp]: + """ + Bumps timestamp metadata version by one and assigns new expiration date. + + Args: + snapshot_version: New snapshot version for timestamp meta field. + timestamp_expires: New timestamp expiration date. + store: Indicates whether metadata should be written to storage. + + Returns: + Updated timestamp metadata + ``tuf.api.metadata.Metadata[Timestamp]`` + """ + timestamp_metadata = self.load_role(Timestamp.type) + timestamp_metadata.signed.version += 1 + timestamp_metadata.signed.expires = timestamp_expires + timestamp_metadata.signed.snapshot_meta = MetaFile(version=snapshot_version) + timestamp_keys = self.key_backend.get(Timestamp.type, "private") + for key in timestamp_keys: + timestamp_metadata.sign(SSlibSigner(key), append=True) + + if store: + self._store(Timestamp.type, timestamp_metadata) + + return timestamp_metadata + + def snapshot_bump_version( + self, + snapshot_expires: datetime, + snapshot_metadata: Optional[Metadata[Snapshot]] = None, + store: Optional[bool] = False, + ) -> Metadata[Snapshot]: + """ + Bumps snapshot metadata version by one and assigns new expiration date. + + Args: + snapshot_expires: New snapshot expiration date. + snapshot_metadata: Snapshot metadata to be bumped. If not passed, snapshot + metadata is loaded from storage. + store: Indicates whether updated snapshot metadata should be written to + storage. + + Returns: + Updated snapshot metadata + ``tuf.api.metadata.Metadata[Snapshot]`` + """ + if snapshot_metadata is None: + snapshot_metadata = self.load_role(Snapshot.type) + + snapshot_metadata.signed.version += 1 + snapshot_metadata.signed.expires = snapshot_expires + snapshot_keys = self.key_backend.get(Snapshot.type, "private") + for key in snapshot_keys: + snapshot_metadata.sign(SSlibSigner(key), append=True) + + if store is True: + self._store(Snapshot.type, snapshot_metadata) + + return snapshot_metadata + + def snapshot_update_meta( + self, + meta_role_name: str, + meta_role_version: int, + snapshot_metadata: Optional[Metadata[Snapshot]] = None, + ) -> Metadata[Snapshot]: + """ + Adds targets metadata information to snapshot metadata. + + Args: + meta_role_name: Targets metadata name to be added to snapshot. + meta_role_version: Targets metadata version to be added to snapshot. + snapshot_metadata: Snapshot metadata to be updated. If not passed, snapshot + metadata is loaded from storage. + + Return: + Updated snapshot metadata + ``tuf.api.metadata.Metadata[Snapshot]`` + """ + if snapshot_metadata is None: + snapshot_metadata = self.load_role(Snapshot.type) + + snapshot_metadata.signed.meta[f"{meta_role_name}.json"] = MetaFile( + version=meta_role_version + ) + + return snapshot_metadata + + def add_targets( + self, + payload: Dict[str, List[TargetsPayload]], + key_rolename: str, + ) -> Metadata[Snapshot]: + """ + Adds target files info to targets metadata and updates snapshot. + + The targets metadata is loaded from storage, assigned the passed target files + info, has its version incremented by one, and is signed and written back + to storage. Snapshot, also loaded from storage, is updated with the new targets + metadata versions. + + NOTE: Snapshot metadata version is not updated. + + Args: + payload: Dictionary of targets role names as keys and lists of target file + info objects. + key_rolename: Targets metadata signing key in key storage. + + Returns: + Updated snapshot metadata + ``tuf.api.metadata.Metadata[Snapshot]`` + """ + snapshot_metadata = self.load_role(Snapshot.type) + + for rolename, targets in payload.items(): + role_metadata = self.load_role(rolename) + for target in targets: + target_file = TargetFile.from_dict(target.fileinfo, target.path) + role_metadata.signed.targets[target.path] = target_file + + role_metadata.signed.version += 1 + role_keys = self.key_backend.get(key_rolename, "private") + for key in role_keys: + role_metadata.sign(SSlibSigner(key), append=True) + + self._store(rolename, role_metadata) + role_metadata = self.bump_role_version( + rolename=rolename, + role_metadata=role_metadata, + role_expires=role_metadata.signed.expires, + key_rolename=key_rolename, + store=True, + ) + snapshot_metadata = self.snapshot_update_meta( + rolename, role_metadata.signed.version, snapshot_metadata + ) + + return snapshot_metadata diff --git a/warehouse/tuf/services.py b/warehouse/tuf/services.py index 3b39951d2b64..54a48c30212d 100644 --- a/warehouse/tuf/services.py +++ b/warehouse/tuf/services.py @@ -11,15 +11,31 @@ # limitations under the License. +import datetime +import glob import os.path +import shutil import warnings -from tuf import repository_tool +from contextlib import contextmanager + +from securesystemslib.exceptions import StorageError +from securesystemslib.interface import ( + import_ed25519_privatekey_from_file, + import_ed25519_publickey_from_file, +) from zope.interface import implementer +from warehouse.config import Environment +from warehouse.tuf.constants import BIN_N_COUNT, Role +from warehouse.tuf.hash_bins import HashBins from warehouse.tuf.interfaces import IKeyService, IRepositoryService, IStorageService -from warehouse.tuf.tasks import add_target -from warehouse.tuf.utils import GCSBackend, LocalBackend, make_fileinfo +from warehouse.tuf.repository import ( + TOP_LEVEL_ROLE_NAMES, + MetadataRepository, + RolesPayload, + TargetsPayload, +) class InsecureKeyWarning(UserWarning): @@ -28,6 +44,12 @@ class InsecureKeyWarning(UserWarning): @implementer(IKeyService) class LocalKeyService: + """ + A service to read public and private TUF role keys as local files for development. + + NOTE: Do not use in production! + """ + def __init__(self, key_path, request): warnings.warn( "LocalKeyService is intended only for use in development, you " @@ -42,79 +64,344 @@ def __init__(self, key_path, request): def create_service(cls, context, request): return cls(request.registry.settings["tuf.key.path"], request) - def pubkeys_for_role(self, rolename): - pubkey_path = os.path.join(self._key_path, f"tuf.{rolename}.pub") - return [repository_tool.import_ed25519_publickey_from_file(pubkey_path)] + def get(self, rolename, key_type): + """ + Returns Key objects for passed TUF role name from configured TUF key path. Key + type is one of 'public' or 'private'. + """ + if key_type == "private": + privkey_path = os.path.join(self._key_path, f"{rolename}*") + role_keys = glob.glob(privkey_path) + keys_sslib = [ + import_ed25519_privatekey_from_file( + key, self._request.registry.settings[f"tuf.{rolename}.secret"] + ) + for key in role_keys + if "pub" not in key + ] + elif key_type == "public": + pubkey_path = os.path.join(self._key_path, f"{rolename}*.pub") + role_keys = glob.glob(pubkey_path) + keys_sslib = [import_ed25519_publickey_from_file(key) for key in role_keys] + else: + raise ValueError(f"invalid key_type '{key_type}'") - def privkeys_for_role(self, rolename): - privkey_path = os.path.join(self._key_path, f"tuf.{rolename}") - return [ - repository_tool.import_ed25519_privatekey_from_file( - privkey_path, - password=self._request.registry.settings[f"tuf.{rolename}.secret"], - ) - ] + return keys_sslib @implementer(IStorageService) class LocalStorageService: - def __init__(self, request): - self._store = LocalBackend(request) + """ + A storage service with methods to read and write TUF role metadata as local files. + """ + + def __init__(self, repo_path): + self._repo_path = repo_path @classmethod def create_service(cls, context, request): - return cls(request) + return cls( + request.registry.settings["tuf.repo.path"], + ) - def get_backend(self): - return self._store + @contextmanager + def get(self, role, version=None): + """ + Yields TUF role metadata file object for the passed role name, from the + configured TUF repo path, optionally at the passed version (latest if None). + """ + if role == Role.TIMESTAMP.value: + filename = os.path.join(self._repo_path, f"{role}.json") + else: + if version is None: + filenames = glob.glob(os.path.join(self._repo_path, f"*.{role}.json")) + versions = [ + int(name.split("/")[-1].split(".", 1)[0]) for name in filenames + ] + try: + version = max(versions) + except ValueError: + version = 1 -@implementer(IStorageService) -class GCSStorageService: - def __init__(self, request): - self._store = GCSBackend(request) + filename = os.path.join(self._repo_path, f"{version}.{role}.json") + if not os.path.isfile(filename): + filename = os.path.join(self._repo_path, f"{role}.json") - @classmethod - def create_service(cls, context, request): - return cls(request) + file_object = None + try: + file_object = open(filename, "rb") + yield file_object + except OSError: + raise StorageError(f"Can't open {filename}") + finally: + if file_object is not None: + file_object.close() + + def put(self, file_object, filename): + """ + Writes passed file object to configured TUF repo path using the passed filename. + """ + file_path = os.path.join(self._repo_path, filename) + if not file_object.closed: + file_object.seek(0) + + try: + with open(file_path, "wb") as destination_file: + shutil.copyfileobj(file_object, destination_file) + destination_file.flush() + os.fsync(destination_file.fileno()) + except OSError: + raise StorageError(f"Can't write file {filename}") - def get_backend(self): - return self._store + def store(self, file_object, filename): + self.put(file_object, filename) @implementer(IRepositoryService) -class LocalRepositoryService: - def __init__(self, repo_path, executor): - self._repo_path = repo_path - self._executor = executor +class RepositoryService: + """ + A repository service to create and maintain TUF role metadata. + """ + + def __init__(self, storage_service, key_service, request): + self._storage_backend = storage_service + self._key_storage_backend = key_service + self._request = request @classmethod def create_service(cls, context, request): - return cls( - request.registry.settings["tuf.repo.path"], - request.task(add_target).delay, + """ + Creates a new repository service object configuring services to read and write + TUF role metadata (``IStorageService``) and to read public and private keys + (``IKeyService``). + """ + storage_service = request.find_service(IStorageService) + key_service = request.find_service(IKeyService) + return cls(storage_service, key_service, request) + + def _get_hash_bins(self): + """ + Returns a 'hash bin delegation' management object. + """ + if self._request.registry.settings["warehouse.env"] == Environment.development: + number_of_bins = 32 + else: + number_of_bins = BIN_N_COUNT + + return HashBins(number_of_bins) + + def _make_fileinfo(self, file, custom=None): + """ + Returns a TUF-compliant 'fileinfo' dictionary suitable for targets metadata. + + The optional 'custom' kwarg can be used for additional metadata about target + files (e.g., to indicate backsigning). + """ + hashes = {"blake2b-256": file.blake2_256_digest} + fileinfo = dict() + fileinfo["length"] = file.size + fileinfo["hashes"] = hashes + if custom: + fileinfo["custom"] = custom + + return fileinfo + + def _set_expiration_for_role(self, role_name): + """ + Returns a metadata expiration date (now + role-specific interval). + """ + # In a development environment metadata expires less frequently so that + # developers don't have to continually re-initialize it. + if self._request.registry.settings["warehouse.env"] == Environment.development: + return datetime.datetime.now().replace(microsecond=0) + datetime.timedelta( + seconds=self._request.registry.settings[ + "tuf.development_metadata_expiry" + ] + ) + else: + return datetime.datetime.now().replace(microsecond=0) + datetime.timedelta( + seconds=self._request.registry.settings[f"tuf.{role_name}.expiry"] + ) + + def init_repository(self): + """ + Creates TUF top-level role metadata (root, targets, snapshot, timestamp). + + The metadata is populated with configured expiration times, signature thresholds + and verification keys, and signed and persisted using the configured key and + storage services. + + FIXME: In production 'root' and 'targets' roles require offline singing keys, + which may not be available at the time of initializing this metadata. + """ + metadata_repository = MetadataRepository( + self._storage_backend, self._key_storage_backend ) - def load_repository(self): - return repository_tool.load_repository(self._repo_path) + if metadata_repository.is_initialized: + raise FileExistsError("TUF Metadata Repository files already exists.") - def add_target(self, file, custom=None): - fileinfo = make_fileinfo(file, custom=custom) - self._executor(file.path, fileinfo) + top_roles_payload = dict() + for role in TOP_LEVEL_ROLE_NAMES: + top_roles_payload[role] = RolesPayload( + expiration=self._set_expiration_for_role(role), + threshold=self._request.registry.settings[f"tuf.{role}.threshold"], + keys=self._key_storage_backend.get(role, "private"), + ) + metadata_repository.initialize(top_roles_payload, True) -@implementer(IRepositoryService) -class GCSRepositoryService: - def __init__(self, executor, request): - self._store = GCSBackend(request) + def init_targets_delegation(self): + """ + Creates TUF metadata for hash bin delegated targets roles (bins, bin-n). - @classmethod - def create_service(cls, context, request): - return cls(request.task(add_target).delay, request) + Metadata is created for one 'bins' role and a configured number of 'bin-n' + roles. It is populated with configured expiration times, signature thresholds + and verification keys, and signed and persisted using the configured key and + storage services. - def load_repository(self): - return repository_tool.load_repository("tuf", storage_backend=self._store) + FIXME: In production the 'bins' role requires an offline singing key, which may + not be available at the time of initializing this metadata. + """ + hash_bins = self._get_hash_bins() + metadata_repository = MetadataRepository( + self._storage_backend, self._key_storage_backend + ) - def add_target(self, file, custom=None): - fileinfo = make_fileinfo(file, custom=custom) - self._executor(file.path, fileinfo) + # Top-level 'targets' role delegates trust for all target files to 'bins' role. + delegate_roles_payload = dict() + delegate_roles_payload["targets"] = list() + delegate_roles_payload["targets"].append( + RolesPayload( + expiration=self._set_expiration_for_role(Role.BINS.value), + threshold=self._request.registry.settings[ + f"tuf.{Role.BINS.value}.threshold" + ], + keys=self._key_storage_backend.get(Role.BINS.value, "private"), + delegation_role=Role.BINS.value, + paths=["*/*", "*/*/*/*"], + ) + ) + # The 'bins' role delegates trust for target files to 'bin-n' roles based on + # target file path hash prefixes. + delegate_roles_payload[Role.BINS.value] = list() + for bin_n_name, bin_n_hash_prefixes in hash_bins.generate(): + delegate_roles_payload[Role.BINS.value].append( + RolesPayload( + expiration=self._set_expiration_for_role(Role.BIN_N.value), + threshold=self._request.registry.settings[ + f"tuf.{Role.BIN_N.value}.threshold" + ], + keys=self._key_storage_backend.get(Role.BIN_N.value, "private"), + delegation_role=bin_n_name, + path_hash_prefixes=bin_n_hash_prefixes, + ) + ) + + snapshot_metadata = metadata_repository.delegate_targets_roles( + delegate_roles_payload, + ) + self.bump_snapshot(snapshot_metadata) + + def bump_snapshot(self, snapshot_metadata=None): + """ + Bumps version and expiration date of TUF 'snapshot' role metadata. + + The version number is incremented by one, the expiration date renewed using a + configured expiration interval, and the metadata is signed and persisted using + the configured key and storage services. + + Bumping 'snapshot' transitively bumps the 'timestamp' role. + """ + metadata_repository = MetadataRepository( + self._storage_backend, self._key_storage_backend + ) + + if snapshot_metadata is None: + snapshot_metadata = metadata_repository.load_role(Role.SNAPSHOT.value) + + snapshot_metadata = metadata_repository.snapshot_bump_version( + snapshot_expires=self._set_expiration_for_role(Role.SNAPSHOT.value), + snapshot_metadata=snapshot_metadata, + store=True, + ) + + metadata_repository.timestamp_bump_version( + snapshot_version=snapshot_metadata.signed.version, + timestamp_expires=self._set_expiration_for_role(Role.TIMESTAMP.value), + store=True, + ) + + def bump_bin_n_roles(self): + """ + Bumps version and expiration date of 'bin-n' role metadata (multiple). + + The version numbers are incremented by one, the expiration dates are renewed + using a configured expiration interval, and the metadata is signed and persisted + using the configured key and storage services. + + Bumping 'bin-n' transitively bumps 'snapshot' and 'timestamp' roles. + """ + + # 1. Grab metadata Repository + metadata_repository = MetadataRepository( + self._storage_backend, self._key_storage_backend + ) + + # 2. Load Snapshot role. + snapshot_metadata = metadata_repository.load_role(Role.SNAPSHOT.value) + + # 3. Fore each delegated hashed bin target role, bump and update Snapshot + hash_bins = self._get_hash_bins() + for bin_n_name, _ in hash_bins.generate(): + role_metadata = metadata_repository.load_role(bin_n_name) + metadata_repository.bump_role_version( + rolename=bin_n_name, + role_metadata=role_metadata, + role_expires=self._set_expiration_for_role(Role.BINS.value), + key_rolename=Role.BIN_N.value, + store=True, + ) + + snapshot_metadata = metadata_repository.snapshot_update_meta( + bin_n_name, role_metadata.signed.version, snapshot_metadata + ) + + # 4. Bump Snapshot with updated targets (bin-n) metadata + self.bump_snapshot(snapshot_metadata) + + def add_hashed_targets(self, targets): + """ + Update 'bin-n' roles metadata, assigning each passed target to the correct bin. + + Assignment is based on the hash prefix of the target file path. All metadata is + signed and persisted using the configured key and storage services. + + Updating 'bin-n' transitively bumps 'snapshot' and 'timestamp'. + """ + hash_bins = self._get_hash_bins() + + targets_payload = dict() + for target in targets: + fileinfo = target.get("info") + filepath = target.get("path") + delegated_role_bin_name = hash_bins.get_delegate(filepath) + target_file = TargetsPayload(fileinfo, filepath) + if targets_payload.get(delegated_role_bin_name) is None: + targets_payload[delegated_role_bin_name] = list() + + targets_payload[delegated_role_bin_name].append(target_file) + + metadata_repository = MetadataRepository( + self._storage_backend, self._key_storage_backend + ) + + snapshot_metadata = metadata_repository.add_targets( + targets_payload, + Role.BIN_N.value, + ) + # TODO: Should we renew expiration date of 'timestamp' *and* 'snapshot' here? + # PEP 458 'Producing Consistent Snapshots' only mentions 'timestamp'. + # https://www.python.org/dev/peps/pep-0458/#producing-consistent-snapshots + self.bump_snapshot(snapshot_metadata) diff --git a/warehouse/tuf/tasks.py b/warehouse/tuf/tasks.py index b83ef2889960..241004d9be06 100644 --- a/warehouse/tuf/tasks.py +++ b/warehouse/tuf/tasks.py @@ -9,315 +9,50 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from datetime import timedelta - import redis -from securesystemslib.util import get_file_hashes -from tuf.api import metadata - from warehouse.tasks import task -from warehouse.tuf import utils -from warehouse.tuf.constants import HASH_ALGORITHM, TUF_REPO_LOCK, Role -from warehouse.tuf.interfaces import IKeyService, IStorageService +from warehouse.tuf.constants import TUF_REPO_LOCK +from warehouse.tuf.services import IRepositoryService @task(bind=True, ignore_result=True, acks_late=True) def bump_snapshot(task, request): - """ - Re-signs the TUF snapshot role, incrementing its version and renewing its - expiration period. - - Bumping the snapshot transitively bumps the timestamp role. - """ r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) with r.lock(TUF_REPO_LOCK): - # Bumping the snapshot role involves the following steps: - # 1. First, we grab our key and storage services. We'll use the former - # for signing operations, and the latter to read and write individual - # metadata files to and from the repository without loading the entire - # repo. - # 2. Using our storage service, we fetch the timestamp metadata, which - # is always at `timestamp.json`. We load it using the `Timestamp` model - # provided by the TUF API. - # 3. Using the snapshot version stored in the current `Timestamp`, we fetch - # `{VERSION}.snapshot.json` and load it using the `Snapshot` model - # provided by the TUF API. - # 4. We call `utils.bump_metadata()` and `Snapshot.sign()` to bump - # and re-sign the current snapshot. - # 5. We call `Snapshot.to_json_file()` with `{VERSION + 1}.snapshot.json`, - # where `{VERSION + 1}` is the incremented snapshot version. - # 6. We call `Timestamp.update()` on the loaded timestamp, giving it the - # incremented snapshot version as well as the serialized length and - # BLAKE2B hash of the serialized form. - # 7. We call `utils.bump_metadata()` and `Timestamp.sign()` to bump - # and re-sign the current timestamp. - # 8. We call `Timestamp.to_json_file()`, writing to `timestamp.json`. - # - # Each of the steps is labeled below for clarity. - - # 1. Service retrieval. - storage_service = request.find_service(IStorageService) - key_service = request.find_service(IKeyService) - - storage_backend = storage_service.get_backend() - - # 2. Timestamp retrieval and loading. - timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) - - # 3. Snapshot retrieval and loading. - snapshot = utils.find_snapshot(timestamp.signed, storage_backend) - - # 4. Snapshot bumping and versioning. - utils.bump_metadata( - snapshot.signed, - timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), - ) - for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): - snapshot.sign(key) - - # 5. Writing the updated snapshot back to the repository. - snapshot_filename = f"{snapshot.signed.version}.snapshot.json" - snapshot.to_json_file(snapshot_filename, storage_backend) - - # 6. Timestamp updating. - timestamp.signed.update( - snapshot.signed.version, - len(snapshot.to_json().encode()), - get_file_hashes( - snapshot_filename, - hash_algorithms=[HASH_ALGORITHM], - storage_backend=storage_backend, - ), - ) - - # 7. Timestamp bumping. - utils.bump_metadata( - timestamp.signed, - timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), - ) - for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): - timestamp.sign(key) - - # 8. Writing the updated timestamp back to the repository. - timestamp.to_json_file("timestamp.json", storage_backend) + repository_service = request.find_service(IRepositoryService) + repository_service.bump_snapshot() @task(bind=True, ignore_result=True, acks_late=True) -def bump_bin_ns(task, request): +def bump_bin_n_roles(task, request): r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) with r.lock(TUF_REPO_LOCK): - # Bumping all of the delegated bin roles in the TUF repository involves - # the following steps: - # 1. Grab key and storage services. - # 2. Fetch timestamp. - # 3. Fetch snapshot using timestamp. - # 4. For each delegated target in the snapshot, fetch its current version, - # bump, re-sign, and write back to the repo. Update the snapshot to - # match the bumped version. - # 5. Bump and re-sign the snapshot. - # 6. Write the snapshot back. - # 7. Bump and re-sign the timestamp. - # 8. Write the timestamp back. - - # 1. Service retrieval. - storage_service = request.find_service(IStorageService) - key_service = request.find_service(IKeyService) - - storage_backend = storage_service.get_backend() - - # 2. Timestamp retrieval and loading. - timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) - - # 3. Snapshot retrieval and loading. - snapshot = utils.find_snapshot(timestamp.signed, storage_backend) - - # Target iteration: skip the top-level targets role. - for role_name, role_info in snapshot.signed.meta.items(): - if role_name == "targets.json": - continue - - role_version = role_info["version"] - delegated_bin_filename = f"{role_version}.{role_name}" - - # Load the delegated bin. - delegated_bin = metadata.Metadata.from_json_file( - delegated_bin_filename, storage_backend - ) - - # Bump and sign the delegated bin. - utils.bump_metadata( - delegated_bin.signed, - timedelta(seconds=request.registry.settings["tuf.bin-n.expiry"]), - ) - - for key in key_service.privkeys_for_role(Role.BIN_N.value): - delegated_bin.sign(key) + repository_service = request.find_service(IRepositoryService) + repository_service.bump_bin_n_roles() - # Write-back. - delegated_bin.to_json_file(delegated_bin_filename, storage_backend) - # Update the snapshot with this updated target's version. - # TODO: Ideally we'd use snapshot.update here, but that takes - # the role name without .json on the end. But role_name here - # has that suffix. Annoying. - snapshot.meta[role_name]["version"] = delegated_bin.signed.version - - # Bump and sign the snapshot. - utils.bump_metadata( - snapshot.signed, - timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), - ) - - for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): - snapshot.sign(key) - - # Write-back. - snapshot_filename = f"{snapshot.signed.version}.snapshot.json" - snapshot.to_json_file(snapshot_filename, storage_backend) - - # Bump and sign the timestamp. - utils.bump_metadata( - timestamp.signed, - timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), - ) - timestamp.signed.update( - snapshot.signed.version, - len(snapshot.to_json().encode()), - get_file_hashes( - snapshot_filename, - hash_algorithms=[HASH_ALGORITHM], - storage_backend=storage_backend, - ), - ) - - for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): - timestamp.sign(key) - - # Write-back. - timestamp.to_json_file("timestamp.json", storage_backend) +@task(bind=True, ignore_result=True, acks_late=True) +def init_repository(task, request): + repository_service = request.find_service(IRepositoryService) + repository_service.init_repository() @task(bind=True, ignore_result=True, acks_late=True) -def add_target(task, request, filepath, fileinfo): +def init_targets_delegation(task, request): r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) with r.lock(TUF_REPO_LOCK): - # Adding a target to the TUF repository involves the following steps: - # 1. First, we grab our key and storage services. We'll use the former - # for signing operations, and the latter to read and write individual - # metadata files to and from the repository without loading the entire - # repo. - # 2. Using our storage service, we fetch the timestamp metadata, which - # is always at `timestamp.json`. We load it using the `Timestamp` model - # provided by the TUF API. - # 3. Using the snapshot version stored in the current `Timestamp`, we fetch - # `{VERSION}.snapshot.json` and load it using the `Snapshot` model - # provided by the TUF API. - # 4. Using the target's name (`filepath`), we determine the name of the - # delegated bin that it'll end up in. We use that delegated bin name to - # index into the `Snapshot` model and get the current version for that bin. - # Then, we fetch `{VERSION}.{BIN}.json` and load it using the `Targets` - # model provided by the TUF API. - # 5. We update the delegated bin, bumping its version, expiration, and - # adding our new target to it. - # 6. We call `Targets.sign()` on the loaded bin, giving it the bin-n - # signing key from our key service. - # 7. We call `Targets.to_json_file()` with `{VERSION + 1}.{BIN}.json` - # as the filepath, where `{VERSION + 1}` is the incremented version - # of the previous delegated bin version. - # 8. We update the snapshot, bumping its version, expiration, and giving - # it our new delegated bin version. - # 9. We call `Snapshot.sign()` on the loaded snapshot, giving it the - # snapshot signing key from our key service. - # 10. We call `Snapshot.to_json_file()` with `{VERSION + 1}.snapshot.json`, - # where `{VERSION + 1}` is the incremented version of the previous - # snapshot version. - # 11. We update the timestamp, bumping its version, expiration, and giving - # it our new snapshot version and integrity information. - # 12. We call `Timestamp.sign()` on the loaded timestamp, giving it the - # timestamp signing key from our key service. - # 13. We call `Timestamp.to_json_file()`, writing to `timestamp.json`. - # - # Each of the steps is labeled below for clarity. - - # 1. Service retrieval. - storage_service = request.find_service(IStorageService) - key_service = request.find_service(IKeyService) - - storage_backend = storage_service.get_backend() - - # 2. Timestamp retrieval and loading. - timestamp = metadata.Metadata.from_json_file("timestamp.json", storage_backend) - - # 3. Snapshot retrieval and loading. - snapshot = utils.find_snapshot(timestamp.signed, storage_backend) + repository_service = request.find_service(IRepositoryService) + repository_service.init_targets_delegation() - # 4. Delegated bin retrieval and loading. - delegated_bin_name, delegated_bin = utils.find_delegated_bin( - filepath, snapshot.signed, storage_backend - ) - # 5. Updating the delegated bin. - utils.bump_metadata( - delegated_bin.signed, - timedelta(seconds=request.registry.settings["tuf.bin-n.expiry"]), - ) - delegated_bin.signed.update(filepath, fileinfo) - - # 6. Signing the updated delegated bin metadata. - for key in key_service.privkeys_for_role(Role.BIN_N.value): - delegated_bin.sign(key) - - # 7. Writing the updated delegated bin back to the TUF repository. - delegated_bin.to_json_file( - f"{delegated_bin.signed.version}.{delegated_bin_name}.json", storage_backend - ) - - # 8. Updating the snapshot. - # TODO(ww): Fill in length and hashes? - utils.bump_metadata( - snapshot.signed, - timedelta(seconds=request.registry.settings["tuf.snapshot.expiry"]), - ) - snapshot.signed.update(f"{delegated_bin_name}.json", delegated_bin.signed.version) - - # 9. Signing the updated snapshot metadata. - for key in key_service.privkeys_for_role(Role.SNAPSHOT.value): - snapshot.sign(key) - - # 10. Writing the updated snapshot back to the TUF repository. - snapshot_filename = f"{snapshot.signed.version}.snapshot.json" - snapshot.to_json_file( - snapshot_filename, - storage_backend, - ) - - # 11. Updating the timestamp. - # NOTE(ww): Calling get_file_hashes here causes us to round-trip - # through the object store just to compute our snapshot's hash. - # Maybe add a function to securesystemslib that does the digest - # calculation on a string/bytes. - utils.bump_metadata( - timestamp.signed, - timedelta(seconds=request.registry.settings["tuf.timestamp.expiry"]), - ) - timestamp.signed.update( - snapshot.signed.version, - len(snapshot.to_json().encode()), - get_file_hashes( - snapshot_filename, - hash_algorithms=[HASH_ALGORITHM], - storage_backend=storage_backend, - ), - ) - - # 12. Signing the updated timestamp metadata. - for key in key_service.privkeys_for_role(Role.TIMESTAMP.value): - timestamp.sign(key) +@task(bind=True, ignore_result=True, acks_late=True) +def add_hashed_targets(task, request, targets): + r = redis.StrictRedis.from_url(request.registry.settings["celery.scheduler_url"]) - # 13. Writing the updated timestamp back to the TUF repository. - timestamp.to_json_file("timestamp.json", storage_backend) + with r.lock(TUF_REPO_LOCK): + repository_service = request.find_service(IRepositoryService) + repository_service.add_hashed_targets(targets) diff --git a/warehouse/tuf/utils.py b/warehouse/tuf/utils.py deleted file mode 100644 index 8c6ae0c99abe..000000000000 --- a/warehouse/tuf/utils.py +++ /dev/null @@ -1,174 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from contextlib import contextmanager -from io import BytesIO - -import tuf.formats -import tuf.repository_lib - -from google.cloud.exceptions import GoogleCloudError, NotFound -from securesystemslib.exceptions import StorageError -from securesystemslib.storage import FilesystemBackend, StorageBackendInterface -from tuf.api import metadata - -from warehouse.tuf.constants import BIN_N_COUNT - - -def make_fileinfo(file, custom=None): - """ - Given a warehouse.packaging.models.File, create a TUF-compliant - "fileinfo" dictionary suitable for addition to a delegated bin. - - The optional "custom" kwarg can be used to supply additional custom - metadata (e.g., metadata for indicating backsigning). - """ - hashes = {"blake2b": file.blake2_256_digest} - fileinfo = tuf.formats.make_targets_fileinfo(file.size, hashes, custom=custom) - - return fileinfo - - -def bump_metadata(metadata, delta): - """ - Given a tuf.api.metadata.Signed, bump its version and expiration (with the given - timedelta). - """ - metadata.bump_version() - metadata.bump_expiration(delta=delta) - - -def find_snapshot(timestamp, storage_backend): - """ - Given a tuf.api.metadata.Timestamp model, return the Metadata container - for the consistent snapshot that it references. - """ - snapshot_version = timestamp.meta["snapshot.json"]["version"] - - return metadata.Metadata.from_json_file( - f"{snapshot_version}.snapshot.json", storage_backend - ) - - -def find_delegated_bin(filepath, snapshot, storage_backend): - """ - Given a new target filepath and a tuf.api.metadata.Snapshot model, - return a tuple of the bin name and tup.api.metadata.Metadata container for - the consistent delegated targets bin that the target belongs in. - """ - - # TODO: This probably isn't using the right hash function. - filepath_hash = tuf.repository_lib.get_target_hash(filepath) - bin_name = tuf.repository_lib.find_bin_for_target_hash(filepath_hash, BIN_N_COUNT) - bin_version = snapshot.meta[f"{bin_name}.json"]["version"] - - return bin_name, metadata.Metadata.from_json_file( - f"{bin_version}.{bin_name}.json", storage_backend - ) - - -class LocalBackend(StorageBackendInterface): - def __init__(self, request): - self._filesystem_backend = FilesystemBackend() - self._repo_path = os.path.join( - request.registry.settings["tuf.repo.path"], "metadata.staged" - ) - - def get(self, filepath): - return self._filesystem_backend.get(os.path.join(self._repo_path, filepath)) - - def put(self, fileobj, filepath): - return self._filesystem_backend.put( - fileobj, os.path.join(self._repo_path, filepath) - ) - - def remove(self, filepath): - return self._filesystem_backend.remove(os.path.join(self._repo_path, filepath)) - - def getsize(self, filepath): - return self._filesystem_backend.getsize(os.path.join(self._repo_path, filepath)) - - def create_folder(self, filepath): - return self._filesystem_backend.create_folder( - os.path.join(self._repo_path, filepath) - ) - - def list_folder(self, filepath): - return self._filesystem_backend.list_folder( - os.path.join(self._repo_path, filepath) - ) - - -class GCSBackend(StorageBackendInterface): - def __init__(self, request): - self._client = request.find_service(name="gcloud.gcs") - # NOTE: This needs to be created. - self._bucket = self._client.get_bucket(request.registry.settings["tuf.bucket"]) - - @contextmanager - def get(self, filepath): - try: - contents = self._bucket.blob(filepath).download_as_string() - yield BytesIO(contents) - except NotFound as e: - raise StorageError(f"{filepath} not found") - - def put(self, fileobj, filepath): - try: - blob = self._bucket.blob(filepath) - # NOTE(ww): rewind=True reflects the behavior of the securesystemslib - # implementation of StorageBackendInterface, which seeks to the file start. - # I'm not sure it's actually required. - blob.upload_from_file(fileobj, rewind=True) - except GoogleCloudError: - # TODO: expose details of the underlying error in the message here? - raise StorageError(f"couldn't store to {filepath}") - - def remove(self, filepath): - try: - self._bucket.blob(filepath).delete() - except NotFound: - raise StorageError(f"{filepath} not found") - - def getsize(self, filepath): - blob = self._bucket.get_blob(filepath) - - if blob is None: - raise StorageError(f"{filepath} not found") - - return blob.size - - def create_folder(self, filepath): - if not filepath: - return - - if not filepath.endswith("/"): - filepath = f"{filepath}/" - - try: - blob = self._bucket.blob(filepath) - blob.upload_from_string(b"") - except GoogleCloudError as e: - raise StorageError(f"couldn't create folder: {filepath}") - - def list_folder(self, filepath): - if not filepath.endswith("/"): - filepath = f"{filepath}/" - - # NOTE: The `nextPageToken` appears to be required due to an implementation detail leak. - # See https://github.com/googleapis/google-cloud-python/issues/7875 - blobs = self._client.list_blobs( - self._bucket, prefix=filepath, fields="items(name),nextPageToken" - ) - return [blob.name for blob in blobs]