Skip to content

Commit

Permalink
feat(backup): Support import decryption
Browse files Browse the repository at this point in the history
This is the follow up to #58015, adding the corresponding
`--decrypt_with` flag to decrypt tarballs at import time.

Closes getsentry/team-ospo#207
  • Loading branch information
azaslavsky committed Oct 16, 2023
1 parent 52947aa commit 73aa12b
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 187 deletions.
55 changes: 7 additions & 48 deletions src/sentry/backup/exports.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
from __future__ import annotations

import io
import tarfile
from typing import BinaryIO, Type

import click
from cryptography.fernet import Fernet
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import padding
from django.db.models.base import Model

from sentry.backup.dependencies import (
Expand All @@ -17,7 +12,7 @@
get_model_name,
sorted_dependencies,
)
from sentry.backup.helpers import Filter
from sentry.backup.helpers import Filter, create_encrypted_export_tarball
from sentry.backup.scopes import ExportScope
from sentry.services.hybrid_cloud.import_export.model import (
RpcExportError,
Expand Down Expand Up @@ -47,7 +42,7 @@ def __init__(self, context: RpcExportError) -> None:


def _export(
dest,
dest: BinaryIO,
scope: ExportScope,
*,
encrypt_with: BinaryIO | None = None,
Expand Down Expand Up @@ -151,47 +146,11 @@ def get_exporter_for_model(model: Type[Model]):
dest_wrapper.detach()
return

# Generate a new DEK (data encryption key), and use that DEK to encrypt the JSON being exported.
pem = encrypt_with.read()
data_encryption_key = Fernet.generate_key()
backup_encryptor = Fernet(data_encryption_key)
encrypted_json_export = backup_encryptor.encrypt(json.dumps(json_export).encode("utf-8"))

# Encrypt the newly minted DEK using symmetric public key encryption.
dek_encryption_key = serialization.load_pem_public_key(pem, default_backend())
sha256 = hashes.SHA256()
mgf = padding.MGF1(algorithm=sha256)
oaep_padding = padding.OAEP(mgf=mgf, algorithm=sha256, label=None)
encrypted_dek = dek_encryption_key.encrypt(data_encryption_key, oaep_padding) # type: ignore

# Generate a tarball with 3 files:
#
# 1. The DEK we minted, name "data.key".
# 2. The public key we used to encrypt that DEK, named "key.pub".
# 3. The exported JSON data, encrypted with that DEK, named "export.json".
#
# The upshot: to decrypt the exported JSON data, you need the plaintext (decrypted) DEK. But to
# decrypt the DEK, you need the private key associated with the included public key, which
# you've hopefully kept in a safe, trusted location.
#
# Note that the supplied file names are load-bearing - ex, changing to `data.key` to `foo.key`
# risks breaking assumptions that the decryption side will make on the other end!
tar_buffer = io.BytesIO()
with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
json_info = tarfile.TarInfo("export.json")
json_info.size = len(encrypted_json_export)
tar.addfile(json_info, fileobj=io.BytesIO(encrypted_json_export))
key_info = tarfile.TarInfo("data.key")
key_info.size = len(encrypted_dek)
tar.addfile(key_info, fileobj=io.BytesIO(encrypted_dek))
pub_info = tarfile.TarInfo("key.pub")
pub_info.size = len(pem)
tar.addfile(pub_info, fileobj=io.BytesIO(pem))
dest.write(tar_buffer.getvalue())
dest.write(create_encrypted_export_tarball(json_export, encrypt_with).getvalue())


def export_in_user_scope(
dest,
dest: BinaryIO,
*,
encrypt_with: BinaryIO | None = None,
user_filter: set[str] | None = None,
Expand All @@ -217,7 +176,7 @@ def export_in_user_scope(


def export_in_organization_scope(
dest,
dest: BinaryIO,
*,
encrypt_with: BinaryIO | None = None,
org_filter: set[str] | None = None,
Expand All @@ -244,7 +203,7 @@ def export_in_organization_scope(


def export_in_config_scope(
dest,
dest: BinaryIO,
*,
encrypt_with: BinaryIO | None = None,
indent: int = 2,
Expand All @@ -269,7 +228,7 @@ def export_in_config_scope(


def export_in_global_scope(
dest,
dest: BinaryIO,
*,
encrypt_with: BinaryIO | None = None,
indent: int = 2,
Expand Down
113 changes: 112 additions & 1 deletion src/sentry/backup/helpers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from __future__ import annotations

import io
import tarfile
from datetime import datetime, timedelta, timezone
from enum import Enum
from functools import lru_cache
from typing import Generic, NamedTuple, Type, TypeVar
from typing import BinaryIO, Generic, NamedTuple, Type, TypeVar

from cryptography.fernet import Fernet
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import padding
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models

from sentry.backup.scopes import RelocationScope
from sentry.utils import json

# Django apps we take care to never import or export from.
EXCLUDED_APPS = frozenset(("auth", "contenttypes", "fixtures"))
Expand All @@ -27,6 +34,110 @@ def default(self, obj):
return super().default(obj)


def create_encrypted_export_tarball(
json_export: json.JSONData, encrypt_with: BinaryIO
) -> io.BytesIO:
"""
Generate a tarball with 3 files:
1. The DEK we minted, name "data.key".
2. The public key we used to encrypt that DEK, named "key.pub".
3. The exported JSON data, encrypted with that DEK, named "export.json".
The upshot: to decrypt the exported JSON data, you need the plaintext (decrypted) DEK. But to
decrypt the DEK, you need the private key associated with the included public key, which
you've hopefully kept in a safe, trusted location.
Note that the supplied file names are load-bearing - ex, changing to `data.key` to `foo.key`
risks breaking assumptions that the decryption side will make on the other end!
"""

# Generate a new DEK (data encryption key), and use that DEK to encrypt the JSON being exported.
pem = encrypt_with.read()
data_encryption_key = Fernet.generate_key()
backup_encryptor = Fernet(data_encryption_key)
encrypted_json_export = backup_encryptor.encrypt(json.dumps(json_export).encode("utf-8"))

# Encrypt the newly minted DEK using asymmetric public key encryption.
dek_encryption_key = serialization.load_pem_public_key(pem, default_backend())
sha256 = hashes.SHA256()
mgf = padding.MGF1(algorithm=sha256)
oaep_padding = padding.OAEP(mgf=mgf, algorithm=sha256, label=None)
encrypted_dek = dek_encryption_key.encrypt(data_encryption_key, oaep_padding) # type: ignore

# Generate the tarball and write it to to a new output stream.
tar_buffer = io.BytesIO()
with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
json_info = tarfile.TarInfo("export.json")
json_info.size = len(encrypted_json_export)
tar.addfile(json_info, fileobj=io.BytesIO(encrypted_json_export))
key_info = tarfile.TarInfo("data.key")
key_info.size = len(encrypted_dek)
tar.addfile(key_info, fileobj=io.BytesIO(encrypted_dek))
pub_info = tarfile.TarInfo("key.pub")
pub_info.size = len(pem)
tar.addfile(pub_info, fileobj=io.BytesIO(pem))

return tar_buffer


def decrypt_encrypted_tarball(tarball: BinaryIO, decrypt_with: BinaryIO) -> str:
"""
A tarball encrypted by a call to `_export` with `encrypt_with` set has some specific properties (filenames, etc). This method handles all of those, and decrypts using the provided private key into an in-memory JSON string.
"""

export = None
encrypted_dek = None
public_key_pem = None
private_key_pem = decrypt_with.read()
with tarfile.open(fileobj=tarball, mode="r") as tar:
for member in tar.getmembers():
if member.isfile():
file = tar.extractfile(member)
if file is None:
raise ValueError(f"Could not extract file for {member.name}")

content = file.read()
if member.name == "export.json":
export = content.decode("utf-8")
elif member.name == "data.key":
encrypted_dek = content
elif member.name == "key.pub":
public_key_pem = content
else:
raise ValueError(f"Unknown tarball entity {member.name}")

if export is None or encrypted_dek is None or public_key_pem is None:
raise ValueError("A required file was missing from the temporary test tarball")

# Compare the public and private key, to ensure that they are a match.
private_key = serialization.load_pem_private_key(
private_key_pem,
password=None,
backend=default_backend(),
)
generated_public_key_pem = private_key.public_key().public_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PublicFormat.SubjectPublicKeyInfo,
)
if public_key_pem != generated_public_key_pem:
raise ValueError(
"The public key does not match that generated by the `decrypt_with` private key."
)

# Decrypt the DEK, then use it to decrypt the underlying JSON
decrypted_dek = private_key.decrypt( # type: ignore
encrypted_dek,
padding.OAEP(
mgf=padding.MGF1(algorithm=hashes.SHA256()),
algorithm=hashes.SHA256(),
label=None,
),
)
decryptor = Fernet(decrypted_dek)
return decryptor.decrypt(export).decode("utf-8")


def get_final_derivations_of(model: Type) -> set[Type]:
"""A "final" derivation of the given `model` base class is any non-abstract class for the
"sentry" app with `BaseModel` as an ancestor. Top-level calls to this class should pass in
Expand Down
Loading

0 comments on commit 73aa12b

Please sign in to comment.