Skip to content

Commit

Permalink
DSR 3.0: First Class Tasks + Parallelization (#4760)
Browse files Browse the repository at this point in the history
Adds the new DSR 3.0 Scheduler which adds first class Request Tasks and supports running tasks within Privacy Requests in parallel. 

Support for DSR 2.0 (Dask scheduler, run in memory, in sequence) maintained for a limited time.
  • Loading branch information
pattisdr authored Apr 24, 2024
1 parent d108ebe commit 8fc3749
Show file tree
Hide file tree
Showing 158 changed files with 11,287 additions and 5,014 deletions.
44 changes: 44 additions & 0 deletions .fides/db_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,8 @@ dataset:
- name: privacyrequest
data_categories: []
fields:
- name: access_result_urls
data_categories: [user]
- name: cancel_reason
data_categories: [system.operations]
- name: canceled_at
Expand All @@ -1289,6 +1291,8 @@ dataset:
data_categories: [system.operations]
- name: external_id
data_categories: [system.operations]
- name: filtered_final_upload
data_categories: [user]
- name: finished_processing_at
data_categories: [system.operations]
- name: id
Expand Down Expand Up @@ -2109,3 +2113,43 @@ dataset:
data_categories: [ system.operations ]
- name: is_eligible
data_categories: [ system.operations ]

data_categories: [system.operations]
- name: requesttask
fields:
- name: access_data
data_categories: [user]
- name: action_type
data_categories: [system]
- name: all_descendant_tasks
data_categories: [system]
- name: collection
data_categories: [system]
- name: collection_address
data_categories: [system]
- name: collection_name
data_categories: [system]
- name: consent_sent
data_categories: [system]
- name: created_at
data_categories: [system]
- name: data_for_erasures
data_categories: [user]
- name: dataset_name
data_categories: [system]
- name: downstream_tasks
data_categories: [system]
- name: id
data_categories: [system]
- name: privacy_request_id
data_categories: [system]
- name: rows_masked
data_categories: [system]
- name: status
data_categories: [system]
- name: traversal_details
data_categories: [system]
- name: updated_at
data_categories: [system]
- name: upstream_tasks
data_categories: [system]
1 change: 1 addition & 0 deletions .fides/fides.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ task_retry_backoff = 1
subject_identity_verification_required = false
task_retry_count = 0
task_retry_delay = 1
use_dsr_3_0 = false

[admin_ui]
enabled = true
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The types of changes are:
## [Unreleased](https://github.com/ethyca/fides/compare/2.34.0...main)

### Added
- Added DSR 3.0 Scheduling which supports running DSR's in parallel with first-class request tasks [#4760](https://github.com/ethyca/fides/pull/4760)
- Added carets to collapsible sections in the overlay modal [#4793](https://github.com/ethyca/fides/pull/4793)
- Added erasure support for OpenWeb [#4735](https://github.com/ethyca/fides/pull/4735)
- Added support for configuration of pre-approval webhooks [#4795](https://github.com/ethyca/fides/pull/4795)
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ module = [
"jose.*",
"jwt.*",
"multidimensional_urlencode.*",
"networkx.*",
"okta.*",
"pandas.*",
"plotly.*",
Expand All @@ -65,7 +66,7 @@ module = [
"twilio.*",
"uvicorn.*",
"validators.*",
"pygtrie.*"
"pygtrie.*",
]
ignore_missing_imports = true

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ multidimensional_urlencode==0.0.4
nh3==0.2.15
okta==2.7.0
openpyxl==3.0.9
networkx==3.1 # added to help with privacy preference data migration
networkx==3.1
packaging==23.0
pandas==1.4.3
paramiko==3.4.0
Expand Down
133 changes: 133 additions & 0 deletions src/fides/api/alembic/migrations/versions/55bedede956d_requesttask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""requesttask
Revision ID: 55bedede956d
Revises: 6cfd59e7920a
Create Date: 2024-04-04 04:12:25.332952
"""
import sqlalchemy as sa
import sqlalchemy_utils
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "55bedede956d"
down_revision = "6cfd59e7920a"
branch_labels = None
depends_on = None


def upgrade():
op.create_table(
"requesttask",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("privacy_request_id", sa.String(), nullable=True),
sa.Column("collection_address", sa.String(), nullable=False),
sa.Column("dataset_name", sa.String(), nullable=False),
sa.Column("collection_name", sa.String(), nullable=False),
sa.Column("action_type", sa.String(), nullable=False),
sa.Column("status", sa.String(), nullable=False),
sa.Column(
"upstream_tasks", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
sa.Column(
"downstream_tasks", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
sa.Column(
"all_descendant_tasks",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
sa.Column(
"access_data",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
sa.Column(
"data_for_erasures",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
sa.Column("rows_masked", sa.Integer(), nullable=True),
sa.Column("consent_sent", sa.Boolean(), nullable=True),
sa.Column("collection", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column(
"traversal_details", postgresql.JSONB(astext_type=sa.Text()), nullable=True
),
sa.ForeignKeyConstraint(
["privacy_request_id"], ["privacyrequest.id"], ondelete="SET NULL"
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_requesttask_action_type"), "requesttask", ["action_type"], unique=False
)
op.create_index(
op.f("ix_requesttask_collection_address"),
"requesttask",
["collection_address"],
unique=False,
)
op.create_index(
op.f("ix_requesttask_collection_name"),
"requesttask",
["collection_name"],
unique=False,
)
op.create_index(
op.f("ix_requesttask_dataset_name"),
"requesttask",
["dataset_name"],
unique=False,
)
op.create_index(op.f("ix_requesttask_id"), "requesttask", ["id"], unique=False)
op.create_index(
op.f("ix_requesttask_privacy_request_id"),
"requesttask",
["privacy_request_id"],
unique=False,
)
op.create_index(
op.f("ix_requesttask_status"), "requesttask", ["status"], unique=False
)
op.add_column(
"privacyrequest",
sa.Column(
"filtered_final_upload",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
)
op.add_column(
"privacyrequest",
sa.Column(
"access_result_urls",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
)


def downgrade():
op.drop_column("privacyrequest", "access_result_urls")
op.drop_column("privacyrequest", "filtered_final_upload")
op.drop_index(op.f("ix_requesttask_status"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_privacy_request_id"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_id"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_dataset_name"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_collection_name"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_collection_address"), table_name="requesttask")
op.drop_index(op.f("ix_requesttask_action_type"), table_name="requesttask")
op.drop_table("requesttask")
Loading

0 comments on commit 8fc3749

Please sign in to comment.