From 6415ff0cdd063cde43f7a1834efa5f03b15cbf54 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Thu, 5 Dec 2024 16:21:10 +1100 Subject: [PATCH 1/5] Only show unpublished superseding score set to the users who have permissions to view it. The current version to other users will only be the published one. --- src/mavedb/lib/validation/urn_re.py | 4 + src/mavedb/routers/score_sets.py | 152 +++++++++++++++------------- 2 files changed, 87 insertions(+), 69 deletions(-) diff --git a/src/mavedb/lib/validation/urn_re.py b/src/mavedb/lib/validation/urn_re.py index 2ae7358e..09663d5a 100644 --- a/src/mavedb/lib/validation/urn_re.py +++ b/src/mavedb/lib/validation/urn_re.py @@ -8,6 +8,10 @@ MAVEDB_TMP_URN_PATTERN = r"tmp:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" MAVEDB_TMP_URN_RE = re.compile(MAVEDB_TMP_URN_PATTERN) +# Old temp URN +MAVEDB_OLD_TMP_URN_PATTERN = r"^tmp:[A-Za-z0-9]{16}$" +MAVEDB_OLD_TMP_URN_RE = re.compile(MAVEDB_OLD_TMP_URN_PATTERN) + # Experiment set URN MAVEDB_EXPERIMENT_SET_URN_PATTERN = rf"urn:{MAVEDB_URN_NAMESPACE}:\d{{{MAVEDB_EXPERIMENT_SET_URN_DIGITS}}}" MAVEDB_EXPERIMENT_SET_URN_RE = re.compile(MAVEDB_EXPERIMENT_SET_URN_PATTERN) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 353ee1ab..beeb0b73 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -41,6 +41,7 @@ search_score_sets as _search_score_sets, refresh_variant_urns, ) +from mavedb.lib.validation import urn_re from mavedb.lib.taxonomies import find_or_create_taxonomy from mavedb.lib.urns import ( generate_experiment_set_urn, @@ -64,7 +65,7 @@ async def fetch_score_set_by_urn( - db, urn: str, user: Optional[UserData], owner_or_contributor: Optional[UserData], only_published: bool + db, urn: str, user: Optional[UserData], owner_or_contributor: Optional[UserData], only_published: bool ) -> Optional[ScoreSet]: """ Fetch one score set by URN, ensuring that the user has read permission. @@ -103,6 +104,17 @@ async def fetch_score_set_by_urn( raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found") assert_permission(user, item, Action.READ) + if( + item + and item.superseding_score_set + and not owner_or_contributor + and ( + urn_re.MAVEDB_OLD_TMP_URN_RE.fullmatch(item.superseding_score_set.urn) + or urn_re.MAVEDB_TMP_URN_RE.fullmatch(item.superseding_score_set.urn) + ) + ): + item.superseding_score_set = None + return item @@ -128,9 +140,9 @@ def search_score_sets(search: ScoreSetsSearch, db: Session = Depends(deps.get_db response_model=list[score_set.ShortScoreSet], ) def search_my_score_sets( - search: ScoreSetsSearch, # = Body(..., embed=True), - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user), + search: ScoreSetsSearch, # = Body(..., embed=True), + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user), ) -> Any: """ Search score sets created by the current user.. @@ -146,10 +158,10 @@ def search_my_score_sets( response_model_exclude_none=True, ) async def show_score_set( - *, - urn: str, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(get_current_user), + *, + urn: str, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(get_current_user), ) -> Any: """ Fetch a single score set by URN. @@ -165,17 +177,17 @@ async def show_score_set( 200: { "content": {"text/csv": {}}, "description": """Variant scores in CSV format, with four fixed columns (accession, hgvs_nt, hgvs_pro,""" - """ and hgvs_splice), plus score columns defined by the score set.""", + """ and hgvs_splice), plus score columns defined by the score set.""", } }, ) def get_score_set_scores_csv( - *, - urn: str, - start: int = Query(default=None, description="Start index for pagination"), - limit: int = Query(default=None, description="Number of variants to return"), - db: Session = Depends(deps.get_db), - user_data: Optional[UserData] = Depends(get_current_user), + *, + urn: str, + start: int = Query(default=None, description="Start index for pagination"), + limit: int = Query(default=None, description="Number of variants to return"), + db: Session = Depends(deps.get_db), + user_data: Optional[UserData] = Depends(get_current_user), ) -> Any: """ Return scores from a score set, identified by URN, in CSV format. @@ -219,17 +231,17 @@ def get_score_set_scores_csv( 200: { "content": {"text/csv": {}}, "description": """Variant counts in CSV format, with four fixed columns (accession, hgvs_nt, hgvs_pro,""" - """ and hgvs_splice), plus score columns defined by the score set.""", + """ and hgvs_splice), plus score columns defined by the score set.""", } }, ) async def get_score_set_counts_csv( - *, - urn: str, - start: int = Query(default=None, description="Start index for pagination"), - limit: int = Query(default=None, description="Number of variants to return"), - db: Session = Depends(deps.get_db), - user_data: Optional[UserData] = Depends(get_current_user), + *, + urn: str, + start: int = Query(default=None, description="Start index for pagination"), + limit: int = Query(default=None, description="Number of variants to return"), + db: Session = Depends(deps.get_db), + user_data: Optional[UserData] = Depends(get_current_user), ) -> Any: """ Return counts from a score set, identified by URN, in CSV format. @@ -272,10 +284,10 @@ async def get_score_set_counts_csv( response_model=list[mapped_variant.MappedVariant], ) def get_score_set_mapped_variants( - *, - urn: str, - db: Session = Depends(deps.get_db), - user_data: Optional[UserData] = Depends(get_current_user), + *, + urn: str, + db: Session = Depends(deps.get_db), + user_data: Optional[UserData] = Depends(get_current_user), ) -> Any: """ Return mapped variants from a score set, identified by URN. @@ -293,10 +305,10 @@ def get_score_set_mapped_variants( mapped_variants = ( db.query(MappedVariant) - .filter(ScoreSet.urn == urn) - .filter(ScoreSet.id == Variant.score_set_id) - .filter(Variant.id == MappedVariant.variant_id) - .all() + .filter(ScoreSet.urn == urn) + .filter(ScoreSet.id == Variant.score_set_id) + .filter(Variant.id == MappedVariant.variant_id) + .all() ) if not mapped_variants: @@ -316,10 +328,10 @@ def get_score_set_mapped_variants( response_model_exclude_none=True, ) async def create_score_set( - *, - item_create: score_set.ScoreSetCreate, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user_with_email), + *, + item_create: score_set.ScoreSetCreate, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user_with_email), ) -> Any: """ Create a score set. @@ -461,9 +473,10 @@ async def create_score_set( for identifier in item_create.primary_publication_identifiers or [] ] publication_identifiers = [ - await find_or_create_publication_identifier(db, identifier.identifier, identifier.db_name) - for identifier in item_create.secondary_publication_identifiers or [] - ] + primary_publication_identifiers + await find_or_create_publication_identifier(db, identifier.identifier, + identifier.db_name) + for identifier in item_create.secondary_publication_identifiers or [] + ] + primary_publication_identifiers # create a temporary `primary` attribute on each of our publications that indicates # to our association proxy whether it is a primary publication or not @@ -603,13 +616,13 @@ async def create_score_set( response_model_exclude_none=True, ) async def upload_score_set_variant_data( - *, - urn: str, - counts_file: Optional[UploadFile] = File(None), - scores_file: UploadFile = File(...), - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user_with_email), - worker: ArqRedis = Depends(deps.get_worker), + *, + urn: str, + counts_file: Optional[UploadFile] = File(None), + scores_file: UploadFile = File(...), + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user_with_email), + worker: ArqRedis = Depends(deps.get_worker), ) -> Any: """ Upload scores and variant count files for a score set, and initiate processing these files to @@ -660,12 +673,12 @@ async def upload_score_set_variant_data( "/score-sets/{urn}", response_model=score_set.ScoreSet, responses={422: {}}, response_model_exclude_none=True ) async def update_score_set( - *, - urn: str, - item_update: score_set.ScoreSetUpdate, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user_with_email), - worker: ArqRedis = Depends(deps.get_worker), + *, + urn: str, + item_update: score_set.ScoreSetUpdate, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user_with_email), + worker: ArqRedis = Depends(deps.get_worker), ) -> Any: """ Update a score set. @@ -722,9 +735,10 @@ async def update_score_set( for identifier in item_update.primary_publication_identifiers or [] ] publication_identifiers = [ - await find_or_create_publication_identifier(db, identifier.identifier, identifier.db_name) - for identifier in item_update.secondary_publication_identifiers or [] - ] + primary_publication_identifiers + await find_or_create_publication_identifier(db, identifier.identifier, + identifier.db_name) + for identifier in item_update.secondary_publication_identifiers or [] + ] + primary_publication_identifiers # create a temporary `primary` attribute on each of our publications that indicates # to our association proxy whether it is a primary publication or not @@ -863,15 +877,15 @@ async def update_score_set( if item.variants: assert item.dataset_columns is not None score_columns = [ - "hgvs_nt", - "hgvs_splice", - "hgvs_pro", - ] + item.dataset_columns["score_columns"] + "hgvs_nt", + "hgvs_splice", + "hgvs_pro", + ] + item.dataset_columns["score_columns"] count_columns = [ - "hgvs_nt", - "hgvs_splice", - "hgvs_pro", - ] + item.dataset_columns["count_columns"] + "hgvs_nt", + "hgvs_splice", + "hgvs_pro", + ] + item.dataset_columns["count_columns"] scores_data = pd.DataFrame( variants_to_csv_rows(item.variants, columns=score_columns, dtype="score_data") @@ -914,10 +928,10 @@ async def update_score_set( @router.delete("/score-sets/{urn}", responses={422: {}}) async def delete_score_set( - *, - urn: str, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user), + *, + urn: str, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user), ) -> Any: """ Delete a score set. @@ -952,10 +966,10 @@ async def delete_score_set( response_model_exclude_none=True, ) def publish_score_set( - *, - urn: str, - db: Session = Depends(deps.get_db), - user_data: UserData = Depends(require_current_user), + *, + urn: str, + db: Session = Depends(deps.get_db), + user_data: UserData = Depends(require_current_user), ) -> Any: """ Publish a score set. From 7fc2addedeb312a5cbd6e5951ffbd696f9b3d02c Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 13 Dec 2024 16:34:20 +1100 Subject: [PATCH 2/5] Modify function and add superseding score set tests. --- src/mavedb/routers/score_sets.py | 13 ++--- tests/routers/test_score_set.py | 90 ++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 10 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index beeb0b73..ade5b35a 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -29,7 +29,7 @@ logging_context, save_to_logging_context, ) -from mavedb.lib.permissions import Action, assert_permission +from mavedb.lib.permissions import Action, assert_permission, has_permission from mavedb.lib.score_sets import ( csv_data_to_df, find_meta_analyses_for_experiment_sets, @@ -104,15 +104,8 @@ async def fetch_score_set_by_urn( raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found") assert_permission(user, item, Action.READ) - if( - item - and item.superseding_score_set - and not owner_or_contributor - and ( - urn_re.MAVEDB_OLD_TMP_URN_RE.fullmatch(item.superseding_score_set.urn) - or urn_re.MAVEDB_TMP_URN_RE.fullmatch(item.superseding_score_set.urn) - ) - ): + + if item.superseding_score_set and not has_permission(user, item.superseding_score_set, Action.READ).permitted: item.superseding_score_set = None return item diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index 1b64683f..942de1a3 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -1557,3 +1557,93 @@ def test_can_modify_metadata_for_score_set_with_inactive_license(session, client assert response.status_code == 200 response_data = response.json() assert ("title", response_data["title"]) == ("title", "Update title") + +######################################################################################################################## +# Supersede score set +######################################################################################################################## + +def test_create_superseding_score_set(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + publish_score_set_response = client.post(f"/api/v1/score-sets/{score_set['urn']}/publish") + assert publish_score_set_response.status_code == 200 + published_score_set = publish_score_set_response.json() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] + score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert superseding_score_set_response.status_code == 200 + +def test_can_view_unpublished_superseding_score_set(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + unpublished_score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") + assert publish_score_set_response.status_code == 200 + published_score_set = publish_score_set_response.json() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] + score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert superseding_score_set_response.status_code == 200 + superseding_score_set = superseding_score_set_response.json() + score_set_response = client.get(f"/api/v1/score-sets/{published_score_set['urn']}") + score_set = score_set_response.json() + assert score_set_response.status_code == 200 + assert score_set["urn"] == superseding_score_set["supersededScoreSet"]["urn"] + assert score_set["supersedingScoreSet"]["urn"] == superseding_score_set["urn"] + +def test_cannot_view_others_unpublished_superseding_score_set(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + unpublished_score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") + assert publish_score_set_response.status_code == 200 + published_score_set = publish_score_set_response.json() + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = published_score_set["experiment"]["urn"] + score_set_post_payload["supersededScoreSetUrn"] = published_score_set["urn"] + superseding_score_set_response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert superseding_score_set_response.status_code == 200 + superseding_score_set = superseding_score_set_response.json() + change_ownership(session, superseding_score_set["urn"], ScoreSetDbModel) + score_set_response = client.get(f"/api/v1/score-sets/{published_score_set['urn']}") + score_set = score_set_response.json() + assert score_set_response.status_code == 200 + assert score_set["urn"] == superseding_score_set["supersededScoreSet"]["urn"] + # Other users can't view the unpublished superseding score set. + assert "supersedingScoreSet" not in score_set + +def test_can_view_others_published_superseding_score_set(session, data_provider, client, setup_router_db, data_files): + experiment = create_experiment(client) + unpublished_score_set = create_seq_score_set_with_variants( + client, session, data_provider, experiment["urn"], data_files / "scores.csv" + ) + publish_score_set_response = client.post(f"/api/v1/score-sets/{unpublished_score_set['urn']}/publish") + assert publish_score_set_response.status_code == 200 + published_score_set = publish_score_set_response.json() + + superseding_score_set = create_seq_score_set_with_variants( + client, + session, + data_provider, + published_score_set["experiment"]["urn"], + data_files / "scores.csv", + update={"supersededScoreSetUrn": published_score_set["urn"]}, + ) + published_superseding_score_set_response = client.post(f"/api/v1/score-sets/{superseding_score_set['urn']}/publish") + assert published_superseding_score_set_response.status_code == 200 + published_superseding_score_set = published_superseding_score_set_response.json() + + change_ownership(session, published_superseding_score_set["urn"], ScoreSetDbModel) + + score_set_response = client.get(f"/api/v1/score-sets/{published_score_set['urn']}") + assert score_set_response.status_code == 200 + score_set = score_set_response.json() + assert score_set["urn"] == published_superseding_score_set["supersededScoreSet"]["urn"] + # Other users can view published superseding score set. + assert score_set["supersedingScoreSet"]["urn"] == published_superseding_score_set["urn"] From 131e3b13e980d5d86fbbad772551cb28b41c194a Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 13 Dec 2024 16:39:31 +1100 Subject: [PATCH 3/5] Remove unnecessary import. --- src/mavedb/routers/score_sets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index ade5b35a..e90de6fd 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -41,7 +41,6 @@ search_score_sets as _search_score_sets, refresh_variant_urns, ) -from mavedb.lib.validation import urn_re from mavedb.lib.taxonomies import find_or_create_taxonomy from mavedb.lib.urns import ( generate_experiment_set_urn, From 882ae306b7af1cbfe07f90c44543fd1420279a03 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 20 Dec 2024 17:33:25 +1100 Subject: [PATCH 4/5] Fix listing score set in experiment if these score sets' superseding score sets are unpublished yet. Haven't fixed the search score set codes. --- src/mavedb/routers/experiments.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index cea7209e..b1e83ce7 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -169,8 +169,34 @@ def get_experiment_score_sets( .filter(~ScoreSet.superseding_score_set.has()) .all() ) + superseding_score_sets = ( + db.query(ScoreSet) + .filter(ScoreSet.experiment_id == experiment.id) + .filter(ScoreSet.superseding_score_set.has()) + .all() + ) + + updated_score_set_result = [] + for s in score_set_result: + current_version = s + while current_version: + if current_version.superseded_score_set: + if not has_permission(user_data, current_version, Action.READ).permitted: + current_version = next( + (sup for sup in superseding_score_sets if sup.urn == current_version.superseded_score_set.urn), + None + ) + else: + break + else: + break + if current_version: + updated_score_set_result.append(current_version) + else: + updated_score_set_result.append(s) + score_set_result[:] = [ - score_set for score_set in score_set_result if has_permission(user_data, score_set, Action.READ).permitted + score_set for score_set in updated_score_set_result if has_permission(user_data, score_set, Action.READ).permitted ] if not score_set_result: From c7165f0df2b9959fceaeb87d8ea40eaecbe2bacc Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 20 Dec 2024 17:54:47 +1100 Subject: [PATCH 5/5] Handle poetry run mypy src/ error so that add next_version --- src/mavedb/routers/experiments.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py index b1e83ce7..e7dc858a 100644 --- a/src/mavedb/routers/experiments.py +++ b/src/mavedb/routers/experiments.py @@ -182,10 +182,15 @@ def get_experiment_score_sets( while current_version: if current_version.superseded_score_set: if not has_permission(user_data, current_version, Action.READ).permitted: - current_version = next( + next_version: Optional[ScoreSet] = next( (sup for sup in superseding_score_sets if sup.urn == current_version.superseded_score_set.urn), None ) + # handle poetry run mypy src/ error so that add next_version + if next_version: + current_version = next_version + else: + break else: break else: