Skip to content

Commit

Permalink
Move around
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 11, 2024
1 parent b374f44 commit dc09054
Show file tree
Hide file tree
Showing 14 changed files with 42 additions and 36 deletions.
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "mavemap"
description = "Map MaveDB scoresets to reference sequences"
name = "dcd-mapping"
description = "Map MaveDB scoresets to VRS objects"
authors = [
{name = "Alex Handler Wagner", email = "[email protected]"},
{name = "Jeremy Arbesfeld", email = "[email protected]"},
Expand Down Expand Up @@ -39,7 +39,7 @@ dependencies = [
]

[project.scripts]
dcd-map = "mavemap.cli:cli"
dcd-map = "dcd_mapping.cli:cli"

[project.optional-dependencies]
tests = [
Expand Down
File renamed without changes.
15 changes: 11 additions & 4 deletions src/mavemap/align.py → src/dcd_mapping/align.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
"""Align MaveDB target sequences to a human reference genome."""
"""Align MaveDB target sequences to a human reference genome.
Outstanding items/encountered errors
------------------------------------
"""
import logging
import subprocess
import uuid
Expand All @@ -11,14 +16,14 @@
from cool_seq_tool.schemas import Strand
from gene.database.database import click

from mavemap.lookup import get_chromosome_identifier, get_gene_location
from mavemap.resources import (
from dcd_mapping.lookup import get_chromosome_identifier, get_gene_location
from dcd_mapping.resources import (
LOCAL_STORE_PATH,
get_cached_blat_output,
get_mapping_tmp_dir,
get_ref_genome_file,
)
from mavemap.schemas import (
from dcd_mapping.schemas import (
AlignmentResult,
GeneLocation,
ScoresetMetadata,
Expand Down Expand Up @@ -145,6 +150,8 @@ def _get_blat_output(
# TODO
# the notebooks handle errors here by trying different BLAT arg configurations --
# investigate, refer to older code if it comes up
# ideally we should be forming correct queries up front instead of running
# failed alignment attempts
output = read_blat(out_file.absolute(), "blat-psl")

# clean up
Expand Down
4 changes: 2 additions & 2 deletions src/mavemap/cli.py → src/dcd_mapping/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import click

from mavemap.main import map_scoreset_urn
from dcd_mapping.main import map_scoreset_urn

_logger = logging.getLogger(__name__)

Expand All @@ -31,7 +31,7 @@ def cli(urn: str, debug: bool) -> None:
:param debug: if True, enable debug logging
""" # noqa: D301
logging.basicConfig(
filename="mavemap.log",
filename="dcd-mapping.log",
format="%(asctime)s %(levelname)s:%(message)s",
level=logging.INFO,
force=True,
Expand Down
2 changes: 1 addition & 1 deletion src/mavemap/lookup.py → src/dcd_mapping/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from gene.query import QueryHandler
from gene.schemas import SourceName

from mavemap.schemas import GeneLocation, ManeDescription, ScoresetMetadata
from dcd_mapping.schemas import GeneLocation, ManeDescription, ScoresetMetadata

_logger = logging.getLogger(__name__)

Expand Down
18 changes: 5 additions & 13 deletions src/mavemap/main.py → src/dcd_mapping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

import click

from mavemap.align import AlignmentError, align
from mavemap.resources import (
from dcd_mapping.align import AlignmentError, align
from dcd_mapping.resources import (
ResourceAcquisitionError,
get_scoreset_metadata,
get_scoreset_records,
)
from mavemap.schemas import ScoreRow, ScoresetMetadata
from mavemap.transcripts import TxSelectError, select_transcript
from mavemap.vrs_map import VrsMapError, vrs_map
from dcd_mapping.schemas import ScoreRow, ScoresetMetadata
from dcd_mapping.transcripts import TxSelectError, select_transcript
from dcd_mapping.vrs_map import VrsMapError, vrs_map

_logger = logging.getLogger(__name__)

Expand All @@ -33,11 +33,6 @@ async def map_scoreset(
_logger.error(f"Alignment failed for scoreset {metadata.urn}")
return None

print("Alignment result:") # TODO remove these print calls
print(alignment_result)

breakpoint()

try:
transcript = await select_transcript(
metadata, records, alignment_result, silent
Expand All @@ -46,9 +41,6 @@ async def map_scoreset(
_logger.error(f"Transcript selection failed for scoreset {metadata.urn}")
return None

print("Transcript:")
print(transcript)

try:
_ = vrs_map(metadata, transcript, records)
except VrsMapError:
Expand Down
6 changes: 4 additions & 2 deletions src/mavemap/resources.py → src/dcd_mapping/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
from pydantic import ValidationError
from tqdm import tqdm

from mavemap.schemas import ReferenceGenome, ScoreRow, ScoresetMetadata, UniProtRef
from dcd_mapping.schemas import ReferenceGenome, ScoreRow, ScoresetMetadata, UniProtRef

_logger = logging.getLogger(__name__)


LOCAL_STORE_PATH = Path(
os.environ.get("MAVEDB_STORAGE_DIR", Path.home() / ".local" / "share" / "mavemap")
os.environ.get(
"MAVEDB_STORAGE_DIR", Path.home() / ".local" / "share" / "dcd-mapping"
)
)
if not LOCAL_STORE_PATH.exists():
LOCAL_STORE_PATH.mkdir(exist_ok=True, parents=True)
Expand Down
File renamed without changes.
13 changes: 9 additions & 4 deletions src/mavemap/transcripts.py → src/dcd_mapping/transcripts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
"""Select best reference sequence."""
"""Select best reference sequence.
Outstanding questions/confusion
-------------------------------
* ``urn:mavedb:00000097-n-1``: unable to find any matching transcripts
"""
import logging
import re
from typing import List, Optional
Expand All @@ -9,7 +14,7 @@
from cool_seq_tool.schemas import TranscriptPriority
from gene.database.database import click

from mavemap.lookup import (
from dcd_mapping.lookup import (
get_chromosome_identifier,
get_gene_symbol,
get_mane_transcripts,
Expand All @@ -19,7 +24,7 @@
get_transcripts,
get_uniprot_sequence,
)
from mavemap.schemas import (
from dcd_mapping.schemas import (
AlignmentResult,
ManeDescription,
ScoreRow,
Expand Down Expand Up @@ -217,7 +222,7 @@ def _offset_target_sequence(metadata: ScoresetMetadata, records: List[ScoreRow])
amino_acids_by_position = {}
for protein_change in protein_change_list:
if protein_change == "_sy" or protein_change == "_wt":
raise ValueError
continue
if ";" in protein_change:
protein_changes = protein_change[1:-1].split(";")
else:
Expand Down
4 changes: 2 additions & 2 deletions src/mavemap/vrs_map.py → src/dcd_mapping/vrs_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
)
from ga4gh.vrs.normalize import normalize

from mavemap.lookup import hgvs_to_vrs
from mavemap.schemas import (
from dcd_mapping.lookup import hgvs_to_vrs
from dcd_mapping.schemas import (
AlignmentResult,
ScoreRow,
ScoresetMetadata,
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from mavemap.schemas import AlignmentResult, ScoresetMetadata, TxSelectResult
from dcd_mapping.schemas import AlignmentResult, ScoresetMetadata, TxSelectResult


@pytest.fixture(scope="module")
Expand Down
Empty file added tests/fixtures/vrs_map.json
Empty file.
2 changes: 1 addition & 1 deletion tests/unit/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest
from cool_seq_tool.schemas import Strand

from mavemap.align import align
from dcd_mapping.align import align


def test_align_src_catalytic_domain(scoreset_metadata_fixture):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import pytest

from mavemap.resources import get_scoreset_records
from mavemap.schemas import AlignmentResult, ScoresetMetadata, TxSelectResult
from mavemap.transcripts import select_transcript
from dcd_mapping.resources import get_scoreset_records
from dcd_mapping.schemas import AlignmentResult, ScoresetMetadata, TxSelectResult
from dcd_mapping.transcripts import select_transcript


@pytest.mark.asyncio(scope="module")
Expand Down

0 comments on commit dc09054

Please sign in to comment.