Skip to content

Commit

Permalink
Anvil parser (#208)
Browse files Browse the repository at this point in the history
* Release/2.8.0 (#198)

* Bumping version

* support for extracting dug elements from graph (#197)

* support for extracting dug elements from graph

* adding flag for enabling dug element extraction from graph

* adding new config for node_to dug element parsing

* adding more parameters to crawler to able configuration to element extraction logic

* add tests

* add tests for crawler

Co-authored-by: Yaphetkg <[email protected]>

* Update _version.py

* Update _version.py

updating version for final push to master

* Update factory.py

Adding more comments

Co-authored-by: Carl Schreep <[email protected]>
Co-authored-by: Yaphetkg <[email protected]>

* Release/v2.9.0 (#201)

* Bumping version

* support for extracting dug elements from graph (#197)

* support for extracting dug elements from graph

* adding flag for enabling dug element extraction from graph

* adding new config for node_to dug element parsing

* adding more parameters to crawler to able configuration to element extraction logic

* add tests

* add tests for crawler

Co-authored-by: Yaphetkg <[email protected]>

* Display es scores (#199)

* Include ES scores in variable results

* Round ES score to 6

* Update _version.py (#200)

* Update _version.py

Co-authored-by: Carl Schreep <[email protected]>
Co-authored-by: Yaphetkg <[email protected]>
Co-authored-by: Ginnie Hench <[email protected]>

* anvil parser

* bump number of files test

* Update dbgap_parser.py

* Update anvil_dbgap_parser.py

change to AnVIL

* Update test_parsers.py

update test

Co-authored-by: Carl Schreep <[email protected]>
Co-authored-by: Yaphetkg <[email protected]>
Co-authored-by: Ginnie Hench <[email protected]>
  • Loading branch information
4 people authored May 2, 2022
1 parent 447d325 commit ef43201
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/dug/core/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .scicrunch_parser import SciCrunchParser
from .topmed_tag_parser import TOPMedTagParser
from .topmed_csv_parser import TOPMedCSVParser
from .anvil_dbgap_parser import AnvilDbGaPParser

logger = logging.getLogger('dug')

Expand All @@ -22,6 +23,7 @@ def define_parsers(parser_dict: Dict[str, Parser]):
parser_dict["topmedtag"] = TOPMedTagParser()
parser_dict["topmedcsv"] = TOPMedCSVParser()
parser_dict["scicrunch"] = SciCrunchParser()
parser_dict["anvil"] = AnvilDbGaPParser()


class ParserNotFoundException(Exception):
Expand Down
6 changes: 6 additions & 0 deletions src/dug/core/parsers/anvil_dbgap_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .dbgap_parser import DbGaPParser


class AnvilDbGaPParser(DbGaPParser):
def _get_element_type(self):
return "AnVIL"
5 changes: 4 additions & 1 deletion src/dug/core/parsers/dbgap_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ def parse_study_name_from_filename(filename: str):
return match.group(1)
return None

def _get_element_type(self):
return "DbGaP"

def __call__(self, input_file: InputFile) -> List[Indexable]:
logger.debug(input_file)
tree = ET.parse(input_file)
Expand All @@ -41,7 +44,7 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
elem = DugElement(elem_id=f"{variable.attrib['id']}.p{participant_set}",
name=variable.find('name').text,
desc=variable.find('description').text.lower(),
elem_type="DbGaP",
elem_type=self._get_element_type(),
collection_id=f"{study_id}.p{participant_set}",
collection_name=study_name)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="./datadict_v2.xsl"?><data_table id="pht009987.v1" study_id="phs001547.v1" participant_set="1" date_created="Tue Mar 17 09:38:19 2020"><description/><variable id="phv00427626.v1"><name>SUBJECT_ID</name><description>Subject ID</description><type>string</type></variable><variable id="phv00427627.v1"><name>CONSENT</name><description>Consent group as determined by DAC</description><type>encoded value</type><value code="1">Health/Medical/Biomedical (NPU) (HMB-NPU)</value></variable><variable id="phv00427628.v1"><name>AFFECTION_STATUS</name><description>Case control status of the subject for atrial fibrillation (AF)</description><type>encoded value</type><value code="1">Case</value></variable></data_table>
2 changes: 1 addition & 1 deletion tests/integration/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_filesystem_loader():
filepath=TEST_DATA_DIR,
)
files = list(targets)
assert len(files) == 9
assert len(files) == 10

with pytest.raises(ValueError):
targets = load_from_filesystem(
Expand Down
11 changes: 10 additions & 1 deletion tests/integration/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dug.core.parsers import DbGaPParser, NIDAParser, TOPMedTagParser, SciCrunchParser
from dug.core.parsers import DbGaPParser, NIDAParser, TOPMedTagParser, SciCrunchParser, AnvilDbGaPParser
from tests.integration.conftest import TEST_DATA_DIR

def test_dbgap_parse_study_name_from_filename():
Expand Down Expand Up @@ -61,3 +61,12 @@ def test_topmed_tag_parser():
for element in elements:
assert element.name != element.id
assert element.description != element.id


def test_anvil_parser():
parser = AnvilDbGaPParser()
parse_file = str(TEST_DATA_DIR / "phs001547.v1.pht009987.v1.TOPMed_CCDG_GENAF_Subject.data_dict.xml")
elements = parser(parse_file)
assert len(elements) == 3
for element in elements:
assert element.type == "AnVIL"

0 comments on commit ef43201

Please sign in to comment.