From 1a714e13fcece720b495f9b505aa36c407ba9f7e Mon Sep 17 00:00:00 2001 From: YaphetKG <45075777+YaphetKG@users.noreply.github.com> Date: Tue, 24 Jan 2023 12:37:55 -0500 Subject: [PATCH] Release 2.9.6 (#272) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Initial Kaniko build. * Move version file definition. * Quote env vars. * Update env vars. * Update env vars. * Update env vars. * env var changes. * env var changes. * env var changes. * env var changes. * Update DOCKER_IMAGE var. * Update DOCKER_IMAGE var in kaniko cmd. * Update kaniko destination line. * Update kaniko destination line. * Moree variable madness. * Programatically remove quotes from version tag. * dug dump concepts api created and tested (#229) Co-authored-by: Nathan Braswell <nbraswell@rti.org> * Update _version.py (#234) * Version changes + separate build and publish. * Semantic versioning prep. * Add develop and master versioning and tagging. * Bump version. * Revert version to dug format. * Ncpi index fix (#232) * Renamed anvil to ncpi * Update ncpi datasets catalog * Modified script to download NCPI datasets into platform subfolders * Updated NCPI integration dataset * Removed unused variable * Removed ncpi top level folder to spread results among subfolders * Change output dir to data instead of ncpi subdir * Moved NCPI subdirs into main data folder for ingest as per Yaphet's request Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> * Add github creds env var. * Fix version typo. * Initial commit * Reduce ephemeral storage limits and requests * More parsers (#248) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * consolidate dbgap format parser in single file , adds crdc and kfdrc parsers * adding tests * bump version * parser when versions of studies are > 9 * test for version * fix long text issues, and encoding errors * nltk initialization * change nltk approach for sliding window Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Master to develop sync (#262) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Release/2.9.1 (#205) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Release/2.9.1 Renames SPARC datasets as SPARC instead of dbgap Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> * Update _version.py (#206) * Release/2.9.2 (#209) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> * Release 2.9.3 (#244) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Initial Kaniko build. * Move version file definition. * Quote env vars. * Update env vars. * Update env vars. * Update env vars. * env var changes. * env var changes. * env var changes. * env var changes. * Update DOCKER_IMAGE var. * Update DOCKER_IMAGE var in kaniko cmd. * Update kaniko destination line. * Update kaniko destination line. * Moree variable madness. * Programatically remove quotes from version tag. * dug dump concepts api created and tested (#229) Co-authored-by: Nathan Braswell <nbraswell@rti.org> * Update _version.py (#234) * Version changes + separate build and publish. * Semantic versioning prep. * Add develop and master versioning and tagging. * Ncpi index fix (#232) * Renamed anvil to ncpi * Update ncpi datasets catalog * Modified script to download NCPI datasets into platform subfolders * Updated NCPI integration dataset * Removed unused variable * Removed ncpi top level folder to spread results among subfolders * Change output dir to data instead of ncpi subdir * Moved NCPI subdirs into main data folder for ingest as per Yaphet's request Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> * Release/2.9.4 (#260) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Initial Kaniko build. * Move version file definition. * Quote env vars. * Update env vars. * Update env vars. * Update env vars. * env var changes. * env var changes. * env var changes. * env var changes. * Update DOCKER_IMAGE var. * Update DOCKER_IMAGE var in kaniko cmd. * Update kaniko destination line. * Update kaniko destination line. * Moree variable madness. * Programatically remove quotes from version tag. * dug dump concepts api created and tested (#229) Co-authored-by: Nathan Braswell <nbraswell@rti.org> * Update _version.py (#234) * Version changes + separate build and publish. * Semantic versioning prep. * Add develop and master versioning and tagging. * Bump version. * Revert version to dug format. * Ncpi index fix (#232) * Renamed anvil to ncpi * Update ncpi datasets catalog * Modified script to download NCPI datasets into platform subfolders * Updated NCPI integration dataset * Removed unused variable * Removed ncpi top level folder to spread results among subfolders * Change output dir to data instead of ncpi subdir * Moved NCPI subdirs into main data folder for ingest as per Yaphet's request Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> * Add github creds env var. * Fix version typo. * Initial commit * Reduce ephemeral storage limits and requests * More parsers (#248) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * consolidate dbgap format parser in single file , adds crdc and kfdrc parsers * adding tests * bump version * parser when versions of studies are > 9 * test for version * fix long text issues, and encoding errors * nltk initialization * change nltk approach for sliding window Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * update version * remove cruft from merge Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Hoid <tylerlcheek@gmail.com> * version bump Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Hoid <tylerlcheek@gmail.com> * Sprint (#264) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Release/2.9.1 (#205) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Release/2.9.1 Renames SPARC datasets as SPARC instead of dbgap Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> * Update _version.py (#206) * Release/2.9.2 (#209) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> * Release 2.9.3 (#244) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Initial Kaniko build. * Move version file definition. * Quote env vars. * Update env vars. * Update env vars. * Update env vars. * env var changes. * env var changes. * env var changes. * env var changes. * Update DOCKER_IMAGE var. * Update DOCKER_IMAGE var in kaniko cmd. * Update kaniko destination line. * Update kaniko destination line. * Moree variable madness. * Programatically remove quotes from version tag. * dug dump concepts api created and tested (#229) Co-authored-by: Nathan Braswell <nbraswell@rti.org> * Update _version.py (#234) * Version changes + separate build and publish. * Semantic versioning prep. * Add develop and master versioning and tagging. * Ncpi index fix (#232) * Renamed anvil to ncpi * Update ncpi datasets catalog * Modified script to download NCPI datasets into platform subfolders * Updated NCPI integration dataset * Removed unused variable * Removed ncpi top level folder to spread results among subfolders * Change output dir to data instead of ncpi subdir * Moved NCPI subdirs into main data folder for ingest as per Yaphet's request Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> * Release/2.9.4 (#260) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Dev version bump (#202) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Attribute mapping from node to dug element (#203) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * adding more config options for node extraction * some refactoring Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Changed DbGaP to SPARC in the scicrunch parser (#204) * Anvil (#207) * Added updated anvil dataset catalog * Added script for downloading all anvil data dicts * Added current anvil data dictionaries to data folder to be used for indexing * Anvil parser (#208) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * anvil parser * bump number of files test * Update dbgap_parser.py * Update anvil_dbgap_parser.py change to AnVIL * Update test_parsers.py update test Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * Initial Kaniko build. * Move version file definition. * Quote env vars. * Update env vars. * Update env vars. * Update env vars. * env var changes. * env var changes. * env var changes. * env var changes. * Update DOCKER_IMAGE var. * Update DOCKER_IMAGE var in kaniko cmd. * Update kaniko destination line. * Update kaniko destination line. * Moree variable madness. * Programatically remove quotes from version tag. * dug dump concepts api created and tested (#229) Co-authored-by: Nathan Braswell <nbraswell@rti.org> * Update _version.py (#234) * Version changes + separate build and publish. * Semantic versioning prep. * Add develop and master versioning and tagging. * Bump version. * Revert version to dug format. * Ncpi index fix (#232) * Renamed anvil to ncpi * Update ncpi datasets catalog * Modified script to download NCPI datasets into platform subfolders * Updated NCPI integration dataset * Removed unused variable * Removed ncpi top level folder to spread results among subfolders * Change output dir to data instead of ncpi subdir * Moved NCPI subdirs into main data folder for ingest as per Yaphet's request Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> * Add github creds env var. * Fix version typo. * Initial commit * Reduce ephemeral storage limits and requests * More parsers (#248) * Release/2.8.0 (#198) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Update _version.py * Update _version.py updating version for final push to master * Update factory.py Adding more comments Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Release/v2.9.0 (#201) * Bumping version * support for extracting dug elements from graph (#197) * support for extracting dug elements from graph * adding flag for enabling dug element extraction from graph * adding new config for node_to dug element parsing * adding more parameters to crawler to able configuration to element extraction logic * add tests * add tests for crawler Co-authored-by: Yaphetkg <yaphetkg@renci.org> * Display es scores (#199) * Include ES scores in variable results * Round ES score to 6 * Update _version.py (#200) * Update _version.py Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * consolidate dbgap format parser in single file , adds crdc and kfdrc parsers * adding tests * bump version * parser when versions of studies are > 9 * test for version * fix long text issues, and encoding errors * nltk initialization * change nltk approach for sliding window Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> * update version * remove cruft from merge Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Hoid <tylerlcheek@gmail.com> * sprint parser * adding parser to plugin manager * sprint form names parse from file name * moving sprint to sprint parser * remove old import * empty desc Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Hoid <tylerlcheek@gmail.com> * Coverage and Makefile improvements (#267) * Update Makefile so PYTHONPATH isn't necessary * Add PYTHONPATH as an exported var in Makefile * Remove echo * feat: Use buildAndPush() from pipeline-utils * feat: Add .githooks and commit-msg hook * fix: Add CCV variable (oops) * doc: Update README * Bacpac parser (#270) * adding bacpac parser * bump version * remove unused import Co-authored-by: Yaphetkg <yaphetkg@renci.org> * adding dug installer in makefile (#271) * bump version for release * Update src/dug/core/parsers/bacpac_parser.py Co-authored-by: Gaurav Vaidya <gaurav@ggvaidya.com> Co-authored-by: Carl Schreep <schreepc@renci.org> Co-authored-by: Yaphetkg <yaphetkg@renci.org> Co-authored-by: Ginnie Hench <vhench@rti.org> Co-authored-by: Howard Lander <howard@renci.org> Co-authored-by: Alex Waldrop <39315592+alexwaldrop@users.noreply.github.com> Co-authored-by: Charles Bennett <cnbennett3@gmail.com> Co-authored-by: Nathaniel Braswell <braswent6@gmail.com> Co-authored-by: Nathan Braswell <nbraswell@rti.org> Co-authored-by: cnbennett3 <53792237+cnbennett3@users.noreply.github.com> Co-authored-by: Alex Waldrop <awaldrop@RTI-108292.rti.ns> Co-authored-by: Hoid <tylerlcheek@gmail.com> Co-authored-by: Gaurav Vaidya <gaurav@ggvaidya.com> --- .coveragerc | 9 +++ .githooks/commit-msg | 81 +++++++++++++++++++ .githooks/commit_types.txt | 4 + Dockerfile | 1 + Jenkinsfile | 49 +++++------ Makefile | 14 +++- README.md | 2 + src/dug/_version.py | 2 +- src/dug/core/parsers/__init__.py | 4 +- src/dug/core/parsers/bacpac_parser.py | 48 +++++++++++ src/dug/core/parsers/dbgap_parser.py | 1 + src/dug/core/parsers/sprint_parser.py | 3 +- .../data/bacpac_baseline_do_measures.xml | 17 ++++ tests/integration/test_parsers.py | 14 +++- 14 files changed, 212 insertions(+), 37 deletions(-) create mode 100644 .coveragerc create mode 100755 .githooks/commit-msg create mode 100644 .githooks/commit_types.txt create mode 100644 src/dug/core/parsers/bacpac_parser.py create mode 100644 tests/integration/data/bacpac_baseline_do_measures.xml diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..6deec938 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,9 @@ +[run] +include = + src/** + +omit = + **/__init__.py + **/_version.py + src/dug/config.py + src/dug/hookspecs.py \ No newline at end of file diff --git a/.githooks/commit-msg b/.githooks/commit-msg new file mode 100755 index 00000000..06310310 --- /dev/null +++ b/.githooks/commit-msg @@ -0,0 +1,81 @@ +#!/bin/bash + +# set -xe + +script_name=`basename "$0"` + +# Make sure stdin is open--it's sometimes open when we come in here +exec 0< /dev/tty + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# Get user's original commit text. +orig_msg=$(awk '!/^#/{print}' $1); + +# Test for commit type/breaking change info already in commit text. +# This can occur if the user entered command using "-m" and the +# pre-commmit hook already handled getting this info. + +rgx="^(feat:|fix:|test:|doc:|Merge).*" +if grep -E $rgx <<< "$orig_msg" > /dev/null 2>&1; then + echo "$script_name: Commit message passes commmit-type check." + echo "--------------------------------------------------------------------------------" + echo "$orig_msg" + echo "--------------------------------------------------------------------------------" + exit 0 +fi + +echo +echo 'Please select the type of your commmit from the following list:' +echo +nl $SCRIPT_DIR/commit_types.txt | awk '{ print $1, $2 }' +echo +count=$(wc -l $SCRIPT_DIR/commit_types.txt | awk '{ print $1 }') +n=0 +while true; do + read -p 'Select option: ' n + # If $n is an integer between one and $count... + if [[ "$n" -eq $n ]] && + [[ "$n" -gt 0 ]] && + [[ "$n" -le "$count" ]]; then + break + fi +done + +commit_type="$(sed -n "${n}p" $SCRIPT_DIR/commit_types.txt | awk '{ print $2 }')" +echo +yesno="n" +if [ $commit_type == "feat" ] || [ $commit_type == "fix" ]; then + while true; do + read -p 'Is this a breaking change [y/n]: ' yn + yesno="$(tr [A-Z] [a-z] <<< "$yn")" + if [ "$yesno" == "y" ] || [ "$yesno" == "yes" ] || \ + [ "$yesno" == "n" ] || [ "$yesno" == "no" ]; then + break + fi + done +fi + +breaking_commit=0 +if [ "$yesno" == "y" ] || [ "$yesno" == "yes" ]; then + breaking_commit=1 +fi + +NL=$'\n' +if [ "$breaking_commit" -eq 1 ]; then + # Explicitly state it's a breaking change + msg="$msg${NL}${NL}BREAKING CHANGE" +fi + +# Prepend commit type to original commit text. +new_msg="${commit_type}: $orig_msg"; +echo $new_msg > $1; + +echo +echo "Updated message:" +echo +echo "--------------------------------------------------------------------------------" +echo "$new_msg" +echo "--------------------------------------------------------------------------------" +echo +exit 0 diff --git a/.githooks/commit_types.txt b/.githooks/commit_types.txt new file mode 100644 index 00000000..406b8f74 --- /dev/null +++ b/.githooks/commit_types.txt @@ -0,0 +1,4 @@ +feature feat +fix fix +test test +documentation doc diff --git a/Dockerfile b/Dockerfile index c6547593..9b1c97de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,7 @@ COPY --chown=$USER . dug/ WORKDIR $HOME/dug RUN make install +RUN make install.dug # Run it ENTRYPOINT dug \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 7449a309..66e89a50 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,5 +1,7 @@ library 'pipeline-utils@master' +CCV = "" + pipeline { agent { kubernetes { @@ -56,30 +58,33 @@ spec: GITHUB_CREDS = credentials("${env.GITHUB_CREDS_ID_STR}") REGISTRY = "${env.REGISTRY}" REG_OWNER="helxplatform" - REG_APP="dug" + REPO_NAME="dug" COMMIT_HASH="${sh(script:"git rev-parse --short HEAD", returnStdout: true).trim()}" - VERSION_FILE="src/dug/_version.py" - VERSION="${sh(script:'awk \'{ print $3 }\' src/dug/_version.py | xargs', returnStdout: true).trim()}" - IMAGE_NAME="${REGISTRY}/${REG_OWNER}/${REG_APP}" - TAG1="$BRANCH_NAME" - TAG2="$COMMIT_HASH" - TAG3="$VERSION" - TAG4="latest" + IMAGE_NAME="${REGISTRY}/${REG_OWNER}/${REPO_NAME}" } stages { stage('Build') { steps { script { + container(name: 'go', shell: '/bin/bash') { + if (BRANCH_NAME.equals("master")) { + CCV = go.ccv() + } + } container(name: 'kaniko', shell: '/busybox/sh') { - kaniko.build("./Dockerfile", ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2", "$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"]) + def tagsToPush = ["$IMAGE_NAME:$BRANCH_NAME", "$IMAGE_NAME:$COMMIT_HASH"] + if (CCV != null && !CCV.trim().isEmpty() && BRANCH_NAME.equals("master")) { + tagsToPush.add("$IMAGE_NAME:$CCV") + tagsToPush.add("$IMAGE_NAME:latest") + } else if (BRANCH_NAME.equals("develop")) { + def now = new Date() + def currTimestamp = now.format("yyyy-MM-dd'T'HH.mm'Z'", TimeZone.getTimeZone('UTC')) + tagsToPush.add("$IMAGE_NAME:$currTimestamp") + } + kaniko.buildAndPush("./Dockerfile", tagsToPush) } } } - post { - always { - archiveArtifacts artifacts: 'image.tar', onlyIfSuccessful: true - } - } } stage('Test') { steps { @@ -88,21 +93,5 @@ spec: ''' } } - stage('Publish') { - steps { - script { - container(name: 'crane', shell: '/busybox/sh') { - def imageTagsPushAlways = ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2"] - def imageTagsPushForDevelopBranch = ["$IMAGE_NAME:$TAG3"] - def imageTagsPushForMasterBranch = ["$IMAGE_NAME:$TAG4"] - image.publish( - imageTagsPushAlways, - imageTagsPushForDevelopBranch, - imageTagsPushForMasterBranch - ) - } - } - } - } } } diff --git a/Makefile b/Makefile index 170d70c4..c93fd217 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ DOCKER_OWNER = helxplatform DOCKER_APP = dug DOCKER_TAG = ${VERSION} DOCKER_IMAGE = ${DOCKER_OWNER}/${DOCKER_APP}:$(DOCKER_TAG) +export PYTHONPATH = $(shell echo ${PWD})/src .DEFAULT_GOAL = help @@ -15,6 +16,11 @@ DOCKER_IMAGE = ${DOCKER_OWNER}/${DOCKER_APP}:$(DOCKER_TAG) help: @grep -E '^#[a-zA-Z\.\-]+:.*$$' $(MAKEFILE_LIST) | tr -d '#' | awk 'BEGIN {FS = ": "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' +init: + git --version + echo "Please make sure your git version is greater than 2.9.0. If it's not, this command will fail." + git config --local core.hooksPath .githooks/ + #clean: Remove old build artifacts and installed packages clean: rm -rf build @@ -27,13 +33,19 @@ clean: install: ${PYTHON} -m pip install --upgrade pip ${PYTHON} -m pip install -r requirements.txt + +#install.dug: Install dug as a library to the current Python environment. +install.dug: ${PYTHON} -m pip install . #test: Run all tests test: # ${PYTHON} -m flake8 src ${PYTHON} -m pytest --doctest-modules src - ${PYTHON} -m pytest tests + coverage run -m pytest tests + +coverage: + coverage report #build: Build Docker image build: diff --git a/README.md b/README.md index 4647f686..33741e37 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ To achieve this, we annotate study metadata with terms from [biomedical ontologi ## Quickstart +NOTE: You must run `make init` once you've cloned the repo to enable the commit-msg git hook so that conventional commits will apply automatically. + To install Dug in your environment , run `make install`. Alternatively, ```shell diff --git a/src/dug/_version.py b/src/dug/_version.py index 933fa29c..c1467c65 100644 --- a/src/dug/_version.py +++ b/src/dug/_version.py @@ -1 +1 @@ -__version__ = "2.9.5" +__version__ = "2.9.6" diff --git a/src/dug/core/parsers/__init__.py b/src/dug/core/parsers/__init__.py index fa60c1c4..8143d508 100644 --- a/src/dug/core/parsers/__init__.py +++ b/src/dug/core/parsers/__init__.py @@ -10,6 +10,7 @@ from .topmed_tag_parser import TOPMedTagParser from .topmed_csv_parser import TOPMedCSVParser from .sprint_parser import SPRINTParser +from .bacpac_parser import BACPACParser logger = logging.getLogger('dug') @@ -28,6 +29,7 @@ def define_parsers(parser_dict: Dict[str, Parser]): parser_dict["crdc"] = CRDCDbGaPParser() parser_dict["kfdrc"] = KFDRCDbGaPParser() parser_dict["sprint"] = SPRINTParser() + parser_dict["bacpac"] = BACPACParser() class ParserNotFoundException(Exception): @@ -46,4 +48,4 @@ def get_parser(hook, parser_name) -> Parser: err_msg = f"Cannot find parser of type '{parser_name}'\n" \ f"Supported parsers: {', '.join(available_parsers.keys())}" logger.error(err_msg) - raise ParserNotFoundException(err_msg) \ No newline at end of file + raise ParserNotFoundException(err_msg) diff --git a/src/dug/core/parsers/bacpac_parser.py b/src/dug/core/parsers/bacpac_parser.py new file mode 100644 index 00000000..ff2709d2 --- /dev/null +++ b/src/dug/core/parsers/bacpac_parser.py @@ -0,0 +1,48 @@ +import logging +from typing import List +from xml.etree import ElementTree as ET + +from dug import utils as utils +from ._base import DugElement, FileParser, Indexable, InputFile + +logger = logging.getLogger('dug') + + +class BACPACParser(FileParser): + # Class for parsing BACPAC data dictionaries in dbGaP XML format into a set of Dug Elements. + + @staticmethod + def parse_study_name_from_filename(filename: str): + # Parse the form name from the xml filename + return filename.split('/')[-1].replace('.xml', '') + + def __call__(self, input_file: InputFile) -> List[Indexable]: + logger.debug(input_file) + tree = ET.parse(input_file) + root = tree.getroot() + study_id = root.attrib['study_id'] + + # Parse study name from file handle + study_name = self.parse_study_name_from_filename(str(input_file)) + + if study_name is None: + err_msg = f"Unable to parse BACPAC Form name from data dictionary: {input_file}!" + logger.error(err_msg) + raise IOError(err_msg) + + elements = [] + for variable in root.iter('variable'): + description = variable.find('description').text or "" + elem = DugElement(elem_id=f"{variable.attrib['id']}", + name=variable.find('name').text, + desc=description.lower(), + elem_type="BACPAC", + collection_id=f"{study_id}", + collection_name=study_name) + + # Add to set of variables + logger.debug(elem) + elements.append(elem) + + # You don't actually create any concepts + return elements diff --git a/src/dug/core/parsers/dbgap_parser.py b/src/dug/core/parsers/dbgap_parser.py index 49fa92c6..b01432c4 100644 --- a/src/dug/core/parsers/dbgap_parser.py +++ b/src/dug/core/parsers/dbgap_parser.py @@ -72,3 +72,4 @@ def _get_element_type(self): class KFDRCDbGaPParser(DbGaPParser): def _get_element_type(self): return "Kids First" + diff --git a/src/dug/core/parsers/sprint_parser.py b/src/dug/core/parsers/sprint_parser.py index 7917ed76..43568e0f 100644 --- a/src/dug/core/parsers/sprint_parser.py +++ b/src/dug/core/parsers/sprint_parser.py @@ -10,7 +10,7 @@ class SPRINTParser(FileParser): - # Class for parsers NIDA Data dictionary into a set of Dug Elements + # Class for parsers SPRINT Data dictionary into a set of Dug Elements @staticmethod def parse_study_name_from_filename(filename: str): @@ -41,7 +41,6 @@ def __call__(self, input_file: InputFile) -> List[Indexable]: collection_id=f"{study_id}", collection_name=study_name) - # Create NIDA links as study/variable actions # Add to set of variables logger.debug(elem) elements.append(elem) diff --git a/tests/integration/data/bacpac_baseline_do_measures.xml b/tests/integration/data/bacpac_baseline_do_measures.xml new file mode 100644 index 00000000..6beaebcf --- /dev/null +++ b/tests/integration/data/bacpac_baseline_do_measures.xml @@ -0,0 +1,17 @@ +<data_table id="bacpac_baseline_do_measures" study_id="bacpac_baseline_do_measures" study_name="bacpac_baseline_do_measures"> + <variable id="record_id.demographic_and_baseline_characteristic_core_data_elements"> + <name>record_id.demographic_and_baseline_characteristic_core_data_elements</name> + <description>Record ID</description> + <type>nan</type> + </variable> + <variable id="dob"> + <name>dob</name> + <description>Date of birth:</description> + <type>text</type> + </variable> + <variable id="age"> + <name>age</name> + <description>Age:</description> + <type>text</type> + </variable> +</data_table> \ No newline at end of file diff --git a/tests/integration/test_parsers.py b/tests/integration/test_parsers.py index 9b689d0c..a22d00c7 100644 --- a/tests/integration/test_parsers.py +++ b/tests/integration/test_parsers.py @@ -1,5 +1,5 @@ from dug.core.parsers import DbGaPParser, NIDAParser, TOPMedTagParser, SciCrunchParser, AnvilDbGaPParser,\ - CRDCDbGaPParser, KFDRCDbGaPParser, SPRINTParser + CRDCDbGaPParser, KFDRCDbGaPParser, SPRINTParser, BACPACParser from tests.integration.conftest import TEST_DATA_DIR def test_dbgap_parse_study_name_from_filename(): @@ -107,4 +107,14 @@ def test_sprint_parser(): def test_sprint_parser_form_name(): filename = "/opt/***/share/data/dug/input_files/sprint/adolescent_sleep_wake_scale_short_form_aswssf.xml" - assert SPRINTParser.parse_study_name_from_filename(filename) == "adolescent_sleep_wake_scale_short_form_aswssf" \ No newline at end of file + assert SPRINTParser.parse_study_name_from_filename(filename) == "adolescent_sleep_wake_scale_short_form_aswssf" + +def test_bacpac_parser(): + parser = BACPACParser() + parse_file = str(TEST_DATA_DIR / "bacpac_baseline_do_measures.xml") + elements = parser(parse_file) + assert len(elements) == 3 + for element in elements: + assert element.type == "BACPAC" + element_names = [e.name for e in elements] + assert "record_id.demographic_and_baseline_characteristic_core_data_elements" in element_names \ No newline at end of file