From 9f6a2370aea873336820eaf8e2251a5446694464 Mon Sep 17 00:00:00 2001 From: YaphetKG <45075777+YaphetKG@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:47:30 -0400 Subject: [PATCH] Release merge (#109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update _version.py (#86) * Update _version.py * Rti merge (#84) * roger cli preped for Merge Deploy * Update Makefile to work with python env * Update redisgraph-bulk-loader to fix issue with loading MODULE LIST * Revert "Update redisgraph-bulk-loader to fix issue with loading MODULE LIST" This reverts commit 7baf7efa725caac77e5501e948f545a0f4b20e3d. * Finalized dev deployment of dug inside Catapult Merge, deployment yamls, code changes and configurations * updated to reflect the Dug-Api updates to FastAPI * adding multi label redis by removing 'biolink:' on nodes, edges cannot be fixed after update so they need to be solved either by changing TranQl AND Plater or forking bulk-redisgraph to allow for colons to be added in the edges * Working multi label redis nodes w/ no biolink label * Latest code changes to deploy working Roger in Merge * biolink data move to '.' separator * updates to include new dug fixes, upgraded redis-bulk-loader and made changes to for biolink variables to specify it's domain with a 'biolink.' * adding test roger code * removed helm deployments * change docker owner * remove core.py * remove dup dev config * redis graph is not directly used removing cruft * remove print statement * remove logging files * update requriemtns * update requriemtns * add redis graph.py * fix import error for logger * adding es scheme and ca_path config * adding es scheme and ca_path config * adding debug code * removing debug * adding nodes args * adding biolink. * adding biolink. * Update requirements.txt * Update .gitignore * Update dug_utils.py Handle Error when curie not found in validate * Update __init__.py * Update config.yaml * Update dev-config.yaml * Update docker-compose.yaml * fixed docker-compose * adding back postgres volume to docker compose * env correction , docker compose updates --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent * adding v5.0 * cde-links branch * pin linkml * Update config.yaml collection_action to action * pop total items before result * print extracted elements * Update requirements.txt * Keep edge provenance (#94) * Update kgx.py * Update kgx.py * Update kgx.py can't delete edge keys while looping over them. * just collect then update * Update requirements.txt (#93) * Pipeline parameterize restructure (#95) * roger cli preped for Merge Deploy * Update Makefile to work with python env * Update redisgraph-bulk-loader to fix issue with loading MODULE LIST * Revert "Update redisgraph-bulk-loader to fix issue with loading MODULE LIST" This reverts commit 7baf7efa725caac77e5501e948f545a0f4b20e3d. * Finalized dev deployment of dug inside Catapult Merge, deployment yamls, code changes and configurations * updated to reflect the Dug-Api updates to FastAPI * adding multi label redis by removing 'biolink:' on nodes, edges cannot be fixed after update so they need to be solved either by changing TranQl AND Plater or forking bulk-redisgraph to allow for colons to be added in the edges * Working multi label redis nodes w/ no biolink label * Latest code changes to deploy working Roger in Merge * biolink data move to '.' separator * updates to include new dug fixes, upgraded redis-bulk-loader and made changes to for biolink variables to specify it's domain with a 'biolink.' * adding test roger code * removed helm deployments * change docker owner * remove core.py * remove dup dev config * redis graph is not directly used removing cruft * remove print statement * remove logging files * update requriemtns * update requriemtns * add redis graph.py * fix import error for logger * adding es scheme and ca_path config * adding es scheme and ca_path config * Parameterized annotate tasks with input_data_path and output_data_path * adding debug code * removing debug * adding nodes args * adding biolink. * adding biolink. * Parameterized annotate tasks with input_data_path and output_data_path (#85) * adding lakefs changes to roger-2.0 * point avalon to vg1 branch * change avalon dep * update airflow * fix avalon tag typo * update jenkins to tag version on main branch only * update jenkins to tag version * update jenkins to tag version * psycopg2 installation * add cncf k8s req * use airflow non-slim * simplified for testing * simplified for testing * change dag name * Erroneous parameter passed, should not be None * adding pre-exec * adding pre-exec * adding pre-exec * typo preexec * typo preexec * fix context * get files from repo * get files from repo * get files from repo * get files from repo * First shot at moving pipeline into base class and implementing. Anvil pipeline not complete * Syntax fix, docker image version bump to airflow 2.7.2-python3.11 * update storage dir * update remove dir code * update remove dir code * remote path to * * fix input dir for annotators * fix input dir for annotators * fix input dir for annotators * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * adding branch info on lakefs config * callback push to branch * back to relative import * reformat temp branch name based on unique task id * add logging * add logging * convert posix path to str for avalon * add extra / to root path * New dag created using DugPipeline subclasses * EmptyOperator imported from wrong place * import and syntax fixes * utterly silly syntax error * Added anvil to default input data sets for testing purposes * adding / to local path * commit meta task args empty string * add merge logic * add merge logic * upstream task dir pull for downstream task * Switched from subdag to taskgroup because latest Airflow depricated subdag * Added BACPAC pipeline object * Temporarily ignoring configuration variable for enabled datasets for testing * Passed dag in to create task group to see if it helps dag errors * Fixed silly syntax error * adding input / output dir params for make kgx * Trying different syntax to make taskgroups work. * adding input / output dir params for make kgx * Parsing, syntax, pylint fixes * adding input / output dir params for make kgx * Added pipeline name to task group name to ensure uniqueness * oops, moved something out of scope. Fixed * Filled out pipeline with methods from dug_utils. Needs data path changes * Finished implementing input_data_path and output_data_path handling, pylint cleanup * Update requirements.txt * adding toggle to avoid sending config obj * adding toggle to avoid sending config obj * disable to string for test * control pipelines for testing * add self to anvil get files * add log stream to make it available * typo fix * correcting branch id * adding source repo * adding source repo * patch name-resolver response * no pass input repo and branch , if not overriden to pre-exec * no pass input repo and branch , if not overriden to pre-exec * no pass input repo and branch , if not overriden to pre-exec * dug pipeline edit * recurisvely find recursively * recurisvely find recursively * setup output path for crawling * all task functions should have input and output params * adding annotation as upstream for validate index * revamp create task , and task wrapper * add validate concepts index task * adding concept validation * add index_variables task as dependecy for validate concepts * add index_variables task as dependecy for validate concepts * await client exist * await client exist * concepts not getting picked up for indexing * concepts not getting picked up for indexing * fix search elements * converting annotation output to json * json format annotation outputs * adding support for json format elements and concepts read * json back to dug objects * fixing index valriables with json objects * indetation and new line for better change detection :? * indetation and new line for better change detection * treat dictionary concepts as dictionary * read concepts json as a dict * concepts files are actually file paths * debug message * make output jsonable * clear up dir after commit , and delete unmerged branch even if no changes * don`t clear indexes, parallel dataset processing will be taxed * memory leak? * memory leak? * memory leak? * dumping pickles to debug locally * find out why concepts are being added to every other element * find out why concepts are being added to every other element * pointless shuffle 🤷‍♂️ * revert back in time * back to sanitize dug * output just json for annotation * adding jsonpickle * jsonpickle 🥒 * unpickle for index * unpickle for validate index * crawling fixes * crawling fixes * crawling validation fixes * fix index concepts * fix makekgx * adding other bdc pipelines * adding pipeline paramters to be able to configure per instance * fix * add input dataset for pipelines * Adding README to document how to create data set-specific pipelines * catchup on base.py * Added dbgap and nida pipelines * fix import errors * annotator modules added by passing config val (#90) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 --------- Co-authored-by: YaphetKG * Add heal parsers (#96) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 * add remaining pipelines * adding ctn parser * change merge strategy * merge init fix * debug dir * fix topmed file read * fix topmed file read * return file names as strings * topmed kgx builder custom * topmed kgx builder custom * add skip * get files pattern recursive * version pin avalon * pin dug --------- Co-authored-by: braswent * Add heal parsers (#97) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 * add remaining pipelines * adding ctn parser * change merge strategy * merge init fix * debug dir * fix topmed file read * fix topmed file read * return file names as strings * topmed kgx builder custom * topmed kgx builder custom * add skip * get files pattern recursive * version pin avalon * pin dug --------- Co-authored-by: braswent * Radx pipeline (#99) * point to large download * fix schema path * debug bulk input dir * fix schema read * fix schema read * fix schema read * commenting steup dir for test * adding logs * fix path stuff * add commented stuff back in * testing radx parser * adding parser * skip indexing vars with no id * adding indexes as part of bulk loader paramters * fix id index cli arg * fix local cli * dug latest --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent Co-authored-by: Michael T. Bacon Co-authored-by: Michael T Bacon <110547969+mbacon-renci@users.noreply.github.com> * pin avalon * deleted jenkins and added workflows * unlinked helx-actions * testing paths * testing again * d * tests * commented out pytest * try again for bandit * commented out bandit * changed dag to dags * Added fixes * Bagel (#103) * bump dug version * adding bdc new pipelines * adding curesc * adding bagel config * add test parser * add score threshold * point to dug develop * Dbgap programs (#104) * bump dug version * adding bdc new pipelines * adding curesc * fix up merge conflict * adding bagel config parse to bool * Dev sync main (#106) * release sync (#100) * Update _version.py (#86) * Update _version.py * Rti merge (#84) * roger cli preped for Merge Deploy * Update Makefile to work with python env * Update redisgraph-bulk-loader to fix issue with loading MODULE LIST * Revert "Update redisgraph-bulk-loader to fix issue with loading MODULE LIST" This reverts commit 7baf7efa725caac77e5501e948f545a0f4b20e3d. * Finalized dev deployment of dug inside Catapult Merge, deployment yamls, code changes and configurations * updated to reflect the Dug-Api updates to FastAPI * adding multi label redis by removing 'biolink:' on nodes, edges cannot be fixed after update so they need to be solved either by changing TranQl AND Plater or forking bulk-redisgraph to allow for colons to be added in the edges * Working multi label redis nodes w/ no biolink label * Latest code changes to deploy working Roger in Merge * biolink data move to '.' separator * updates to include new dug fixes, upgraded redis-bulk-loader and made changes to for biolink variables to specify it's domain with a 'biolink.' * adding test roger code * removed helm deployments * change docker owner * remove core.py * remove dup dev config * redis graph is not directly used removing cruft * remove print statement * remove logging files * update requriemtns * update requriemtns * add redis graph.py * fix import error for logger * adding es scheme and ca_path config * adding es scheme and ca_path config * adding debug code * removing debug * adding nodes args * adding biolink. * adding biolink. * Update requirements.txt * Update .gitignore * Update dug_utils.py Handle Error when curie not found in validate * Update __init__.py * Update config.yaml * Update dev-config.yaml * Update docker-compose.yaml * fixed docker-compose * adding back postgres volume to docker compose * env correction , docker compose updates --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent * adding v5.0 * cde-links branch * pin linkml * Update config.yaml collection_action to action * pop total items before result * print extracted elements * Update requirements.txt * Keep edge provenance (#94) * Update kgx.py * Update kgx.py * Update kgx.py can't delete edge keys while looping over them. * just collect then update * Update requirements.txt (#93) * Pipeline parameterize restructure (#95) * roger cli preped for Merge Deploy * Update Makefile to work with python env * Update redisgraph-bulk-loader to fix issue with loading MODULE LIST * Revert "Update redisgraph-bulk-loader to fix issue with loading MODULE LIST" This reverts commit 7baf7efa725caac77e5501e948f545a0f4b20e3d. * Finalized dev deployment of dug inside Catapult Merge, deployment yamls, code changes and configurations * updated to reflect the Dug-Api updates to FastAPI * adding multi label redis by removing 'biolink:' on nodes, edges cannot be fixed after update so they need to be solved either by changing TranQl AND Plater or forking bulk-redisgraph to allow for colons to be added in the edges * Working multi label redis nodes w/ no biolink label * Latest code changes to deploy working Roger in Merge * biolink data move to '.' separator * updates to include new dug fixes, upgraded redis-bulk-loader and made changes to for biolink variables to specify it's domain with a 'biolink.' * adding test roger code * removed helm deployments * change docker owner * remove core.py * remove dup dev config * redis graph is not directly used removing cruft * remove print statement * remove logging files * update requriemtns * update requriemtns * add redis graph.py * fix import error for logger * adding es scheme and ca_path config * adding es scheme and ca_path config * Parameterized annotate tasks with input_data_path and output_data_path * adding debug code * removing debug * adding nodes args * adding biolink. * adding biolink. * Parameterized annotate tasks with input_data_path and output_data_path (#85) * adding lakefs changes to roger-2.0 * point avalon to vg1 branch * change avalon dep * update airflow * fix avalon tag typo * update jenkins to tag version on main branch only * update jenkins to tag version * update jenkins to tag version * psycopg2 installation * add cncf k8s req * use airflow non-slim * simplified for testing * simplified for testing * change dag name * Erroneous parameter passed, should not be None * adding pre-exec * adding pre-exec * adding pre-exec * typo preexec * typo preexec * fix context * get files from repo * get files from repo * get files from repo * get files from repo * First shot at moving pipeline into base class and implementing. Anvil pipeline not complete * Syntax fix, docker image version bump to airflow 2.7.2-python3.11 * update storage dir * update remove dir code * update remove dir code * remote path to * * fix input dir for annotators * fix input dir for annotators * fix input dir for annotators * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * kwargs to task * adding branch info on lakefs config * callback push to branch * back to relative import * reformat temp branch name based on unique task id * add logging * add logging * convert posix path to str for avalon * add extra / to root path * New dag created using DugPipeline subclasses * EmptyOperator imported from wrong place * import and syntax fixes * utterly silly syntax error * Added anvil to default input data sets for testing purposes * adding / to local path * commit meta task args empty string * add merge logic * add merge logic * upstream task dir pull for downstream task * Switched from subdag to taskgroup because latest Airflow depricated subdag * Added BACPAC pipeline object * Temporarily ignoring configuration variable for enabled datasets for testing * Passed dag in to create task group to see if it helps dag errors * Fixed silly syntax error * adding input / output dir params for make kgx * Trying different syntax to make taskgroups work. * adding input / output dir params for make kgx * Parsing, syntax, pylint fixes * adding input / output dir params for make kgx * Added pipeline name to task group name to ensure uniqueness * oops, moved something out of scope. Fixed * Filled out pipeline with methods from dug_utils. Needs data path changes * Finished implementing input_data_path and output_data_path handling, pylint cleanup * Update requirements.txt * adding toggle to avoid sending config obj * adding toggle to avoid sending config obj * disable to string for test * control pipelines for testing * add self to anvil get files * add log stream to make it available * typo fix * correcting branch id * adding source repo * adding source repo * patch name-resolver response * no pass input repo and branch , if not overriden to pre-exec * no pass input repo and branch , if not overriden to pre-exec * no pass input repo and branch , if not overriden to pre-exec * dug pipeline edit * recurisvely find recursively * recurisvely find recursively * setup output path for crawling * all task functions should have input and output params * adding annotation as upstream for validate index * revamp create task , and task wrapper * add validate concepts index task * adding concept validation * add index_variables task as dependecy for validate concepts * add index_variables task as dependecy for validate concepts * await client exist * await client exist * concepts not getting picked up for indexing * concepts not getting picked up for indexing * fix search elements * converting annotation output to json * json format annotation outputs * adding support for json format elements and concepts read * json back to dug objects * fixing index valriables with json objects * indetation and new line for better change detection :? * indetation and new line for better change detection * treat dictionary concepts as dictionary * read concepts json as a dict * concepts files are actually file paths * debug message * make output jsonable * clear up dir after commit , and delete unmerged branch even if no changes * don`t clear indexes, parallel dataset processing will be taxed * memory leak? * memory leak? * memory leak? * dumping pickles to debug locally * find out why concepts are being added to every other element * find out why concepts are being added to every other element * pointless shuffle 🤷‍♂️ * revert back in time * back to sanitize dug * output just json for annotation * adding jsonpickle * jsonpickle 🥒 * unpickle for index * unpickle for validate index * crawling fixes * crawling fixes * crawling validation fixes * fix index concepts * fix makekgx * adding other bdc pipelines * adding pipeline paramters to be able to configure per instance * fix * add input dataset for pipelines * Adding README to document how to create data set-specific pipelines * catchup on base.py * Added dbgap and nida pipelines * fix import errors * annotator modules added by passing config val (#90) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 --------- Co-authored-by: YaphetKG * Add heal parsers (#96) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 * add remaining pipelines * adding ctn parser * change merge strategy * merge init fix * debug dir * fix topmed file read * fix topmed file read * return file names as strings * topmed kgx builder custom * topmed kgx builder custom * add skip * get files pattern recursive * version pin avalon * pin dug --------- Co-authored-by: braswent * Add heal parsers (#97) * annotator modules added by passing config val * fix merge conflict * following same pattern as parsers , modify configs * fix to dug config method * fix old dug pipeline for backward compatiblity * correct default annotator type * reflective changes * typo extra quotes * annotator type not being picked up from config * remove annotate simple , log env value for lakefs enabled * testing lakefs off * add more logging * add more logging * post init for config to parse to boolean * put back task calls * revert some changes * adding new pipeline * lakefs io support for merge task * fix name * add io params for kg tasks * wire up i/o paths for merge * fix variable name * print files * few debug logs * few debug logs * treat path as path not str * few debug logs * some fixes * logging edge files * bug fix knowledge has edge * re-org graph structure * adding pathing for other tasks * pagenation logic fix for avalon * update lakefs client code * fix glob for get kgx files * fix up get merged objects * send down fake commit id for metadata * working on edges schema * bulk create nodes I/O * find schema file * bulk create edges I/O * bulk create edges I/O * bulk load io * no outputs for final tasks * add recursive glob * fix globbing * oops * delete dags * pin dug to latest release * cruft cleanup * re-org kgx config * add support for multiple initial repos * fix comma * create dir to download to * swap branch and repo * clean up dirs * fix up other pipeline 👌 * add remaining pipelines * adding ctn parser * change merge strategy * merge init fix * debug dir * fix topmed file read * fix topmed file read * return file names as strings * topmed kgx builder custom * topmed kgx builder custom * add skip * get files pattern recursive * version pin avalon * pin dug --------- Co-authored-by: braswent * Radx pipeline (#99) * point to large download * fix schema path * debug bulk input dir * fix schema read * fix schema read * fix schema read * commenting steup dir for test * adding logs * fix path stuff * add commented stuff back in * testing radx parser * adding parser * skip indexing vars with no id * adding indexes as part of bulk loader paramters * fix id index cli arg * fix local cli * dug latest --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent Co-authored-by: Michael T. Bacon Co-authored-by: Michael T Bacon <110547969+mbacon-renci@users.noreply.github.com> * pin avalon --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent Co-authored-by: Howard Lander Co-authored-by: Michael T. Bacon Co-authored-by: Michael T Bacon <110547969+mbacon-renci@users.noreply.github.com> * remove jenkins file * bump apache version * revert airflow version --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent Co-authored-by: Howard Lander Co-authored-by: Michael T. Bacon Co-authored-by: Michael T Bacon <110547969+mbacon-renci@users.noreply.github.com> * Updated docker image to 2.10.2 * fix kgx path error * fix kgx path error * Trying out the slim apache images to reduce vulnerability footprint * Building general package update into dockerfile * Trying more rigorous dist-upgrade to try to get rid of vulerabilities that won't quit * dist-upgrade also needs -y flag * Rolling back from slim image * Fixed apt-get syntax error * Reverting develop to 2.7.2 for stability's sake, will revisit after upgrades finish * Update Dockerfile * Update Dockerfile * Removed post-install package cleanup * Update requirements.txt * test merge issue * fix merge issue --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent Co-authored-by: Howard Lander Co-authored-by: Michael T. Bacon Co-authored-by: Michael T Bacon <110547969+mbacon-renci@users.noreply.github.com> Co-authored-by: Patrick Hachicho Co-authored-by: Patrick hachicho <105758539+pchachicho@users.noreply.github.com> --- dags/roger/core/storage.py | 3 +++ dags/roger/tasks.py | 4 +++- requirements.txt | 5 +++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dags/roger/core/storage.py b/dags/roger/core/storage.py index c1a3e80..e3c9697 100644 --- a/dags/roger/core/storage.py +++ b/dags/roger/core/storage.py @@ -167,6 +167,9 @@ def merge_path(name, path: Path=None): if not os.path.exists(ROGER_DATA_DIR / 'merge'): os.makedirs(ROGER_DATA_DIR / 'merge') return str(ROGER_DATA_DIR / 'merge' / name) + if not os.path.exists(path): + os.makedirs(path) + return str(path.joinpath(name)) def merged_objects(file_type, path=None): diff --git a/dags/roger/tasks.py b/dags/roger/tasks.py index f545114..678f713 100755 --- a/dags/roger/tasks.py +++ b/dags/roger/tasks.py @@ -176,7 +176,8 @@ def avalon_commit_callback(context: DagContext, **kwargs): branch=temp_branch_name, repo=repo, # @TODO figure out how to pass real commit id here - commit_id=branch + commit_id=branch, + source_branch_name=branch ) # see what changes are going to be pushed from this branch to main branch @@ -205,6 +206,7 @@ def avalon_commit_callback(context: DagContext, **kwargs): repository=repo, branch=temp_branch_name ) + logger.info(f"deleted temp branch {temp_branch_name}") logger.info(f"deleting local dir {local_path}") files_to_clean = glob.glob(local_path + '**', recursive=True) + [local_path] diff --git a/requirements.txt b/requirements.txt index 3a0ee22..2c9dbbf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,12 @@ elasticsearch==8.5.2 flatten-dict jsonpickle redisgraph-bulk-loader==0.12.3 +setuptools>=66 pytest PyYAML -git+https://github.com/helxplatform/dug@2.13.2 +git+https://github.com/helxplatform/dug@2.13.4 orjson kg-utils==0.0.6 bmt==1.1.0 -git+https://github.com/helxplatform/avalon.git@v1.0.1 +git+https://github.com/helxplatform/avalon.git@v1.1.0 linkml-runtime==1.6.0