diff --git a/.github/scripts/test_sgkit_bgen.py b/.github/scripts/test_sgkit_bgen.py index 013db753d..1d0a334c0 100644 --- a/.github/scripts/test_sgkit_bgen.py +++ b/.github/scripts/test_sgkit_bgen.py @@ -4,7 +4,7 @@ if __name__ == "__main__": urllib.request.urlretrieve( - "https://github.com/pystatgen/sgkit/raw/main/sgkit/tests/io/bgen/data/example.bgen", + "https://github.com/sgkit-dev/sgkit/raw/main/sgkit/tests/io/bgen/data/example.bgen", "example.bgen", ) ds = read_bgen("example.bgen") diff --git a/.github/scripts/test_sgkit_plink.py b/.github/scripts/test_sgkit_plink.py index 0b8c7b184..053e5a7ab 100644 --- a/.github/scripts/test_sgkit_plink.py +++ b/.github/scripts/test_sgkit_plink.py @@ -5,7 +5,7 @@ if __name__ == "__main__": for ext in (".bed", ".bim", ".fam"): urllib.request.urlretrieve( - f"https://github.com/pystatgen/sgkit/raw/main/sgkit/tests/io/plink/data/plink_sim_10s_100v_10pmiss{ext}", + f"https://github.com/sgkit-dev/sgkit/raw/main/sgkit/tests/io/plink/data/plink_sim_10s_100v_10pmiss{ext}", f"plink_sim_10s_100v_10pmiss{ext}", ) ds = read_plink(path="plink_sim_10s_100v_10pmiss") diff --git a/.github/scripts/test_sgkit_vcf.py b/.github/scripts/test_sgkit_vcf.py index 33f0f3ba3..949ff7bd2 100644 --- a/.github/scripts/test_sgkit_vcf.py +++ b/.github/scripts/test_sgkit_vcf.py @@ -7,7 +7,7 @@ if __name__ == "__main__": for ext in (".gz", ".gz.tbi"): urllib.request.urlretrieve( - f"https://github.com/pystatgen/sgkit/raw/main/sgkit/tests/io/vcf/data/sample.vcf{ext}", + f"https://github.com/sgkit-dev/sgkit/raw/main/sgkit/tests/io/vcf/data/sample.vcf{ext}", f"sample.vcf{ext}", ) vcf_to_zarr("sample.vcf.gz", "out") diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 5f0eef317..59b18d823 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -7,14 +7,14 @@ on: env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} - BENCHMARKS_REPO: pystatgen/sgkit-benchmarks-asv + BENCHMARKS_REPO: sgkit-dev/sgkit-benchmarks-asv ASV_CONFIG: benchmarks/asv.conf.json MACHINE_NAME: github-actions # to identify github actions machine as hostname changes everytime jobs: build: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -62,7 +62,7 @@ jobs: run: | cd ~/$BENCHMARKS_REPO git add . - git config --global user.email "project@pystatgen.org" + git config --global user.email "project@sgkit-dev.org" git config --global user.name "sgkit benchmark bot" git commit -m "Update benchmarks" git push origin main diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 63598530f..fead4649a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,7 +10,7 @@ on: jobs: build: # Scheduled runs only on the origin org - if: (github.event_name == 'schedule' && github.repository_owner == 'pystatgen') || (github.event_name != 'schedule') + if: (github.event_name == 'schedule' && github.repository_owner == 'sgkit-dev') || (github.event_name != 'schedule') runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/check-docs.yml b/.github/workflows/check-docs.yml index 1f4b00289..3c29b0b53 100644 --- a/.github/workflows/check-docs.yml +++ b/.github/workflows/check-docs.yml @@ -10,7 +10,7 @@ on: jobs: build: # Scheduled runs only on the origin org - if: (github.event_name == 'schedule' && github.repository_owner == 'pystatgen') || (github.event_name != 'schedule') + if: (github.event_name == 'schedule' && github.repository_owner == 'sgkit-dev') || (github.event_name != 'schedule') runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2c1b1b723..3217acaa4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -28,7 +28,7 @@ jobs: make html SPHINXOPTS="-W --keep-going -n" - name: Commit documentation changes to gh-pages branch run: | - git clone https://github.com/pystatgen/sgkit.git --branch gh-pages --single-branch gh-pages + git clone https://github.com/sgkit-dev/sgkit.git --branch gh-pages --single-branch gh-pages mkdir -p gh-pages/latest cp -r docs/_build/html/* gh-pages/latest cd gh-pages diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index 4965a1870..1d92123ab 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -10,7 +10,7 @@ on: jobs: build: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index 71b940d70..ed44811de 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -10,7 +10,7 @@ on: jobs: validation_suite: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index bdb0fd108..a1f16203b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -13,7 +13,7 @@ on: jobs: build: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: ubuntu-latest strategy: matrix: @@ -40,7 +40,7 @@ jobs: unix-test: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' needs: ['build'] strategy: matrix: @@ -72,7 +72,7 @@ jobs: # Windows doesn't support vcf windows-test: # This workflow only runs on the origin org - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: windows-latest needs: ['build'] strategy: @@ -100,7 +100,7 @@ jobs: python sgkit-copy/.github/scripts/test_sgkit_plink.py pypi-upload: - if: github.repository_owner == 'pystatgen' + if: github.repository_owner == 'sgkit-dev' runs-on: ubuntu-latest needs: ['unix-test', 'windows-test'] steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 725f31f30..2df069474 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -10,7 +10,7 @@ on: jobs: win_build: # Scheduled runs only on the origin org - if: (github.event_name == 'schedule' && github.repository_owner == 'pystatgen') || (github.event_name != 'schedule') + if: (github.event_name == 'schedule' && github.repository_owner == 'sgkit-dev') || (github.event_name != 'schedule') runs-on: windows-latest strategy: matrix: diff --git a/.mergify.yml b/.mergify.yml index 0c58a820d..b47535faf 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -6,7 +6,7 @@ queue_rules: - status-success=build (3.10) - status-success=build (3.11) - status-success=win_build (3.9) - - approved-reviews-by=@pystatgen/committers + - approved-reviews-by=@sgkit-dev/committers - "#approved-reviews-by>=1" - label=auto-merge @@ -18,7 +18,7 @@ pull_request_rules: - status-success=build (3.10) - status-success=build (3.11) - status-success=win_build (3.9) - - approved-reviews-by=@pystatgen/committers + - approved-reviews-by=@sgkit-dev/committers - "#approved-reviews-by>=1" - label=auto-merge actions: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b672bed25..3760123b1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,3 @@ All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome. -For general information on how to contribute see https://pystatgen.github.io/sgkit/latest/contributing.html. +For general information on how to contribute see https://sgkit-dev.github.io/sgkit/latest/contributing.html. diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 9df1703a2..0e96b94f3 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -1 +1 @@ -Please see our [code of conduct](https://github.com/pystatgen/.github/blob/master/CODE_OF_CONDUCT.md) for more information. \ No newline at end of file +Please see our [code of conduct](https://github.com/sgkit-dev/.github/blob/master/CODE_OF_CONDUCT.md) for more information. \ No newline at end of file diff --git a/README.md b/README.md index aa10ee76c..c181d5a44 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,17 @@ # sgkit: Statistical genetics toolkit in Python -[![Build status](https://github.com/pystatgen/sgkit/workflows/Build/badge.svg?branch=main)](https://github.com/pystatgen/sgkit/actions?query=workflow%3A%22Build%22+branch%3Amain) -[![Windows build status](https://github.com/pystatgen/sgkit/workflows/Windows/badge.svg?branch=main)](https://github.com/pystatgen/sgkit/actions?query=workflow%3A%22Windows%22+branch%3Amain) -[![Documentation status](https://github.com/pystatgen/sgkit/workflows/Docs/badge.svg?branch=main)](https://pystatgen.github.io/sgkit/) -[![Validation status](https://github.com/pystatgen/sgkit/workflows/Validation/badge.svg?branch=main)](https://github.com/pystatgen/sgkit/actions?query=workflow%3A%22Validation%22+branch%3Amain) -[![Upstream status](https://github.com/pystatgen/sgkit/workflows/Upstream/badge.svg?branch=main)](https://github.com/pystatgen/sgkit/actions?query=workflow%3A%22Upstream%22+branch%3Amain) -[![asv](https://img.shields.io/badge/Benchmarked%20by-asv-green.svg?style=flat)](https://pystatgen.github.io/sgkit-benchmarks-asv/) +[![Build status](https://github.com/sgkit-dev/sgkit/workflows/Build/badge.svg?branch=main)](https://github.com/sgkit-dev/sgkit/actions?query=workflow%3A%22Build%22+branch%3Amain) +[![Windows build status](https://github.com/sgkit-dev/sgkit/workflows/Windows/badge.svg?branch=main)](https://github.com/sgkit-dev/sgkit/actions?query=workflow%3A%22Windows%22+branch%3Amain) +[![Documentation status](https://github.com/sgkit-dev/sgkit/workflows/Docs/badge.svg?branch=main)](https://sgkit-dev.github.io/sgkit/) +[![Validation status](https://github.com/sgkit-dev/sgkit/workflows/Validation/badge.svg?branch=main)](https://github.com/sgkit-dev/sgkit/actions?query=workflow%3A%22Validation%22+branch%3Amain) +[![Upstream status](https://github.com/sgkit-dev/sgkit/workflows/Upstream/badge.svg?branch=main)](https://github.com/sgkit-dev/sgkit/actions?query=workflow%3A%22Upstream%22+branch%3Amain) +[![asv](https://img.shields.io/badge/Benchmarked%20by-asv-green.svg?style=flat)](https://sgkit-dev.github.io/sgkit-benchmarks-asv/) [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](http://numfocus.org) Sgkit is a Python package that provides a variety of analytical genetics methods through the use of general-purpose frameworks such as [Xarray](http://xarray.pydata.org/en/stable/), [Pandas](https://pandas.pydata.org/docs/), [Dask](https://docs.dask.org/en/latest/) and [Zarr](https://zarr.readthedocs.io/en/stable/). -For more information on using sgkit, see the [documentation](https://pystatgen.github.io/sgkit/). +For more information on using sgkit, see the [documentation](https://sgkit-dev.github.io/sgkit/). [//]: # (numfocus-fiscal-sponsor-attribution) diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index d914977fc..632f87d21 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -7,7 +7,7 @@ "project": "sgkit", // The project's homepage - "project_url": "https://pystatgen.github.io/sgkit/", + "project_url": "https://sgkit-dev.github.io/sgkit/", // The URL or local path of the source code repository for the // project being benchmarked @@ -55,7 +55,7 @@ //"install_timeout": 600, // the base URL to show a commit for the project. - "show_commit_url": "http://github.com/pystatgen/sgkit/commit/", + "show_commit_url": "http://github.com/sgkit-dev/sgkit/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. diff --git a/docs/_static/switcher.json b/docs/_static/switcher.json index 665560cb3..e05639402 100644 --- a/docs/_static/switcher.json +++ b/docs/_static/switcher.json @@ -1,37 +1,37 @@ [ { "version": "latest", - "url": "https://pystatgen.github.io/sgkit/latest/" + "url": "https://sgkit-dev.github.io/sgkit/latest/" }, { "name": "0.7.0 (stable)", "version": "0.7.0", - "url": "https://pystatgen.github.io/sgkit/0.7.0/" + "url": "https://sgkit-dev.github.io/sgkit/0.7.0/" }, { "name": "0.6.0", "version": "0.6.0", - "url": "https://pystatgen.github.io/sgkit/0.6.0/" + "url": "https://sgkit-dev.github.io/sgkit/0.6.0/" }, { "name": "0.5.0", "version": "0.5.0", - "url": "https://pystatgen.github.io/sgkit/0.5.0/" + "url": "https://sgkit-dev.github.io/sgkit/0.5.0/" }, { "version": "0.4.0", - "url": "https://pystatgen.github.io/sgkit/0.4.0/" + "url": "https://sgkit-dev.github.io/sgkit/0.4.0/" }, { "version": "0.3.0", - "url": "https://pystatgen.github.io/sgkit/0.3.0/" + "url": "https://sgkit-dev.github.io/sgkit/0.3.0/" }, { "version": "0.2.0a1", - "url": "https://pystatgen.github.io/sgkit/0.2.0a1/" + "url": "https://sgkit-dev.github.io/sgkit/0.2.0a1/" }, { "version": "0.1.0a1", - "url": "https://pystatgen.github.io/sgkit/0.1.0a1/" + "url": "https://sgkit-dev.github.io/sgkit/0.1.0a1/" } ] \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index fe8027343..f6e44a876 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -66,8 +66,8 @@ ] extlinks = { - "issue": ("https://github.com/pystatgen/sgkit/issues/%s", "GH %s"), - "pr": ("https://github.com/pystatgen/sgkit/pull/%s", "PR %s"), + "issue": ("https://github.com/sgkit-dev/sgkit/issues/%s", "GH %s"), + "pr": ("https://github.com/sgkit-dev/sgkit/pull/%s", "PR %s"), "user": ("https://github.com/%s", "%s"), } @@ -96,7 +96,7 @@ def filter(self, record: pylogging.LogRecord) -> bool: html_context = dict( display_github=False, # Integrate GitHub - github_user="pystatgen", # Username + github_user="sgkit-dev", # Username github_repo="sgkit", # Repo name github_version="main", # Version conf_py_path="/docs/", # Path in the checkout to the docs root @@ -142,7 +142,7 @@ def filter(self, record: pylogging.LogRecord) -> bool: # https://pydata-sphinx-theme.readthedocs.io/en/latest/user_guide/configuring.html html_theme_options = { - "github_url": "https://github.com/pystatgen/sgkit", + "github_url": "https://github.com/sgkit-dev/sgkit", "logo": { "image_light": "sgkit_trnsprnt.png", "image_dark": "sgkit_blue_trnsprnt.png", diff --git a/docs/contributing.rst b/docs/contributing.rst index 804ee2876..03ccb5ecc 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -31,20 +31,20 @@ Conversation about *sgkit* happens in the following places: Discussions on GitHub Discussions (and previously the forum) tend to be about higher-level themes, and statistical genetics in general. Coding details should be discussed on GitHub issues and pull requests. -.. _`GitHub Issue Tracker`: https://github.com/pystatgen/sgkit/issues -.. _`GitHub Discussions`: https://github.com/pystatgen/sgkit/discussions -.. _`Python for Statistical Genetics forum`: https://discourse.pystatgen.org/ +.. _`GitHub Issue Tracker`: https://github.com/sgkit-dev/sgkit/issues +.. _`GitHub Discussions`: https://github.com/sgkit-dev/sgkit/discussions +.. _`Python for Statistical Genetics forum`: https://discourse.sgkit-dev.org/ Code repositories ----------------- Code and documentation for *sgkit* is maintained in a few git repositories hosted on the -GitHub ``pystatgen`` organization, https://github.com/pystatgen. This includes the primary +GitHub ``sgkit-dev`` organization, https://github.com/sgkit-dev. This includes the primary repository and several other repositories for different components. A non-exhaustive list follows: -* https://github.com/pystatgen/sgkit: The main code repository containing the +* https://github.com/sgkit-dev/sgkit: The main code repository containing the data representations (in Xarray), algorithms, and most documentation Git and GitHub can be challenging at first. Fortunately good materials exist @@ -65,7 +65,7 @@ then check out the `"good first issue" label`_, which contains issues that are g for starting developers. Generally, familiarity with Python, NumPy, and some parallel computing (Dask) are assumed. -.. _`"good first issue" label`: https://github.com/pystatgen/sgkit/labels/good%20first%20issue +.. _`"good first issue" label`: https://github.com/sgkit-dev/sgkit/labels/good%20first%20issue Before starting work, make sure there is an issue covering the feature or bug you plan to produce a pull request for. Assign the issue to yourself to indicate that @@ -77,7 +77,7 @@ Development environment Download code ~~~~~~~~~~~~~ -Make a fork of the main `sgkit repository `_ and +Make a fork of the main `sgkit repository `_ and clone the fork:: git clone https://github.com//sgkit @@ -243,7 +243,7 @@ dependency, or add a new one, then don't forget to change both files. We try to After a release, the release manager will update the corresponding dependencies in the `conda-forge feedstock `_. -There is a `GitHub Action `_ that runs every night +There is a `GitHub Action `_ that runs every night against the main branches of our key upstream dependencies. This is useful for finding any breaking changes that would affect *sgkit*, so we can report or try to fix the problem before the upstream library is released. @@ -253,7 +253,7 @@ Build dependencies are listed in ``requirements-dev.txt`` and ``requirements-doc Contributing to documentation ----------------------------- -*sgkit* uses Sphinx_ for documentation, hosted at https://pystatgen.github.io/sgkit/. +*sgkit* uses Sphinx_ for documentation, hosted at https://sgkit-dev.github.io/sgkit/. Documentation is maintained in the RestructuredText markup language (``.rst`` files) in ``docs``. The documentation consists both of prose and API documentation. @@ -288,8 +288,8 @@ configuration file. The benchmarks should be written in the ``benchmarks/`` directory. For more information on different types of benchmarks have a look at the ``asv`` documentation here: https://asv.readthedocs.io/en/stable/writing_benchmarks.html#writing-benchmarks -The results of benchmarks are uploaded to benchmarks repository: https://github.com/pystatgen/sgkit-benchmarks-asv -via Github Actions. They can be seen on the static site here: https://pystatgen.github.io/sgkit-benchmarks-asv +The results of benchmarks are uploaded to benchmarks repository: https://github.com/sgkit-dev/sgkit-benchmarks-asv +via Github Actions. They can be seen on the static site here: https://sgkit-dev.github.io/sgkit-benchmarks-asv You can run the benchmark suite locally with:: @@ -341,7 +341,7 @@ Pull requests will be reviewed by a project maintainer. All changes to *sgkit* r approval by at least one maintainer. We use `mergify `_ to automate PR flow. A project -`committer `_ (reviewer) can decide +`committer `_ (reviewer) can decide to automatically merge a PR by labeling it with ``auto-merge``, and then when the PR gets at least one approval from a committer and a clean build it will get merged automatically. @@ -357,22 +357,22 @@ Dataset subclassing Debates on whether or not we should use Xarray objects directly or put them behind a layer of encapsulation: -- https://github.com/pystatgen/sgkit/pull/16#issuecomment-657725092 -- https://github.com/pystatgen/sgkit/pull/78#issuecomment-669878845 +- https://github.com/sgkit-dev/sgkit/pull/16#issuecomment-657725092 +- https://github.com/sgkit-dev/sgkit/pull/78#issuecomment-669878845 Dataset API typing ~~~~~~~~~~~~~~~~~~ Discussions around bringing stricter array type enforcement into the API: -- https://github.com/pystatgen/sgkit/issues/43 -- https://github.com/pystatgen/sgkit/pull/124 -- https://github.com/pystatgen/sgkit/pull/276 +- https://github.com/sgkit-dev/sgkit/issues/43 +- https://github.com/sgkit-dev/sgkit/pull/124 +- https://github.com/sgkit-dev/sgkit/pull/276 Dataset variables ~~~~~~~~~~~~~~~~~ -Naming conventions for variables: https://github.com/pystatgen/sgkit/issues/295 +Naming conventions for variables: https://github.com/sgkit-dev/sgkit/issues/295 Delayed invariant checks ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -380,25 +380,25 @@ Delayed invariant checks Discussions on how to run sanity checks on arrays efficiently and why those checks would be useful if they were possible (they are not possible currently w/ Dask): -- https://github.com/pystatgen/sgkit/issues/61 +- https://github.com/sgkit-dev/sgkit/issues/61 - https://github.com/dask/dask/issues/97 Mixed ploidy ~~~~~~~~~~~~ -Proposal for handling mixed ploidy: https://github.com/pystatgen/sgkit/issues/243 +Proposal for handling mixed ploidy: https://github.com/sgkit-dev/sgkit/issues/243 Numba guvectorize usage ~~~~~~~~~~~~~~~~~~~~~~~ Learning how to use ``guvectorize`` effectively: -- https://github.com/pystatgen/sgkit/pull/114 -- https://github.com/pystatgen/sgkit/pull/348 +- https://github.com/sgkit-dev/sgkit/pull/114 +- https://github.com/sgkit-dev/sgkit/pull/348 API namespace ~~~~~~~~~~~~~ Sgkit controls API namespace via init files. To accommodate for mypy and docstrings we include both imports and ``__all__`` declaration. More on this decision in the issue: -https://github.com/pystatgen/sgkit/issues/251 +https://github.com/sgkit-dev/sgkit/issues/251 diff --git a/docs/examples/gwas_tutorial.ipynb b/docs/examples/gwas_tutorial.ipynb index 1d720db0f..ad8fee564 100644 --- a/docs/examples/gwas_tutorial.ipynb +++ b/docs/examples/gwas_tutorial.ipynb @@ -13,7 +13,7 @@ "id": "written-product", "metadata": {}, "source": [ - "This notebook is an [sgkit](https://pystatgen.github.io/sgkit) port of Hail's [GWAS Tutorial](https://nbviewer.jupyter.org/github/tomwhite/sgkit/blob/86753e814c6d56982b6950ec3de727f3b1bfad7d/docs/examples/01-genome-wide-association-study.ipynb), which demonstrates how to run a genome-wide SNP association test. Readers are encouraged to read the Hail tutorial alongside this one for more background, and to see the motivation behind some of the steps.\n", + "This notebook is an [sgkit](https://sgkit-dev.github.io/sgkit) port of Hail's [GWAS Tutorial](https://nbviewer.jupyter.org/github/tomwhite/sgkit/blob/86753e814c6d56982b6950ec3de727f3b1bfad7d/docs/examples/01-genome-wide-association-study.ipynb), which demonstrates how to run a genome-wide SNP association test. Readers are encouraged to read the Hail tutorial alongside this one for more background, and to see the motivation behind some of the steps.\n", "\n", "_Note that some of the results do not exactly match the output from Hail. Also, since sgkit is still a 0.x release, its API is still subject to non-backwards compatible changes._" ] @@ -97,7 +97,7 @@ "id": "elementary-college", "metadata": {}, "source": [ - "Next, [convert it to Zarr](https://pystatgen.github.io/sgkit/latest/user_guide.html#converting-genetic-data-to-zarr), stored on the local filesystem in a directory called _1kg.zarr_." + "Next, [convert it to Zarr](https://sgkit-dev.github.io/sgkit/latest/user_guide.html#converting-genetic-data-to-zarr), stored on the local filesystem in a directory called _1kg.zarr_." ] }, { @@ -3536,7 +3536,7 @@ "source": [ "QC is the process of filtering out poor quality data before running an analysis. This is usually an iterative process.\n", "\n", - "The [`sample_stats`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.sample_stats.html) function in sgkit computes a collection of useful metrics for each sample and stores them in new variables. (The Hail equivalent is `sample_qc`.)\n", + "The [`sample_stats`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.sample_stats.html) function in sgkit computes a collection of useful metrics for each sample and stores them in new variables. (The Hail equivalent is `sample_qc`.)\n", "\n", "Here's the dataset _before_ running `sample_stats`." ] @@ -5878,7 +5878,7 @@ "sample_dp_mean = dp.mean(dim=\"variants\")\n", "sample_dp_mean.attrs[\"long_name\"] = \"Mean Sample DP\"\n", "ds[\"sample_dp_mean\"] = sample_dp_mean # add new data array to dataset\n", - "# Following does not work with recent versions of xarray, see https://github.com/pystatgen/sgkit/issues/934\n", + "# Following does not work with recent versions of xarray, see https://github.com/sgkit-dev/sgkit/issues/934\n", "#ds.plot.scatter(x=\"sample_dp_mean\", y=\"sample_call_rate\", size=8, s=10);" ] }, @@ -5988,7 +5988,7 @@ "id": "banner-structure", "metadata": {}, "source": [ - "Variant QC is similar. This time we use the [`variant_stats`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variant_stats.html) function, but we won't do any filtering on these variables." + "Variant QC is similar. This time we use the [`variant_stats`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variant_stats.html) function, but we won't do any filtering on these variables." ] }, { @@ -13117,7 +13117,7 @@ "ds_pca[\"sample_pca_projection_0\"] = ds_pca.sample_pca_projection[:,0]\n", "ds_pca[\"sample_pca_projection_1\"] = ds_pca.sample_pca_projection[:,1]\n", "ds_pca[\"sample_pca_projection_2\"] = ds_pca.sample_pca_projection[:,2]\n", - "# Following does not work with recent versions of xarray, see https://github.com/pystatgen/sgkit/issues/934\n", + "# Following does not work with recent versions of xarray, see https://github.com/sgkit-dev/sgkit/issues/934\n", "#ds_pca.plot.scatter(x=\"sample_pca_projection_0\", y=\"sample_pca_projection_1\", hue=\"SuperPopulation\", size=8, s=10);" ] }, diff --git a/docs/examples/relatedness_tutorial.ipynb b/docs/examples/relatedness_tutorial.ipynb index 9d1efb26e..2588a3b2f 100644 --- a/docs/examples/relatedness_tutorial.ipynb +++ b/docs/examples/relatedness_tutorial.ipynb @@ -8,7 +8,7 @@ "source": [ "# Relatedness Tutorial\n", "\n", - "This notebook demonstrates a variety of approaches for calculating relationship matrices in [sgkit](https://pystatgen.github.io/sgkit).\n", + "This notebook demonstrates a variety of approaches for calculating relationship matrices in [sgkit](https://sgkit-dev.github.io/sgkit).\n", "It covers the following topics:\n", "\n", "- Working with pedigree data\n", @@ -293,7 +293,7 @@ "metadata": {}, "source": [ "Next we convert the pandas dataframe to an xarray data array.\n", - "This is referred to as the [`parent_id`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.parent_id_spec.html#sgkit.variables.parent_id_spec) matrix in sgkit and has dimensions named \"samples\" and \"parents\"." + "This is referred to as the [`parent_id`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.parent_id_spec.html#sgkit.variables.parent_id_spec) matrix in sgkit and has dimensions named \"samples\" and \"parents\"." ] }, { @@ -718,7 +718,7 @@ "Finally we create a Dataset.\n", "We will call this dataset `ped` to indicate that it primarily contains the pedigree data, and to differentiate it from a dataset containing genomic data that will be created later in this notebook.\n", "\n", - "In addition to adding the `parent_id` matrix to the dataset, we will also create a [`sample_id`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.sample_id_spec.html#sgkit.variables.sample_id_spec) array which is expected by many sgkit methods.\n", + "In addition to adding the `parent_id` matrix to the dataset, we will also create a [`sample_id`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.sample_id_spec.html#sgkit.variables.sample_id_spec) array which is expected by many sgkit methods.\n", "Fortunately the `sample_id` array is identical to the \"samples\" coordinate of the `parent_id` matrix." ] }, @@ -1148,7 +1148,7 @@ "id": "fb7e0036-30f7-4ceb-b38b-b37765b2c5ad", "metadata": {}, "source": [ - "Pedigree methods in sgkit use these variables to generate the [`parent`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.parent_spec.html) matrix which has the same shape as `parent_id` but contains the integer *indices* of each parent with `-1` indicating unknown parents.\n", + "Pedigree methods in sgkit use these variables to generate the [`parent`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.parent_spec.html) matrix which has the same shape as `parent_id` but contains the integer *indices* of each parent with `-1` indicating unknown parents.\n", "It is important to note that adding, removing or re-ordering samples in the dataset will invalidate these indices and they should be re-created using the `parent_indices` method." ] }, @@ -1584,7 +1584,7 @@ "These expectations are based on some simplifying assumptions, the most important of which are that the pedigree is correct and that the founding individuals of the pedigree are *equally unrelated* from one another.\n", "Even when these assumptions hold, pedigree based estimates of relatedness will be slightly inaccurate as they ignore variance around the expectation caused by Mendelian sampling. \n", "\n", - "Using our pedigree dataset, we can calculate the additive relationship matrix (ARM) using the [`predigree_kinship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.pedigree_kinship.html) method." + "Using our pedigree dataset, we can calculate the additive relationship matrix (ARM) using the [`predigree_kinship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.pedigree_kinship.html) method." ] }, { @@ -1607,7 +1607,7 @@ "id": "dfa3b663-a22b-495f-bd09-d3876afd94c0", "metadata": {}, "source": [ - "This returns a dataset with variables [`stat_pedigree_kinship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.stat_pedigree_kinship_spec.html) and [`stat_pedigree_relationship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.stat_pedigree_relationship_spec.html)." + "This returns a dataset with variables [`stat_pedigree_kinship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.stat_pedigree_kinship_spec.html) and [`stat_pedigree_relationship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.stat_pedigree_relationship_spec.html)." ] }, { @@ -2555,7 +2555,7 @@ "source": [ "Genomic estimation of relatedness attempts to estimate the *realized* relationships among individuals based upon shared alleles.\n", "To calculate genomic relationship estimates, we first need to read in the genomic marker data and construct a new dataset.\n", - "sgkit includes [IO methods](https://pystatgen.github.io/sgkit/latest/api.html#io-imports-and-exports) for several common datatype including BGEN, PLINK and VCF.\n", + "sgkit includes [IO methods](https://sgkit-dev.github.io/sgkit/latest/api.html#io-imports-and-exports) for several common datatype including BGEN, PLINK and VCF.\n", "However, it is also possible to load data from other formats by manually constructing the required data arrays.\n", "\n", "The pig SNP data used in this tutorial is provided as a simple text file of SNP dosages.\n", @@ -2804,7 +2804,7 @@ "Loading the data results in a matrix of SNP dosages (sometimes referred to as a SNP-matrix).\n", "Rows of the matrix correspond to individuals using the same identifiers as in the pedigree, and columns correspond to marker loci.\n", "\n", - "To work with this data in sgkit, we will translate it into the [`call_dosage`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.call_dosage_spec.html) variable.\n", + "To work with this data in sgkit, we will translate it into the [`call_dosage`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.call_dosage_spec.html) variable.\n", "This requires that we transpose the matrix so that rows correspond to variant loci and columns correspond to samples. " ] }, @@ -3674,7 +3674,7 @@ "id": "d831cae4-6ac6-44fb-95c0-79103a13d22d", "metadata": {}, "source": [ - "The estimate is then calculated with the [`genomic_relationship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.genomic_relationship.html) method which returns a matrix called [`stat_genomic_relationship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.stat_genomic_relationship_spec.html).\n", + "The estimate is then calculated with the [`genomic_relationship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.genomic_relationship.html) method which returns a matrix called [`stat_genomic_relationship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.stat_genomic_relationship_spec.html).\n", "However, we will rename this matrix to `\"stat_VanRaden_GRM_naive\"` for clarity." ] }, @@ -3993,9 +3993,9 @@ "id": "2201475d-f107-4cd5-98f4-dfd901b0d1f0", "metadata": {}, "source": [ - "We can calculate the Weir-Goudet estimator using the [`Weir_Goudet_beta`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.Weir_Goudet_beta.html) method.\n", + "We can calculate the Weir-Goudet estimator using the [`Weir_Goudet_beta`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.Weir_Goudet_beta.html) method.\n", "This method uses sample allele-frequencies rather than dosage because it can be applied to multi-allelic data.\n", - "Therefore, we first convert the `call_dosage` variable to the [`call_allele_frequency`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.variables.call_allele_frequency_spec.html) variable which holds per sample allele frequencies for both the reference and alternate alleles." + "Therefore, we first convert the `call_dosage` variable to the [`call_allele_frequency`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.variables.call_allele_frequency_spec.html) variable which holds per sample allele frequencies for both the reference and alternate alleles." ] }, { @@ -4202,7 +4202,7 @@ "\n", "However, the scaling factors $\\tau$ and $\\omega$ (Martini et al 2018) can be used to fine-tune the influence of $A$ and $G$ in which case $H_{22}$ may differ from $G$ (the use of these scaling factors are beyond the scope of this tutorial).\n", "\n", - "An H-matrix can be calculated using the [`hybrid_relationship`](https://pystatgen.github.io/sgkit/latest/generated/sgkit.hybrid_relationship.html) method.\n", + "An H-matrix can be calculated using the [`hybrid_relationship`](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.hybrid_relationship.html) method.\n", "\n", "*Note that we use the chunk method to ensure a single chunk along each axis. Due to the array size, dask will automatically chunk these matrix into uneven sizes resulting in an error raised during matrix inversion*" ] diff --git a/docs/how_do_i.rst b/docs/how_do_i.rst index 66912a17d..0aa9c1d9e 100644 --- a/docs/how_do_i.rst +++ b/docs/how_do_i.rst @@ -87,7 +87,7 @@ Set an index on the dataset, then call :meth:`xarray.Dataset.sel`: ds.set_index(variants=("variant_contig", "variant_position")).sel(variants=(0, slice(2, 4))) -An API to make this easier is under discussion. Please add your requirements to https://github.com/pystatgen/sgkit/pull/658. +An API to make this easier is under discussion. Please add your requirements to https://github.com/sgkit-dev/sgkit/pull/658. Get the list of samples? ------------------------ diff --git a/docs/news/introducing_sgkit.md b/docs/news/introducing_sgkit.md index 0a0080f8c..203df4228 100644 --- a/docs/news/introducing_sgkit.md +++ b/docs/news/introducing_sgkit.md @@ -7,12 +7,12 @@ author: hammer --- ``` -The sgkit team is pleased to announce the release of [sgkit 0.5.0](https://github.com/pystatgen/sgkit/releases/tag/0.5.0)! This release adds support for the [VCF Zarr specification](https://github.com/pystatgen/vcf-zarr-spec), which describes an encoding of VCF data in chunked-columnar form using the [Zarr format](https://zarr.readthedocs.io/en/stable/). +The sgkit team is pleased to announce the release of [sgkit 0.5.0](https://github.com/sgkit-dev/sgkit/releases/tag/0.5.0)! This release adds support for the [VCF Zarr specification](https://github.com/sgkit-dev/vcf-zarr-spec), which describes an encoding of VCF data in chunked-columnar form using the [Zarr format](https://zarr.readthedocs.io/en/stable/). With this release, we also introduce our news page, where we will announce future releases and provide other relevant updates for the `sgkit` project. -Oxford and Related Sciences began collaborating in early 2020 on `sgkit` as a successor to the popular [scikit-allel](https://github.com/cggh/scikit-allel) library. We’ve worked closely with third-party library authors to read and write data stored in VCF ([cyvcf2](https://github.com/brentp/cyvcf2)), BGEN ([cbgen](https://github.com/limix/cbgen)), and PLINK ([bed_reader](https://github.com/fastlmm/bed-reader)) files. We’ve designed an [Xarray](https://github.com/pydata/xarray)-based [data model](https://pystatgen.github.io/sgkit/latest/getting_started.html#data-structures) and implemented many common methods from statistical and population genetics, including variant and sample [quality control](https://pystatgen.github.io/sgkit/latest/examples/gwas_tutorial.html#quality-control), [kinship analysis](https://pystatgen.github.io/sgkit/latest/generated/sgkit.pc_relate.html#sgkit-pc-relate), genome-wide [selection scans](https://pystatgen.github.io/sgkit/latest/generated/sgkit.Garud_H.html), and genome-wide [association analyses](https://pystatgen.github.io/sgkit/latest/generated/sgkit.gwas_linear_regression.html), as well as a [novel implementation](https://pystatgen.github.io/sgkit/latest/generated/sgkit.regenie.html#sgkit-regenie) of the recently developed [REGENIE algorithm](https://github.com/rgcgithub/regenie). +Oxford and Related Sciences began collaborating in early 2020 on `sgkit` as a successor to the popular [scikit-allel](https://github.com/cggh/scikit-allel) library. We’ve worked closely with third-party library authors to read and write data stored in VCF ([cyvcf2](https://github.com/brentp/cyvcf2)), BGEN ([cbgen](https://github.com/limix/cbgen)), and PLINK ([bed_reader](https://github.com/fastlmm/bed-reader)) files. We’ve designed an [Xarray](https://github.com/pydata/xarray)-based [data model](https://sgkit-dev.github.io/sgkit/latest/getting_started.html#data-structures) and implemented many common methods from statistical and population genetics, including variant and sample [quality control](https://sgkit-dev.github.io/sgkit/latest/examples/gwas_tutorial.html#quality-control), [kinship analysis](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.pc_relate.html#sgkit-pc-relate), genome-wide [selection scans](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.Garud_H.html), and genome-wide [association analyses](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.gwas_linear_regression.html), as well as a [novel implementation](https://sgkit-dev.github.io/sgkit/latest/generated/sgkit.regenie.html#sgkit-regenie) of the recently developed [REGENIE algorithm](https://github.com/rgcgithub/regenie). `sgkit` was accepted as a [NumFOCUS Sponsored Project](https://numfocus.org/project/sgkit) in 2021, and we now have developers in the US, the UK, and New Zealand. -If you think sgkit might be useful for your project, please don't hesitate to file an [issue](https://github.com/pystatgen/sgkit/issues) or start a [discussion](https://github.com/pystatgen/sgkit/discussions) with questions and feedback! +If you think sgkit might be useful for your project, please don't hesitate to file an [issue](https://github.com/sgkit-dev/sgkit/issues) or start a [discussion](https://github.com/sgkit-dev/sgkit/discussions) with questions and feedback! diff --git a/docs/user_guide.rst b/docs/user_guide.rst index f5292e23b..f9b84eef7 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -217,7 +217,7 @@ Merge can be used to rename output variables too. ds.merge(sg.count_variant_alleles(ds, merge=False).rename(variant_allele_count="my_variant_allele_count")) Note that there is a limitation where intermediate variables (``call_allele_count`` in this case) -are not returned if ``merge=False``. See https://github.com/pystatgen/sgkit/issues/405. +are not returned if ``merge=False``. See https://github.com/sgkit-dev/sgkit/issues/405. .. _python_interop: diff --git a/setup.cfg b/setup.cfg index 179dce270..f1473e357 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,14 @@ [metadata] name = sgkit author = sgkit Developers -author_email = project@pystatgen.org +author_email = project@sgkit-dev.org license = Apache description = Statistical genetics toolkit long_description_content_type=text/x-rst long_description = **sgkit** is an open source project for analyzing and manipulating genetic variation data. -url = https://github.com/pystatgen/sgkit +url = https://github.com/sgkit-dev/sgkit classifiers = Development Status :: 3 - Alpha License :: OSI Approved :: Apache Software License diff --git a/sgkit/io/vcfzarr_reader.py b/sgkit/io/vcfzarr_reader.py index 225a7bdaf..cff8353f8 100644 --- a/sgkit/io/vcfzarr_reader.py +++ b/sgkit/io/vcfzarr_reader.py @@ -53,7 +53,7 @@ def read_scikit_allel_vcfzarr( from a Zarr file created using scikit-allel's ``vcf_to_zarr`` function. This allows conversion from scikit-allel's Zarr format to sgkit's - `VCF Zarr `_ format. + `VCF Zarr `_ format. Since ``vcf_to_zarr`` does not preserve phasing information, there is no :data:`sgkit.variables.call_genotype_phased_spec` variable in the resulting dataset. @@ -339,7 +339,7 @@ def concat_zarrs_optimized( # copy variables that are to be rechunked # NOTE: that this uses _to_zarr function defined here that is needed to avoid # race conditions between writing the array contents and its metadata - # see https://github.com/pystatgen/sgkit/pull/486 + # see https://github.com/sgkit-dev/sgkit/pull/486 for var in vars_to_rechunk: dtype = None if fix_strings and var in {"variant_id", "variant_allele"}: diff --git a/sgkit/stats/pca.py b/sgkit/stats/pca.py index e8775cbd4..5a2fea566 100644 --- a/sgkit/stats/pca.py +++ b/sgkit/stats/pca.py @@ -322,7 +322,7 @@ def pca( def count_call_alternate_alleles(ds: Dataset, merge: bool = True) -> Dataset: - # TODO: Add to public API (https://github.com/pystatgen/sgkit/issues/282) + # TODO: Add to public API (https://github.com/sgkit-dev/sgkit/issues/282) AC = count_call_alleles(ds)["call_allele_count"] AC = AC[..., 1:].sum(dim="alleles").astype("int16") AC = AC.where(~ds.call_genotype_mask.any(dim="ploidy"), AC.dtype.type(-1)) diff --git a/sgkit/tests/io/plink/test_plink_reader.py b/sgkit/tests/io/plink/test_plink_reader.py index b891e9fea..ad31b3da6 100644 --- a/sgkit/tests/io/plink/test_plink_reader.py +++ b/sgkit/tests/io/plink/test_plink_reader.py @@ -10,10 +10,10 @@ # for 10 samples, 100 variants, and genotype calls # that are missing in ~10% of cases. # TODO: document and move code to central location -# (cf. https://github.com/pystatgen/sgkit-plink/pull/20#discussion_r466907811) +# (cf. https://github.com/sgkit-dev/sgkit-plink/pull/20#discussion_r466907811) example_dataset_1 = "plink_sim_10s_100v_10pmiss" -# This data was generated by following https://github.com/pystatgen/sgkit/issues/947 +# This data was generated by following https://github.com/sgkit-dev/sgkit/issues/947 example_dataset_2 = "example" diff --git a/sgkit/tests/io/vcf/test_vcf_scikit_allel.py b/sgkit/tests/io/vcf/test_vcf_scikit_allel.py index 131da41cb..fce7c9b8f 100644 --- a/sgkit/tests/io/vcf/test_vcf_scikit_allel.py +++ b/sgkit/tests/io/vcf/test_vcf_scikit_allel.py @@ -135,12 +135,12 @@ def test_DP_field(shared_datadir, tmpdir): "vcf_file,allel_exclude_fields,sgkit_exclude_fields,max_alt_alleles", [ # Excluding AA here because of pad-vs-missing data in sckit-allel strings - # https://github.com/pystatgen/sgkit/issues/1195 + # https://github.com/sgkit-dev/sgkit/issues/1195 ("sample.vcf.gz", ["AA"], ["INFO/AA"], 3), ("mixed.vcf.gz", None, None, 3), # exclude PL since it has Number=G, which is not yet supported # Excluding PGT and PID here because of pad-vs-missing data in sckit-allel strings - # https://github.com/pystatgen/sgkit/issues/1195 + # https://github.com/sgkit-dev/sgkit/issues/1195 # increase max_alt_alleles since scikit-allel does not truncate genotype calls ( "CEUTrio.20.21.gatk3.4.g.vcf.bgz", diff --git a/sgkit/tests/test_ld.py b/sgkit/tests/test_ld.py index c6848d04d..3fb08b01a 100644 --- a/sgkit/tests/test_ld.py +++ b/sgkit/tests/test_ld.py @@ -158,7 +158,7 @@ def ld_prune_args(draw): @settings(max_examples=50, deadline=None, phases=PHASES_NO_SHRINK) @example(args=(np.array([[1, 1], [1, 1]], dtype="uint8"), 1, 1, 0.0, -1)) @pytest.mark.skip( - reason="Hypothesis generates failures that need investigation: https://github.com/pystatgen/sgkit/issues/864" + reason="Hypothesis generates failures that need investigation: https://github.com/sgkit-dev/sgkit/issues/864" ) def test_vs_skallel(args): x, size, step, threshold, chunks = args diff --git a/sgkit/tests/test_pc_relate.py b/sgkit/tests/test_pc_relate.py index b1f3a8997..8c7e5c7bb 100644 --- a/sgkit/tests/test_pc_relate.py +++ b/sgkit/tests/test_pc_relate.py @@ -113,7 +113,7 @@ def test_pc_relate__values_within_range() -> None: def test_pc_relate__parent_child_relationship() -> None: - # Eric's source: https://github.com/pystatgen/sgkit/pull/228#discussion_r487436876 + # Eric's source: https://github.com/sgkit-dev/sgkit/pull/228#discussion_r487436876 # Create a dataset that is 2/3 founders and 1/3 progeny seed = 1 diff --git a/sgkit/tests/test_pedigree.py b/sgkit/tests/test_pedigree.py index 9cd6e89ee..bcffdb023 100644 --- a/sgkit/tests/test_pedigree.py +++ b/sgkit/tests/test_pedigree.py @@ -1322,7 +1322,7 @@ def test_pedigree_inverse_kinship__raise_on_half_founder(): @pytest.mark.skipif( sys.version_info >= (3, 11), - reason="Fails on Python 3.11, due to Numba error, see https://github.com/pystatgen/sgkit/pull/1080", + reason="Fails on Python 3.11, due to Numba error, see https://github.com/sgkit-dev/sgkit/pull/1080", ) def test_pedigree_inverse_kinship__raise_on_singular_kinship_matrix(): ds = sg.simulate_genotype_call_dataset(n_variant=1, n_sample=4, n_ploidy=4, seed=1) diff --git a/sgkit/tests/test_regenie.py b/sgkit/tests/test_regenie.py index 39e761405..325a15f5d 100644 --- a/sgkit/tests/test_regenie.py +++ b/sgkit/tests/test_regenie.py @@ -322,7 +322,7 @@ def test_regenie__glow_comparison(ndarray_type: str, datadir: Path) -> None: check_simulation_result(datadir, config, run, xp) -@pytest.mark.xfail(reason="See https://github.com/pystatgen/sgkit/issues/456") +@pytest.mark.xfail(reason="See https://github.com/sgkit-dev/sgkit/issues/456") def test_regenie__no_loco_with_one_contig(): # LOCO is not possible with a single contig ds = simulate_regression_dataset( diff --git a/sgkit/tests/test_utils.py b/sgkit/tests/test_utils.py index 533d54a42..78bbcc53d 100644 --- a/sgkit/tests/test_utils.py +++ b/sgkit/tests/test_utils.py @@ -225,7 +225,7 @@ def test_max_str_len__invalid_dtype(): max_str_len(np.array([1])) -# track failure in https://github.com/pystatgen/sgkit/issues/890 +# track failure in https://github.com/sgkit-dev/sgkit/issues/890 def test_max_str_len__dask_failure(): pytest.importorskip("dask", minversion="2022.8") with pytest.raises(Exception): diff --git a/validation/gwas/method/pc_relate/Dockerfile b/validation/gwas/method/pc_relate/Dockerfile index dd8aba380..07a168827 100644 --- a/validation/gwas/method/pc_relate/Dockerfile +++ b/validation/gwas/method/pc_relate/Dockerfile @@ -1,7 +1,7 @@ FROM rstudio/r-base:4.0-focal # Note: We freeze versions because we want point in time validation -# See: https://github.com/pystatgen/sgkit/pull/228 +# See: https://github.com/sgkit-dev/sgkit/pull/228 RUN apt-get update \ && apt-get install python3 python3-pip git pkg-config -y \ diff --git a/validation/gwas/method/regenie/sgkit_zarr.py b/validation/gwas/method/regenie/sgkit_zarr.py index a923b36a1..5eaa24b3c 100644 --- a/validation/gwas/method/regenie/sgkit_zarr.py +++ b/validation/gwas/method/regenie/sgkit_zarr.py @@ -20,7 +20,7 @@ def run(dataset: str, dataset_dir="data/dataset"): zarr_path = dataset_dir / dataset / "genotypes.zarr.zip" ds = read_plink(path=plink_path, bim_sep="\t", fam_sep="\t") # Pre-compute string lengths until this is done: - # https://github.com/pystatgen/sgkit-plink/issues/12 + # https://github.com/sgkit-dev/sgkit-plink/issues/12 ds = ds.compute() logger.info(f"Loaded dataset {dataset}:") logger.info("\n" + str(ds)) diff --git a/validation/gwas/method/regenie_loco_regression/GlowGR_continuous.ipynb b/validation/gwas/method/regenie_loco_regression/GlowGR_continuous.ipynb index 7cb6054f4..28d305d5e 100644 --- a/validation/gwas/method/regenie_loco_regression/GlowGR_continuous.ipynb +++ b/validation/gwas/method/regenie_loco_regression/GlowGR_continuous.ipynb @@ -45,8 +45,8 @@ "outputs": [], "source": [ "# Download the zip file mentioned in the link below to target directory and modify the path below as needed:\n", - "# https://github.com/pystatgen/sgkit/issues/622\n", - "# Direct link: https://github.com/pystatgen/sgkit/files/6779496/regenie_simulations.zip\n", + "# https://github.com/sgkit-dev/sgkit/issues/622\n", + "# Direct link: https://github.com/sgkit-dev/sgkit/files/6779496/regenie_simulations.zip\n", "dsdir = Path('../../../../sgkit/tests/test_regenie/dataset/sim_sm_02')\n", "\n", "genotypes_vcf = dsdir / 'genotypes.bed'\n",