nf-core · subwaystation · Dec 15, 2022 · Dec 12, 2022 · Dec 12, 2022 · Dec 12, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,19 +8,102 @@ on:
   release:
     types: [published]
 
+env: 
+  NXF_ANSI_LOG: false
+
 jobs:
   test:
-    name: Run workflow tests
+    name: Run pipeline with test data
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        NXF_VER: 
+          - "20.10.0"
+          - "22.04.5"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
+        run: |
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12
+
+  no_viz_no_layout:
+    name: Run pipeline without graph vizualizations or graph layouts
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        parameters: 
+          - "--no_viz"
+          - "--no_layout"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
+        run: |
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
+
+  vg_deconstruct:
+    name: Run pipeline with vg deconstruct parameter
     # Only run on push if this is the nf-core dev branch (merged PRs)
     if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
     runs-on: ubuntu-latest
-    env:
-      NXF_VER: ${{ matrix.nxf_ver }}
-      NXF_ANSI_LOG: false
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ['20.10.0', '21.04.1', '21.10.3']
+        parameters: 
+          - "--vcf_spec \"gi|568815561:#,gi|568815567:#\""
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -43,22 +126,170 @@ jobs:
           docker tag nfcore/pangenome:dev nfcore/pangenome:dev
 
       - name: Install Nextflow
-        env:
-          CAPSULE_LOG: none
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
+
+  smoothxg:
+    name: Run pipeline with smoothxg parameters
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        parameters: 
+          - "--smoothxg_write_maf --smoothxg_poa_length 100,200,300 --smoothxg_run_abpoa --smoothxg_run_global_poa"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
         run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
+
+  wfmash_chunks:
+    name: Run pipeline with wfmash chunk parameter
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        parameters: 
+          - "--wfmash_chunks 2"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
+        run: |
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
+
+  wfmash_only:
+    name: Run only the wfmash part of the pipeline
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        parameters: 
+          - "--wfmash_only"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
+        run: |
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Run pipeline with test data
+        run: |
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
+
+  communities:
+    name: Run the pipeline with the communities parameter
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/pangenome') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        parameters: 
+          - "--communities --squeeze_gfa"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Check if Dockerfile or Conda environment changed
+        uses: technote-space/get-diff-action@v4
+        with:
+          FILES: |
+            Dockerfile
+            environment.yml
+
+      - name: Build new docker image
+        if: env.MATCHED_FILES
+        run: docker build --no-cache . -t nfcore/pangenome:dev
+
+      - name: Pull docker image
+        if: ${{ !env.MATCHED_FILES }}
+        run: |
+          docker pull nfcore/pangenome:dev
+          docker tag nfcore/pangenome:dev nfcore/pangenome:dev
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
 
       - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
-        # We also test basic visualization and reporting options here
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --no_viz --no_layout
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --smoothxg consensus_spec 10,100,1000
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --vcf_spec "gi|568815561:#,gi|568815567:#"
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --smoothxg_write_maf
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --wfmash_chunks 2
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 --wfmash_only
+          NXF_VER=22.04.5 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_haplotypes 12 ${{ matrix.parameters }}
diff --git a/Dockerfile b/Dockerfile
@@ -10,6 +10,8 @@ RUN apt-get update \
     && apt-get clean -y && rm -rf /var/lib/apt/lists/*
 
 COPY bin/split_approx_mappings_in_chunks.py /
+COPY bin/paf2net.py /
+COPY bin/net2communities.py /
 
 # Install miniconda
 RUN wget \

diff --git a/bin/net2communities.py b/bin/net2communities.py
@@ -0,0 +1,83 @@
+import argparse
+
+# Create the parser and add arguments
+parser = argparse.ArgumentParser(
+    description="It detects communities by applying the Leiden algorithm (Trag et al., 2018).",
+    epilog='Author: Andrea Guarracino (https://github.com/AndreaGuarracino)'
+)
+parser.add_argument('-e', '--edge-list', dest='edge_list', help="edge list representing the pairs of sequences mapped in the network", required=True)
+parser.add_argument('-w', '--edge-weights', dest='edge_weights', help="list of edge weights", required=True)
+parser.add_argument('-n', '--vertice-names', dest='vertice_names', help="'id to sequence name' map", required=True)
+parser.add_argument('--output-prefix', dest='output_prefix', default="", help="prefix to add to the output filenames")
+parser.add_argument('--accurate-detection', dest='accurate', default=False, action='store_true', help="accurate community detection (slower)")
+parser.add_argument('--plot', dest='plot', default=False, action='store_true', help="plot the network, coloring by community and labeling with contig/scaffold names (it assumes PanSN naming)")
+
+# Parse and print the results
+args = parser.parse_args()
+
+
+import igraph as ig
+
+# Read weights
+weight_list = [float(x) for x in open(args.edge_weights).read().strip().split('\n')]
+
+# Read the edge list and initialize the network
+g = ig.read( filename=args.edge_list, format='edgelist', directed=False)
+
+# Detect the communities
+partition = g.community_leiden(
+    objective_function='modularity',
+    n_iterations=120 if args.accurate else 60, # -1 would indicate to iterate until convergence
+    weights=weight_list
+)
+
+# Slower implementation
+# import leidenalg as la
+# partition = la.find_partition(
+#     g,
+#     la.ModularityVertexPartition,
+#     n_iterations=-1 if args.accurate else 30, # -1 indicates to iterate until convergence
+#     weights=weight_list,
+#     seed=42
+# )
+
+print(f'Detected {len(partition)} communities.')
+
+# Write the communities
+id_2_name_dict = {}
+with open(args.vertice_names) as f:
+    for line in f:
+        id, name = line.strip().split(' ')
+
+        id_2_name_dict[int(id)] = name
+
+output_prefix = args.output_prefix if args.output_prefix else args.edge_weights
+
+for id_community, id_members in enumerate(partition):
+    with open(f'{output_prefix}.community.{id_community}.txt', 'w') as fw:
+        for id in id_members:
+            fw.write(f'{id_2_name_dict[id]}\n')
+
+# Write the plot
+if args.plot:
+    print('Plotting on PDF')
+
+    # Take contig names (it assumes PanSN naming)
+    name_list = [x.split(' ')[-1].split('#')[-1] for x in id_2_name_dict.values()]
+
+    # To scale between ~0 and 5.0
+    max_weight=max(weight_list) / 5.0
+
+    ig.plot(
+        partition,
+        target = f'{output_prefix}.communities.pdf',
+        vertex_size=50,
+        #vertex_color=['blue', 'red', 'green', 'yellow'],
+        vertex_label=name_list,
+        vertex_label_size=20,
+        #vertex_label_color='black',
+        edge_width=[x/max_weight for x in weight_list],
+        #edge_color=['black', 'grey'],
+        bbox=(2000, 2000),
+        margin=100
+    )