-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from simonlabcode/featurecount
bam2bakR version 3.0.0
- Loading branch information
Showing
55 changed files
with
783,024 additions
and
1,182 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
name: Tests | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
- featurecount | ||
pull_request: | ||
branches_ignore: [] | ||
|
||
jobs: | ||
formatting: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout with submodules | ||
uses: actions/checkout@v3 | ||
with: | ||
submodules: recursive | ||
fetch-depth: 0 | ||
- name: Formatting | ||
uses: github/super-linter@v5 | ||
env: | ||
VALIDATE_ALL_CODEBASE: false | ||
DEFAULT_BRANCH: featurecount | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
VALIDATE_SNAKEMAKE_SNAKEFMT: true | ||
linting: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Linting | ||
uses: snakemake/[email protected] | ||
with: | ||
directory: .test | ||
snakefile: workflow/Snakefile | ||
args: "--configfile .test/configs/star_config.yaml --lint" | ||
|
||
run-workflow: | ||
runs-on: ubuntu-latest | ||
needs: | ||
- linting | ||
- formatting | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
- name: Test workflow (star) | ||
uses: snakemake/[email protected] | ||
with: | ||
directory: .test | ||
snakefile: workflow/Snakefile | ||
args: "--configfile .test/configs/star_config.yaml --use-conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache" | ||
- name: Test workflow (bam) | ||
uses: snakemake/[email protected] | ||
with: | ||
directory: .test | ||
snakefile: workflow/Snakefile | ||
args: "--configfile .test/configs/bam2bakR.yaml --use-conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
usage: | ||
mandatory-flags: # optional definition of additional flags | ||
desc: # describe your flags here in a few sentences (they will be inserted below the example commands) | ||
flags: # put your flags here | ||
software-stack-deployment: # definition of software deployment method (at least one of conda, singularity, or singularity+conda) | ||
conda: true # whether pipeline works with --use-conda | ||
singularity: true # whether pipeline works with --use-singularity | ||
singularity+conda: true # whether pipeline works with --use-singularity --use-conda | ||
report: false # add this to confirm that the workflow allows to use 'snakemake --report report.zip' to generate a report containing all results and explanations |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
##### Parameters you will likely have to change ##### | ||
|
||
# Run bam2bakR only? If true, will expect paths to bam files as input and alignment steps will be skipped. If false, | ||
# paths to directories containing fastq files will be expected as input. | ||
bam2bakr: True | ||
|
||
# path to directory containing fastq files if bam2bakr is False | ||
# path to bam files if bam2bakr is True | ||
## example of what samples could look like for bam input | ||
samples: | ||
WT_1: data/bams/WT_replicate_1.bam | ||
WT_2: data/bams/WT_replicate_2.bam | ||
WT_ctl: data/bams/WT_nos4U.bam | ||
|
||
|
||
## example of what samples will look like for fastq input | ||
# samples: | ||
# WT_1: data/fastq/WT_1 | ||
# WT_2: data/fastq/WT_2 | ||
# WT_ctl: data/fastq/WT_ctl | ||
# KO_1: data/fastq/KO_1 | ||
# KO_2: data/fastq/KO_2 | ||
# KO_ctl: data/fastq/KO_ctl | ||
|
||
# location of annotation gtf file | ||
annotation: data/annotation/GRCh38.gtf | ||
|
||
# -s4U control sample IDs | ||
control_samples: ['WT_ctl', 'KO_ctl'] | ||
|
||
# location of genome fasta file | ||
genome_fasta: data/genome/GRCh38.fa | ||
|
||
##### Parameters that are always relevant ##### | ||
|
||
# Format of reads | ||
FORMAT: "PE" # (PE, SE, NU) | ||
# [SE - single end reads] | ||
# [NU - including non-unique] (not tested) | ||
|
||
|
||
# Strandedness of reads | ||
strandedness: "R" # (F, R); F means read 1 represents original RNA sequence (vs. its reverse complement). | ||
|
||
|
||
# Type of browser tracks to generate | ||
mut_tracks: "TC" # ("TC", "GA", "TC,GA") | ||
|
||
# Minimum base quality to call mutation | ||
minqual: 40 | ||
|
||
# String common to spike-in gene_ids in annotation gtf | ||
# If you have no spike-ins, then this should be "\"\"", i.e., an empty string ("") | ||
spikename: "\"\"" | ||
|
||
# If True, tracks will be normalized | ||
normalize: True | ||
|
||
# Are you using the Windows subsystem for linux? 0 = Yes, 1 = No | ||
WSL: 1 | ||
|
||
# Are there jI and jM tags in your bam file? | ||
remove_tags: False | ||
|
||
|
||
##### Parameters that are only relevant if bam2bakr is False ##### | ||
|
||
# location of hisat2 indices (directory containing indices) | ||
HISAT2: data/hisat2/grch38_tran/ | ||
|
||
# location of hisat3n indices (path to and common prefix of all .ht2 files) | ||
HISAT_3N: data/hisat_3n/GRCh38 | ||
|
||
# location of STAR indices | ||
STAR_index: data/star/ | ||
|
||
# If TRUE, hisat-3n will be used for alignment | ||
use_hisat3n: False | ||
|
||
# If TRUE STAR will be used for alignment; if hisat-3n is also TRUE, hisat-3n will be used and not STAR | ||
use_star: False | ||
|
||
# If TRUE, index will be built for star and directory will be created at path set in STAR_index parameter | ||
build_star: False | ||
|
||
# If use_hisat3n TRUE, then this specifies path to hisat_3n executable | ||
hisat3n_path: hisat-3n | ||
# If hisat-3n is on your PATH, this will just be hisat-3n | ||
|
||
# Add a 'chr' to each chromosome number during alignment. [Useful when aliner index is number-based, but GTF annotation is chr-based] | ||
chr_tag: True | ||
|
||
# Are you on Yale's HPC where hisat-3n can be loaded as a module using Lmod system? | ||
Yale: False | ||
|
||
# Are you using a flattened annotation from FlatStacks? | ||
flattened: False | ||
|
||
# code specifying adapters to be trimmed that will be passed to cutadapt | ||
adapter: "-a AGATCGGAAGAGC -A AGATCGGAAGAGC" | ||
|
||
# Optional code to pass to cutadapt | ||
cutadapt_extra: "--minimum-length 20" | ||
# No additional parameters are passed, so this can include any of cutadapt's optional parameters | ||
|
||
# Extra parameters to be passed to STAR | ||
star_extra: "--outFilterMismatchNmax 20" | ||
# Already passing: "--outSAMtype BAM SortedByCoordinate --outSAMattributes NH HI AS NM MD --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile config["annotation"] | ||
# Including any of the already set parameters will yield an error. | ||
|
||
# Extra parameters to be passed to Hisat2 | ||
hisat2_extra: "--mp 1,0" | ||
# No additional parameters are passed, so this can include any of hisat2's optional parameters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
####### GENERAL INFORMATION ABOUT THIS CONFIG ####### | ||
# | ||
# This config file allows you to specify a number of important pieces of information that | ||
# the fastq2EZbakR pipeline will require to run. It also allows you to set optional parameters | ||
# for all tools that fastq2EZbakR makes use of. | ||
# | ||
# File paths can either be absolute (e.g., ~/path/to/file/or/directory) or relative | ||
# to the directory in which you are calling the pipeline from (e.g., data/fastq/WT_1 in the | ||
# example samples entry means to look in the data directory present in the directory | ||
# where you called `snakemake` to run the pipeline). | ||
# | ||
####### PARAMETERS YOU NEED TO SET ####### | ||
|
||
# Run bam2bakR only? If true, will expect paths to bam files as input and alignment steps will be skipped. If false, | ||
# paths to directories containing fastq files will be expected as input. | ||
bam2bakr: False | ||
|
||
## Paths to data to process | ||
# path to directory containing fastq files if bam2bakr is False | ||
# fastq files can be either gzipped or unzipped | ||
# Each set of fastq files must be in a different directory | ||
# path to bam files if bam2bakr is True | ||
samples: | ||
WT_1: data/WT1 | ||
WT_2: data/WT2 | ||
WT_ctl: data/WTctl | ||
|
||
## example of what samples will look like for fastq input | ||
# samples: | ||
# WT_1: data/fastq/WT_1 | ||
# WT_2: data/fastq/WT_2 | ||
# WT_ctl: data/fastq/WT_ctl | ||
# KO_1: data/fastq/KO_1 | ||
# KO_2: data/fastq/KO_2 | ||
# KO_ctl: data/fastq/KO_ctl | ||
|
||
# location of annotation gtf file | ||
annotation: data/annotation/genome.gtf | ||
|
||
# -s4U control sample IDs | ||
control_samples: ['WT_ctl'] | ||
|
||
# location of genome fasta file | ||
genome_fasta: data/genome/genome.fasta | ||
|
||
##### Parameters that are always relevant ##### | ||
|
||
# Format of reads | ||
FORMAT: "PE" # (PE, SE, NU) | ||
# [SE - single end reads] | ||
# [NU - including non-unique] (not tested) | ||
|
||
|
||
# Strandedness of reads | ||
strandedness: "R" # (F, R); F means read 1 represents original RNA sequence (vs. its reverse complement). | ||
|
||
|
||
# Type of browser tracks to generate | ||
mut_tracks: "TC" # ("TC", "GA", "TC,GA") | ||
|
||
# Minimum base quality to call mutation | ||
minqual: 40 | ||
|
||
# String common to spike-in gene_ids in annotation gtf | ||
# If you have no spike-ins, then this should be "\"\"", i.e., an empty string ("") | ||
spikename: "\"\"" | ||
|
||
# If True, tracks will be normalized | ||
normalize: True | ||
|
||
# Are you using the Windows subsystem for linux? 0 = Yes, 1 = No | ||
WSL: 1 | ||
|
||
# Are there jI and jM tags in your bam file? | ||
remove_tags: False | ||
|
||
|
||
##### Parameters that are only relevant if bam2bakr is False ##### | ||
|
||
# location of hisat2 indices (directory containing indices) | ||
HISAT2: data/hisat2/ | ||
|
||
# location of hisat3n indices (path to and common prefix of all .ht2 files) | ||
HISAT_3N: data/hisat_3n/GRCh38 | ||
|
||
# location of STAR indices | ||
STAR_index: data/star/ | ||
|
||
# If TRUE, hisat-3n will be used for alignment | ||
use_hisat3n: False | ||
|
||
# If TRUE STAR will be used for alignment; if hisat-3n is also TRUE, hisat-3n will be used and not STAR | ||
use_star: True | ||
|
||
# If TRUE, index will be built for star and directory will be created at path set in STAR_index parameter | ||
build_star: True | ||
|
||
# If use_hisat3n TRUE, then this specifies path to hisat_3n executable | ||
hisat3n_path: hisat-3n | ||
# If hisat-3n is on your PATH, this will just be hisat-3n | ||
|
||
# Add a 'chr' to each chromosome number during alignment. [Useful when aliner index is number-based, but GTF annotation is chr-based] | ||
chr_tag: True | ||
|
||
# Are you on Yale's HPC where hisat-3n can be loaded as a module using Lmod system? | ||
Yale: False | ||
|
||
# Are you using a flattened annotation from FlatStacks? | ||
flattened: False | ||
|
||
# code specifying adapters to be trimmed that will be passed to cutadapt | ||
adapter: "-a AGATCGGAAGAGC -A AGATCGGAAGAGC" | ||
|
||
# Optional code to pass to cutadapt | ||
cutadapt_extra: "--minimum-length 20" | ||
# No additional parameters are passed, so this can include any of cutadapt's optional parameters | ||
|
||
# Extra parameters to be passed to STAR | ||
star_extra: "--outFilterMismatchNmax 20" | ||
# Already passing: "--outSAMtype BAM SortedByCoordinate --outSAMattributes NH HI AS NM MD --quantMode TranscriptomeSAM GeneCounts --sjdbGTFfile config["annotation"] | ||
# Including any of the already set parameters will yield an error. | ||
|
||
# Extra parameters to be passed to Hisat2 | ||
hisat2_extra: "--mp 1,0" | ||
# No additional parameters are passed, so this can include any of hisat2's optional parameters | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.