generated from snakemake-workflows/snakemake-workflow-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: auto creation of sample sheet (#8)
* feat: auto creation of sample sheet * formatting
- Loading branch information
1 parent
8218b99
commit 9c148c5
Showing
6 changed files
with
99 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1 @@ | ||
sample_name,fq1,fq2 | ||
115_L001,/groups/ds/resistance_cefiderocol/data/illumina/115_L001_R1.fastq.gz,/groups/ds/resistance_cefiderocol/data/illumina/115_L001_R2.fastq.gz | ||
139_L001,/groups/ds/resistance_cefiderocol/data/illumina/139_L001_R1.fastq.gz,/groups/ds/resistance_cefiderocol/data/illumina/139_L001_R2.fastq.gz | ||
I15566-L1,/projects/pig-muenster/Metagenome_Kiel/fastq/I15566-L1_R1.fastq.gz,/projects/pig-muenster/Metagenome_Kiel/fastq/I15566-L1_R2.fastq.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
rule create_sample_sheet: | ||
input: | ||
"config/pep/samples.csv", | ||
params: | ||
inpath=config["sample-sheet"]["data-path"], | ||
renaming=config["sample-sheet"]["rename-sample-files"], | ||
log: | ||
"logs/create_sample_sheet.log", | ||
conda: | ||
"../envs/python.yaml" | ||
script: | ||
"../scripts/create_sample_sheet.py" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,60 @@ | ||
import os | ||
import re | ||
path="/groups/ds/metagenomes/231218_Miseq/" | ||
outfile="config/pep/samples_231218.csv" | ||
import sys | ||
|
||
## write to log file | ||
sys.stderr = open(snakemake.log[0], "w") | ||
|
||
inpath = snakemake.params.inpath | ||
renaming = snakemake.params.renaming | ||
sample_csv = snakemake.input[0] | ||
|
||
|
||
def rename_fastqs(path): | ||
fastqs=os.listdir(path) | ||
samples=[] | ||
samples = [] | ||
|
||
fastqs = [file for file in os.listdir(path) if file.endswith(".fastq.gz")] | ||
if not fastqs: | ||
print( | ||
f"Error: There are no fastq files in the directory. Have you used the correct path: {path}?" | ||
) | ||
raise Exception( | ||
f"There are no fastq files in the directory. Have you used the correct path: {path}?" | ||
) | ||
|
||
if renaming: | ||
print( | ||
"Renaming fastq files, e.g. from sampleID_S40_L001_R1_001.fastq.gz to sampleID_R1.fastq.gz" | ||
) | ||
else: | ||
print("Fastq files will not be renamed") | ||
|
||
for fastq in fastqs: | ||
## renaming from e.g. sampleID_S40_L001_R1_001.fastq.gz to sampleID_R1.fastq.gz | ||
fastq_new = re.sub(r"_S\d{0,2}_L001", "", fastq) | ||
fastq_new = re.sub(r"_001.fastq", ".fastq", fastq_new) | ||
sample=fastq_new.split("_")[0] | ||
if sample not in samples: | ||
|
||
sample = (re.search("(.*)_R[1-2].fastq.gz", fastq_new)).group(1) | ||
if sample not in samples and sample != "Undetermined": | ||
samples.append(sample) | ||
os.system(f"mv {path}{fastq} {path}{fastq_new}") | ||
return(samples) | ||
|
||
def write_sample_sheet(samples,outfile): | ||
os.system(f"touch {outfile}") | ||
with open(outfile,"w") as sheet: | ||
if renaming: | ||
os.system(f"mv {path} {fastq} {path} {fastq_new}") | ||
|
||
return samples | ||
|
||
|
||
def write_sample_sheet(samples, path, outfile): | ||
# os.system(f"touch {outfile}") | ||
|
||
with open(outfile, "w") as sheet: | ||
sheet.write("sample_name,fq1,fq2\n") | ||
|
||
for sample in samples: | ||
sheet.write(f"{sample},{path}{sample}_R1.fastq.gz,{path}{sample}_R2.fastq.gz\n") | ||
sheet.write( | ||
f"{sample},{path} {sample}_R1.fastq.gz,{path} {sample}_R2.fastq.gz\n" | ||
) | ||
|
||
|
||
samples=rename_fastqs(path) | ||
write_sample_sheet(samples,outfile) | ||
samples = rename_fastqs(inpath) | ||
write_sample_sheet(samples, inpath, sample_csv) |