diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 7adb289..4a3799d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -13,7 +13,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
-        python-version: '3.8.17'
+        python-version: '3.10.5'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 7deb7c4..9c82f36 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8.17]
+        python-version: [3.10.5, 3.11]
 
     steps:
     - uses: actions/checkout@v3
diff --git a/.gitignore b/.gitignore
index 32b0a13..3701049 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,14 +1,38 @@
-cromwell-executions/
-cromwell-workflow-logs/
+
+# built files
 __pycache__/
+build/
 .idea/
 .pyc
-.env
 .DS_Store
 *.egg-info/
 pip-wheel-metadata/
-.coverage
-.mypy_cache/
 dist/
-venv/
+
+# env & IDE
+.env
 .python-version
+.mypy_cache/
+.vscode/
+venv/
+
+# tests
+.coverage
+.nextflow/
+cromwell-executions/
+cromwell-workflow-logs/
+sample_data/
+local_data/
+
+# translation files
+.janis/
+output/
+translated*/
+testout/
+*.log
+involucro
+
+# misc files
+test.py
+ubu*
+temp.txt
\ No newline at end of file
diff --git a/configs/nextflow.config.BwaAlignment b/configs/nextflow.config.BwaAlignment
new file mode 100644
index 0000000..fe17e18
--- /dev/null
+++ b/configs/nextflow.config.BwaAlignment
@@ -0,0 +1,97 @@
+docker.enabled = true
+
+params {
+
+    // OUTPUT DIRECTORY
+    outdir  = './outputs'
+
+    // INPUTS
+    fastqs                          = [
+        [
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R1.fastq.gz',
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R2.fastq.gz'
+        ],
+    ]
+
+    // FastaWithIndexes
+    reference                       = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.amb',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.ann',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.bwt',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.dict',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.fai',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.pac',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.sa',
+    ]
+
+    // CompressedIndexedVCF
+    snps_dbsnp                      = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    snps_1000gp                     = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    known_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    mills_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    adapter_file                    = '/home/grace/work/pp/translation/janis-assistant/local_data/data/adapter_list.txt'
+    contaminant_file                = '/home/grace/work/pp/translation/janis-assistant/local_data/data/contaminant_list.txt'
+    sample_name                     = 'NA12878-BRCA1'
+    allele_freq_threshold           = 0.05
+    min_mapping_qual                = null
+    filter                          = null
+    align_and_sort_sortsam_tmp_dir  = './tmp'
+
+    // PROCESSES
+    getfastqc_adapters.code_file                       = '/home/grace/work/pp/translation/janis-assistant/translated_BwaAlignmentAndQC/templates/ParseFastqcAdapters.py'
+    calculate_performancesummary_genomefile.code_file  = '/home/grace/work/pp/translation/janis-assistant/translated_BwaAlignmentAndQC/templates/GenerateGenomeFileForBedtoolsCoverage.py'
+    fastqc.cpus    = 1
+    fastqc.memory  = 8
+
+    // SUBWORKFLOW: ALIGN_AND_SORT
+    align_and_sort.five_prime_adapter_read1       = [
+        // list values here
+    ]
+    align_and_sort.five_prime_adapter_read2       = [
+        // list values here
+    ]
+    align_and_sort.cutadapt_quality_cutoff        = 15
+    align_and_sort.cutadapt_minimum_length        = 50
+    align_and_sort.bwamem_mark_shorter_splits     = true
+    align_and_sort.sortsam_sort_order             = 'coordinate'
+    align_and_sort.sortsam_create_index           = true
+    align_and_sort.sortsam_validation_stringency  = 'SILENT'
+    align_and_sort.sortsam_max_records_in_ram     = 5000000
+    align_and_sort.cutadapt.cpus                  = 5
+    align_and_sort.cutadapt.memory                = 4
+    align_and_sort.bwamem.cpus                    = 16
+    align_and_sort.bwamem.memory                  = 16
+    align_and_sort.sortsam.cpus                   = 1
+    align_and_sort.sortsam.memory                 = 8
+
+    // SUBWORKFLOW: MERGE_AND_MARKDUPS
+    merge_and_markdups.create_index                           = true
+    merge_and_markdups.max_records_in_ram                     = 5000000
+    merge_and_markdups.merge_sam_files_use_threading          = true
+    merge_and_markdups.merge_sam_files_validation_stringency  = 'SILENT'
+    merge_and_markdups.merge_sam_files.cpus                   = 4
+    merge_and_markdups.merge_sam_files.memory                 = 8
+    merge_and_markdups.mark_duplicates.cpus                   = 4
+    merge_and_markdups.mark_duplicates.memory                 = 8
+
+}
diff --git a/configs/nextflow.config.BwaAlignmentAndQC b/configs/nextflow.config.BwaAlignmentAndQC
new file mode 100644
index 0000000..093867c
--- /dev/null
+++ b/configs/nextflow.config.BwaAlignmentAndQC
@@ -0,0 +1,104 @@
+docker.enabled = true
+
+params {
+
+    // OUTPUT DIRECTORY
+    outdir  = './outputs'
+
+    // INPUTS
+    fastqs                          = [
+        [
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R1.fastq.gz',
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R2.fastq.gz'
+        ],
+    ]
+
+    // FastaWithIndexes
+    reference                       = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.amb',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.ann',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.bwt',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.dict',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.fai',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.pac',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.sa',
+    ]
+
+    // CompressedIndexedVCF
+    snps_dbsnp                      = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    snps_1000gp                     = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    known_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    mills_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    adapter_file                    = '/home/grace/work/pp/translation/janis-assistant/local_data/data/adapter_list.txt'
+    contaminant_file                = '/home/grace/work/pp/translation/janis-assistant/local_data/data/contaminant_list.txt'
+    sample_name                     = 'NA12878-BRCA1'
+    allele_freq_threshold           = 0.05
+    min_mapping_qual                = null
+    filter                          = null
+    align_and_sort_sortsam_tmp_dir  = './tmp'
+
+    // PROCESSES
+    getfastqc_adapters.code_file                       = '/home/grace/work/pp/translation/janis-assistant/translated_BwaAlignmentAndQC/templates/ParseFastqcAdapters.py'
+    calculate_performancesummary_genomefile.code_file  = '/home/grace/work/pp/translation/janis-assistant/translated_BwaAlignmentAndQC/templates/GenerateGenomeFileForBedtoolsCoverage.py'
+    fastqc.cpus    = 1
+    fastqc.memory  = 8
+
+    // SUBWORKFLOW: ALIGN_AND_SORT
+    align_and_sort.five_prime_adapter_read1       = [
+        // list values here
+    ]
+    align_and_sort.five_prime_adapter_read2       = [
+        // list values here
+    ]
+    align_and_sort.cutadapt_quality_cutoff        = 15
+    align_and_sort.cutadapt_minimum_length        = 50
+    align_and_sort.bwamem_mark_shorter_splits     = true
+    align_and_sort.sortsam_sort_order             = 'coordinate'
+    align_and_sort.sortsam_create_index           = true
+    align_and_sort.sortsam_validation_stringency  = 'SILENT'
+    align_and_sort.sortsam_max_records_in_ram     = 5000000
+    align_and_sort.cutadapt.cpus                  = 5
+    align_and_sort.cutadapt.memory                = 4
+    align_and_sort.bwamem.cpus                    = 16
+    align_and_sort.bwamem.memory                  = 16
+    align_and_sort.sortsam.cpus                   = 1
+    align_and_sort.sortsam.memory                 = 8
+
+    // SUBWORKFLOW: MERGE_AND_MARKDUPS
+    merge_and_markdups.create_index                           = true
+    merge_and_markdups.max_records_in_ram                     = 5000000
+    merge_and_markdups.merge_sam_files_use_threading          = true
+    merge_and_markdups.merge_sam_files_validation_stringency  = 'SILENT'
+    merge_and_markdups.merge_sam_files.cpus                   = 4
+    merge_and_markdups.merge_sam_files.memory                 = 8
+    merge_and_markdups.mark_duplicates.cpus                   = 4
+    merge_and_markdups.mark_duplicates.memory                 = 8
+
+    // SUBWORKFLOW: PERFORMANCE_SUMMARY
+    performance_summary.samtoolsview_do_not_output_alignments_with_bits_set  = '0x400'
+    performance_summary.performancesummary_genome                            = true
+    performance_summary.gatk4collectinsertsizemetrics.cpus                   = 1
+    performance_summary.gatk4collectinsertsizemetrics.memory                 = 8
+    performance_summary.bedtoolsgenomecoveragebed.memory                     = 8
+
+}
diff --git a/configs/nextflow.config.wgsgermline b/configs/nextflow.config.wgsgermline
new file mode 100644
index 0000000..c68e7a5
--- /dev/null
+++ b/configs/nextflow.config.wgsgermline
@@ -0,0 +1,182 @@
+docker.enabled = true
+
+params {
+
+    // OUTPUT DIRECTORY
+    outdir  = './outputs'
+
+    // INPUTS
+    fastqs                          = [
+        [
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R1.fastq.gz',
+            '/home/grace/work/pp/translation/janis-assistant/local_data/data/NA12878-BRCA1_R2.fastq.gz'
+        ],
+    ]
+
+    // FastaWithIndexes
+    reference                       = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.amb',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.ann',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.bwt',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.dict',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.fai',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.pac',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.chr17.fasta.sa',
+    ]
+
+    // CompressedIndexedVCF
+    snps_dbsnp                      = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    snps_1000gp                     = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    known_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    mills_indels                    = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    adapter_file                    = '/home/grace/work/pp/translation/janis-assistant/local_data/data/adapter_list.txt'
+    contaminant_file                = '/home/grace/work/pp/translation/janis-assistant/local_data/data/contaminant_list.txt'
+    
+    gatk_intervals                  = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/BRCA1.hg38.bed'
+    ]
+    vardict_intervals               = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/BRCA1.hg38.split-intervals.bed'
+    ]
+    // BedTABIX
+    strelka_intervals               = [
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/BRCA1.hg38.bed.gz',
+        '/home/grace/work/pp/translation/janis-assistant/local_data/data/BRCA1.hg38.bed.gz.tbi'
+    ]
+
+    gridss_blacklist                = '/home/grace/work/pp/translation/janis-assistant/local_data/data/consensusBlacklist.hg38.chr17.bed'
+    sample_name                     = 'NA12878-BRCA1'
+    allele_freq_threshold           = 0.05
+    min_mapping_qual                = null
+    filter                          = null
+    align_and_sort_sortsam_tmp_dir  = './tmp'
+    combine_variants_type           = 'germline'
+    combine_variants_columns        = ['AC', 'AN', 'AF', 'AD', 'DP', 'GT']
+
+    // PROCESSES
+    getfastqc_adapters.code_file                       = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/ParseFastqcAdapters.py'
+    calculate_performancesummary_genomefile.code_file  = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateGenomeFileForBedtoolsCoverage.py'
+    generate_gatk_intervals.code_file                  = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateIntervalsByChromosome.py'
+    generate_manta_config.code_file                    = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateMantaConfig.py'
+    generate_vardict_headerlines.code_file             = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateVardictHeaderLines.py'
+    fastqc.cpus                                        = 1
+    fastqc.memory                                      = 8
+    vc_gridss.cpus                                     = 8
+    vc_gridss.memory                                   = 31
+    vc_gatk_sort_combined.cpus                         = 1
+    vc_gatk_sort_combined.memory                       = 8
+    vc_vardict_sort_combined.cpus                      = 1
+    vc_vardict_sort_combined.memory                    = 8
+    combine_variants.memory                            = 8
+    combined_sort.cpus                                 = 1
+    combined_sort.memory                               = 8
+
+    // SUBWORKFLOW: ALIGN_AND_SORT
+    align_and_sort.five_prime_adapter_read1       = []  // list values here
+    align_and_sort.five_prime_adapter_read2       = []  // list values here
+    align_and_sort.cutadapt_quality_cutoff        = 15
+    align_and_sort.cutadapt_minimum_length        = 50
+    align_and_sort.bwamem_mark_shorter_splits     = true
+    align_and_sort.sortsam_sort_order             = 'coordinate'
+    align_and_sort.sortsam_create_index           = true
+    align_and_sort.sortsam_validation_stringency  = 'SILENT'
+    align_and_sort.sortsam_max_records_in_ram     = 5000000
+    align_and_sort.cutadapt.cpus                  = 5
+    align_and_sort.cutadapt.memory                = 4
+    align_and_sort.bwamem.cpus                    = 16
+    align_and_sort.bwamem.memory                  = 16
+    align_and_sort.sortsam.cpus                   = 1
+    align_and_sort.sortsam.memory                 = 8
+
+    // SUBWORKFLOW: BQSR
+    bqsr.base_recalibrator.cpus    = 1
+    bqsr.base_recalibrator.memory  = 16
+    bqsr.apply_bqsr.cpus           = 1
+    bqsr.apply_bqsr.memory         = 8
+
+    // SUBWORKFLOW: COMBINED_ADDBAMSTATS
+    combined_addbamstats.samtoolsmpileup_count_orphans  = true
+    combined_addbamstats.samtoolsmpileup_nobaq          = true
+    combined_addbamstats.samtoolsmpileup_minbq          = 0
+    combined_addbamstats.samtoolsmpileup_max_depth      = 10000
+    combined_addbamstats.addbamstats_type               = 'germline'
+
+    // SUBWORKFLOW: MERGE_AND_MARKDUPS
+    merge_and_markdups.create_index                           = true
+    merge_and_markdups.max_records_in_ram                     = 5000000
+    merge_and_markdups.merge_sam_files_use_threading          = true
+    merge_and_markdups.merge_sam_files_validation_stringency  = 'SILENT'
+    merge_and_markdups.merge_sam_files.cpus                   = 4
+    merge_and_markdups.merge_sam_files.memory                 = 8
+    merge_and_markdups.mark_duplicates.cpus                   = 4
+    merge_and_markdups.mark_duplicates.memory                 = 8
+
+    // SUBWORKFLOW: PERFORMANCE_SUMMARY
+    performance_summary.samtoolsview_do_not_output_alignments_with_bits_set  = '0x400'
+    performance_summary.performancesummary_genome                            = true
+    performance_summary.gatk4collectinsertsizemetrics.cpus                   = 1
+    performance_summary.gatk4collectinsertsizemetrics.memory                 = 8
+    performance_summary.bedtoolsgenomecoveragebed.memory                     = 8
+
+    // SUBWORKFLOW: VC_GATK
+    vc_gatk.haplotype_caller_pair_hmm_implementation  = 'LOGLESS_CACHING'
+    vc_gatk.split_bam.memory                          = 4
+    vc_gatk.haplotype_caller.cpus                     = 1
+    vc_gatk.haplotype_caller.memory                   = 8
+    vc_gatk.splitnormalisevcf.cpus                    = 1
+    vc_gatk.splitnormalisevcf.memory                  = 8
+
+    // SUBWORKFLOW: VC_STRELKA
+    vc_strelka.is_exome                         = null
+    vc_strelka.strelka_config                   = null
+    vc_strelka.filterpass_remove_filetered_all  = true
+    vc_strelka.filterpass_recode                = true
+    vc_strelka.filterpass_recode_infoall        = true
+    vc_strelka.manta.cpus                       = 4
+    vc_strelka.manta.memory                     = 4
+    vc_strelka.strelka.cpus                     = 4
+    vc_strelka.strelka.memory                   = 4
+    vc_strelka.splitnormalisevcf.cpus           = 1
+    vc_strelka.splitnormalisevcf.memory         = 8
+
+    // SUBWORKFLOW: VC_VARDICT
+    vc_vardict.vardict_vcf_format               = true
+    vc_vardict.vardict_chrom_column             = 1
+    vc_vardict.vardict_reg_start_col            = 2
+    vc_vardict.vardict_gene_end_col             = 3
+    vc_vardict.vardict_threads                  = 4
+    vc_vardict.compressvcf_stdout               = true
+    vc_vardict.filterpass_remove_filetered_all  = true
+    vc_vardict.filterpass_recode                = true
+    vc_vardict.filterpass_recode_infoall        = true
+    vc_vardict.vardict.cpus                     = 4
+    vc_vardict.vardict.memory                   = 8
+    vc_vardict.annotate.cpus                    = 1
+    vc_vardict.annotate.memory                  = 8
+    vc_vardict.splitnormalisevcf.cpus           = 1
+    vc_vardict.splitnormalisevcf.memory         = 8
+    vc_vardict.trim.cpus                        = 1
+    vc_vardict.trim.memory                      = 1
+
+
+}
diff --git a/configs/nextflow.config.wgsgermline.server b/configs/nextflow.config.wgsgermline.server
new file mode 100644
index 0000000..135f30e
--- /dev/null
+++ b/configs/nextflow.config.wgsgermline.server
@@ -0,0 +1,182 @@
+docker.enabled = true
+
+params {
+
+    // OUTPUT DIRECTORY
+    outdir  = './outputs'
+
+    // INPUTS
+    fastqs                          = [
+        [
+            '/home/ubuntu/data/NA12878-BRCA1_R1.fastq.gz',
+            '/home/ubuntu/data/NA12878-BRCA1_R2.fastq.gz'
+        ],
+    ]
+
+    // FastaWithIndexes
+    reference                       = [
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.amb',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.ann',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.bwt',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.dict',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.fai',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.pac',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.chr17.fasta.sa',
+    ]
+
+    // CompressedIndexedVCF
+    snps_dbsnp                      = [
+        '/home/ubuntu/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    snps_1000gp                     = [
+        '/home/ubuntu/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz',
+        '/home/ubuntu/data/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    known_indels                    = [
+        '/home/ubuntu/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz',
+        '/home/ubuntu/data/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz.tbi'
+    ]
+
+    // CompressedIndexedVCF
+    mills_indels                    = [
+        '/home/ubuntu/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz',
+        '/home/ubuntu/data/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz.tbi'
+    ]
+
+    adapter_file                    = '/home/ubuntu/data/adapter_list.txt'
+    contaminant_file                = '/home/ubuntu/data/contaminant_list.txt'
+    
+    gatk_intervals                  = [
+        '/home/ubuntu/data/BRCA1.hg38.bed'
+    ]
+    vardict_intervals               = [
+        '/home/ubuntu/data/BRCA1.hg38.split-intervals.bed'
+    ]
+    // BedTABIX
+    strelka_intervals               = [
+        '/home/ubuntu/data/BRCA1.hg38.bed.gz',
+        '/home/ubuntu/data/BRCA1.hg38.bed.gz.tbi'
+    ]
+
+    gridss_blacklist                = '/home/ubuntu/data/consensusBlacklist.hg38.chr17.bed'
+    sample_name                     = 'NA12878-BRCA1'
+    allele_freq_threshold           = 0.05
+    min_mapping_qual                = null
+    filter                          = null
+    align_and_sort_sortsam_tmp_dir  = './tmp'
+    combine_variants_type           = 'germline'
+    combine_variants_columns        = ['AC', 'AN', 'AF', 'AD', 'DP', 'GT']
+
+    // PROCESSES
+    getfastqc_adapters.code_file                       = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/ParseFastqcAdapters.py'
+    calculate_performancesummary_genomefile.code_file  = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateGenomeFileForBedtoolsCoverage.py'
+    generate_gatk_intervals.code_file                  = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateIntervalsByChromosome.py'
+    generate_manta_config.code_file                    = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateMantaConfig.py'
+    generate_vardict_headerlines.code_file             = '/home/grace/work/pp/translation/janis-assistant/translated_wgsgermline/templates/GenerateVardictHeaderLines.py'
+    fastqc.cpus                                        = 1
+    fastqc.memory                                      = 8
+    vc_gridss.cpus                                     = 8
+    vc_gridss.memory                                   = 31
+    vc_gatk_sort_combined.cpus                         = 1
+    vc_gatk_sort_combined.memory                       = 8
+    vc_vardict_sort_combined.cpus                      = 1
+    vc_vardict_sort_combined.memory                    = 8
+    combine_variants.memory                            = 8
+    combined_sort.cpus                                 = 1
+    combined_sort.memory                               = 8
+
+    // SUBWORKFLOW: ALIGN_AND_SORT
+    align_and_sort.five_prime_adapter_read1       = []  // list values here
+    align_and_sort.five_prime_adapter_read2       = []  // list values here
+    align_and_sort.cutadapt_quality_cutoff        = 15
+    align_and_sort.cutadapt_minimum_length        = 50
+    align_and_sort.bwamem_mark_shorter_splits     = true
+    align_and_sort.sortsam_sort_order             = 'coordinate'
+    align_and_sort.sortsam_create_index           = true
+    align_and_sort.sortsam_validation_stringency  = 'SILENT'
+    align_and_sort.sortsam_max_records_in_ram     = 5000000
+    align_and_sort.cutadapt.cpus                  = 5
+    align_and_sort.cutadapt.memory                = 4
+    align_and_sort.bwamem.cpus                    = 16
+    align_and_sort.bwamem.memory                  = 16
+    align_and_sort.sortsam.cpus                   = 1
+    align_and_sort.sortsam.memory                 = 8
+
+    // SUBWORKFLOW: BQSR
+    bqsr.base_recalibrator.cpus    = 1
+    bqsr.base_recalibrator.memory  = 16
+    bqsr.apply_bqsr.cpus           = 1
+    bqsr.apply_bqsr.memory         = 8
+
+    // SUBWORKFLOW: COMBINED_ADDBAMSTATS
+    combined_addbamstats.samtoolsmpileup_count_orphans  = true
+    combined_addbamstats.samtoolsmpileup_nobaq          = true
+    combined_addbamstats.samtoolsmpileup_minbq          = 0
+    combined_addbamstats.samtoolsmpileup_max_depth      = 10000
+    combined_addbamstats.addbamstats_type               = 'germline'
+
+    // SUBWORKFLOW: MERGE_AND_MARKDUPS
+    merge_and_markdups.create_index                           = true
+    merge_and_markdups.max_records_in_ram                     = 5000000
+    merge_and_markdups.merge_sam_files_use_threading          = true
+    merge_and_markdups.merge_sam_files_validation_stringency  = 'SILENT'
+    merge_and_markdups.merge_sam_files.cpus                   = 4
+    merge_and_markdups.merge_sam_files.memory                 = 8
+    merge_and_markdups.mark_duplicates.cpus                   = 4
+    merge_and_markdups.mark_duplicates.memory                 = 8
+
+    // SUBWORKFLOW: PERFORMANCE_SUMMARY
+    performance_summary.samtoolsview_do_not_output_alignments_with_bits_set  = '0x400'
+    performance_summary.performancesummary_genome                            = true
+    performance_summary.gatk4collectinsertsizemetrics.cpus                   = 1
+    performance_summary.gatk4collectinsertsizemetrics.memory                 = 8
+    performance_summary.bedtoolsgenomecoveragebed.memory                     = 8
+
+    // SUBWORKFLOW: VC_GATK
+    vc_gatk.haplotype_caller_pair_hmm_implementation  = 'LOGLESS_CACHING'
+    vc_gatk.split_bam.memory                          = 4
+    vc_gatk.haplotype_caller.cpus                     = 1
+    vc_gatk.haplotype_caller.memory                   = 8
+    vc_gatk.splitnormalisevcf.cpus                    = 1
+    vc_gatk.splitnormalisevcf.memory                  = 8
+
+    // SUBWORKFLOW: VC_STRELKA
+    vc_strelka.is_exome                         = null
+    vc_strelka.strelka_config                   = null
+    vc_strelka.filterpass_remove_filetered_all  = true
+    vc_strelka.filterpass_recode                = true
+    vc_strelka.filterpass_recode_infoall        = true
+    vc_strelka.manta.cpus                       = 4
+    vc_strelka.manta.memory                     = 4
+    vc_strelka.strelka.cpus                     = 4
+    vc_strelka.strelka.memory                   = 4
+    vc_strelka.splitnormalisevcf.cpus           = 1
+    vc_strelka.splitnormalisevcf.memory         = 8
+
+    // SUBWORKFLOW: VC_VARDICT
+    vc_vardict.vardict_vcf_format               = true
+    vc_vardict.vardict_chrom_column             = 1
+    vc_vardict.vardict_reg_start_col            = 2
+    vc_vardict.vardict_gene_end_col             = 3
+    vc_vardict.vardict_threads                  = 4
+    vc_vardict.compressvcf_stdout               = true
+    vc_vardict.filterpass_remove_filetered_all  = true
+    vc_vardict.filterpass_recode                = true
+    vc_vardict.filterpass_recode_infoall        = true
+    vc_vardict.vardict.cpus                     = 4
+    vc_vardict.vardict.memory                   = 8
+    vc_vardict.annotate.cpus                    = 1
+    vc_vardict.annotate.memory                  = 8
+    vc_vardict.splitnormalisevcf.cpus           = 1
+    vc_vardict.splitnormalisevcf.memory         = 8
+    vc_vardict.trim.cpus                        = 1
+    vc_vardict.trim.memory                      = 1
+
+
+}
diff --git a/janis_assistant/__init__.py b/janis_assistant/__init__.py
index 73fced5..19b282c 100644
--- a/janis_assistant/__init__.py
+++ b/janis_assistant/__init__.py
@@ -11,3 +11,6 @@
 from janis_assistant.management.workflowmanager import WorkflowManager, TaskStatus
 from janis_assistant.validation import ValidationRequirements
 from janis_assistant.__meta__ import __version__
+
+import collections
+collections.Callable = collections.abc.Callable
diff --git a/janis_assistant/__meta__.py b/janis_assistant/__meta__.py
index 2d4166b..9328593 100644
--- a/janis_assistant/__meta__.py
+++ b/janis_assistant/__meta__.py
@@ -1,4 +1,4 @@
-__version__ = "v0.12.1"
+__version__ = "v0.13.0"
 DOCS_URL = "https://janis.readthedocs.io"
 GITHUB_URL = "https://github.com/PMCC-BioinformaticsCore/janis-assistant"
 ISSUE_URL = "https://github.com/PMCC-BioinformaticsCore/janis-assistant/issues/new"
diff --git a/janis_assistant/cli.py b/janis_assistant/cli.py
index 01846b6..1ad6b66 100644
--- a/janis_assistant/cli.py
+++ b/janis_assistant/cli.py
@@ -3,15 +3,16 @@
 import os.path
 import json
 from time import sleep
-from typing import Optional, Tuple, List
+from typing import Optional, Tuple
 
 import ruamel.yaml
 import tabulate
-from janis_assistant.management.workflowmanager import WorkflowManager
 
+from janis_assistant.management.workflowmanager import WorkflowManager
 from janis_assistant.management.envvariables import EnvVariables
 
 from janis_core import InputQualityType, HINTS, HintEnum, SupportedTranslation, Tool
+from janis_core.ingestion import SupportedIngestion
 from janis_core.utils.logger import Logger, LogLevel
 
 from janis_assistant.__meta__ import DOCS_URL
@@ -27,6 +28,7 @@
 from janis_assistant.data.enums.taskstatus import TaskStatus
 
 from janis_assistant.main import (
+    ingest,
     translate,
     generate_inputs,
     cleanup,
@@ -42,7 +44,6 @@
     parse_additional_arguments,
     parse_dict,
     get_file_from_searchname,
-    fully_qualify_filename,
     dict_to_yaml_string,
 )
 
@@ -94,7 +95,7 @@ def process_args(sysargs=None):
     )
     add_translate_args(
         subparsers.add_parser(
-            "translate", help="Translate a janis workflow to CWL, WDL, or Nextflow"
+            "translate", help="Translate a janis workflow to CWL, WDL or Nextflow"
         )
     )
     add_inputs_args(
@@ -285,42 +286,94 @@ def add_cleanup_args(parser):
     return parser
 
 
-def add_translate_args(parser):
-    parser.add_argument("workflow", help="Path to workflow")
+def add_translate_args(parser: argparse.ArgumentParser):
+    """
+    intended syntax
+    fmt1: janis translate [OPTIONS] --from cwl --to nextflow infile.cwl   [longform]
+    """
+    ### --- MANDATORY ARGS --- ###
+    parser.add_argument(
+        "infile", 
+        help="Path to input file",
+    )
     parser.add_argument(
-        "translation",
-        help="language to translate to",
+        "--from",
+        help="Language of infile. Will be autodetected if not supplied",
+        choices=SupportedIngestion.all(),
+        type=str
+    )
+    parser.add_argument(
+        "--to",
+        help="Language to translate to.",
         choices=SupportedTranslation.all(),
+        type=str
     )
-    parser.add_argument("-c", "--config", help="Path to config file")
+    
+    ### --- OPTIONAL ARGS --- ###
+    
+    # translation features
     parser.add_argument(
-        "--name",
-        help="If you have multiple workflows in your file, you may want to "
-        "help Janis out to select the right workflow to run",
+        "--mode",
+        help="Translate mode (default: regular). Controls extent of tool translation\n\
+        - skeleton: ignores inputs which aren't used in workflow. no CLI command generation.\n\
+        - regular: ignores inputs which aren't used in workflow. \n\
+        - extended: full translation of all inputs & CLI command",
+        type=str,
+        choices=["skeleton", "regular", "extended"],
+        default="regular"
+    )
+    # parser.add_argument(
+    #     "--no-comments",
+    #     help="don't provide info comments in output translation",
+    #     default=False,
+    #     action="store_true"
+    # )
+    parser.add_argument(
+        "--galaxy-build-images",
+        action="store_true",
+        help="Requires docker. \nFor Galaxy Tool Wrappers with multiple software requirements, build a local container image containing all requirements.\nAdds ~2-10 mins per affected Galaxy Wrapper. "
     )
     parser.add_argument(
-        "-o",
-        "--output-dir",
-        help="output directory to write output to (default=stdout)",
+        "--galaxy-no-image-cache",
+        help="Turns off galaxy container image cache. Cache stores previously identified containers suitable for different tool wrappers so that quay.io API calls are unnecessary for previously parsed tools.",
+        action="store_true",
     )
     parser.add_argument(
-        "--no-cache",
-        help="Force re-download of workflow if remote",
+        "--galaxy-no-wrapper-cache",
+        help="Turns off galaxy tool downloads cache. Cache stores local copies of tool.xml files so they don't have to be re-downloaded if they have been downloaded before.",
         action="store_true",
     )
 
+    # accessory files & directories
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        help="Output directory to write output to (default: translated).",
+        type=str,
+        default="translated"
+    )
+    parser.add_argument(
+        "-c", 
+        "--config", 
+        help="Path to config file"
+    )
+    parser.add_argument(
+        "--name",
+        help="Specifies the name of the workflow/tool to translate, in case where infile has multiple such objects.",
+    )
     parser.add_argument(
         "--resources",
         action="store_true",
         help="Add resource overrides into inputs file (eg: runtime_cpu / runtime_memory)",
     )
-
     parser.add_argument(
-        "--toolbox", help="Only look for tools in the toolbox", action="store_true"
+        "--toolbox", 
+        help="Only look for tools in the toolbox", 
+        action="store_true"
     )
 
+    # workflow inputs
     inputargs = parser.add_argument_group("Inputs")
-
     inputargs.add_argument(
         "-i",
         "--inputs",
@@ -334,6 +387,7 @@ def add_translate_args(parser):
         action="append",
     )
 
+    # hints
     hint_args = parser.add_argument_group("hints")
     for HintType in HINTS:
         if issubclass(HintType, HintEnum):
@@ -341,21 +395,20 @@ def add_translate_args(parser):
                 "--hint-" + HintType.key(), choices=HintType.symbols()
             )
 
+    # containers
     container_args = parser.add_argument_group("container related args")
 
     container_args.add_argument(
-        "--allow-empty-container",
+        "--disallow-empty-container",
         action="store_true",
-        help="Some tools you use may not include a container, this would usually (and intentionally) cause an error. "
-        "Including this flag will disable this check, and empty containers can be used.",
+        help="Some tools you use may not include a container, this would usually (and intentionally) cause an error."
+        "Including this flag will check that all tools have a container."
     )
-
     container_args.add_argument(
         "--container-override",
         help="Override a tool's container by specifying a new container. This argument should be specified in the "
         "following (comma separated) format: t1=v1,t2=v2. Eg toolid=container/override:version,toolid2=<container>.",
     )
-
     container_args.add_argument(
         "--skip-digest-lookup",
         action="store_true",
@@ -1235,38 +1288,85 @@ def parse_container_override_format(container_override):
     return co
 
 
-def do_translate(args):
-    jc = JanisConfiguration.initial_configuration(args.config)
-
+def do_translate(args: argparse.Namespace):
+    # setup 
+    # (ensure all parameters are ready for ingest & translate)
+    # JanisConfiguration holds settings related to janis-assistant, not janis-core translate
+    # settings in janis should be a singleton module so they are globally available.
+    # this would be time consuming, so will avoid for now. will just pass things as arguments. 
+    # - GH
+    jc = JanisConfiguration.initial_configuration(args.config) 
     container_override = parse_container_override_format(args.container_override)
-
+    source_fmt = _get_source_fmt(args)
+    dest_fmt = _get_dest_fmt(args)
+    inputs = args.inputs if args.inputs else None
     hints = {
-        k[5:]: v
-        for k, v in vars(args).items()
+        k[5:]: v for k, v in vars(args).items()
         if k.startswith("hint_") and v is not None
     }
+    hints = hints if hints else None
 
-    inputs = args.inputs or []
-    # the args.extra_inputs parameter are inputs that we MUST match
-    # we'll need to parse them manually and then pass them to fromjanis as requiring a match
-    # required_inputs = parse_additional_arguments(args.extra_inputs)
+    # ingest
+    internal_model = ingest(
+        infile=args.infile,
+        format=source_fmt,
+        galaxy_build_images=args.galaxy_build_images,
+        galaxy_no_image_cache=args.galaxy_no_image_cache,
+        galaxy_no_wrapper_cache=args.galaxy_no_wrapper_cache
+    )
 
+    # translate
     translate(
         config=jc,
-        tool=args.workflow,
-        translation=args.translation,
+        tool=internal_model,
+        dest_fmt=dest_fmt,
+        mode=args.mode,
         name=args.name,
         output_dir=args.output_dir,
-        force=args.no_cache,
-        allow_empty_container=args.allow_empty_container,
+        inputs=inputs,
+        hints=hints,
+        allow_empty_container=not args.disallow_empty_container,
         container_override=container_override,
         skip_digest_lookup=args.skip_digest_lookup,
         skip_digest_cache=args.skip_digest_cache,
-        inputs=inputs,
         recipes=args.recipe,
-        hints=hints,
-    )
-
+        render_comments=False
+    )
+
+def _get_source_fmt(args: argparse.Namespace) -> str:
+    # user supplied fmt
+    fmt: Optional[str] = None
+    for key, val in args._get_kwargs():  # workaround for '--from' name: usually a python error.
+        if key == 'from' and val is not None:
+            fmt = val
+            break
+    # auto-detect fmt
+    ext_map = {
+        '.cwl': 'cwl',      # any cwl file
+        '.py': 'janis',     # any janis file
+        '.xml': 'galaxy',   # galaxy tool
+        '.ga': 'galaxy'     # galaxy workflow
+    }
+    if not fmt:
+        name, ext = os.path.splitext(args.infile)
+        if ext in ext_map:
+            fmt = ext_map[ext]
+    # guard 
+    if not fmt:
+        raise ValueError(f"unknown source language for {args.infile}. please specify with '--from'")
+    return fmt
+
+def _get_dest_fmt(args: argparse.Namespace) -> str:
+    # user supplied fmt (mandatory)
+    fmt: Optional[str] = None
+    for key, val in args._get_kwargs():  # workaround for '--from' name: usually a python error.
+        if key in ['to', 'dest_language'] and val is not None:
+            fmt = val
+            break
+    # guard 
+    if not fmt:
+        raise ValueError(f"unsupplied dest language for {args.infile}. please specify with '--to'")
+    return fmt
 
 def do_cleanup(args):
     cleanup()
diff --git a/janis_assistant/engines/nextflow/main.py b/janis_assistant/engines/nextflow/main.py
index 7e48077..705e6cd 100644
--- a/janis_assistant/engines/nextflow/main.py
+++ b/janis_assistant/engines/nextflow/main.py
@@ -14,7 +14,7 @@
 from janis_core import LogLevel
 from janis_core.types.data_types import is_python_primitive
 from janis_core.utils.logger import Logger
-from janis_core.translations import nfgen, NextflowTranslator
+from janis_core.translations import NextflowTranslator
 from janis_assistant.data.models.outputs import WorkflowOutputModel
 from janis_assistant.data.models.run import RunModel
 from janis_assistant.data.models.workflowjob import RunJobModel
diff --git a/janis_assistant/main.py b/janis_assistant/main.py
index 55bf6f0..40d5643 100644
--- a/janis_assistant/main.py
+++ b/janis_assistant/main.py
@@ -9,11 +9,10 @@
 import os
 import sys
 import time
-import subprocess
 from datetime import datetime
 from inspect import isclass
+from typing import Optional, Type, Tuple, Any
 from textwrap import dedent
-from typing import Optional, Dict, Union, Type, List, Tuple
 
 import janis_core as j
 from janis_assistant.data.enums import TaskStatus
@@ -35,6 +34,8 @@
 
 from janis_assistant.utils.batchrun import BatchRunRequirements
 from janis_core import InputQualityType, Tool, DynamicWorkflow, LogLevel, JanisShed
+from janis_core import ingestion
+from janis_core import translations
 
 import janis_assistant.templates as janistemplates
 from janis_assistant.data.models.preparedjob import PreparedJob
@@ -42,8 +43,8 @@
 from janis_assistant.management.configmanager import ConfigManager
 from janis_assistant.management.configuration import (
     JanisConfiguration,
-    EnvVariables,
     stringify_dict_keys_or_return_value,
+    EnvVariables,
     JanisConfigurationEnvironment,
     DatabaseTypeToUse,
     JanisConfigurationCromwell,
@@ -66,10 +67,10 @@
 
 
 def run_with_outputs(
-    tool: Union[j.CommandTool, j.Workflow],
-    inputs: Dict[str, any],
+    tool: j.CommandTool | j.Workflow,
+    inputs: dict[str, Any],
     output_dir: str,
-    config: JanisConfiguration = None,
+    config: Optional[JanisConfiguration] = None,
     engine: Optional[str] = None,
     workflow_reference: Optional[str] = None,
 ):
@@ -144,7 +145,7 @@ def run_with_outputs(
 
 
 def resolve_tool(
-    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow, Type[j.Workflow]],
+    tool: str | j.CommandTool | Type[j.CommandTool] | j.Workflow | Type[j.Workflow],
     name=None,
     from_toolshed=False,
     force=False,
@@ -208,22 +209,53 @@ def resolve_tool(
         raise Exception("Couldn't find tool with name: " + str(tool))
 
 
+def ingest(
+    infile: str,
+    format: str,
+    galaxy_build_images: bool=False,
+    galaxy_no_image_cache: bool=False,
+    galaxy_no_wrapper_cache: bool=False
+    ) -> str | j.CommandTool | j.Workflow:
+    """
+    orchestrator of ingest process.
+    used translate() below as a guide.
+    currently just ingests the supplied file. 
+    mainly a placeholder for more extensive logic in the future.
+    future extensions (related to ingesting a whole folder)
+        - gathering & loading files
+        - ingesting input dict
+        - ingesting script files 
+        - ingesting tools (held in external file - like in a 'tools' or 'processes' subfolder)
+    """
+    # if janis, just return the path.
+    # all file loading etc handled in translate() (resolve_tool())
+    if format == 'janis':
+        return infile
+    else:
+        return ingestion.ingest(
+            infile,
+            format,
+            galaxy_build_images,
+#            galaxy_no_image_cache,
+#            galaxy_no_wrapper_cache,
+        ) 
+
 def translate(
     config: JanisConfiguration,
-    tool: Union[str, j.CommandTool, Type[j.CommandTool], j.Workflow, Type[j.Workflow]],
-    translation: str,
-    name: str = None,
-    hints: Optional[Dict[str, str]] = None,
-    output_dir: Optional[str] = None,
-    inputs: Union[str, dict] = None,
-    allow_empty_container=False,
-    container_override=None,
-    skip_digest_lookup=False,
-    skip_digest_cache=False,
-    recipes: List[str] = None,
-    **kwargs,
+    tool: str | j.CommandTool | j.Workflow,
+    dest_fmt: str,
+    mode: Optional[str]=None,
+    name: Optional[str]=None,
+    output_dir: str='translated',
+    inputs: Optional[str | dict[str, Any]]=None,
+    hints: Optional[dict[str, str]]=None,
+    allow_empty_container: Optional[bool]=True,
+    container_override: Optional[bool]=None,
+    skip_digest_lookup: Optional[bool]=False,
+    skip_digest_cache: Optional[bool]=False,
+    recipes: Optional[list[str]]=None,
+    render_comments: bool=True,
 ):
-
     toolref, _ = resolve_tool(tool, name, from_toolshed=True)
 
     if not toolref:
@@ -252,37 +284,44 @@ def translate(
             skip_digest_cache=skip_digest_cache,
         )
 
-    if isinstance(toolref, j.WorkflowBase):
-        wfstr, _, _ = toolref.translate(
-            translation,
-            to_console=False,
-            to_disk=bool(output_dir),
-            export_path=output_dir or "./{language}",
-            hints=hints,
-            additional_inputs=inputsdict,
-            allow_empty_container=allow_empty_container,
-            container_override=container_overrides,
-        )
-    elif isinstance(toolref, (j.CommandTool, j.CodeTool)):
-        wfstr = toolref.translate(
-            translation=translation,
-            to_console=False,
-            to_disk=bool(output_dir),
-            export_path=output_dir or "./{language}",
-            allow_empty_container=allow_empty_container,
-            container_override=container_overrides,
-        )
-
-    else:
-        name = toolref.__name__ if isclass(toolref) else toolref.__class__.__name__
-        raise Exception("Unsupported tool type: " + name)
-
-    print(wfstr, file=sys.stdout)
-    return wfstr
+    translations.translate(
+        entity=toolref,
+        dest_fmt=dest_fmt,
+        mode=mode,
+        
+        # file io
+        to_disk=True,
+        export_path=output_dir,
+        should_zip=None,                    # TODO create cli args?
+        to_console=None,                    # TODO create cli args?
+        tool_to_console=None,               # TODO create cli args?
+        write_inputs_file=None,             # TODO create cli args?
+        source_files=None,                  # TODO create cli args?
+        
+        # inputs
+        additional_inputs=inputsdict if inputsdict else None,
+        hints=hints,
+        
+        # containers
+        with_container=None,                # TODO create cli arg?
+        allow_empty_container=allow_empty_container,
+        container_override=container_overrides if container_overrides else None,
+        
+        # resouces
+        with_resource_overrides=None,       # TODO create cli arg?
+        merge_resources=None,               # TODO create cli arg?
+        max_cores=None,                     # TODO create cli arg?
+        max_mem=None,                       # TODO create cli arg?
+        max_duration=None,                  # TODO create cli arg?
+        
+        # misc
+        render_comments=render_comments,
+        should_validate=None,               # TODO create cli arg?
+    )
 
 
 def spider_tool(
-    tool: Union[str, j.CommandTool, j.Workflow],
+    tool: str | j.CommandTool | j.Workflow,
     name=None,
     force=False,
     only_toolbox=False,
@@ -312,15 +351,15 @@ def spider_tool(
 
 def generate_inputs(
     jc: JanisConfiguration,
-    tool: Union[str, j.CommandTool, j.Workflow],
+    tool: str | j.CommandTool | j.Workflow,
     all=False,
     name=None,
     force=False,
     additional_inputs=None,
     with_resources=False,
-    quality_type: List[InputQualityType] = None,
-    recipes: List[str] = None,
-    hints: dict = None,
+    quality_type: Optional[list[InputQualityType]] = None,
+    recipes: Optional[list[str]] = None,
+    hints: Optional[dict[str, Any]] = None,
 ):
     toolref, _ = resolve_tool(tool, name, from_toolshed=True, force=force)
     inputsdict = None
@@ -358,7 +397,7 @@ def generate_inputs(
 
 class InitArgParser(argparse.ArgumentParser):
     def __init__(
-        self, templatename, schema: List[TemplateInput], description: str = None
+        self, templatename, schema: list[TemplateInput], description: Optional[str] = None
     ):
         super().__init__(f"janis init {templatename}", description=description)
         # self.add_usage(
@@ -504,9 +543,9 @@ def get_config():
 
 
 def run_from_jobfile(
-    workflow: Union[str, j.Tool, Type[j.Tool]],
+    workflow: str | j.Tool | Type[j.Tool],
     jobfile: PreparedJob,
-    engine: Union[str, Engine, None] = None,
+    engine: Optional[str | Engine] = None,
     wait: bool = False,
     # specific engine args
     cromwell_jar: Optional[str] = None,
@@ -583,17 +622,17 @@ def run_from_jobfile(
 
 
 def prepare_job(
-    tool: Union[str, j.Tool, Type[j.Tool]],
+    tool: str | j.Tool | Type[j.Tool],
     # workflow search options
     workflow_reference: Optional[str],  # if this is None, no jobfile will be written
     jc: JanisConfiguration,
     engine: Optional[str],
     batchrun_reqs: Optional[BatchRunRequirements],
     validation_reqs: Optional[ValidationRequirements],
-    hints: Optional[Dict[str, str]],
+    hints: Optional[dict[str, str]],
     output_dir: Optional[str],
     execution_dir: Optional[str],
-    inputs: Union[str, dict],
+    inputs: str | dict,
     required_inputs: dict,
     watch,
     max_cores,
@@ -611,9 +650,9 @@ def prepare_job(
     skip_digest_lookup,
     skip_digest_cache,
     run_prepare_processing,
-    db_type: DatabaseTypeToUse = None,
-    source_hints: List[str] = None,
-    post_run_script: str = None,
+    db_type: Optional[DatabaseTypeToUse] = None,
+    source_hints: Optional[list[str]] = None,
+    post_run_script: Optional[str] = None,
     localise_all_files: bool = False,
 ):
 
@@ -793,7 +832,7 @@ def get_filescheme_from_fs(fs, **kwargs):
     raise Exception(f"Couldn't initialise filescheme with unrecognised type: '{fs}'")
 
 
-def abort_wids(sids: List[str], wait=True):
+def abort_wids(sids: list[str], wait=True):
     cm = ConfigManager(db_path=None)
     for sid in sids:
         try:
diff --git a/janis_assistant/management/workflowmanager.py b/janis_assistant/management/workflowmanager.py
index 8db7d91..99da3c7 100644
--- a/janis_assistant/management/workflowmanager.py
+++ b/janis_assistant/management/workflowmanager.py
@@ -41,7 +41,7 @@
 from janis_core.operators.operator import Operator
 from janis_core.translations import get_translator, CwlTranslator
 from janis_core.translations.translationbase import TranslatorBase
-from janis_core.translations.wdl import apply_secondary_file_format_to_filename
+from janis_core.utils.secondary import apply_secondary_file_format_to_filename
 
 from janis_assistant.data.enums import TaskStatus, ProgressKeys
 from janis_assistant.data.models.joblabel import JobLabelModel
diff --git a/janis_assistant/tests/data/__init__.py b/janis_assistant/tests/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/janis_assistant/tests/data/cwl/fastqc.cwl b/janis_assistant/tests/data/cwl/fastqc.cwl
new file mode 100644
index 0000000..909c065
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/fastqc.cwl
@@ -0,0 +1,50 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+
+doc: |
+  Run fastqc on raw reads in FASTQ format (single or paired end) or aligned reads in BAM.
+
+hints:
+  ResourceRequirement:
+    coresMin: 1
+    ramMin: 5000
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1
+
+baseCommand: "fastqc"
+arguments: 
+  - valueFrom: $(runtime.outdir)
+    prefix: "-o"
+  - valueFrom: "--noextract"
+
+inputs:
+  fastq1:
+    type: File?
+    inputBinding:
+      position: 1
+  fastq2:
+    type: File?
+    inputBinding:
+      position: 2
+  bam:
+    type: File?
+    inputBinding:
+      position: 1
+ 
+outputs:
+  fastqc_zip:
+    doc: all data e.g. figures
+    type:
+      type: array
+      items: File
+    outputBinding:
+      glob: "*_fastqc.zip"
+  fastqc_html:
+    doc: html report showing results from zip
+    type:
+      type: array
+      items: File
+    outputBinding:
+      glob: "*_fastqc.html"
+    
\ No newline at end of file
diff --git a/janis_assistant/tests/data/cwl/fastqc2.cwl b/janis_assistant/tests/data/cwl/fastqc2.cwl
new file mode 100644
index 0000000..8a437a9
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/fastqc2.cwl
@@ -0,0 +1,169 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1
+  SoftwareRequirement:
+    packages:
+      fastqc:
+        specs: [ "http://identifiers.org/biotools/fastqc" ]
+        version: [ "0.11.9--hdfd78af_1", "0.11.9" ]
+
+inputs:
+
+  reads_file:
+    type: File
+    inputBinding:
+      position: 50
+    doc: |
+      Input bam,sam,bam_mapped,sam_mapped or fastq file
+
+  format_enum:
+    type:
+      - "null"
+      - type: enum
+        name: "format"
+        symbols: ['bam','sam','bam_mapped','sam_mapped','fastq']
+    inputBinding:
+      position: 6
+      prefix: '--format'
+    doc: |
+      Bypasses the normal sequence file format detection and
+      forces the program to use the specified format.  Valid
+      formats are bam,sam,bam_mapped,sam_mapped and fastq
+
+  threads:
+    type: int?
+    inputBinding:
+      position: 7
+      prefix: '--threads'
+    doc: |
+      Specifies the number of files which can be processed
+      simultaneously.  Each thread will be allocated 250MB of
+      memory so you shouldn't run more threads than your
+      available memory will cope with, and not more than
+      6 threads on a 32 bit machine
+  contaminants:
+    type: File?
+    inputBinding:
+      position: 8
+      prefix: '--contaminants'
+    doc: |
+      Specifies a non-default file which contains the list of
+      contaminants to screen overrepresented sequences against.
+      The file must contain sets of named contaminants in the
+      form name[tab]sequence.  Lines prefixed with a hash will
+      be ignored.
+  adapters:
+    type: File?
+    inputBinding:
+      position: 9
+      prefix: '--adapters'
+    doc: |
+      Specifies a non-default file which contains the list of
+      adapter sequences which will be explicity searched against
+      the library. The file must contain sets of named adapters
+      in the form name[tab]sequence.  Lines prefixed with a hash
+      will be ignored.
+  limits:
+    type: File?
+    inputBinding:
+      position: 10
+      prefix: '--limits'
+    doc: |
+      Specifies a non-default file which contains a set of criteria
+      which will be used to determine the warn/error limits for the
+      various modules.  This file can also be used to selectively
+      remove some modules from the output all together.  The format
+      needs to mirror the default limits.txt file found in the
+      Configuration folder.
+  kmers:
+    type: int?
+    inputBinding:
+      position: 11
+      prefix: '--kmers'
+    doc: |
+      Specifies the length of Kmer to look for in the Kmer content
+      module. Specified Kmer length must be between 2 and 10. Default
+      length is 7 if not specified.
+  casava:
+    type: boolean?
+    inputBinding:
+      position: 13
+      prefix: '--casava'
+    doc: |
+      Files come from raw casava output. Files in the same sample
+      group (differing only by the group number) will be analysed
+      as a set rather than individually. Sequences with the filter
+      flag set in the header will be excluded from the analysis.
+      Files must have the same names given to them by casava
+      (including being gzipped and ending with .gz) otherwise they
+      won't be grouped together correctly.
+
+  nofilter:
+    type: boolean?
+    inputBinding:
+      position: 14
+      prefix: '--nofilter'
+    doc: |
+      If running with --casava then don't remove read flagged by
+      casava as poor quality when performing the QC analysis.
+  hide_group:
+    type: boolean?
+    inputBinding:
+      position: 15
+      prefix: '--nogroup'
+    doc: |
+      Disable grouping of bases for reads >50bp. All reports will
+      show data for every base in the read.  WARNING: Using this
+      option will cause fastqc to crash and burn if you use it on
+      really long reads, and your plots may end up a ridiculous size.
+      You have been warned!
+outputs:
+  zipped_file:
+    type: File
+    outputBinding:
+      glob: '*.zip'
+  html_file:
+    type: File
+    outputBinding:
+      glob: '*.html'
+  summary_file:
+    type: File
+    outputBinding:
+      glob: "*/summary.txt"
+baseCommand: [fastqc, --extract, --outdir, .]
+$namespaces:
+  s: http://schema.org/
+$schemas:
+- https://schema.org/version/latest/schemaorg-current-https.rdf
+s:name: "fastqc_2"
+s:license: http://www.apache.org/licenses/LICENSE-2.0
+s:creator:
+- class: s:Organization
+  s:legalName: "Cincinnati Children's Hospital Medical Center"
+  s:location:
+  - class: s:PostalAddress
+    s:addressCountry: "USA"
+    s:addressLocality: "Cincinnati"
+    s:addressRegion: "OH"
+    s:postalCode: "45229"
+    s:streetAddress: "3333 Burnet Ave"
+    s:telephone: "+1(513)636-4200"
+  s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png"
+  s:department:
+  - class: s:Organization
+    s:legalName: "Allergy and Immunology"
+    s:department:
+    - class: s:Organization
+      s:legalName: "Barski Research Lab"
+      s:member:
+      - class: s:Person
+        s:name: Michael Kotliar
+        s:email: mailto:misha.kotliar@gmail.com
+        s:sameAs:
+        - id: http://orcid.org/0000-0002-6486-3898
+doc: |
+  Tool runs FastQC from Babraham Bioinformatics
\ No newline at end of file
diff --git a/janis_assistant/tests/data/cwl/gatk_haplotype_tool.cwl b/janis_assistant/tests/data/cwl/gatk_haplotype_tool.cwl
new file mode 100644
index 0000000..7759397
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/gatk_haplotype_tool.cwl
@@ -0,0 +1,1250 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+baseCommand:
+- gatk
+- HaplotypeCaller
+doc: |-
+  Call germline SNPs and indels via local re-assembly of haplotypes
+
+   <p>The HaplotypeCaller is capable of calling SNPs and indels simultaneously via local de-novo assembly of haplotypes in an active region. In other words, whenever the program encounters a region showing signs of variation, it discards the existing mapping information and completely reassembles the reads in that region. This allows the HaplotypeCaller to be more accurate when calling regions that are traditionally difficult to call, for example when they contain different types of variants close to each other. It also makes the HaplotypeCaller much better at calling indels than position-based callers like UnifiedGenotyper.</p>
+
+   <p>In the GVCF workflow used for scalable variant calling in DNA sequence data, HaplotypeCaller runs per-sample to generate an intermediate GVCF (not to be used in final analysis), which can then be used in GenotypeGVCFs for joint genotyping of multiple samples in a very efficient way. The GVCF workflow enables rapid incremental processing of samples as they roll off the sequencer, as well as scaling to very large cohort sizes (e.g. the 92K exomes of ExAC).</p>
+
+   <p>In addition, HaplotypeCaller is able to handle non-diploid organisms as well as pooled experiment data. Note however that the algorithms used to calculate variant likelihoods is not well suited to extreme allele frequencies (relative to ploidy) so its use is not recommended for somatic (cancer) variant discovery. For that purpose, use Mutect2 instead.</p>
+
+   <p>Finally, HaplotypeCaller is also able to correctly handle the splice junctions that make RNAseq a challenge for most variant callers,
+   on the condition that the input read data has previously been processed according to our recommendations as documented <a href='https://software.broadinstitute.org/gatk/documentation/article?id=4067'>here</a>.</p>
+
+   <h3>How HaplotypeCaller works</h3>
+
+   <br />
+   <h4><a href='https://software.broadinstitute.org/gatk/documentation/article?id=4147'>1. Define active regions </a></h4>
+
+   <p>The program determines which regions of the genome it needs to operate on (active regions), based on the presence of
+   evidence for variation.
+
+   <br />
+   <h4><a href='https://software.broadinstitute.org/gatk/documentation/article?id=4146'>2. Determine haplotypes by assembly of the active region </a></h4>
+
+   <p>For each active region, the program builds a De Bruijn-like graph to reassemble the active region and identifies
+   what are the possible haplotypes present in the data. The program then realigns each haplotype against the reference
+   haplotype using the Smith-Waterman algorithm in order to identify potentially variant sites. </p>
+
+   <br />
+   <h4><a href='https://software.broadinstitute.org/gatk/documentation/article?id=4441'>3. Determine likelihoods of the haplotypes given the read data </a></h4>
+
+   <p>For each active region, the program performs a pairwise alignment of each read against each haplotype using the
+   PairHMM algorithm. This produces a matrix of likelihoods of haplotypes given the read data. These likelihoods are
+   then marginalized to obtain the likelihoods of alleles for each potentially variant site given the read data.   </p>
+
+   <br />
+   <h4><a href='https://software.broadinstitute.org/gatk/documentation/article?id=4442'>4. Assign sample genotypes </a></h4>
+
+   <p>For each potentially variant site, the program applies Bayes' rule, using the likelihoods of alleles given the
+   read data to calculate the likelihoods of each genotype per sample given the read data observed for that
+   sample. The most likely genotype is then assigned to the sample.    </p>
+   <h3>Input</h3>
+   <p>
+   Input bam file(s) from which to make variant calls
+   </p>
+   <h3>Output</h3>
+   <p>
+   Either a VCF or GVCF file with raw, unfiltered SNP and indel calls. Regular VCFs must be filtered either by variant
+   recalibration (Best Practice) or hard-filtering before use in downstream analyses. If using the GVCF workflow, the
+   output is a GVCF file that must first be run through GenotypeGVCFs and then filtering before further analysis.
+   </p>
+   <h3>Caveats</h3>
+   <ul>
+   <li>We have not yet fully tested the interaction between the GVCF-based calling or the multisample calling and the
+   RNAseq-specific functionalities. Use those in combination at your own risk.</li>
+   </ul>
+   <h3>Special note on ploidy</h3>
+   <p>This tool is able to handle many non-diploid use cases; the desired ploidy can be specified using the -ploidy
+   argument. Note however that very high ploidies (such as are encountered in large pooled experiments) may cause
+   performance challenges including excessive slowness. We are working on resolving these limitations.</p>
+   <h3>Additional Notes</h3>
+   <ul>
+       <li>When working with PCR-free data, be sure to set `-pcr_indel_model NONE` (see argument below).</li>
+       <li>When running in `-ERC GVCF` or `-ERC BP_RESOLUTION` modes, the confidence threshold
+       is automatically set to 0. This cannot be overridden by the command line. The threshold can be set manually
+       to the desired level in the next step of the workflow (GenotypeGVCFs)</li>
+       <li>We recommend using a list of intervals to speed up analysis. See <a href='https://software.broadinstitute.org/gatk/documentation/article?id=4133'>this document</a> for details.</li>
+   </ul>
+requirements:
+  ShellCommandRequirement: {}
+  InlineJavascriptRequirement:
+    expressionLib:
+    - |
+      /**
+       * File of functions to be added to cwl files
+       */
+      function generateGATK4BooleanValue(){
+          /**
+           * Boolean types in GATK 4 are expressed on the command line as --<PREFIX> "true"/"false",
+           * so patch here
+           */
+          if(self === true || self === false){
+              return self.toString()
+          }
+          return self;
+      }
+      function applyTagsToArgument(prefix, tags){
+          /**
+           * Function to be used in the field valueFrom of File objects to add gatk tags.
+           */
+          if(!self){
+              return null;
+          }
+          else if(!tags){
+              return generateArrayCmd(prefix);
+          }
+          else{
+              function addTagToArgument(tagObject, argument){
+                  var allTags = Array.isArray(tagObject) ? tagObject.join(",") : tagObject;
+                  return [prefix + ":" + allTags, argument];
+              }
+              if(Array.isArray(self)){
+                  if(!Array.isArray(tags) || self.length !== tags.length){
+                      throw new TypeError("Argument '" + prefix + "' tag field is invalid");
+                  }
+                  var value = self.map(function(element, i) {
+                      return addTagToArgument(tags[i], element);
+                  }).reduce(function(a, b){return a.concat(b)})
+                  return value;
+              }
+              else{
+                  return addTagToArgument(tags, self);
+              }
+          }
+      }
+      function generateArrayCmd(prefix){
+          /**
+           * Function to be used in the field valueFrom of array objects, so that arrays are optional
+           * and prefixes are handled properly.
+           *
+           * The issue that this solves is documented here:
+           * https://www.biostars.org/p/258414/#260140
+           */
+          if(!self){
+              return null;
+          }
+          if(!Array.isArray(self)){
+              self = [self];
+          }
+          var output = [];
+          self.forEach(function(element) {
+              output.push(prefix);
+              output.push(element);
+          })
+          return output;
+      }
+      /* Polyfill String.endsWith (it was introduced in ES6, but CWL 1.0 only supports ES5) */
+      String.prototype.endsWith = String.prototype.endsWith || function(suffix) {
+          return this.indexOf(suffix, this.length - suffix.length) >= 0;
+      };
+  SchemaDefRequirement:
+    types:
+    - type: enum
+      name: annotation_type
+      symbols:
+      - AS_BaseQualityRankSumTest
+      - AS_FisherStrand
+      - AS_InbreedingCoeff
+      - AS_MappingQualityRankSumTest
+      - AS_QualByDepth
+      - AS_RMSMappingQuality
+      - AS_ReadPosRankSumTest
+      - AS_StrandOddsRatio
+      - AlleleFraction
+      - BaseQuality
+      - BaseQualityRankSumTest
+      - ChromosomeCounts
+      - ClippingRankSumTest
+      - CountNs
+      - Coverage
+      - DepthPerAlleleBySample
+      - DepthPerSampleHC
+      - ExcessHet
+      - FisherStrand
+      - FragmentLength
+      - GenotypeSummaries
+      - InbreedingCoeff
+      - LikelihoodRankSumTest
+      - MappingQuality
+      - MappingQualityRankSumTest
+      - MappingQualityZero
+      - OrientationBiasReadCounts
+      - OriginalAlignment
+      - PossibleDeNovo
+      - QualByDepth
+      - RMSMappingQuality
+      - ReadPosRankSumTest
+      - ReadPosition
+      - ReferenceBases
+      - SampleList
+      - StrandBiasBySample
+      - StrandOddsRatio
+      - TandemRepeat
+      - UniqueAltReadCount
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/gatk4:4.1.6.0--py38_0
+inputs:
+- doc: Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise,
+    overrides threshold fraction.
+  id: ambig-filter-bases
+  type: int?
+  inputBinding:
+    prefix: --ambig-filter-bases
+- doc: Threshold fraction of ambiguous bases
+  id: ambig-filter-frac
+  type: double?
+  inputBinding:
+    prefix: --ambig-filter-frac
+- doc: Maximum length of fragment (insert size)
+  id: max-fragment-length
+  type: int?
+  inputBinding:
+    prefix: --max-fragment-length
+- doc: Minimum length of fragment (insert size)
+  id: min-fragment-length
+  type: int?
+  inputBinding:
+    prefix: --min-fragment-length
+- doc: One or more genomic intervals to keep
+  id: keep-intervals
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--keep-intervals"))
+- doc: Name of the library to keep
+  id: library
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--library"))
+- doc: Maximum mapping quality to keep (inclusive)
+  id: maximum-mapping-quality
+  type: int?
+  inputBinding:
+    prefix: --maximum-mapping-quality
+- doc: Minimum mapping quality to keep (inclusive)
+  id: minimum-mapping-quality
+  type: int?
+  inputBinding:
+    prefix: --minimum-mapping-quality
+- doc: Minimum start location difference at which mapped mates are considered distant
+  id: mate-too-distant-length
+  type: int?
+  inputBinding:
+    prefix: --mate-too-distant-length
+- doc: Allow a read to be filtered out based on having only 1 soft-clipped block.
+    By default, both ends must have a soft-clipped block, setting this flag requires
+    only 1 soft-clipped block
+  id: dont-require-soft-clips-both-ends
+  type: boolean?
+  inputBinding:
+    prefix: --dont-require-soft-clips-both-ends
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Minimum number of aligned bases
+  id: filter-too-short
+  type: int?
+  inputBinding:
+    prefix: --filter-too-short
+- doc: Platform attribute (PL) to match
+  id: platform-filter-name
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--platform-filter-name"))
+- doc: Platform unit (PU) to filter out
+  id: black-listed-lanes
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--black-listed-lanes"))
+- doc: A read group filter expression in the form "attribute:value", where "attribute"
+    is a two character read group attribute such as "RG" or "PU".
+  id: read-group-black-list
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--read-group-black-list"))
+- doc: The name of the read group to keep
+  id: keep-read-group
+  type: string?
+  inputBinding:
+    prefix: --keep-read-group
+- doc: Keep only reads with length at most equal to the specified value
+  id: max-read-length
+  type: int?
+  inputBinding:
+    prefix: --max-read-length
+- doc: Keep only reads with length at least equal to the specified value
+  id: min-read-length
+  type: int?
+  inputBinding:
+    prefix: --min-read-length
+- doc: Keep only reads with this read name
+  id: read-name
+  type: string?
+  inputBinding:
+    prefix: --read-name
+- doc: Keep only reads on the reverse strand
+  id: keep-reverse-strand-only
+  type: boolean?
+  inputBinding:
+    prefix: --keep-reverse-strand-only
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: The name of the sample(s) to keep, filtering out all others
+  id: sample
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--sample"))
+- doc: Inverts the results from this filter, causing all variants that would pass
+    to fail and visa-versa.
+  id: invert-soft-clip-ratio-filter
+  type: boolean?
+  inputBinding:
+    prefix: --invert-soft-clip-ratio-filter
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Threshold ratio of soft clipped bases (leading / trailing the cigar string)
+    to total bases in read for read to be filtered.
+  id: soft-clipped-leading-trailing-ratio
+  type: double?
+  inputBinding:
+    prefix: --soft-clipped-leading-trailing-ratio
+- doc: Threshold ratio of soft clipped bases (anywhere in the cigar string) to total
+    bases in read for read to be filtered.
+  id: soft-clipped-ratio-threshold
+  type: double?
+  inputBinding:
+    prefix: --soft-clipped-ratio-threshold
+- doc: Minimum probability for a locus to be considered active.
+  id: active-probability-threshold
+  type: double?
+  inputBinding:
+    prefix: --active-probability-threshold
+- doc: Use Mutect2's adaptive graph pruning algorithm
+  id: adaptive-pruning
+  type: boolean?
+  inputBinding:
+    prefix: --adaptive-pruning
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Initial base error rate estimate for adaptive pruning
+  id: adaptive-pruning-initial-error-rate
+  type: double?
+  inputBinding:
+    prefix: --adaptive-pruning-initial-error-rate
+- doc: If true, adds a PG tag to created SAM/BAM/CRAM files.
+  id: add-output-sam-program-record
+  type: boolean?
+  inputBinding:
+    prefix: --add-output-sam-program-record
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, adds a command line header line to created VCF files.
+  id: add-output-vcf-command-line
+  type: boolean?
+  inputBinding:
+    prefix: --add-output-vcf-command-line
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Annotate all sites with PLs
+  id: all-site-pls
+  type: boolean?
+  inputBinding:
+    prefix: --all-site-pls
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Likelihood and read-based annotations will only take into consideration reads
+    that overlap the variant or any base no further than this distance expressed in
+    base pairs
+  id: allele-informative-reads-overlap-margin
+  type: int?
+  inputBinding:
+    prefix: --allele-informative-reads-overlap-margin
+- doc: The set of alleles to force-call regardless of evidence
+  id: alleles
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--alleles", inputs['alleles_tags']))
+- doc: A argument to set the tags of 'alleles'
+  id: alleles_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Allow graphs that have non-unique kmers in the reference
+  id: allow-non-unique-kmers-in-ref
+  type: boolean?
+  inputBinding:
+    prefix: --allow-non-unique-kmers-in-ref
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If provided, we will annotate records with the number of alternate alleles
+    that were discovered (but not necessarily genotyped) at a given site
+  id: annotate-with-num-discovered-alleles
+  type: boolean?
+  inputBinding:
+    prefix: --annotate-with-num-discovered-alleles
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: One or more specific annotations to add to variant calls [synonymous with -A]
+  id: annotation
+  type:
+  - 'null'
+  - annotation_type
+  - annotation_type[]
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--annotation"))
+- doc: One or more groups of annotations to apply to variant calls [synonymous with
+    -G]
+  id: annotation-group
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--annotation-group"))
+- doc: One or more specific annotations to exclude from variant calls [synonymous
+    with -AX]
+  id: annotations-to-exclude
+  type:
+  - 'null'
+  - annotation_type
+  - annotation_type[]
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--annotations-to-exclude"))
+- doc: read one or more arguments files and add them to the command line
+  id: arguments_file
+  type:
+  - 'null'
+  - type: array
+    items: File
+    inputBinding:
+      valueFrom: $(null)
+  - File
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--arguments_file", inputs['arguments_file_tags']))
+- doc: A argument to set the tags of 'arguments_file'
+  id: arguments_file_tags
+  type:
+  - 'null'
+  - type: array
+    items:
+    - string
+    - type: array
+      items: string
+- doc: Output the assembly region to this IGV formatted file
+  id: assembly-region-out-filename
+  type: string?
+  inputBinding:
+    prefix: --assembly-region-out
+- doc: Number of additional bases of context to include around each assembly region
+  id: assembly-region-padding
+  type: int?
+  inputBinding:
+    prefix: --assembly-region-padding
+- doc: File to which assembled haplotypes should be written [synonymous with -bamout]
+  id: bam-output-filename
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--bam-output", inputs['bam-output_tags']))
+- doc: A argument to set the tags of 'bam-output'
+  id: bam-output_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Which haplotypes should be written to the BAM
+  id: bam-writer-type
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - ALL_POSSIBLE_HAPLOTYPES
+    - CALLED_HAPLOTYPES
+  inputBinding:
+    prefix: --bam-writer-type
+- doc: Base qualities below this threshold will be reduced to the minimum (6)
+  id: base-quality-score-threshold
+  type: int?
+  inputBinding:
+    prefix: --base-quality-score-threshold
+- doc: Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer
+    if unset. [synonymous with -CIPB]
+  id: cloud-index-prefetch-buffer
+  type: int?
+  inputBinding:
+    prefix: --cloud-index-prefetch-buffer
+- doc: Size of the cloud-only prefetch buffer (in MB; 0 to disable). [synonymous with
+    -CPB]
+  id: cloud-prefetch-buffer
+  type: int?
+  inputBinding:
+    prefix: --cloud-prefetch-buffer
+- doc: Comparison VCF file(s) [synonymous with -comp]
+  id: comparison
+  type:
+  - 'null'
+  - type: array
+    items: File
+    inputBinding:
+      valueFrom: $(null)
+  - File
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--comparison", inputs['comparison_tags']))
+- doc: A argument to set the tags of 'comparison'
+  id: comparison_tags
+  type:
+  - 'null'
+  - type: array
+    items:
+    - string
+    - type: array
+      items: string
+- doc: Tab-separated File containing fraction of contamination in sequencing data
+    (per sample) to aggressively remove. Format should be "<SampleID><TAB><Contamination>"
+    (Contamination is double) per line; No header. [synonymous with -contamination-file]
+  id: contamination-fraction-per-sample-file
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--contamination-fraction-per-sample-file", inputs['contamination-fraction-per-sample-file_tags']))
+- doc: A argument to set the tags of 'contamination-fraction-per-sample-file'
+  id: contamination-fraction-per-sample-file_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Fraction of contamination in sequencing data (for all samples) to aggressively
+    remove [synonymous with -contamination]
+  id: contamination-fraction-to-filter
+  type: double?
+  inputBinding:
+    prefix: --contamination-fraction-to-filter
+- doc: Undocumented option
+  id: correct-overlapping-quality
+  type: boolean?
+  inputBinding:
+    prefix: --correct-overlapping-quality
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM
+    file. [synonymous with -OBI]
+  id: create-output-bam-index
+  type: boolean?
+  inputBinding:
+    prefix: --create-output-bam-index
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, create a MD5 digest for any BAM/SAM/CRAM file created [synonymous
+    with -OBM]
+  id: create-output-bam-md5
+  type: boolean?
+  inputBinding:
+    prefix: --create-output-bam-md5
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, create a VCF index when writing a coordinate-sorted VCF file. [synonymous
+    with -OVI]
+  id: create-output-variant-index
+  type: boolean?
+  inputBinding:
+    prefix: --create-output-variant-index
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, create a a MD5 digest any VCF file created. [synonymous with -OVM]
+  id: create-output-variant-md5
+  type: boolean?
+  inputBinding:
+    prefix: --create-output-variant-md5
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: dbSNP file [synonymous with -D]
+  id: dbsnp
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--dbsnp", inputs['dbsnp_tags']))
+- doc: A argument to set the tags of 'dbsnp'
+  id: dbsnp_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Print out verbose debug information about each assembly region [synonymous
+    with -debug]
+  id: debug-assembly
+  type: boolean?
+  inputBinding:
+    prefix: --debug-assembly
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, don't cache bam indexes, this will reduce memory requirements but
+    may harm performance if many intervals are specified.  Caching is automatically
+    disabled if there are no intervals specified. [synonymous with -DBIC]
+  id: disable-bam-index-caching
+  type: boolean?
+  inputBinding:
+    prefix: --disable-bam-index-caching
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Don't skip calculations in ActiveRegions with no variants
+  id: disable-optimizations
+  type: boolean?
+  inputBinding:
+    prefix: --disable-optimizations
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Read filters to be disabled before analysis [synonymous with -DF]
+  id: disable-read-filter
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--disable-read-filter"))
+- doc: If specified, do not check the sequence dictionaries from our inputs for compatibility.
+    Use at your own risk!
+  id: disable-sequence-dictionary-validation
+  type: boolean?
+  inputBinding:
+    prefix: --disable-sequence-dictionary-validation
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Disable all tool default annotations
+  id: disable-tool-default-annotations
+  type: boolean?
+  inputBinding:
+    prefix: --disable-tool-default-annotations
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: 'Disable all tool default read filters (WARNING: many tools will not function
+    correctly without their default read filters on)'
+  id: disable-tool-default-read-filters
+  type: boolean?
+  inputBinding:
+    prefix: --disable-tool-default-read-filters
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Disable physical phasing
+  id: do-not-run-physical-phasing
+  type: boolean?
+  inputBinding:
+    prefix: --do-not-run-physical-phasing
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Disable iterating over kmer sizes when graph cycles are detected
+  id: dont-increase-kmer-sizes-for-cycles
+  type: boolean?
+  inputBinding:
+    prefix: --dont-increase-kmer-sizes-for-cycles
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Do not analyze soft clipped bases in the reads
+  id: dont-use-soft-clipped-bases
+  type: boolean?
+  inputBinding:
+    prefix: --dont-use-soft-clipped-bases
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Mode for emitting reference confidence scores (For Mutect2, this is a BETA
+    feature) [synonymous with -ERC]
+  id: emit-ref-confidence
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - NONE
+    - BP_RESOLUTION
+    - GVCF
+  inputBinding:
+    prefix: --emit-ref-confidence
+- doc: Use all possible annotations (not for the faint of heart)
+  id: enable-all-annotations
+  type: boolean?
+  inputBinding:
+    prefix: --enable-all-annotations
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: One or more genomic intervals to exclude from processing [synonymous with -XL]
+  id: exclude-intervals
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--exclude-intervals"))
+- doc: Output the band lower bound for each GQ block regardless of the data it represents
+  id: floor-blocks
+  type: boolean?
+  inputBinding:
+    prefix: --floor-blocks
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If provided, all regions will be marked as active
+  id: force-active
+  type: boolean?
+  inputBinding:
+    prefix: --force-active
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Force-call filtered alleles included in the resource specified by --alleles
+    [synonymous with -genotype-filtered-alleles]
+  id: force-call-filtered-alleles
+  type: boolean?
+  inputBinding:
+    prefix: --force-call-filtered-alleles
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Samples representing the population "founders"
+  id: founder-id
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--founder-id"))
+- doc: A configuration file to use with the GATK.
+  id: gatk-config-file
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--gatk-config-file", inputs['gatk-config-file_tags']))
+- doc: A argument to set the tags of 'gatk-config-file'
+  id: gatk-config-file_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: If the GCS bucket channel errors out, how many times it will attempt to re-initiate
+    the connection [synonymous with -gcs-retries]
+  id: gcs-max-retries
+  type: int?
+  inputBinding:
+    prefix: --gcs-max-retries
+- doc: Project to bill when accessing "requester pays" buckets. If unset, these buckets
+    cannot be accessed.
+  id: gcs-project-for-requester-pays
+  type: string?
+  inputBinding:
+    prefix: --gcs-project-for-requester-pays
+- doc: Write debug assembly graph information to this file [synonymous with -graph]
+  id: graph-output-filename
+  type: string?
+  inputBinding:
+    prefix: --graph-output
+- doc: Exclusive upper bounds for reference confidence GQ bands (must be in [1, 100]
+    and specified in increasing order) [synonymous with -GQB]
+  id: gvcf-gq-bands
+  type:
+  - 'null'
+  - type: array
+    items: int
+    inputBinding:
+      valueFrom: $(null)
+  - int
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--gvcf-gq-bands"))
+- doc: Heterozygosity value used to compute prior likelihoods for any locus.  See
+    the GATKDocs for full details on the meaning of this population genetics concept
+  id: heterozygosity
+  type: double?
+  inputBinding:
+    prefix: --heterozygosity
+- doc: Standard deviation of heterozygosity for SNP and indel calling.
+  id: heterozygosity-stdev
+  type: double?
+  inputBinding:
+    prefix: --heterozygosity-stdev
+- doc: Heterozygosity for indel calling.  See the GATKDocs for heterozygosity for
+    full details on the meaning of this population genetics concept
+  id: indel-heterozygosity
+  type: double?
+  inputBinding:
+    prefix: --indel-heterozygosity
+- doc: The size of an indel to check for in the reference model
+  id: indel-size-to-eliminate-in-ref-model
+  type: int?
+  inputBinding:
+    prefix: --indel-size-to-eliminate-in-ref-model
+- doc: BAM/SAM/CRAM file containing reads [synonymous with -I]
+  id: input
+  type:
+  - type: array
+    items: File
+    inputBinding:
+      valueFrom: $(null)
+  - File
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--input", inputs['input_tags']))
+  secondaryFiles: $(self.basename + self.nameext.replace('m','i'))
+- doc: A argument to set the tags of 'input'
+  id: input_tags
+  type:
+  - 'null'
+  - type: array
+    items:
+    - string
+    - type: array
+      items: string
+- doc: Amount of padding (in bp) to add to each interval you are excluding. [synonymous
+    with -ixp]
+  id: interval-exclusion-padding
+  type: int?
+  inputBinding:
+    prefix: --interval-exclusion-padding
+- doc: Interval merging rule for abutting intervals [synonymous with -imr]
+  id: interval-merging-rule
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - ALL
+    - OVERLAPPING_ONLY
+  inputBinding:
+    prefix: --interval-merging-rule
+- doc: Amount of padding (in bp) to add to each interval you are including. [synonymous
+    with -ip]
+  id: interval-padding
+  type: int?
+  inputBinding:
+    prefix: --interval-padding
+- doc: Set merging approach to use for combining interval inputs [synonymous with
+    -isr]
+  id: interval-set-rule
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - UNION
+    - INTERSECTION
+  inputBinding:
+    prefix: --interval-set-rule
+- doc: One or more genomic intervals over which to operate [synonymous with -L]
+  id: intervals
+  type:
+  - 'null'
+  - type: array
+    items:
+    - File
+    - string
+    inputBinding:
+      valueFrom: $(null)
+  - File
+  - string
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--intervals", inputs['intervals_tags']))
+- doc: A argument to set the tags of 'intervals'
+  id: intervals_tags
+  type:
+  - 'null'
+  - type: array
+    items:
+    - string
+    - type: array
+      items: string
+- doc: Kmer size to use in the read threading assembler
+  id: kmer-size
+  type:
+  - 'null'
+  - type: array
+    items: int
+    inputBinding:
+      valueFrom: $(null)
+  - int
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--kmer-size"))
+- doc: Lenient processing of VCF files [synonymous with -LE]
+  id: lenient
+  type: boolean?
+  inputBinding:
+    prefix: --lenient
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Maximum number of alternate alleles to genotype
+  id: max-alternate-alleles
+  type: int?
+  inputBinding:
+    prefix: --max-alternate-alleles
+- doc: Maximum size of an assembly region
+  id: max-assembly-region-size
+  type: int?
+  inputBinding:
+    prefix: --max-assembly-region-size
+- doc: Maximum number of genotypes to consider at any site
+  id: max-genotype-count
+  type: int?
+  inputBinding:
+    prefix: --max-genotype-count
+- doc: Two or more phased substitutions separated by this distance or less are merged
+    into MNPs. [synonymous with -mnp-dist]
+  id: max-mnp-distance
+  type: int?
+  inputBinding:
+    prefix: --max-mnp-distance
+- doc: Maximum number of haplotypes to consider for your population
+  id: max-num-haplotypes-in-population
+  type: int?
+  inputBinding:
+    prefix: --max-num-haplotypes-in-population
+- doc: Upper limit on how many bases away probability mass can be moved around when
+    calculating the boundaries between active and inactive assembly regions
+  id: max-prob-propagation-distance
+  type: int?
+  inputBinding:
+    prefix: --max-prob-propagation-distance
+- doc: Maximum number of reads to retain per alignment start position. Reads above
+    this threshold will be downsampled. Set to 0 to disable.
+  id: max-reads-per-alignment-start
+  type: int?
+  inputBinding:
+    prefix: --max-reads-per-alignment-start
+- doc: Maximum number of variants in graph the adaptive pruner will allow
+  id: max-unpruned-variants
+  type: int?
+  inputBinding:
+    prefix: --max-unpruned-variants
+- doc: Minimum size of an assembly region
+  id: min-assembly-region-size
+  type: int?
+  inputBinding:
+    prefix: --min-assembly-region-size
+- doc: Minimum base quality required to consider a base for calling [synonymous with
+    -mbq]
+  id: min-base-quality-score
+  type: int?
+  inputBinding:
+    prefix: --min-base-quality-score
+- doc: Minimum length of a dangling branch to attempt recovery
+  id: min-dangling-branch-length
+  type: int?
+  inputBinding:
+    prefix: --min-dangling-branch-length
+- doc: Minimum support to not prune paths in the graph
+  id: min-pruning
+  type: int?
+  inputBinding:
+    prefix: --min-pruning
+- doc: How many threads should a native pairHMM implementation use
+  id: native-pair-hmm-threads
+  type: int?
+  inputBinding:
+    prefix: --native-pair-hmm-threads
+- doc: use double precision in the native pairHmm. This is slower but matches the
+    java implementation better
+  id: native-pair-hmm-use-double-precision
+  type: boolean?
+  inputBinding:
+    prefix: --native-pair-hmm-use-double-precision
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Number of samples that must pass the minPruning threshold
+  id: num-pruning-samples
+  type: int?
+  inputBinding:
+    prefix: --num-pruning-samples
+- doc: Number of hom-ref genotypes to infer at sites not present in a panel
+  id: num-reference-samples-if-no-call
+  type: int?
+  inputBinding:
+    prefix: --num-reference-samples-if-no-call
+- doc: File to which variants should be written [synonymous with -O]
+  id: output_filename
+  type: string
+  inputBinding:
+    prefix: --output
+- doc: Specifies which type of calls we should output
+  id: output_mode
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - EMIT_VARIANTS_ONLY
+    - EMIT_ALL_CONFIDENT_SITES
+    - EMIT_ALL_ACTIVE_SITES
+  inputBinding:
+    prefix: --output-mode
+- doc: Flat gap continuation penalty for use in the Pair HMM
+  id: pair-hmm-gap-continuation-penalty
+  type: int?
+  inputBinding:
+    prefix: --pair-hmm-gap-continuation-penalty
+- doc: The PairHMM implementation to use for genotype likelihood calculations [synonymous
+    with -pairHMM]
+  id: pair-hmm-implementation
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - EXACT
+    - ORIGINAL
+    - LOGLESS_CACHING
+    - AVX_LOGLESS_CACHING
+    - AVX_LOGLESS_CACHING_OMP
+    - EXPERIMENTAL_FPGA_LOGLESS_CACHING
+    - FASTEST_AVAILABLE
+  inputBinding:
+    prefix: --pair-hmm-implementation
+- doc: The PCR indel model to use
+  id: pcr-indel-model
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - NONE
+    - HOSTILE
+    - AGGRESSIVE
+    - CONSERVATIVE
+  inputBinding:
+    prefix: --pcr-indel-model
+- doc: Pedigree file for determining the population "founders" [synonymous with -ped]
+  id: pedigree
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--pedigree", inputs['pedigree_tags']))
+- doc: A argument to set the tags of 'pedigree'
+  id: pedigree_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: The global assumed mismapping rate for reads
+  id: phred-scaled-global-read-mismapping-rate
+  type: int?
+  inputBinding:
+    prefix: --phred-scaled-global-read-mismapping-rate
+- doc: Callset to use in calculating genotype priors [synonymous with -population]
+  id: population-callset
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--population-callset", inputs['population-callset_tags']))
+- doc: A argument to set the tags of 'population-callset'
+  id: population-callset_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Ln likelihood ratio threshold for adaptive pruning algorithm
+  id: pruning-lod-threshold
+  type: double?
+  inputBinding:
+    prefix: --pruning-lod-threshold
+- doc: Whether to suppress job-summary info on System.err.
+  id: QUIET
+  type: boolean?
+  inputBinding:
+    prefix: --QUIET
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Read filters to be applied before analysis [synonymous with -RF]
+  id: read-filter
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--read-filter"))
+- doc: Indices to use for the read inputs. If specified, an index must be provided
+    for every read input and in the same order as the read inputs. If this argument
+    is not specified, the path to the index for each input will be inferred automatically.
+  id: read-index
+  type:
+  - 'null'
+  - type: array
+    items: string
+    inputBinding:
+      valueFrom: $(null)
+  - string
+  inputBinding:
+    valueFrom: $(generateArrayCmd("--read-index"))
+- doc: Validation stringency for all SAM/BAM/CRAM/SRA files read by this program.  The
+    default stringency value SILENT can improve performance when processing a BAM
+    file in which variable-length data (read, qualities, tags) do not otherwise need
+    to be decoded. [synonymous with -VS]
+  id: read-validation-stringency
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - STRICT
+    - LENIENT
+    - SILENT
+  inputBinding:
+    prefix: --read-validation-stringency
+- doc: Recover all dangling branches
+  id: recover-all-dangling-branches
+  type: boolean?
+  inputBinding:
+    prefix: --recover-all-dangling-branches
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: This argument is deprecated since version 3.3
+  id: recover-dangling-heads
+  type: boolean?
+  inputBinding:
+    prefix: --recover-dangling-heads
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Reference sequence file [synonymous with -R]
+  id: reference
+  type: File
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--reference", inputs['reference_tags']))
+  secondaryFiles:
+  - .fai
+  - ^.dict
+- doc: A argument to set the tags of 'reference'
+  id: reference_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: Name of single sample to use from a multi-sample bam [synonymous with -ALIAS]
+  id: sample-name
+  type: string?
+  inputBinding:
+    prefix: --sample-name
+- doc: Ploidy (number of chromosomes) per sample. For pooled data, set to (Number
+    of samples in each pool * Sample Ploidy). [synonymous with -ploidy]
+  id: sample-ploidy
+  type: int?
+  inputBinding:
+    prefix: --sample-ploidy
+- doc: Output traversal statistics every time this many seconds elapse
+  id: seconds-between-progress-updates
+  type: double?
+  inputBinding:
+    prefix: --seconds-between-progress-updates
+- doc: Use the given sequence dictionary as the master/canonical sequence dictionary.  Must
+    be a .dict file.
+  id: sequence-dictionary
+  type: File?
+  inputBinding:
+    valueFrom: $(applyTagsToArgument("--sequence-dictionary", inputs['sequence-dictionary_tags']))
+- doc: A argument to set the tags of 'sequence-dictionary'
+  id: sequence-dictionary_tags
+  type:
+  - 'null'
+  - string
+  - string[]
+- doc: display hidden arguments
+  id: showHidden
+  type: boolean?
+  inputBinding:
+    prefix: --showHidden
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: If true, don't emit genotype fields when writing vcf file output.
+  id: sites-only-vcf-output
+  type: boolean?
+  inputBinding:
+    prefix: --sites-only-vcf-output
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is
+    the right choice
+  id: smith-waterman
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - FASTEST_AVAILABLE
+    - AVX_ENABLED
+    - JAVA
+  inputBinding:
+    prefix: --smith-waterman
+- doc: The minimum phred-scaled confidence threshold at which variants should be called
+    [synonymous with -stand-call-conf]
+  id: standard-min-confidence-threshold-for-calling
+  type: double?
+  inputBinding:
+    prefix: --standard-min-confidence-threshold-for-calling
+- doc: Temp directory to use.
+  id: tmp-dir
+  type: string?
+  inputBinding:
+    prefix: --tmp-dir
+- doc: Use the contamination-filtered read maps for the purposes of annotating variants
+  id: use-filtered-reads-for-annotations
+  type: boolean?
+  inputBinding:
+    prefix: --use-filtered-reads-for-annotations
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Whether to use the JdkDeflater (as opposed to IntelDeflater) [synonymous with
+    -jdk-deflater]
+  id: use-jdk-deflater
+  type: boolean?
+  inputBinding:
+    prefix: --use-jdk-deflater
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Whether to use the JdkInflater (as opposed to IntelInflater) [synonymous with
+    -jdk-inflater]
+  id: use-jdk-inflater
+  type: boolean?
+  inputBinding:
+    prefix: --use-jdk-inflater
+    valueFrom: $(generateGATK4BooleanValue())
+- doc: Control verbosity of logging.
+  id: verbosity
+  type:
+  - 'null'
+  - type: enum
+    symbols:
+    - ERROR
+    - WARNING
+    - INFO
+    - DEBUG
+  inputBinding:
+    prefix: --verbosity
+- doc: display the version number for this tool
+  id: version
+  type: boolean?
+  inputBinding:
+    prefix: --version
+    valueFrom: $(generateGATK4BooleanValue())
+outputs:
+- id: assembly-region-out
+  doc: Output file from corresponding to the input argument assembly-region-out-filename
+  type: File?
+  outputBinding:
+    glob: $(inputs['assembly-region-out-filename'])
+- id: bam-output
+  doc: Output file from corresponding to the input argument bam-output-filename
+  type: File?
+  outputBinding:
+    glob: $(inputs['bam-output-filename'])
+  secondaryFiles:
+  - "$(inputs['create-output-bam-index']? self.basename + self.nameext.replace('m',\
+    \ 'i') : [])"
+  - "$(inputs['create-output-bam-md5']? self.basename + '.md5' : [])"
+- id: graph-output
+  doc: Output file from corresponding to the input argument graph-output-filename
+  type: File?
+  outputBinding:
+    glob: $(inputs['graph-output-filename'])
+- id: output
+  doc: Output file from corresponding to the input argument output-filename
+  type: File
+  outputBinding:
+    glob: $(inputs.output_filename)
+  secondaryFiles:
+  - "$(inputs['create-output-variant-index']? self.basename + (inputs.output_filename.endsWith('.gz')?\
+    \ '.tbi':'.idx') : [])"
+  - "$(inputs['create-output-variant-md5']? self.basename + '.md5' : [])"
\ No newline at end of file
diff --git a/janis_assistant/tests/data/cwl/subworkflow_test/basic.cwl b/janis_assistant/tests/data/cwl/subworkflow_test/basic.cwl
new file mode 100644
index 0000000..1e22771
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/subworkflow_test/basic.cwl
@@ -0,0 +1,55 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.2
+label: basic tool for testing
+
+requirements:
+    - class: ShellCommandRequirement
+    - class: InlineJavascriptRequirement
+    - class: DockerRequirement
+      dockerPull: ubuntu:latest
+
+baseCommand: echo
+
+arguments:
+    - position: 0
+      valueFrom: test:\\t:escaped:\\n:characters
+
+inputs:
+    inFile:
+        type: File
+        inputBinding:
+            position: 1
+    inString:
+        type: string
+        inputBinding:
+            position: 2
+    inSecondary:
+        type: File
+        secondaryFiles: [.fai, ^.dict, .amb, .ann, .bwt, .pac, .sa]
+        inputBinding:
+            position: 3
+    inFileOptional:
+        type: File?
+        inputBinding:
+            position: 4
+    inStringOptional:
+        type: string?
+        inputBinding:
+            position: 5
+    inIntOptional:
+        type: int?
+        inputBinding:
+            position: 6
+    inIntOptional2:
+        type: int?
+        default: 10
+        inputBinding:
+            position: 6
+outputs:
+    out_stdout:
+        type: stdout     
+
+
+
+
diff --git a/janis_assistant/tests/data/cwl/subworkflow_test/main.cwl b/janis_assistant/tests/data/cwl/subworkflow_test/main.cwl
new file mode 100644
index 0000000..ab67ba2
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/subworkflow_test/main.cwl
@@ -0,0 +1,93 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "test workflow"
+requirements:
+    - class: ScatterFeatureRequirement
+    - class: SubworkflowFeatureRequirement
+    - class: MultipleInputFeatureRequirement
+
+inputs:
+    inFile:
+        type: File
+    inFileOptional:
+        type: File?
+    inFileArr:
+        type: File[]
+    inFileArrOptional:
+        type: File[]?
+    inSecondary:
+        type: File
+        secondaryFiles: [.fai, ^.dict, .amb, .ann, .bwt, .pac, .sa]
+    inString:
+        type: string
+    inStringOptional:
+        type: string?
+    inStringOptional2:
+        type: string?
+    inStringArr:
+        type: string[]
+    inStringArrOptional:
+        type: string[]?
+    inInt:
+        type: int
+    inIntOptional:
+        type: int?
+    inIntOptional2:
+        type: int?
+        default: 5
+
+outputs:
+    outFile:
+        type: File
+        outputSource: optional1/out_stdout
+
+steps:
+    basic:
+        run: basic.cwl
+        in:
+            inFile: inFile
+            inString: inString
+            inSecondary: inSecondary
+            inFileOptional: inFileOptional
+            inIntOptional: inIntOptional2
+        out:
+            [out_stdout]
+    mandatory:
+        run: mandatory_input_types.cwl
+        in:
+            inFile: inFile
+            inFileArr: inFileArr
+            inSecondary: inSecondary
+            inString: inString
+            inStringArr: inStringArr
+            inInt: inInt
+        out:
+            [out_stdout]
+    optional1:
+        run: optional_input_types.cwl
+        in:
+            inFile: inFileOptional
+            inFileArr: inFileArrOptional
+        out:
+            [out_stdout]
+    optional2:
+        run: optional_input_types.cwl
+        in:
+            inString: inStringOptional
+            inStringArr: inStringArrOptional
+            inInt: inIntOptional
+        out:
+            [out_stdout]
+    sub:
+        run: subworkflow.cwl
+        in:
+            inFile: inFile
+            inFileArr: inFileArr
+            inSecondary: inSecondary
+            inString: inString
+            inStringArr: inStringArr
+            inInt: inInt
+        out:
+            [out_stdout]
diff --git a/janis_assistant/tests/data/cwl/subworkflow_test/mandatory_input_types.cwl b/janis_assistant/tests/data/cwl/subworkflow_test/mandatory_input_types.cwl
new file mode 100644
index 0000000..b7f7cfd
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/subworkflow_test/mandatory_input_types.cwl
@@ -0,0 +1,46 @@
+
+
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.2
+label: basic tool for testing mandatory inputs
+
+requirements:
+    - class: DockerRequirement
+      dockerPull: ubuntu:latest
+
+baseCommand: echo
+
+inputs:
+    inFile:
+        type: File
+        inputBinding:
+            position: 1
+    inFileArr:
+        type: File[]
+        inputBinding:
+            position: 2
+    inSecondary:
+        type: File
+        secondaryFiles: [.fai, ^.dict, .amb, .ann, .bwt, .pac, .sa]
+        inputBinding:
+            position: 3
+    inString:
+        type: string
+        inputBinding:
+            position: 4
+    inStringArr:
+        type: string[]
+        inputBinding:
+            position: 5
+    inInt:
+        type: int
+        inputBinding:
+            position: 6
+outputs:
+    out_stdout:
+        type: stdout     
+
+
+
+
diff --git a/janis_assistant/tests/data/cwl/subworkflow_test/optional_input_types.cwl b/janis_assistant/tests/data/cwl/subworkflow_test/optional_input_types.cwl
new file mode 100644
index 0000000..560ff9b
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/subworkflow_test/optional_input_types.cwl
@@ -0,0 +1,46 @@
+
+
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.2
+label: basic tool for testing optional inputs
+
+requirements:
+    - class: DockerRequirement
+      dockerPull: ubuntu:latest
+
+baseCommand: echo
+
+inputs:
+    inFile:
+        type: File?
+        inputBinding:
+            position: 1
+    inFileArr:
+        type: File[]?
+        inputBinding:
+            position: 2
+    inSecondary:
+        type: File?
+        secondaryFiles: [.fai, ^.dict, .amb, .ann, .bwt, .pac, .sa]
+        inputBinding:
+            position: 3
+    inString:
+        type: string?
+        inputBinding:
+            position: 4
+    inStringArr:
+        type: string[]?
+        inputBinding:
+            position: 5
+    inInt:
+        type: int?
+        inputBinding:
+            position: 6
+outputs:
+    out_stdout:
+        type: stdout     
+
+
+
+
diff --git a/janis_assistant/tests/data/cwl/subworkflow_test/subworkflow.cwl b/janis_assistant/tests/data/cwl/subworkflow_test/subworkflow.cwl
new file mode 100644
index 0000000..67e7529
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/subworkflow_test/subworkflow.cwl
@@ -0,0 +1,38 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "test subworkflow"
+requirements:
+    - class: ScatterFeatureRequirement
+    - class: SubworkflowFeatureRequirement
+    - class: MultipleInputFeatureRequirement
+
+inputs:
+    inFile:
+        type: File
+    inFileArr:
+        type: File[]
+    inSecondary:
+        type: File
+        secondaryFiles: [.fai, ^.dict, .amb, .ann, .bwt, .pac, .sa]
+    inString:
+        type: string
+    inStringArr:
+        type: string[]
+    inInt:
+        type: int
+
+outputs:
+    outFile:
+        type: File
+        outputSource: optional/out_stdout
+
+steps:
+    optional:
+        run: optional_input_types.cwl
+        in:
+            inFile: inFile
+            inFileArr: inFileArr
+        out:
+            [out_stdout]
diff --git a/janis_assistant/tests/data/cwl/super_enhancer_wf.cwl b/janis_assistant/tests/data/cwl/super_enhancer_wf.cwl
new file mode 100644
index 0000000..495a1c3
--- /dev/null
+++ b/janis_assistant/tests/data/cwl/super_enhancer_wf.cwl
@@ -0,0 +1,479 @@
+cwlVersion: v1.0
+class: Workflow
+
+
+requirements:
+  - class: StepInputExpressionRequirement
+  - class: InlineJavascriptRequirement
+  - class: MultipleInputFeatureRequirement
+
+
+inputs:
+
+  islands_file:
+    type: File
+    label: "Input peaks file"
+    format: "http://edamontology.org/format_3468"
+    doc: "Input XLS file generated by MACS2"
+
+  islands_control_file:
+    type: File?
+    label: "Control peaks file"
+    format: "http://edamontology.org/format_3468"
+    doc: "Control XLS file generated by MACS2"
+
+  bambai_pair:
+    type: File
+    secondaryFiles:
+    - .bai
+    label: "Coordinate sorted BAM+BAI files"
+    format: "http://edamontology.org/format_2572"
+    doc: "Coordinate sorted BAM file and BAI index file"
+
+  annotation_file:
+    type: File
+    label: "TSV annotation file"
+    format: "http://edamontology.org/format_3475"
+    doc: "TSV annotation file"
+
+  chrom_length_file:
+    type: File
+    label: "Chromosome length file"
+    format: "http://edamontology.org/format_2330"
+    doc: "Chromosome length file"
+
+  stitch_distance:
+    type: int
+    label: "Stitching distance"
+    doc: "Linking distance for stitching"
+
+  tss_distance:
+    type: int
+    label: "TSS distance"
+    doc: "Distance from TSS to exclude, 0 = no TSS exclusion"
+
+  promoter_bp:
+    type: int
+    label: "Promoter distance"
+    doc: "Promoter distance for gene names assignment"
+
+
+outputs:
+
+  png_file:
+    type: File
+    label: "ROSE visualization plot"
+    format: "http://edamontology.org/format_3603"
+    doc: "Generated by ROSE visualization plot"
+    outputSource: rename_png/target_file
+
+  gene_names_file:
+    type: File
+    label: "Gateway Super Enhancer + gene names"
+    format: "http://edamontology.org/format_3475"
+    doc: "Gateway Super Enhancer results from ROSE with assigned gene names"
+    outputSource: add_island_names/output_file
+
+  bigbed_file:
+    type: File
+    label: "Gateway Super Enhancer bigBed file"
+    format: "http://edamontology.org/format_3475"
+    doc: "Gateway Super Enhancer bigBed file"
+    outputSource: bed_to_bigbed/bigbed_file
+
+
+steps:
+
+  make_gff:
+    in:
+      islands_file: islands_file
+      islands_control_file: islands_control_file
+    out: [gff_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/rose:v0.0.2
+      inputs:
+        islands_file:
+          type: File
+          inputBinding:
+            position: 5
+          doc: Input XLS file generated by MACS2
+        islands_control_file:
+          type: File?
+          inputBinding:
+            position: 7
+          doc: Control XLS file generated by MACS2
+      outputs:
+        gff_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: ['makegff']
+      arguments:
+      - valueFrom:
+          ${
+            let root = inputs.islands_file.basename.split('.').slice(0,-1).join('.');
+            return (root == "")?inputs.islands_file.basename+".gff":root+".gff";
+          }
+        position: 6
+      doc: Tool produces GFF file from XLS file generated by MACS2
+
+  run_rose:
+    in:
+      binding_sites_file: make_gff/gff_file
+      bam_file: bambai_pair
+      annotation_file: annotation_file
+      stitch_distance: stitch_distance
+      tss_distance: tss_distance
+    out: [plot_points_pic, gateway_super_enhancers_bed]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/rose:v0.0.2
+      inputs:
+        binding_sites_file:
+          type: File
+          inputBinding:
+            position: 5
+            prefix: "-i"
+          doc: GFF file of binding sites used to make enhancers
+        bam_file:
+          type: File
+          inputBinding:
+            position: 6
+            prefix: "-r"
+          secondaryFiles: [".bai"]
+          doc: Indexed BAM+BAI file to rank enhancer by
+        annotation_file:
+          type: File
+          inputBinding:
+            position: 7
+            prefix: "-g"
+          doc: TSV genome annotation file
+        stitch_distance:
+          type: int
+          inputBinding:
+            position: 8
+            prefix: "-s"
+          doc: Linking distance for stitching
+        tss_distance:
+          type: int
+          inputBinding:
+            position: 9
+            prefix: "-t"
+          doc: Distance from TSS to exclude. 0 = no TSS exclusion
+      outputs:
+        plot_points_pic:
+          type: File
+          outputBinding:
+            glob: "*Plot_points.png"
+        gateway_super_enhancers_bed:
+          type: File
+          outputBinding:
+            glob: "*Gateway_SuperEnhancers.bed"
+      baseCommand: ['ROSE_main', '-o', './']
+      doc: Tool runs ROSE to get Super Enhancers regions
+
+  rename_png:
+    in:
+      source_file: run_rose/plot_points_pic
+      target_filename:
+        source: bambai_pair
+        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.png")
+    out: [target_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/scidap:v0.0.3
+      inputs:
+        source_file:
+          type: File
+          inputBinding:
+            position: 5
+          doc: source file to rename
+        target_filename:
+          type: string
+          inputBinding:
+            position: 6
+          doc: filename to rename to
+      outputs:
+        target_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: ["cp"]
+      doc: Tool renames (copy) `source_file` to `target_filename`
+
+  sort_bed:
+    in:
+      unsorted_file: run_rose/gateway_super_enhancers_bed
+      key:
+        default: ["1,1","2,2n","3,3n"]
+    out: [sorted_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: InlineJavascriptRequirement
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/scidap:v0.0.3
+      inputs:
+        key:
+          type:
+            type: array
+            items: string
+            inputBinding:
+              prefix: "-k"
+          inputBinding:
+            position: 5
+          doc: -k, --key=POS1[,POS2], start a key at POS1, end it at POS2 (origin 1)
+        unsorted_file:
+          type: File
+          inputBinding:
+            position: 6
+          doc: File to be sorted
+      outputs:
+        sorted_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      stdout: $(inputs.unsorted_file.location.split('/').slice(-1)[0])
+      baseCommand: ["sort"]
+      doc: Tool sorts data from `unsorted_file` by `key`
+
+  reduce_bed:
+    in:
+      input_file: sort_bed/sorted_file
+    out: [output_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/scidap:v0.0.3
+      inputs:
+        input_file:
+          type: File
+          inputBinding:
+            position: 5
+          doc: Input BED6 file to be reduced to BED4
+      outputs:
+        output_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: [bash, '-c']
+      arguments:
+      - cat $0 | cut -f 1-4 > `basename $0`
+      doc: Tool converts BED6 to BED4 by reducing column numbers
+
+  bed_to_bigbed:
+    in:
+      input_bed: reduce_bed/output_file
+      chrom_length_file: chrom_length_file
+      bed_type:
+        default: "bed4"
+      output_filename:
+        source: bambai_pair
+        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.bb")
+    out: [bigbed_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/ucscuserapps:v358
+      inputs:
+        bed_type:
+          type: string
+          inputBinding:
+            position: 5
+            prefix: -type=
+            separate: false
+          doc: Type of BED file in a form of bedN[+[P]]. By default bed3 to three required BED fields
+        input_bed:
+          type: File
+          inputBinding:
+            position: 6
+          doc: Input BED file
+        chrom_length_file:
+          type: File
+          inputBinding:
+            position: 7
+          doc: Chromosome length files
+        output_filename:
+          type: string
+          inputBinding:
+            position: 8
+          doc: Output filename
+      outputs:
+        bigbed_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: ["bedToBigBed"]
+      doc: Tool converts bed to bigBed
+
+  bed_to_macs:
+    in:
+      input_file: sort_bed/sorted_file
+    out: [output_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/scidap:v0.0.3
+      inputs:
+        input_file:
+          type: File
+          inputBinding:
+            position: 5
+          doc: Input file to be converted to MACS2 output format
+      outputs:
+        output_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: [bash, '-c']
+      arguments:
+      - cat $0 | grep -v "#" | awk
+        'BEGIN {print "chr\tstart\tend\tlength\tabs_summit\tpileup\t-log10(pvalue)\tfold_enrichment\t-log10(qvalue)\tname"}
+        {print $1"\t"$2"\t"$3"\t"$3-$2+1"\t0\t0\t0\t0\t0\t"$4}' > `basename $0`
+      doc: Tool converts `input_file` to the format compatible with the input of iaintersect from `assign_genes` step
+
+  assign_genes:
+    in:
+      input_filename: bed_to_macs/output_file
+      annotation_filename: annotation_file
+      promoter_bp: promoter_bp
+    out: [result_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: InlineJavascriptRequirement
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/iaintersect:v0.0.2
+      inputs:
+        input_filename:
+          type: File
+          inputBinding:
+            prefix: --in=
+            separate: false
+          doc: Input filename with MACS2 peak calling results, tsv
+        annotation_filename:
+          type: File
+          inputBinding:
+            prefix: --a=
+            separate: false
+          doc: Annotation file, tsv
+        promoter_bp:
+          type: int
+          inputBinding:
+            prefix: --promoter=
+            separate: false
+          doc: Promoter region around TSS, base pairs
+      outputs:
+        result_file:
+          type: File
+          outputBinding:
+            glob: "*_iaintersect.tsv"
+      baseCommand: [iaintersect]
+      arguments:
+      - valueFrom: |
+          ${
+            let root = inputs.input_filename.basename.split('.').slice(0,-1).join('.');
+            return (root == "")?inputs.input_filename.basename+"_iaintersect.tsv":root+"_iaintersect.tsv";
+          }
+        prefix: --out=
+        separate: false
+      doc: Tool assigns each peak obtained from MACS2 to a gene and region (upstream, promoter, exon, intron, intergenic)
+
+  add_island_names:
+    in:
+      input_file: [assign_genes/result_file, sort_bed/sorted_file]
+      param:
+        source: bambai_pair
+        valueFrom: $(self.location.split('/').slice(-1)[0].split('.').slice(0,-1).join('.')+"_default_s_enhcr.tsv")
+    out: [output_file]
+    run:
+      cwlVersion: v1.0
+      class: CommandLineTool
+      requirements:
+      - class: DockerRequirement
+        dockerPull: biowardrobe2/scidap:v0.0.3
+      inputs:
+        input_file:
+          type: File[]
+          inputBinding:
+            position: 5
+          doc: TSV file to add extra columns too
+        param:
+          type: string
+          inputBinding:
+            position: 6
+          doc: Param to set output filename
+      outputs:
+        output_file:
+          type: File
+          outputBinding:
+            glob: "*"
+      baseCommand: [bash, '-c']
+      arguments:
+      - echo -e "refseq_id\tgene_id\ttxStart\ttxEnd\tstrand\tchrom\tstart\tend\tlength\tregion\tname\tscore" > `basename $2`;
+        cat $0 | grep -v refseq_id | paste - $1 | cut -f 1-9,15,19,20 >> `basename $2`
+
+$namespaces:
+  s: http://schema.org/
+
+$schemas:
+- http://schema.org/docs/schema_org_rdfa.html
+
+s:name: "super-enhancer"
+s:downloadUrl: https://raw.githubusercontent.com/michael-kotliar/biowardrobe-airflow-plugins/cwls/super-enhancer.cwl
+s:codeRepository: https://github.com/michael-kotliar/biowardrobe-airflow-plugins
+s:license: http://www.apache.org/licenses/LICENSE-2.0
+
+s:isPartOf:
+  class: s:CreativeWork
+  s:name: Common Workflow Language
+  s:url: http://commonwl.org/
+
+s:creator:
+- class: s:Organization
+  s:legalName: "Cincinnati Children's Hospital Medical Center"
+  s:location:
+  - class: s:PostalAddress
+    s:addressCountry: "USA"
+    s:addressLocality: "Cincinnati"
+    s:addressRegion: "OH"
+    s:postalCode: "45229"
+    s:streetAddress: "3333 Burnet Ave"
+    s:telephone: "+1(513)636-4200"
+  s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png"
+  s:department:
+  - class: s:Organization
+    s:legalName: "Allergy and Immunology"
+    s:department:
+    - class: s:Organization
+      s:legalName: "Barski Research Lab"
+      s:member:
+      - class: s:Person
+        s:name: Michael Kotliar
+        s:email: mailto:misha.kotliar@gmail.com
+        s:sameAs:
+        - id: http://orcid.org/0000-0002-6486-3898
+
+doc: |
+  Workflow to run Super Enhancer Analysis
+
+s:about: |
+  Workflow to run Super Enhancer Analysis
\ No newline at end of file
diff --git a/janis_assistant/tests/data/galaxy/abricate_wf.ga b/janis_assistant/tests/data/galaxy/abricate_wf.ga
new file mode 100644
index 0000000..2901da0
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/abricate_wf.ga
@@ -0,0 +1,104 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "my babricate workflow",
+    "format-version": "0.1",
+    "name": "abricate",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "in_fasta"
+                }
+            ],
+            "label": "in_fasta",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "bottom": 330.2253723144531,
+                "height": 61.752044677734375,
+                "left": 447.98150634765625,
+                "right": 647.9814910888672,
+                "top": 268.47332763671875,
+                "width": 199.99998474121094,
+                "x": 447.98150634765625,
+                "y": 268.47332763671875
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": \"\"}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "58a6d4ba-d77b-4355-99cf-84da4da30e53",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "output",
+                    "uuid": "ca826a63-be54-4e04-8b62-21fd1d6b4c57"
+                }
+            ]
+        },
+        "1": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1",
+            "errors": null,
+            "id": 1,
+            "input_connections": {
+                "file_input": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool ABRicate",
+                    "name": "file_input"
+                }
+            ],
+            "label": null,
+            "name": "ABRicate",
+            "outputs": [
+                {
+                    "name": "report",
+                    "type": "tabular"
+                }
+            ],
+            "position": {
+                "bottom": 423.73973083496094,
+                "height": 154.3032684326172,
+                "left": 788.954833984375,
+                "right": 988.9548187255859,
+                "top": 269.43646240234375,
+                "width": 199.99998474121094,
+                "x": 788.954833984375,
+                "y": 269.43646240234375
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1",
+            "tool_shed_repository": {
+                "changeset_revision": "c2ef298da409",
+                "name": "abricate",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adv\": {\"db\": \"resfinder\", \"no_header\": \"false\", \"min_dna_id\": \"80.0\", \"min_cov\": \"80.0\"}, \"file_input\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "1.0.1",
+            "type": "tool",
+            "uuid": "1afee518-0912-4c81-bfe2-ea282a848aa8",
+            "workflow_outputs": [
+                {
+                    "label": "ABRicate on input dataset(s) report file",
+                    "output_name": "report",
+                    "uuid": "15cb9b26-c0a9-401d-9948-f460d90187e0"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "933d732e-61a5-4350-8c15-e0d979c5ad16",
+    "version": 3
+}
\ No newline at end of file
diff --git a/janis_assistant/tests/data/galaxy/cutadapt_wf.ga b/janis_assistant/tests/data/galaxy/cutadapt_wf.ga
new file mode 100644
index 0000000..4ebfca7
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/cutadapt_wf.ga
@@ -0,0 +1,141 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "cutadapt_wf",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "forward"
+                }
+            ],
+            "label": "forward",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 0.99383544921875,
+                "top": 0
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastq\"], \"tag\": \"\"}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "d6925ea7-170c-4c3c-9121-8c651cabe3ae",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "reverse"
+                }
+            ],
+            "label": "reverse",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 0,
+                "top": 103.97540283203125
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastq\"], \"tag\": \"\"}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "c305e6bf-6d5c-4c5a-b078-ffafddb2137c",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/4.0+galaxy1",
+            "errors": null,
+            "id": 2,
+            "input_connections": {
+                "library|input_1": {
+                    "id": 0,
+                    "output_name": "output"
+                },
+                "library|input_2": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Cutadapt",
+                    "name": "library"
+                },
+                {
+                    "description": "runtime parameter for tool Cutadapt",
+                    "name": "library"
+                }
+            ],
+            "label": null,
+            "name": "Cutadapt",
+            "outputs": [
+                {
+                    "name": "out1",
+                    "type": "fastqsanger"
+                },
+                {
+                    "name": "out2",
+                    "type": "fastqsanger"
+                },
+                {
+                    "name": "report",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 243.96514892578125,
+                "top": 32.3052978515625
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt/cutadapt/4.0+galaxy1",
+            "tool_shed_repository": {
+                "changeset_revision": "135b80fb1ac2",
+                "name": "cutadapt",
+                "owner": "lparsons",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adapter_options\": {\"action\": \"trim\", \"internal\": \"\", \"error_rate\": \"0.1\", \"no_indels\": false, \"times\": \"1\", \"overlap\": \"3\", \"match_read_wildcards\": \" \", \"revcomp\": false}, \"filter_options\": {\"discard_trimmed\": false, \"discard_untrimmed\": false, \"minimum_length\": null, \"maximum_length\": null, \"length_R2_options\": {\"length_R2_status\": \"False\", \"__current_case__\": 1}, \"max_n\": null, \"pair_filter\": \"any\", \"max_expected_errors\": null, \"discard_cassava\": false}, \"library\": {\"type\": \"paired\", \"__current_case__\": 1, \"input_1\": {\"__class__\": \"RuntimeValue\"}, \"input_2\": {\"__class__\": \"RuntimeValue\"}, \"r1\": {\"adapters\": [{\"__index__\": 0, \"adapter_source\": {\"adapter_source_list\": \"user\", \"__current_case__\": 0, \"adapter_name\": \"fwd\", \"adapter\": \"AATTGGCC\"}, \"single_noindels\": false}], \"front_adapters\": [], \"anywhere_adapters\": [], \"cut\": \"0\"}, \"r2\": {\"adapters2\": [{\"__index__\": 0, \"adapter_source2\": {\"adapter_source_list2\": \"user\", \"__current_case__\": 0, \"adapter_name2\": \"rev\", \"adapter2\": \"AATTGGCC\"}, \"single_noindels\": false}], \"front_adapters2\": [], \"anywhere_adapters2\": [], \"cut2\": \"0\", \"quality_cutoff2\": \"\"}}, \"output_selector\": [\"report\"], \"read_mod_options\": {\"quality_cutoff\": \"0\", \"nextseq_trim\": \"0\", \"trim_n\": false, \"strip_suffix\": \"\", \"shorten_options\": {\"shorten_values\": \"False\", \"__current_case__\": 1}, \"length_tag\": \"\", \"rename\": \"\", \"zero_cap\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "4.0+galaxy1",
+            "type": "tool",
+            "uuid": "8b012d09-4090-4d5c-b3f5-2f7d3e658970",
+            "when": null,
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "report",
+                    "uuid": "5541c5d2-9d73-442d-ad23-c02606329d6d"
+                },
+                {
+                    "label": "Cutadapt on input dataset(s): Read 1 Output",
+                    "output_name": "out1",
+                    "uuid": "4490227c-3f39-4f43-915c-367d499b091e"
+                },
+                {
+                    "label": null,
+                    "output_name": "out2",
+                    "uuid": "c83a39d3-e9a1-4163-9d47-469d603d605c"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "f9001d73-1606-4edc-aa3d-ac9bbc80433f",
+    "version": 3
+}
\ No newline at end of file
diff --git a/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/.hg_archival.txt b/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/.hg_archival.txt
new file mode 100644
index 0000000..d60ed2e
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/.hg_archival.txt
@@ -0,0 +1,6 @@
+repo: e28c965eeed4adeb19cb086d1e0f5b3ca6dc8a5d
+node: 5ec9f6bceaee7c629268142c0b7bb8f7ab05b51b
+branch: default
+latesttag: null
+latesttagdistance: 24
+changessincelatesttag: 24
diff --git a/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/rgFastQC.xml b/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/rgFastQC.xml
new file mode 100644
index 0000000..7c2bbf1
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/fastqc-5ec9f6bceaee/rgFastQC.xml
@@ -0,0 +1,220 @@
+<tool id="fastqc" name="FastQC" version="0.74+galaxy0">
+    <description>Read Quality reports</description>
+    <xrefs>
+        <xref type="bio.tools">fastqc</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="0.12.1">fastqc</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="FastQC returned non zero exit code" />
+        <exit_code range=":-1" level="fatal" description="FastQC returned non zero exit code" />
+    <regex match="There is insufficient memory for the Java Runtime Environment"
+           source="stdout"
+           level="fatal_oom"
+           description="Out of memory error occurred" />
+    </stdio>
+    <command><![CDATA[
+        #import re
+        #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier))
+
+        #if $input_file.ext.endswith('.gz'):
+            #set input_file_sl = $input_name + '.gz'
+        #elif $input_file.ext.endswith('.bz2'):
+            #set input_file_sl = $input_name + '.bz2'
+        #else
+            #set input_file_sl = $input_name
+        #end if
+
+        #if 'bam' in $input_file.ext:
+            #set format = 'bam'
+        #elif 'sam' in $input_file.ext:
+            #set format = 'sam'
+        #else
+            #set format = 'fastq'
+        #end if
+
+        ln -s '${input_file}' '${input_file_sl}' &&
+        mkdir -p '${html_file.files_path}' &&
+        fastqc
+            --outdir '${html_file.files_path}'
+            #if $contaminants.dataset and str($contaminants) > ''
+                --contaminants '${contaminants}'
+            #end if
+
+            #if $adapters.dataset and str($adapters) > ''
+                --adapters '${adapters}'
+            #end if
+
+            #if $limits.dataset and str($limits) > ''
+                --limits '${limits}'
+            #end if
+            --threads \${GALAXY_SLOTS:-2}
+            --quiet
+            --extract
+            #if $min_length:
+                --min_length $min_length
+            #end if
+            $nogroup
+            --kmers $kmers
+            -f '${format}'
+            '${input_file_sl}'
+
+        && cp '${html_file.files_path}'/*/fastqc_data.txt output.txt
+        && cp '${html_file.files_path}'/*\.html output.html
+
+    ]]></command>
+    <inputs>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="input_file" type="data"
+               label="Raw read data from your current history" />
+        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
+               help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA" />
+        <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list"
+               help="List of adapters adapter sequences which will be explicity searched against the library. It should be a tab-delimited file with 2 columns: name and sequence." />
+        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
+               help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter" />
+        <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False"
+               label="Disable grouping of bases for reads >50bp" help="Using this option will cause fastqc to crash and burn if you use it on really long reads, and your plots may end up a ridiculous size. You have been warned!"/>
+        <param argument="--min_length" type="integer" value="" optional="true"
+               label="Lower limit on the length of the sequence to be shown in the report"
+               help=" As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups.  This can be useful for making directly comaparable statistics from datasets with somewhat variable read lengths."/>
+        <param argument="--kmers" type="integer" value="7" min="2" max="10"
+               label="Length of Kmer to look for" help="Note: the Kmer test is disabled and needs to be enabled using a custom Submodule and limits file"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="html_file" from_work_dir="output.html" label="${tool.name} on ${on_string}: Webpage" />
+        <data format="txt" name="text_file"  from_work_dir="output.txt" label="${tool.name} on ${on_string}: RawData" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
+            <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="adapters" value="fastqc_adapters.txt" ftype="tabular" />
+            <output name="html_file" file="fastqc_report_adapters.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
+            <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
+            <param name="kmers" value="3" />
+            <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
+            <output name="html_file" file="fastqc_report_kmer.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/>
+            <assert_command>
+                <has_text text="--kmers 3"/>
+            </assert_command>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="min_length" value="108" />
+            <output name="html_file" file="fastqc_report_min_length.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
+            <param name="nogroup" value="--nogroup" />
+            <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/>
+            <assert_command>
+                <has_text text="--nogroup"/>
+            </assert_command>
+        </test>
+        <test>
+            <param name="input_file" value="hisat_output_1.bam" ftype="bam" />
+            <output name="html_file" file="fastqc_report_hisat.html" ftype="html" lines_diff="2"/>
+            <output name="text_file" file="fastqc_data_hisat.txt" ftype="txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**Purpose**
+
+FastQC aims to provide a simple way to do some quality control checks on raw
+sequence data coming from high throughput sequencing pipelines.
+It provides a set of analyses which you can use to get a quick
+impression of whether your data has any problems of
+which you should be aware before doing any further analysis.
+
+The main functions of FastQC are:
+
+- Import of data from BAM, SAM or FastQ/FastQ.gz files (any variant),
+- Providing a quick overview to tell you in which areas there may be problems
+- Summary graphs and tables to quickly assess your data
+- Export of results to an HTML based permanent report
+- Offline operation to allow automated generation of reports without running the interactive application
+
+-----
+
+.. class:: infomark
+
+**FastQC**
+
+This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_
+Kindly acknowledge it as well as this tool if you use it.
+FastQC incorporates the Picard-tools_ libraries for SAM/BAM processing.
+
+The contaminants file parameter was borrowed from the independently developed
+fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
+Adaption to version 0.11.2 by T. McGowan.
+
+-----
+
+.. class:: infomark
+
+**Inputs and outputs**
+
+FastQC_ is the best place to look for documentation - it's very good.
+A summary follows below for those in a tearing hurry.
+
+This wrapper will accept a Galaxy fastq, fastq.gz, sam or bam as the input read file to check.
+It will also take an optional file containing a list of contaminants information, in the form of
+a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom
+limits.txt file that allows setting the warning thresholds for the different modules and also specifies
+which modules to include in the output.
+
+The tool produces a basic text and a HTML output file that contain all of the results, including the following:
+
+- Basic Statistics
+- Per base sequence quality
+- Per sequence quality scores
+- Per base sequence content
+- Per base GC content
+- Per sequence GC content
+- Per base N content
+- Sequence Length Distribution
+- Sequence Duplication Levels
+- Overrepresented sequences
+- Kmer Content
+
+All except Basic Statistics and Overrepresented sequences are plots.
+ .. _FastQC: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ .. _Picard-tools: https://broadinstitute.github.io/picard/
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+        @unpublished{andrews_s,
+            author = {Andrews, S.},
+            keywords = {bioinformatics, ngs, qc},
+            priority = {2},
+            title = {{FastQC A Quality Control tool for High Throughput Sequence Data}},
+            url = {http://www.bioinformatics.babraham.ac.uk/projects/fastqc/}
+        }
+        </citation>
+    </citations>
+</tool>
diff --git a/janis_assistant/tests/data/galaxy/hisat2_wf.ga b/janis_assistant/tests/data/galaxy/hisat2_wf.ga
new file mode 100644
index 0000000..89f9231
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/hisat2_wf.ga
@@ -0,0 +1,123 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "hisat2_wf",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "read1"
+                }
+            ],
+            "label": "read1",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 0.0,
+                "top": 0.0
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\"], \"tag\": null}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "c38e69e9-c6e2-4a4d-89d5-b72bbe3b28de",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "read2"
+                }
+            ],
+            "label": "read2",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 2.520477294921875,
+                "top": 79.31350708007812
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\"], \"tag\": null}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "2c369543-cc2f-4bb0-bb06-45f9724042c5",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.2.1+galaxy1",
+            "errors": null,
+            "id": 2,
+            "input_connections": {
+                "library|input_1": {
+                    "id": 0,
+                    "output_name": "output"
+                },
+                "library|input_2": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool HISAT2",
+                    "name": "library"
+                },
+                {
+                    "description": "runtime parameter for tool HISAT2",
+                    "name": "library"
+                }
+            ],
+            "label": null,
+            "name": "HISAT2",
+            "outputs": [
+                {
+                    "name": "output_alignments",
+                    "type": "bam"
+                }
+            ],
+            "position": {
+                "left": 316.78271484375,
+                "top": 27.008209228515625
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.2.1+galaxy1",
+            "tool_shed_repository": {
+                "changeset_revision": "f4af63aaf57a",
+                "name": "hisat2",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adv\": {\"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}, \"alignment_options\": {\"alignment_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"spliced_options_selector\": \"defaults\", \"__current_case__\": 0}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"sam_options\": {\"sam_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"library\": {\"type\": \"paired\", \"__current_case__\": 1, \"input_1\": {\"__class__\": \"RuntimeValue\"}, \"input_2\": {\"__class__\": \"RuntimeValue\"}, \"rna_strandness\": \"\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"mm10\"}, \"sum\": {\"new_summary\": false, \"summary_file\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "2.2.1+galaxy1",
+            "type": "tool",
+            "uuid": "758da7a3-8bc9-415b-b221-0e850efab66d",
+            "when": null,
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "output_alignments",
+                    "uuid": "e988b2f4-5a34-4539-a2a6-fdcf1ba2cae4"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "56329289-8ed2-4b2c-9fca-7e6fba601e0d",
+    "version": 1
+}
\ No newline at end of file
diff --git a/janis_assistant/tests/data/galaxy/unicycler_assembly.ga b/janis_assistant/tests/data/galaxy/unicycler_assembly.ga
new file mode 100644
index 0000000..aa332bb
--- /dev/null
+++ b/janis_assistant/tests/data/galaxy/unicycler_assembly.ga
@@ -0,0 +1,488 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "Unicycler Assembly",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "short_R1"
+                }
+            ],
+            "label": "short_R1",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 3.9754087149161705,
+                "top": 453.28884880181624
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\"], \"tag\": null}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "03640f53-ba88-4f80-9745-dc7d38f72c5e",
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "short_R2"
+                }
+            ],
+            "label": "short_R2",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 1.0348752702617503,
+                "top": 541.5060816266651
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\"], \"tag\": null}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "ce3f01a5-617b-402e-add8-543a7c612e00",
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "",
+            "content_id": null,
+            "errors": null,
+            "id": 2,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "",
+                    "name": "long"
+                }
+            ],
+            "label": "long",
+            "name": "Input dataset",
+            "outputs": [],
+            "position": {
+                "left": 0.0,
+                "top": 628.5142913824635
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\"], \"tag\": null}",
+            "tool_version": null,
+            "type": "data_input",
+            "uuid": "5fdc6024-93ed-48a0-b195-038f8291e897",
+            "workflow_outputs": []
+        },
+        "3": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.73+galaxy0",
+            "errors": null,
+            "id": 3,
+            "input_connections": {
+                "input_file": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "adapters"
+                },
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "contaminants"
+                },
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "input_file"
+                },
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "limits"
+                }
+            ],
+            "label": null,
+            "name": "FastQC",
+            "outputs": [
+                {
+                    "name": "html_file",
+                    "type": "html"
+                },
+                {
+                    "name": "text_file",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 299.96923213247317,
+                "top": 2.4487613134046455
+            },
+            "post_job_actions": {
+                "HideDatasetActiontext_file": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "text_file"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.73+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "3d0c7bdf12f5",
+                "name": "fastqc",
+                "owner": "devteam",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adapters\": {\"__class__\": \"RuntimeValue\"}, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"RuntimeValue\"}, \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"min_length\": null, \"nogroup\": \"false\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "0.73+galaxy0",
+            "type": "tool",
+            "uuid": "1450d138-93ec-47db-960e-78905af995b1",
+            "workflow_outputs": [
+                {
+                    "label": "FastQC on input dataset(s): Webpage",
+                    "output_name": "html_file",
+                    "uuid": "25238fe4-0880-4b36-a7fc-74dbc46e0576"
+                }
+            ]
+        },
+        "4": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.73+galaxy0",
+            "errors": null,
+            "id": 4,
+            "input_connections": {
+                "input_file": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "adapters"
+                },
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "contaminants"
+                },
+                {
+                    "description": "runtime parameter for tool FastQC",
+                    "name": "limits"
+                }
+            ],
+            "label": null,
+            "name": "FastQC",
+            "outputs": [
+                {
+                    "name": "html_file",
+                    "type": "html"
+                },
+                {
+                    "name": "text_file",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 520.9118529795632,
+                "top": 0.0
+            },
+            "post_job_actions": {
+                "HideDatasetActiontext_file": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "text_file"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastqc/fastqc/0.73+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "3d0c7bdf12f5",
+                "name": "fastqc",
+                "owner": "devteam",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adapters\": {\"__class__\": \"RuntimeValue\"}, \"contaminants\": {\"__class__\": \"RuntimeValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"kmers\": \"7\", \"limits\": {\"__class__\": \"RuntimeValue\"}, \"min_length\": null, \"nogroup\": \"false\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "0.73+galaxy0",
+            "type": "tool",
+            "uuid": "d6a70043-c6d3-42ed-a9da-96c84c7fc802",
+            "workflow_outputs": [
+                {
+                    "label": null,
+                    "output_name": "html_file",
+                    "uuid": "3d489e1f-ad51-481d-be74-4e9a50bc871c"
+                }
+            ]
+        },
+        "5": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+            "errors": null,
+            "id": 5,
+            "input_connections": {
+                "long": {
+                    "id": 2,
+                    "output_name": "output"
+                },
+                "paired_unpaired|fastq_input1": {
+                    "id": 0,
+                    "output_name": "output"
+                },
+                "paired_unpaired|fastq_input2": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Create assemblies with Unicycler",
+                    "name": "long"
+                },
+                {
+                    "description": "runtime parameter for tool Create assemblies with Unicycler",
+                    "name": "lr_align"
+                },
+                {
+                    "description": "runtime parameter for tool Create assemblies with Unicycler",
+                    "name": "paired_unpaired"
+                },
+                {
+                    "description": "runtime parameter for tool Create assemblies with Unicycler",
+                    "name": "paired_unpaired"
+                },
+                {
+                    "description": "runtime parameter for tool Create assemblies with Unicycler",
+                    "name": "rotation"
+                }
+            ],
+            "label": null,
+            "name": "Create assemblies with Unicycler",
+            "outputs": [
+                {
+                    "name": "assembly_graph",
+                    "type": "gfa1"
+                },
+                {
+                    "name": "assembly",
+                    "type": "fasta"
+                }
+            ],
+            "position": {
+                "left": 503.34013809490136,
+                "top": 381.4343956129509
+            },
+            "post_job_actions": {
+                "HideDatasetActionassembly_graph": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "assembly_graph"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/unicycler/unicycler/0.4.8.0",
+            "tool_shed_repository": {
+                "changeset_revision": "9e3e80cc4ad4",
+                "name": "unicycler",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"graph_clean\": {\"min_component_size\": \"1000\", \"min_dead_end_size\": \"1000\"}, \"linear_seqs\": \"0\", \"long\": {\"__class__\": \"RuntimeValue\"}, \"lr_align\": {\"contamination\": {\"__class__\": \"RuntimeValue\"}, \"scores\": \"3,-6,-5,-2\", \"low_score\": null}, \"min_anchor_seg_len\": null, \"min_fasta_length\": \"500\", \"mode\": \"normal\", \"paired_unpaired\": {\"fastq_input_selector\": \"paired\", \"__current_case__\": 0, \"fastq_input1\": {\"__class__\": \"RuntimeValue\"}, \"fastq_input2\": {\"__class__\": \"RuntimeValue\"}}, \"pilon\": {\"no_pilon\": \"false\", \"min_polish_size\": \"1000\"}, \"rotation\": {\"no_rotate\": \"false\", \"start_genes\": {\"__class__\": \"RuntimeValue\"}, \"start_gene_id\": \"90.0\", \"start_gene_cov\": \"95.0\"}, \"spades\": {\"no_correct\": \"false\", \"min_kmer_frac\": \"0.2\", \"max_kmer_frac\": \"0.95\", \"kmers\": \"\", \"kmer_count\": \"10\", \"depth_filter\": \"0.25\", \"largest_component\": \"false\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "0.4.8.0",
+            "type": "tool",
+            "uuid": "f827fd85-7920-41bb-8342-c9b2051770c2",
+            "workflow_outputs": [
+                {
+                    "label": "Create assemblies with Unicycler on input dataset(s): Final Assembly",
+                    "output_name": "assembly",
+                    "uuid": "e6632bf6-1c4a-4093-b517-1c86e160c678"
+                }
+            ]
+        },
+        "6": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.28.2+galaxy1",
+            "errors": null,
+            "id": 6,
+            "input_connections": {
+                "mode|reads|files": {
+                    "id": 2,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "NanoPlot",
+            "outputs": [
+                {
+                    "name": "output_html",
+                    "type": "html"
+                },
+                {
+                    "name": "nanostats",
+                    "type": "txt"
+                },
+                {
+                    "name": "nanostats_post_filtering",
+                    "type": "txt"
+                },
+                {
+                    "name": "read_length",
+                    "type": "png"
+                },
+                {
+                    "name": "log_read_length",
+                    "type": "png"
+                }
+            ],
+            "position": {
+                "left": 739.0983192541909,
+                "top": 0.9835881726859839
+            },
+            "post_job_actions": {
+                "HideDatasetActionlog_read_length": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "log_read_length"
+                },
+                "HideDatasetActionnanostats": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "nanostats"
+                },
+                "HideDatasetActionnanostats_post_filtering": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "nanostats_post_filtering"
+                },
+                "HideDatasetActionread_length": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "read_length"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/nanoplot/nanoplot/1.28.2+galaxy1",
+            "tool_shed_repository": {
+                "changeset_revision": "edbb6c5028f5",
+                "name": "nanoplot",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"customization\": {\"color\": null, \"format\": \"png\", \"plots\": null, \"N50\": \"false\"}, \"filter\": {\"maxlength\": null, \"minlength\": null, \"drop_outliers\": \"false\", \"downsample\": null, \"loglength\": \"false\", \"percentqual\": \"false\", \"alength\": \"false\", \"minqual\": null, \"readtype\": null, \"barcoded\": \"false\"}, \"mode\": {\"choice\": \"batch\", \"__current_case__\": 0, \"reads\": {\"type\": \"fastq\", \"__current_case__\": 0, \"files\": {\"__class__\": \"ConnectedValue\"}}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "1.28.2+galaxy1",
+            "type": "tool",
+            "uuid": "3d1ad8c6-4fde-48b5-943f-4383e4b4fe5b",
+            "workflow_outputs": [
+                {
+                    "label": "NanoPlot on input dataset(s): HTML report",
+                    "output_name": "output_html",
+                    "uuid": "4b13efe5-0fc6-4db6-9c22-854f321833d3"
+                }
+            ]
+        },
+        "7": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy5",
+            "errors": null,
+            "id": 7,
+            "input_connections": {
+                "in|inputs": {
+                    "id": 5,
+                    "output_name": "assembly"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "Quast",
+            "outputs": [
+                {
+                    "name": "report_html",
+                    "type": "html"
+                }
+            ],
+            "position": {
+                "left": 926.0962584174182,
+                "top": 475.95282798944174
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/quast/quast/5.0.2+galaxy5",
+            "tool_shed_repository": {
+                "changeset_revision": "675488238c96",
+                "name": "quast",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"advanced\": {\"contig_thresholds\": \"0,1000\", \"strict_NA\": \"false\", \"extensive_mis_size\": \"1000\", \"scaffold_gap_max_size\": \"1000\", \"unaligned_part_size\": \"500\", \"skip_unaligned_mis_contigs\": \"true\", \"fragmented_max_indent\": null}, \"alignments\": {\"use_all_alignments\": \"false\", \"min_alignment\": \"65\", \"min_identity\": \"95.0\", \"ambiguity_usage\": \"one\", \"ambiguity_score\": \"0.99\", \"fragmented\": \"false\", \"upper_bound_assembly\": \"false\", \"upper_bound_min_con\": null}, \"assembly\": {\"type\": \"genome\", \"__current_case__\": 0, \"ref\": {\"use_ref\": \"false\", \"__current_case__\": 1, \"est_ref_size\": null}, \"orga_type\": \"\"}, \"genes\": {\"gene_finding\": {\"tool\": \"none\", \"__current_case__\": 0}, \"rna_finding\": \"false\", \"conserved_genes_finding\": \"false\"}, \"in\": {\"custom\": \"false\", \"__current_case__\": 1, \"inputs\": {\"__class__\": \"ConnectedValue\"}}, \"large\": \"false\", \"min_contig\": \"500\", \"output_files\": [\"html\"], \"reads\": {\"reads_option\": \"disabled\", \"__current_case__\": 0}, \"split_scaffolds\": \"false\", \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "5.0.2+galaxy5",
+            "type": "tool",
+            "uuid": "2a71ea4f-1311-459e-905f-c42bdbef3f33",
+            "workflow_outputs": [
+                {
+                    "label": "Quast on input dataset(s): HTML report",
+                    "output_name": "report_html",
+                    "uuid": "63c115b7-5b97-4291-b280-3a28487d0452"
+                }
+            ]
+        },
+        "8": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.3.2+galaxy0",
+            "errors": null,
+            "id": 8,
+            "input_connections": {
+                "input": {
+                    "id": 5,
+                    "output_name": "assembly"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "Busco",
+            "outputs": [
+                {
+                    "name": "busco_sum",
+                    "type": "txt"
+                },
+                {
+                    "name": "busco_table",
+                    "type": "tabular"
+                }
+            ],
+            "position": {
+                "left": 925.942572765436,
+                "top": 599.928233191543
+            },
+            "post_job_actions": {},
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.3.2+galaxy0",
+            "tool_shed_repository": {
+                "changeset_revision": "41030a6c03b7",
+                "name": "busco",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"use_augustus\": {\"use_augustus_selector\": \"no\", \"__current_case__\": 0}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"auto_detect\", \"__current_case__\": 0, \"auto_lineage\": \"--auto-lineage\"}, \"outputs\": [\"short_summary\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "5.3.2+galaxy0",
+            "type": "tool",
+            "uuid": "896f095b-e08a-47b2-9173-1ca913ffb0dc",
+            "workflow_outputs": [
+                {
+                    "label": "Busco on input dataset(s): short summary",
+                    "output_name": "busco_sum",
+                    "uuid": "38b54017-0299-4515-93c8-eff788fd1e8c"
+                },
+                {
+                    "label": "Busco on input dataset(s): full table",
+                    "output_name": "busco_table",
+                    "uuid": "2b1d658e-9a8d-4131-bb59-ac502af97431"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "26da9aea-1aaf-488d-adf8-b90e1d7a9e61",
+    "version": 6
+}
\ No newline at end of file
diff --git a/janis_assistant/tests/data/janis/gatk_haplotype_caller_tool.py b/janis_assistant/tests/data/janis/gatk_haplotype_caller_tool.py
new file mode 100644
index 0000000..09e648d
--- /dev/null
+++ b/janis_assistant/tests/data/janis/gatk_haplotype_caller_tool.py
@@ -0,0 +1,7 @@
+
+
+from janis_bioinformatics.tools.gatk4 import Gatk4HaplotypeCaller_4_1_3
+
+
+class Gatk4HaplotypeCaller_4_1_3_Test(Gatk4HaplotypeCaller_4_1_3):
+    pass
\ No newline at end of file
diff --git a/janis_assistant/tests/data/janis/germline_variant_caller_wf.py b/janis_assistant/tests/data/janis/germline_variant_caller_wf.py
new file mode 100644
index 0000000..63d8637
--- /dev/null
+++ b/janis_assistant/tests/data/janis/germline_variant_caller_wf.py
@@ -0,0 +1,7 @@
+
+
+from janis_bioinformatics.tools.variantcallers import IlluminaGermlineVariantCaller
+
+
+class IlluminaGermlineVariantCaller_Test(IlluminaGermlineVariantCaller):
+    pass
\ No newline at end of file
diff --git a/janis_assistant/tests/data/janis/samtools_flagstat_tool.py b/janis_assistant/tests/data/janis/samtools_flagstat_tool.py
new file mode 100644
index 0000000..477a839
--- /dev/null
+++ b/janis_assistant/tests/data/janis/samtools_flagstat_tool.py
@@ -0,0 +1,7 @@
+
+
+from janis_bioinformatics.tools.samtools import SamToolsFlagstat_1_9
+
+
+class SamToolsFlagstat_1_9_Test(SamToolsFlagstat_1_9):
+    pass
\ No newline at end of file
diff --git a/janis_assistant/tests/data/wdl/Multisample_jointgt_GATK4.wdl b/janis_assistant/tests/data/wdl/Multisample_jointgt_GATK4.wdl
new file mode 100644
index 0000000..419e1b7
--- /dev/null
+++ b/janis_assistant/tests/data/wdl/Multisample_jointgt_GATK4.wdl
@@ -0,0 +1,850 @@
+## Copyright Broad Institute, 2018
+## 
+## This WDL implements the joint discovery and VQSR filtering portion of the GATK 
+## Best Practices (June 2016) for germline SNP and Indel discovery in human 
+## whole-genome sequencing (WGS) and exome sequencing data.
+##
+## Requirements/expectations :
+## - One or more GVCFs produced by HaplotypeCaller in GVCF mode 
+## - Bare minimum 1 WGS sample or 30 Exome samples. Gene panels are not supported.
+##
+## Outputs :
+## - A VCF file and its index, filtered using variant quality score recalibration 
+##   (VQSR) with genotypes for all samples present in the input VCF. All sites that 
+##   are present in the input VCF are retained; filtered sites are annotated as such 
+##   in the FILTER field.
+##
+## Note about VQSR wiring :
+## The SNP and INDEL models are built in parallel, but then the corresponding 
+## recalibrations are applied in series. Because the INDEL model is generally ready 
+## first (because there are fewer indels than SNPs) we set INDEL recalibration to 
+## be applied first to the input VCF, while the SNP model is still being built. By 
+## the time the SNP model is available, the indel-recalibrated file is available to 
+## serve as input to apply the SNP recalibration. If we did it the other way around, 
+## we would have to wait until the SNP recal file was available despite the INDEL 
+## recal file being there already, then apply SNP recalibration, then apply INDEL 
+## recalibration. This would lead to a longer wall clock time for complete workflow 
+## execution. Wiring the INDEL recalibration to be applied first solves the problem.
+##
+## Cromwell version support 
+## - Successfully tested on v31
+## - Does not work on versions < v23 due to output syntax
+##
+## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. 
+## For program versions, see docker containers. 
+##
+## LICENSING : 
+## This script is released under the WDL source code license (BSD-3) (see LICENSE in 
+## https://github.com/broadinstitute/wdl). Note however that the programs it calls may 
+## be subject to different licenses. Users are responsible for checking that they are
+## authorized to run all programs before running this script. Please see the docker 
+## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
+## licensing information pertaining to the included programs.
+
+## Adapted to Yale Ruddle HPC by Sander Pajusalu (sander.pajusalu@yale.edu)
+
+
+workflow JointGenotyping {
+  File unpadded_intervals_file
+
+  String callset_name
+  
+  File ref_fasta
+  File ref_fasta_index
+  File ref_dict
+
+  File dbsnp_vcf
+  File dbsnp_vcf_index
+
+  File sample_sheet
+
+  Array[String] snp_recalibration_tranche_values
+  Array[String] snp_recalibration_annotation_values
+  Array[String] indel_recalibration_tranche_values
+  Array[String] indel_recalibration_annotation_values
+
+  File eval_interval_list
+  File hapmap_resource_vcf
+  File hapmap_resource_vcf_index
+  File omni_resource_vcf
+  File omni_resource_vcf_index
+  File one_thousand_genomes_resource_vcf
+  File one_thousand_genomes_resource_vcf_index
+  File mills_resource_vcf
+  File mills_resource_vcf_index
+  File axiomPoly_resource_vcf
+  File axiomPoly_resource_vcf_index
+  File dbsnp_resource_vcf = dbsnp_vcf
+  File dbsnp_resource_vcf_index = dbsnp_vcf_index
+
+  # ExcessHet is a phred-scaled p-value. We want a cutoff of anything more extreme
+  # than a z-score of -4.5 which is a p-value of 3.4e-06, which phred-scaled is 54.69
+  Float excess_het_threshold = 54.69
+  Float snp_filter_level
+  Float indel_filter_level
+  Int SNP_VQSR_downsampleFactor
+
+  Int num_of_original_intervals = length(read_lines(unpadded_intervals_file))
+  
+
+  # Make a 2.5:1 interval number to samples in callset ratio interval list
+  Int possible_merge_count = floor(num_of_original_intervals / num_gvcfs / 2.5)
+  Int merge_count = if possible_merge_count > 1 then possible_merge_count else 1
+
+  call samples {
+    input:
+      samples = sample_sheet
+  }
+
+  Int num_gvcfs = length(read_lines(samples.input_gvcfs))
+
+  call DynamicallyCombineIntervals {
+    input:
+      intervals = unpadded_intervals_file,
+      merge_count = merge_count
+  }
+
+  Array[String] unpadded_intervals = read_lines(DynamicallyCombineIntervals.output_intervals)
+
+  scatter (idx in range(length(unpadded_intervals))) {
+    # the batch_size value was carefully chosen here as it
+    # is the optimal value for the amount of memory allocated
+    # within the task; please do not change it without consulting
+    # the Hellbender (GATK engine) team!
+    call ImportGVCFs {
+      input:
+        sample_names = read_lines(samples.sample_names),
+        interval = unpadded_intervals[idx],
+        workspace_dir_name = "genomicsdb",
+        input_gvcfs = read_lines(samples.input_gvcfs),
+        input_gvcfs_indices = read_lines(samples.input_gvcfs_indices),
+        batch_size = 50
+    }
+
+    call GenotypeGVCFs {
+      input:
+        workspace_tar = ImportGVCFs.output_genomicsdb,
+        interval = unpadded_intervals[idx],
+        output_vcf_filename = "output.vcf.gz",
+        ref_fasta = ref_fasta,
+        ref_fasta_index = ref_fasta_index,
+        ref_dict = ref_dict,
+        dbsnp_vcf = dbsnp_vcf,
+        dbsnp_vcf_index = dbsnp_vcf_index
+    }
+
+    call HardFilterAndMakeSitesOnlyVcf {
+      input:
+        vcf = GenotypeGVCFs.output_vcf,
+        vcf_index = GenotypeGVCFs.output_vcf_index,
+        excess_het_threshold = excess_het_threshold,
+        variant_filtered_vcf_filename = callset_name + "." + idx + ".variant_filtered.vcf.gz",
+        sites_only_vcf_filename = callset_name + "." + idx + ".sites_only.variant_filtered.vcf.gz"
+    }
+  }
+
+  call GatherVcfs as SitesOnlyGatherVcf {
+    input:
+      input_vcfs_fofn = HardFilterAndMakeSitesOnlyVcf.sites_only_vcf,
+      input_vcf_indexes_fofn = HardFilterAndMakeSitesOnlyVcf.sites_only_vcf_index,
+      output_vcf_name = callset_name + ".sites_only.vcf.gz"
+  }
+
+  call IndelsVariantRecalibrator {
+    input:
+      sites_only_variant_filtered_vcf = SitesOnlyGatherVcf.output_vcf,
+      sites_only_variant_filtered_vcf_index = SitesOnlyGatherVcf.output_vcf_index,
+      recalibration_filename = callset_name + ".indels.recal",
+      tranches_filename = callset_name + ".indels.tranches",
+      recalibration_tranche_values = indel_recalibration_tranche_values,
+      recalibration_annotation_values = indel_recalibration_annotation_values,
+      mills_resource_vcf = mills_resource_vcf,
+      mills_resource_vcf_index = mills_resource_vcf_index,
+      axiomPoly_resource_vcf = axiomPoly_resource_vcf,
+      axiomPoly_resource_vcf_index = axiomPoly_resource_vcf_index,
+      dbsnp_resource_vcf = dbsnp_resource_vcf,
+      dbsnp_resource_vcf_index = dbsnp_resource_vcf_index
+  }
+
+  if (num_gvcfs > 10000) {
+  call SNPsVariantRecalibratorCreateModel {
+      input:
+        sites_only_variant_filtered_vcf = SitesOnlyGatherVcf.output_vcf,
+        sites_only_variant_filtered_vcf_index = SitesOnlyGatherVcf.output_vcf_index,
+        recalibration_filename = callset_name + ".snps.recal",
+        tranches_filename = callset_name + ".snps.tranches",
+        recalibration_tranche_values = snp_recalibration_tranche_values,
+        recalibration_annotation_values = snp_recalibration_annotation_values,
+        downsampleFactor = SNP_VQSR_downsampleFactor,
+        model_report_filename = callset_name + ".snps.model.report",
+        hapmap_resource_vcf = hapmap_resource_vcf,
+        hapmap_resource_vcf_index = hapmap_resource_vcf_index,
+        omni_resource_vcf = omni_resource_vcf,
+        omni_resource_vcf_index = omni_resource_vcf_index,
+        one_thousand_genomes_resource_vcf = one_thousand_genomes_resource_vcf,
+        one_thousand_genomes_resource_vcf_index = one_thousand_genomes_resource_vcf_index,
+        dbsnp_resource_vcf = dbsnp_resource_vcf,
+        dbsnp_resource_vcf_index = dbsnp_resource_vcf_index
+    }
+
+  scatter (idx in range(length(HardFilterAndMakeSitesOnlyVcf.sites_only_vcf))) {
+    call SNPsVariantRecalibrator as SNPsVariantRecalibratorScattered {
+      input:
+        sites_only_variant_filtered_vcf = HardFilterAndMakeSitesOnlyVcf.sites_only_vcf[idx],
+        sites_only_variant_filtered_vcf_index = HardFilterAndMakeSitesOnlyVcf.sites_only_vcf_index[idx],
+        recalibration_filename = callset_name + ".snps." + idx + ".recal",
+        tranches_filename = callset_name + ".snps." + idx + ".tranches",
+        recalibration_tranche_values = snp_recalibration_tranche_values,
+        recalibration_annotation_values = snp_recalibration_annotation_values,
+        model_report = SNPsVariantRecalibratorCreateModel.model_report,
+        hapmap_resource_vcf = hapmap_resource_vcf,
+        hapmap_resource_vcf_index = hapmap_resource_vcf_index,
+        omni_resource_vcf = omni_resource_vcf,
+        omni_resource_vcf_index = omni_resource_vcf_index,
+        one_thousand_genomes_resource_vcf = one_thousand_genomes_resource_vcf,
+        one_thousand_genomes_resource_vcf_index = one_thousand_genomes_resource_vcf_index,
+        dbsnp_resource_vcf = dbsnp_resource_vcf,
+        dbsnp_resource_vcf_index = dbsnp_resource_vcf_index
+      }
+    }
+    call GatherTranches as SNPGatherTranches {
+        input:
+          input_fofn = SNPsVariantRecalibratorScattered.tranches,
+          output_filename = callset_name + ".snps.gathered.tranches"
+    }
+  }
+
+  if (num_gvcfs <= 10000){
+    call SNPsVariantRecalibrator as SNPsVariantRecalibratorClassic {
+      input:
+          sites_only_variant_filtered_vcf = SitesOnlyGatherVcf.output_vcf,
+          sites_only_variant_filtered_vcf_index = SitesOnlyGatherVcf.output_vcf_index,
+          recalibration_filename = callset_name + ".snps.recal",
+          tranches_filename = callset_name + ".snps.tranches",
+          recalibration_tranche_values = snp_recalibration_tranche_values,
+          recalibration_annotation_values = snp_recalibration_annotation_values,
+          hapmap_resource_vcf = hapmap_resource_vcf,
+          hapmap_resource_vcf_index = hapmap_resource_vcf_index,
+          omni_resource_vcf = omni_resource_vcf,
+          omni_resource_vcf_index = omni_resource_vcf_index,
+          one_thousand_genomes_resource_vcf = one_thousand_genomes_resource_vcf,
+          one_thousand_genomes_resource_vcf_index = one_thousand_genomes_resource_vcf_index,
+          dbsnp_resource_vcf = dbsnp_resource_vcf,
+          dbsnp_resource_vcf_index = dbsnp_resource_vcf_index
+    }
+  }
+
+  # For small callsets (fewer than 1000 samples) we can gather the VCF shards and collect metrics directly.
+  # For anything larger, we need to keep the VCF sharded and gather metrics collected from them.
+  Boolean is_small_callset = num_gvcfs <= 1000
+
+  scatter (idx in range(length(HardFilterAndMakeSitesOnlyVcf.variant_filtered_vcf))) {
+    call ApplyRecalibration {
+      input:
+        recalibrated_vcf_filename = callset_name + ".filtered." + idx + ".vcf.gz",
+        input_vcf = HardFilterAndMakeSitesOnlyVcf.variant_filtered_vcf[idx],
+        input_vcf_index = HardFilterAndMakeSitesOnlyVcf.variant_filtered_vcf_index[idx],
+        indels_recalibration = IndelsVariantRecalibrator.recalibration,
+        indels_recalibration_index = IndelsVariantRecalibrator.recalibration_index,
+        indels_tranches = IndelsVariantRecalibrator.tranches,
+        snps_recalibration = if defined(SNPsVariantRecalibratorScattered.recalibration) then select_first([SNPsVariantRecalibratorScattered.recalibration])[idx] else select_first([SNPsVariantRecalibratorClassic.recalibration]),
+        snps_recalibration_index = if defined(SNPsVariantRecalibratorScattered.recalibration_index) then select_first([SNPsVariantRecalibratorScattered.recalibration_index])[idx] else select_first([SNPsVariantRecalibratorClassic.recalibration_index]),
+        snps_tranches = select_first([SNPGatherTranches.tranches, SNPsVariantRecalibratorClassic.tranches]),
+        indel_filter_level = indel_filter_level,
+        snp_filter_level = snp_filter_level
+    }
+
+    # for large callsets we need to collect metrics from the shards and gather them later
+    if (!is_small_callset) {
+      call CollectVariantCallingMetrics as CollectMetricsSharded {
+        input:
+          input_vcf = ApplyRecalibration.recalibrated_vcf,
+          input_vcf_index = ApplyRecalibration.recalibrated_vcf_index,
+          metrics_filename_prefix = callset_name + "." + idx,
+          dbsnp_vcf = dbsnp_vcf,
+          dbsnp_vcf_index = dbsnp_vcf_index,
+          interval_list = eval_interval_list,
+          ref_dict = ref_dict
+      }
+    }
+  }
+
+  # for small callsets we can gather the VCF shards and then collect metrics on it
+  if (is_small_callset) {
+    call GatherVcfs as FinalGatherVcf {
+      input:
+        input_vcfs_fofn = ApplyRecalibration.recalibrated_vcf,
+        input_vcf_indexes_fofn = ApplyRecalibration.recalibrated_vcf_index,
+        output_vcf_name = callset_name + ".vcf.gz"
+    }
+
+    call CollectVariantCallingMetrics as CollectMetricsOnFullVcf {
+      input:
+        input_vcf = FinalGatherVcf.output_vcf,
+        input_vcf_index = FinalGatherVcf.output_vcf_index,
+        metrics_filename_prefix = callset_name,
+        dbsnp_vcf = dbsnp_vcf,
+        dbsnp_vcf_index = dbsnp_vcf_index,
+        interval_list = eval_interval_list,
+        ref_dict = ref_dict
+    }
+  }
+
+  # for large callsets we still need to gather the sharded metrics
+  if (!is_small_callset) {
+    call GatherMetrics {
+      input:
+        input_details_fofn = select_all(CollectMetricsSharded.detail_metrics_file),
+        input_summaries_fofn = select_all(CollectMetricsSharded.summary_metrics_file),
+        output_prefix = callset_name
+    }
+  }
+
+  output {
+    # outputs from the small callset path through the wdl
+    FinalGatherVcf.output_vcf
+    FinalGatherVcf.output_vcf_index
+    CollectMetricsOnFullVcf.detail_metrics_file
+    CollectMetricsOnFullVcf.summary_metrics_file
+
+    # outputs from the large callset path through the wdl
+    # (note that we do not list ApplyRecalibration here because it is run in both paths)
+    GatherMetrics.detail_metrics_file
+    GatherMetrics.summary_metrics_file
+
+    # output the interval list generated/used by this run workflow
+    DynamicallyCombineIntervals.output_intervals
+  }
+}
+
+task samples {
+  File samples
+
+  command {
+    cut ${samples} -f1 > sample_names.txt
+    cut ${samples} -f2 > gvcfs.txt
+    cut ${samples} -f3 > gvcf_indices.txt
+
+  }
+  runtime {
+    cpus: 2
+    requested_memory: 4000  
+  }
+  output {
+    File sample_names = "sample_names.txt"
+    File input_gvcfs = "gvcfs.txt"
+    File input_gvcfs_indices = "gvcf_indices.txt"
+  }
+
+}
+
+
+task GetNumberOfSamples {
+  File sample_name_map
+  command <<<
+    wc -l ${sample_name_map} | awk '{print $1}'
+  >>>
+
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    Int sample_count = read_int(stdout())
+  }
+}
+
+task ImportGVCFs {
+  Array[String] sample_names
+  Array[File] input_gvcfs
+  Array[File] input_gvcfs_indices
+  String interval
+
+  String workspace_dir_name
+
+  Int batch_size
+
+  command <<<
+    set -e
+    set -o pipefail
+    
+    python << CODE
+    gvcfs = ['${sep="','" input_gvcfs}']
+    sample_names = ['${sep="','" sample_names}']
+
+    if len(gvcfs)!= len(sample_names):
+      exit(1)
+
+    with open("inputs.list", "w") as fi:
+      for i in range(len(gvcfs)):
+        fi.write(sample_names[i] + "\t" + gvcfs[i] + "\n") 
+
+    CODE
+
+    # The memory setting here is very important and must be several GB lower
+    # than the total memory allocated to the VM because this tool uses
+    # a significant amount of non-heap memory for native libraries.
+    # Also, testing has shown that the multithreaded reader initialization
+    # does not scale well beyond 5 threads, so don't increase beyond that.
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx4g -Xms4g" \
+    GenomicsDBImport \
+    --genomicsdb-workspace-path ${workspace_dir_name} \
+    --batch-size ${batch_size} \
+    -L ${interval} \
+    --sample-name-map inputs.list \
+    --reader-threads 5 \
+    -ip 500
+
+    tar -cf ${workspace_dir_name}.tar ${workspace_dir_name}
+
+  >>>
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File output_genomicsdb = "${workspace_dir_name}.tar"
+  }
+}
+
+task GenotypeGVCFs {
+  File workspace_tar
+  String interval
+
+  String output_vcf_filename
+
+  File ref_fasta
+  File ref_fasta_index
+  File ref_dict
+
+  File dbsnp_vcf
+  File dbsnp_vcf_index
+
+
+  command <<<
+    set -e
+
+    tar -xf ${workspace_tar}
+    WORKSPACE=$( basename ${workspace_tar} .tar)
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx14g -Xms5g" \
+     GenotypeGVCFs \
+     -R ${ref_fasta} \
+     -O ${output_vcf_filename} \
+     -D ${dbsnp_vcf} \
+     -G StandardAnnotation \
+     --only-output-calls-starting-in-intervals \
+     --use-new-qual-calculator \
+     -V gendb://$WORKSPACE \
+     -L ${interval}
+  >>>
+  runtime {
+    cpus: 4
+    requested_memory: 16000  
+  }
+  output {
+    File output_vcf = "${output_vcf_filename}"
+    File output_vcf_index = "${output_vcf_filename}.tbi"
+  }
+}
+
+task HardFilterAndMakeSitesOnlyVcf {
+  File vcf
+  File vcf_index
+  Float excess_het_threshold
+
+  String variant_filtered_vcf_filename
+  String sites_only_vcf_filename
+
+
+  command {
+    set -e
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx15g -Xms3g" \
+      VariantFiltration \
+      --filter-expression "ExcessHet > ${excess_het_threshold}" \
+      --filter-name ExcessHet \
+      -O ${variant_filtered_vcf_filename} \
+      -V ${vcf}
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx15g -Xms3g" \
+      MakeSitesOnlyVcf \
+      --INPUT ${variant_filtered_vcf_filename} \
+      --OUTPUT ${sites_only_vcf_filename}
+
+  }
+  runtime {
+    cpus: 4
+    requested_memory: 16000  
+  }
+  output {
+    File variant_filtered_vcf = "${variant_filtered_vcf_filename}"
+    File variant_filtered_vcf_index = "${variant_filtered_vcf_filename}.tbi"
+    File sites_only_vcf = "${sites_only_vcf_filename}"
+    File sites_only_vcf_index = "${sites_only_vcf_filename}.tbi"
+  }
+}
+
+task IndelsVariantRecalibrator {
+  String recalibration_filename
+  String tranches_filename
+
+  Array[String] recalibration_tranche_values
+  Array[String] recalibration_annotation_values
+
+  File sites_only_variant_filtered_vcf
+  File sites_only_variant_filtered_vcf_index
+
+  File mills_resource_vcf
+  File axiomPoly_resource_vcf
+  File dbsnp_resource_vcf
+  File mills_resource_vcf_index
+  File axiomPoly_resource_vcf_index
+  File dbsnp_resource_vcf_index
+
+
+  command {
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx24g -Xms24g" \
+      VariantRecalibrator \
+      -V ${sites_only_variant_filtered_vcf} \
+      -O ${recalibration_filename} \
+      --tranches-file ${tranches_filename} \
+      --trust-all-polymorphic \
+      -tranche ${sep=' -tranche ' recalibration_tranche_values} \
+      -an ${sep=' -an ' recalibration_annotation_values} \
+      -mode INDEL \
+      --max-gaussians 4 \
+      -resource:mills,known=false,training=true,truth=true,prior=12 ${mills_resource_vcf} \
+      -resource:axiomPoly,known=false,training=true,truth=false,prior=10 ${axiomPoly_resource_vcf} \
+      -resource:dbsnp,known=true,training=false,truth=false,prior=2 ${dbsnp_resource_vcf}
+  }
+  runtime {
+    cpus: 8
+    requested_memory: 32000
+  }
+  output {
+    File recalibration = "${recalibration_filename}"
+    File recalibration_index = "${recalibration_filename}.idx"
+    File tranches = "${tranches_filename}"
+  }
+}
+
+task SNPsVariantRecalibratorCreateModel {
+  String recalibration_filename
+  String tranches_filename
+  Int downsampleFactor
+  String model_report_filename
+
+  Array[String] recalibration_tranche_values
+  Array[String] recalibration_annotation_values
+
+  File sites_only_variant_filtered_vcf
+  File sites_only_variant_filtered_vcf_index
+
+  File hapmap_resource_vcf
+  File omni_resource_vcf
+  File one_thousand_genomes_resource_vcf
+  File dbsnp_resource_vcf
+  File hapmap_resource_vcf_index
+  File omni_resource_vcf_index
+  File one_thousand_genomes_resource_vcf_index
+  File dbsnp_resource_vcf_index
+
+
+  command {
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx100g -Xms100g" \
+      VariantRecalibrator \
+      -V ${sites_only_variant_filtered_vcf} \
+      -O ${recalibration_filename} \
+      --tranches-file ${tranches_filename} \
+      --trust-all-polymorphic \
+      -tranche ${sep=' -tranche ' recalibration_tranche_values} \
+      -an ${sep=' -an ' recalibration_annotation_values} \
+      -mode SNP \
+      --sample-every-Nth-variant ${downsampleFactor} \
+      --output-model ${model_report_filename} \
+      --max-gaussians 6 \
+      -resource:hapmap,known=false,training=true,truth=true,prior=15 ${hapmap_resource_vcf} \
+      -resource:omni,known=false,training=true,truth=true,prior=12 ${omni_resource_vcf} \
+      -resource:1000G,known=false,training=true,truth=false,prior=10 ${one_thousand_genomes_resource_vcf} \
+      -resource:dbsnp,known=true,training=false,truth=false,prior=7 ${dbsnp_resource_vcf}
+  }
+  runtime {
+    cpus: 8
+    requested_memory: 104000
+  }
+  output {
+    File model_report = "${model_report_filename}"
+  }
+}
+
+task SNPsVariantRecalibrator {
+  String recalibration_filename
+  String tranches_filename
+  File? model_report
+
+  Array[String] recalibration_tranche_values
+  Array[String] recalibration_annotation_values
+
+  File sites_only_variant_filtered_vcf
+  File sites_only_variant_filtered_vcf_index
+
+  File hapmap_resource_vcf
+  File omni_resource_vcf
+  File one_thousand_genomes_resource_vcf
+  File dbsnp_resource_vcf
+  File hapmap_resource_vcf_index
+  File omni_resource_vcf_index
+  File one_thousand_genomes_resource_vcf_index
+  File dbsnp_resource_vcf_index
+
+
+  command {
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx3g -Xms3g" \
+      VariantRecalibrator \
+      -V ${sites_only_variant_filtered_vcf} \
+      -O ${recalibration_filename} \
+      --tranches-file ${tranches_filename} \
+      --trust-all-polymorphic \
+      -tranche ${sep=' -tranche ' recalibration_tranche_values} \
+      -an ${sep=' -an ' recalibration_annotation_values} \
+      -mode SNP \
+      ${"--input-model " + model_report + " --output-tranches-for-scatter "} \
+      --max-gaussians 6 \
+      -resource:hapmap,known=false,training=true,truth=true,prior=15 ${hapmap_resource_vcf} \
+      -resource:omni,known=false,training=true,truth=true,prior=12 ${omni_resource_vcf} \
+      -resource:1000G,known=false,training=true,truth=false,prior=10 ${one_thousand_genomes_resource_vcf} \
+      -resource:dbsnp,known=true,training=false,truth=false,prior=7 ${dbsnp_resource_vcf}
+  }
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File recalibration = "${recalibration_filename}"
+    File recalibration_index = "${recalibration_filename}.idx"
+    File tranches = "${tranches_filename}"
+  }
+}
+
+task GatherTranches {
+  Array[File] input_fofn
+  String output_filename
+
+
+  command <<<
+    set -e
+    set -o pipefail
+    
+      /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx6g -Xms6g" \
+      GatherTranches \
+      --input ${sep=" --input " input_fofn}  \
+      --output ${output_filename}
+  >>>
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File tranches = "${output_filename}"
+  }
+}
+
+task ApplyRecalibration {
+  String recalibrated_vcf_filename
+  File input_vcf
+  File input_vcf_index
+  File indels_recalibration
+  File indels_recalibration_index
+  File indels_tranches
+  File snps_recalibration
+  File snps_recalibration_index
+  File snps_tranches
+
+  Float indel_filter_level
+  Float snp_filter_level
+
+  command {
+    set -e
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx5g -Xms5g" \
+      ApplyVQSR \
+      -O tmp.indel.recalibrated.vcf \
+      -V ${input_vcf} \
+      --recal-file ${indels_recalibration} \
+      --tranches-file ${indels_tranches} \
+      --truth-sensitivity-filter-level ${indel_filter_level} \
+      --create-output-variant-index true \
+      -mode INDEL
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx5g -Xms5g" \
+      ApplyVQSR \
+      -O ${recalibrated_vcf_filename} \
+      -V tmp.indel.recalibrated.vcf \
+      --recal-file ${snps_recalibration} \
+      --tranches-file ${snps_tranches} \
+      --truth-sensitivity-filter-level ${snp_filter_level} \
+      --create-output-variant-index true \
+      -mode SNP
+  }
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File recalibrated_vcf = "${recalibrated_vcf_filename}"
+    File recalibrated_vcf_index = "${recalibrated_vcf_filename}.tbi"
+  }
+}
+
+task GatherVcfs {
+  Array[File] input_vcfs_fofn
+  Array[File] input_vcf_indexes_fofn
+  String output_vcf_name
+  
+  command <<<
+    set -e
+    set -o pipefail
+
+    # ignoreSafetyChecks make a big performance difference so we include it in our invocation
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx6g -Xms6g" \
+    GatherVcfsCloud \
+    --ignore-safety-checks \
+    --gather-type BLOCK \
+    --input ${sep=" --input " input_vcfs_fofn} \
+    --output ${output_vcf_name}
+
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx6g -Xms6g" \
+    IndexFeatureFile \
+    --input ${output_vcf_name}
+  >>>
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File output_vcf = "${output_vcf_name}"
+    File output_vcf_index = "${output_vcf_name}.tbi"
+  }
+}
+
+task CollectVariantCallingMetrics {
+  File input_vcf
+  File input_vcf_index
+
+  String metrics_filename_prefix
+  File dbsnp_vcf
+  File dbsnp_vcf_index
+  File interval_list
+  File ref_dict
+
+
+  command {
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx6g -Xms6g" \
+      CollectVariantCallingMetrics \
+      --INPUT ${input_vcf} \
+      --DBSNP ${dbsnp_vcf} \
+      --SEQUENCE_DICTIONARY ${ref_dict} \
+      --OUTPUT ${metrics_filename_prefix} \
+      --THREAD_COUNT 8 \
+      --TARGET_INTERVALS ${interval_list}
+  }
+  output {
+    File detail_metrics_file = "${metrics_filename_prefix}.variant_calling_detail_metrics"
+    File summary_metrics_file = "${metrics_filename_prefix}.variant_calling_summary_metrics"
+  }
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+}
+
+task GatherMetrics {
+  Array[File] input_details_fofn
+  Array[File] input_summaries_fofn
+
+  String output_prefix
+
+  command <<<
+    set -e
+    set -o pipefail
+
+    
+    /scratch/pawsey0339/dtang/gatk4_multisample/gatk-4.1.4.1/gatk --java-options "-Xmx2g -Xms2g" \
+    AccumulateVariantCallingMetrics \
+    --INPUT ${sep=" --INPUT " input_details_fofn} \
+    --OUTPUT ${output_prefix}
+  >>>
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+  output {
+    File detail_metrics_file = "${output_prefix}.variant_calling_detail_metrics"
+    File summary_metrics_file = "${output_prefix}.variant_calling_summary_metrics"
+  }
+}
+
+task DynamicallyCombineIntervals {
+  File intervals
+  Int merge_count
+
+  command {
+    python << CODE
+    def parse_interval(interval):
+        colon_split = interval.split(":")
+        chromosome = colon_split[0]
+        dash_split = colon_split[1].split("-")
+        start = int(dash_split[0])
+        end = int(dash_split[1])
+        return chromosome, start, end
+
+    def add_interval(chr, start, end):
+        lines_to_write.append(chr + ":" + str(start) + "-" + str(end))
+        return chr, start, end
+
+    count = 0
+    chain_count = ${merge_count}
+    l_chr, l_start, l_end = "", 0, 0
+    lines_to_write = []
+    with open("${intervals}") as f:
+        with open("out.intervals", "w") as f1:
+            for line in f.readlines():
+                # initialization
+                if count == 0:
+                    w_chr, w_start, w_end = parse_interval(line)
+                    count = 1
+                    continue
+                # reached number to combine, so spit out and start over
+                if count == chain_count:
+                    l_char, l_start, l_end = add_interval(w_chr, w_start, w_end)
+                    w_chr, w_start, w_end = parse_interval(line)
+                    count = 1
+                    continue
+
+                c_chr, c_start, c_end = parse_interval(line)
+                # if adjacent keep the chain going
+                if c_chr == w_chr and c_start == w_end + 1:
+                    w_end = c_end
+                    count += 1
+                    continue
+                # not adjacent, end here and start a new chain
+                else:
+                    l_char, l_start, l_end = add_interval(w_chr, w_start, w_end)
+                    w_chr, w_start, w_end = parse_interval(line)
+                    count = 1
+            if l_char != w_chr or l_start != w_start or l_end != w_end:
+                add_interval(w_chr, w_start, w_end)
+            f1.writelines("\n".join(lines_to_write))
+    CODE
+  }
+
+  runtime {
+    cpus: 4
+    requested_memory: 8000
+  }
+
+  output {
+    File output_intervals = "out.intervals"
+  }
+}
diff --git a/janis_assistant/tests/data/wdl/bwa.wdl b/janis_assistant/tests/data/wdl/bwa.wdl
new file mode 100644
index 0000000..93cbcb0
--- /dev/null
+++ b/janis_assistant/tests/data/wdl/bwa.wdl
@@ -0,0 +1,113 @@
+version 1.0
+
+# Copyright (c) 2017 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Mem {
+    input {
+        File read1
+        File? read2
+        BwaIndex bwaIndex
+        String outputPrefix
+        Boolean sixtyFour = false
+        Boolean usePostalt = false
+        Int sortMemoryPerThreadGb = 2
+        Int compressionLevel = 1
+
+        String? readgroup
+        Int? sortThreads
+
+        Int threads = 4
+        Int? memoryGb
+        Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads)
+        # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10.
+        String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0"
+    }
+
+    # Samtools sort may block the pipe while it is writing data to disk.
+    # This can lead to cpu underutilization.
+    # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads.
+    Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
+    Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
+    # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.
+    Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
+    
+    # The bwa postalt script is out commented as soon as usePostalt = false.
+    # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them.
+    command {
+        set -e
+        mkdir -p "$(dirname ~{outputPrefix})"
+        bwa mem \
+          -t ~{threads} \
+          ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \
+          ~{bwaIndex.fastaFile} \
+          ~{read1} \
+          ~{read2} \
+          2> ~{outputPrefix}.log.bwamem | \
+          ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \
+          samtools sort \
+          ~{"-@ " + totalSortThreads} \
+          -m ~{sortMemoryPerThreadGb}G \
+          -l ~{compressionLevel} \
+          - \
+          -o ~{outputPrefix}.aln.bam
+    }
+
+    output {
+        File outputBam = outputPrefix + ".aln.bam"
+        File? outputHla = outputPrefix + ".hla"
+    }
+
+    runtime {
+        # One extra thread for bwa-postalt + samtools is not needed.
+        # These only use 5-10% of compute power and not always simultaneously.
+        cpu: threads
+        memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        read1: {description: "The first-end fastq file.", category: "required"}
+        read2: {description: "The second-end fastq file.", category: "common"}
+        bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"}
+        outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"}
+        sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"}
+        usePostalt: {description: "Whether to use the postalt script from bwa kit."}
+        sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
+        compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
+        readgroup: {description: "A readgroup identifier.", category: "common"}
+        sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
+        threads: {description: "The number of threads to use for alignment.", category: "advanced"}
+        memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        outputBam: {description: "The produced BAM file."}
+        outputHla: {description: "The produced HLA file."}
+    }
+}
+
+struct BwaIndex {
+    File fastaFile
+    Array[File] indexFiles
+}
\ No newline at end of file
diff --git a/janis_assistant/tests/test_translate.py b/janis_assistant/tests/test_translate.py
new file mode 100644
index 0000000..f01487d
--- /dev/null
+++ b/janis_assistant/tests/test_translate.py
@@ -0,0 +1,532 @@
+
+
+"""
+To test translate functionality. 
+Tests CLI args and end-to-end translations.
+"""
+
+import os
+import unittest
+import shutil
+import regex as re
+from typing import Optional
+
+from janis_core import settings
+from janis_assistant.management.configuration import JanisConfiguration
+from janis_assistant.main import ingest
+from janis_assistant.main import translate
+from janis_assistant.cli import process_args
+
+CWL_TESTDATA_DIR = os.path.join(os.getcwd(), 'tests/data/cwl')
+WDL_TESTDATA_DIR = os.path.join(os.getcwd(), 'tests/data/wdl')
+GALAXY_TESTDATA_DIR = os.path.join(os.getcwd(), 'tests/data/galaxy')
+JANIS_TESTDATA_DIR = os.path.join(os.getcwd(), 'tests/data/janis')
+
+
+# ------- HELPER FUNCS ------- #
+
+def _run_translate(filepath: str, srcfmt: str, destfmt: str, mode: Optional[str]=None) -> Optional[str]:
+    config = JanisConfiguration.initial_configuration(None)
+    internal = ingest(filepath, srcfmt)
+    return translate(config, internal, destfmt, mode=mode)
+
+def _get_file_lines(filepath: str) -> list[str]:
+    with open(filepath, 'r') as f:
+        text = f.read()
+    return _get_simplified_lines(text)
+    
+def _get_process_input_lines(filepath: str) -> list[str]:
+    PATTERN = r'input:([\s\S]+)(?=output:)'
+    with open(filepath, 'r') as f:
+        text = f.read()
+    match = re.search(PATTERN, text)
+    assert(match)
+    return _get_simplified_lines(match.group(1))
+
+def _get_script_lines(filepath: str) -> list[str]:
+    PATTERN = r'script:[\s\S]+"""([\s\S]+)(?=""")'
+    with open(filepath, 'r') as f:
+        text = f.read()
+    match = re.search(PATTERN, text)
+    assert(match)
+    return _get_simplified_lines(match.group(1))
+
+def _get_simplified_lines(text: str) -> list[str]:
+    lines = text.split('\n')
+    lines = [ln.split('//')[0] for ln in lines]
+    lines = [ln.strip() for ln in lines]
+    lines = [ln for ln in lines if ln != '']
+    return lines
+
+def _docker_running() -> bool:
+    import subprocess
+    try:
+        completed_process = subprocess.run(['docker', 'version'], shell=True, capture_output=True)
+        if completed_process.returncode == 0:
+            return True
+        else:
+            return False
+    except FileNotFoundError:
+        return False
+    
+def _reset_global_settings() -> None:
+    settings.translate.MODE = 'regular'
+    settings.translate.SAFE_MODE = False
+    settings.translate.ALLOW_EMPTY_CONTAINER = True
+    settings.ingest.galaxy.GEN_IMAGES = False
+    settings.ingest.galaxy.DISABLE_IMAGE_CACHE = False
+    settings.ingest.cwl.INGEST_JAVASCRIPT_EXPRESSIONS = True
+    settings.ingest.cwl.REQUIRE_CWL_VERSION = False
+    settings.datatypes.ALLOW_UNPARSEABLE_DATATYPES = True
+    settings.graph.ALLOW_UNKNOWN_SOURCE = True
+    settings.graph.ALLOW_UNKNOWN_SCATTER_FIELDS = True
+    settings.graph.ALLOW_INCORRECT_NUMBER_OF_SOURCES = True
+    settings.graph.ALLOW_NON_ARRAY_SCATTER_INPUT = True
+    settings.graph.ALLOW_INCOMPATIBLE_TYPES = True
+    settings.validation.STRICT_IDENTIFIERS = False
+    settings.validation.VALIDATE_STRINGFORMATTERS = False
+
+
+
+### --------- CLI ---------- ###
+
+class TestCli(unittest.TestCase):
+    def setUp(self) -> None:
+        self.src = 'cwl'
+        self.dest = 'nextflow'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_basic(self) -> None:
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc.cwl'
+        args = ['translate', '--from', self.src, '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+        
+        # assert file exists
+        lines = _get_file_lines('translated/fastqc.nf')
+        self.assertGreater(len(lines), 0)
+
+    @unittest.skip("need to test different assert for skeleton mode")
+    def test_modes_skeleton(self) -> None:
+        filepath = f'{CWL_TESTDATA_DIR}/subworkflow_test/main.cwl'
+        args = ['translate', '--mode', 'skeleton', '--from', self.src, '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+        
+        # check main.nf
+        lines = _get_file_lines('translated/main.nf')
+        self.assertGreater(len(lines), 0)
+        
+        # check optional_input_types.nf
+        input_lines = _get_process_input_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(input_lines), 5)
+        script_lines = _get_script_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(script_lines), 2)
+
+    @unittest.skip("Need to further test different assert value")
+    def test_modes_regular(self) -> None:
+        filepath = f'{CWL_TESTDATA_DIR}/subworkflow_test/main.cwl'
+        args = ['translate', '--mode', 'regular', '--from', self.src, '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+        
+        # check main.nf
+        lines = _get_file_lines('translated/main.nf')
+        self.assertGreater(len(lines), 0)
+        
+        # check optional_input_types.nf
+        input_lines = _get_process_input_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(input_lines), 5)
+        script_lines = _get_script_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(script_lines), 7)
+
+    def test_modes_extended1(self) -> None:
+        filepath = f'{CWL_TESTDATA_DIR}/subworkflow_test/main.cwl'
+        args = ['translate', '--mode', 'extended', '--from', self.src, '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+
+        # check main.nf
+        lines = _get_file_lines('translated/main.nf')
+        self.assertGreater(len(lines), 0)
+        
+        # check optional_input_types.nf
+        input_lines = _get_process_input_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(input_lines), 6)
+        script_lines = _get_script_lines('translated/modules/optional_input_types.nf')
+        self.assertEqual(len(script_lines), 8)
+        
+    def test_modes_extended2(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/unicycler_assembly.ga'
+        args = ['translate', '--mode', 'extended', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+        
+        # check main.nf
+        lines = _get_file_lines('translated/main.nf')
+        self.assertGreater(len(lines), 0)
+        
+        # check optional_input_types.nf
+        input_lines = _get_process_input_lines('translated/modules/quast.nf')
+        self.assertEqual(len(input_lines), 41)
+        script_lines = _get_script_lines('translated/modules/quast.nf')
+        self.assertEqual(len(script_lines), 42)
+
+    @unittest.skip("test require local docker installation")
+    #@unittest.skipUnless(_docker_running(), 'docker daemon must be running to test this')
+    def test_galaxy_build_images(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/hisat2_wf.ga'
+        args = ['translate', '--galaxy-build-images', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+        
+        # check main.nf
+        lines = _get_file_lines('translated/main.nf')
+        self.assertGreater(len(lines), 0)
+        
+        # check container correct
+        lines = _get_file_lines('translated/modules/hisat2.nf')
+        self.assertGreater(len(lines), 0)
+        self.assertIn('container "quay.io/ppp-janis-translate/hisat2:2.2.1"', lines)
+
+    def test_galaxy_no_image_cache(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        args = ['translate', '--galaxy-no-image-cache', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+
+        # check main.nf
+        lines = _get_file_lines('translated/fastqc.nf')
+        self.assertGreater(len(lines), 0)
+
+    def test_galaxy_no_wrapper_cache(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        args = ['translate', '--galaxy-no-wrapper-cache', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+
+        # check main.nf
+        lines = _get_file_lines('translated/fastqc.nf')
+        self.assertGreater(len(lines), 0)
+
+    def test_output_dir(self) -> None:
+        if os.path.exists('out_fastqc'):
+            shutil.rmtree('out_fastqc')
+
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        args = ['translate', '-o', 'out_fastqc', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+
+        # check main.nf
+        lines = _get_file_lines('out_fastqc/fastqc.nf')
+        self.assertGreater(len(lines), 0)
+
+        if os.path.exists('out_fastqc'):
+            shutil.rmtree('out_fastqc')
+
+    def test_disallow_empty_container(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        args = ['translate', '--disallow-empty-container', '--from', 'galaxy', '--to', self.dest, filepath]
+        
+        # run translate via cli args
+        process_args(sysargs=args)
+
+        # check main.nf
+        lines = _get_file_lines('translated/fastqc.nf')
+        self.assertGreater(len(lines), 0)
+
+
+
+### --- FROM CWL --- ###
+
+class TestCwlToCwl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'cwl'
+        self.dest = 'cwl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+    
+    def test_fastqc_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+  
+    def test_fastqc2_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc2.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_gatk_haplotype_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/gatk_haplotype_tool.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_super_enhancer_wf(self):
+        filepath = f'{CWL_TESTDATA_DIR}/super_enhancer_wf.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestCwlToWdl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'cwl'
+        self.dest = 'wdl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+    
+    def test_fastqc_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+  
+    def test_fastqc2_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc2.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_gatk_haplotype_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/gatk_haplotype_tool.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_super_enhancer_wf(self):
+        filepath = f'{CWL_TESTDATA_DIR}/super_enhancer_wf.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestCwlToNextflow(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'cwl'
+        self.dest = 'nextflow'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+    
+    def test_fastqc_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+  
+    def test_fastqc2_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/fastqc2.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_gatk_haplotype_tool(self):
+        filepath = f'{CWL_TESTDATA_DIR}/gatk_haplotype_tool.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_super_enhancer_wf(self):
+        filepath = f'{CWL_TESTDATA_DIR}/super_enhancer_wf.cwl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+
+### --- FROM GALAXY --- ###
+
+class TestGalaxyToNextflow(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'galaxy'
+        self.dest = 'nextflow'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_fastqc_tool(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_abricate_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/abricate_wf.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+   
+    @unittest.skip("need to test different assert for this translation") 
+    def test_unicycler_assembly_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/unicycler_assembly.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestGalaxyToCwl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'galaxy'
+        self.dest = 'cwl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_fastqc_tool(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_abricate_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/abricate_wf.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_unicycler_assembly_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/unicycler_assembly.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestGalaxyToWdl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'galaxy'
+        self.dest = 'wdl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_fastqc_tool(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/fastqc-5ec9f6bceaee/rgFastQC.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_abricate_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/abricate_wf.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    def test_unicycler_assembly_wf(self) -> None:
+        filepath = f'{GALAXY_TESTDATA_DIR}/unicycler_assembly.ga'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+
+### --- FROM JANIS --- ###
+
+class TestJanisToCWL(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'janis'
+        self.dest = 'cwl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_samtools_flagstat_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/samtools_flagstat_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_gatk_haplotype_caller_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/gatk_haplotype_caller_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_germline_variant_caller_wf(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/germline_variant_caller_wf.py'
+        maintask = _run_translate(filepath, self.src, self.dest, mode='extended')
+
+
+class TestJanisToNextflow(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'janis'
+        self.dest = 'nextflow'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_samtools_flagstat_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/samtools_flagstat_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_gatk_haplotype_caller_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/gatk_haplotype_caller_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_germline_variant_caller_wf(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/germline_variant_caller_wf.py'
+        maintask = _run_translate(filepath, self.src, self.dest, mode='extended')
+
+
+class TestJanisToWdl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'janis'
+        self.dest = 'wdl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    def test_samtools_flagstat_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/samtools_flagstat_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_gatk_haplotype_caller_tool(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/gatk_haplotype_caller_tool.py'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+    def test_germline_variant_caller_wf(self) -> None:
+        filepath = f'{JANIS_TESTDATA_DIR}/germline_variant_caller_wf.py'
+        maintask = _run_translate(filepath, self.src, self.dest, mode='extended')
+    
+
+
+### --- FROM WDL --- ###
+
+class TestWdlToWdl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'wdl'
+        self.dest = 'wdl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_bwa_mem_tool(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/bwa.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_gatk4_wf(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/Multisample_jointgt_GATK4.wdl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestWdlToCwl(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'wdl'
+        self.dest = 'cwl'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_bwa_mem_tool(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/bwa.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_gatk4_wf(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/Multisample_jointgt_GATK4.wdl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
+
+class TestWdlToNextflow(unittest.TestCase):
+    
+    def setUp(self) -> None:
+        self.src = 'wdl'
+        self.dest = 'nextflow'
+        if os.path.exists('translated'):
+            shutil.rmtree('translated')
+        _reset_global_settings()
+
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_bwa_mem_tool(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/bwa.xml'
+        maintask = _run_translate(filepath, self.src, self.dest)
+    
+    @unittest.skip('TODO WDL ingest needs work')
+    def test_gatk4_wf(self) -> None:
+        filepath = f'{WDL_TESTDATA_DIR}/Multisample_jointgt_GATK4.wdl'
+        maintask = _run_translate(filepath, self.src, self.dest)
+
diff --git a/notes.txt b/notes.txt
new file mode 100644
index 0000000..4ac2b96
--- /dev/null
+++ b/notes.txt
@@ -0,0 +1,20 @@
+
+
+TODO
+- IlluminaGermlineVariantCaller
+    - "--mode regular" or "--mode skeleton" fails for translation
+    - can't find input in the unwrap functions
+- move <js> to <__UNTRANSLATED_JS__>
+
+- docker permission issues
+    - super fucked 
+    - can we run docker as non-root? 
+
+- --galaxy-build-images 
+    - doesn't work if running on singularity as docker isn't running
+    - "you have supplied "--build-galaxy-tool-images", but docker was not found"
+    - can we make this work with singularity? 
+    - can we detect whether singularity is running & tell users to swap to docker?
+
+- get a pypi package ready for janis
+    - avoids singularity / docker issues
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 85e8371..4cde4b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,19 +1,82 @@
-[tool.black]
-line-length = 88
-target_version = ['py37']
-include = '\.pyi?$'
-exclude = '''
-(
-  /(
-      \.eggs         # exclude a few common directories in the
-    | \.git          # root of the project
-    | \.hg
-    | \.mypy_cache
-    | \.tox
-    | \.venv
-    | _build
-    | build
-    | dist
-  )/
-)
-'''
+
+[build-system]
+build-backend   = "setuptools.build_meta"
+requires        = ["setuptools>=67.8.0", "wheel>=0.40.0"]
+
+[project]
+name            = "janis-pipelines.assistant"
+version         = "v0.13.0"
+description     = "Easier way to run workflows, configurable across environments"
+readme          = "README.md"
+license         = { file = "LICENSE" }
+keywords        = [
+    "janis", 
+    "workflows", 
+    "assistant", 
+]
+
+authors = [
+    { name = "Michael Franklin", email = "michael.franklin@petermac.org" },
+    { name = "Grace Hall", email = "grace.hall1@unimelb.edu.au" },
+    { name = "Richard Lupat", email = "Richard.Lupat@petermac.org" },
+    { name = "Evan Thomas", email = "thomas.e@wehi.edu.au" },
+]
+maintainers = [
+    { name = "Grace Hall", email = "grace.hall1@unimelb.edu.au" },
+    { name = "Richard Lupat", email = "Richard.Lupat@petermac.org" },
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering",
+]
+dependencies = [
+    "janis-pipelines.core",
+    "janis-pipelines.bioinformatics",
+    "janis-pipelines.unix",
+    "janis-pipelines.templates >= 0.11.0",
+    "ruamel.yaml >= 0.12.4, <= 0.16.5",
+    "progressbar2",
+    "path",
+    "black",
+    "pre-commit",
+    "requests",
+    "python-dateutil",
+    "tabulate",
+    "cwltool",
+    "cwl-utils==0.15",
+    "blessed",
+    "regex",
+]
+requires-python = ">=3.10.5"
+
+[project.optional-dependencies]
+gcs = ["google-cloud-storage"]
+ci = [
+    "codecov",
+    "coverage",
+    "requests_mock",
+    "nose_parameterized",
+    "keyring==21.4.0",
+    "setuptools",
+    "wheel",
+    "twine",
+]
+
+[project.urls]
+repository      = "https://github.com/PMCC-BioinformaticsCore/janis-assistant"
+documentation   = "https://janis.readthedocs.io/en/latest/"
+
+[tool.setuptools.packages.find]
+where = ["./"]
+include = ["janis_assistant*"]
+namespaces = false
+
+[project.scripts]
+janis = "janis_assistant.cli:process_args"
+
+# NOTE
+# The following line was not copied across from old setup.py (unknown purpose)
+# entry_points={"janis.extension": ["assistant=janis_assistant"]}
diff --git a/requirements.txt b/requirements.txt
index 41fc7f7..fcb7607 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,16 @@
-blessed
 janis-pipelines.core
+janis-pipelines.bioinformatics
+janis-pipelines.unix
 janis-pipelines.templates
+ruamel.yaml >= 0.12.4, <= 0.16.5
+progressbar2
+path
 black
 pre-commit
 requests
 python-dateutil
-tabulate
\ No newline at end of file
+tabulate
+cwltool
+cwl-utils==0.15
+blessed
+regex
diff --git a/setup.py b/setup.py
index 9123ffa..348be0a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,65 +1 @@
-from setuptools import setup, find_packages
-
-# from setuptools.command.develop import develop
-# from setuptools.command.install import install
-
-modules = ["janis_assistant." + p for p in sorted(find_packages("./janis_assistant"))]
-
-vsn = {}
-with open("./janis_assistant/__meta__.py") as fp:
-    exec(fp.read(), vsn)
-__version__ = vsn["__version__"]
-
-setup(
-    # legacy .runner - renamed to assistant
-    name="janis-pipelines.runner",
-    version=__version__,
-    description="Easier way to run workflows, configurable across environments",
-    long_description=open("./README.md").read(),
-    long_description_content_type="text/markdown",
-    author="Michael Franklin",
-    author_email="michael.franklin@petermac.org",
-    license="MIT",
-    keywords=["janis", "workflows", "assistant"],
-    entry_points={
-        "console_scripts": ["janis=janis_assistant.cli:process_args"],
-        "janis.extension": ["assistant=janis_assistant"],
-    },
-    install_requires=[
-        "janis-pipelines.core>=0.11.0",
-        "janis-pipelines.templates>=0.11.0",
-        "requests",
-        "path",
-        "python-dateutil",
-        "progressbar2",
-        "tabulate",
-        "ruamel.yaml >= 0.12.4, <= 0.16.5",
-        "cwltool",
-        "blessed",
-    ],
-    extras_require={
-        "gcs": ["google-cloud-storage"],
-        "ci": [
-            "codecov",
-            "coverage",
-            "requests_mock",
-            "nose_parameterized",
-            "keyring==21.4.0",
-            "setuptools",
-            "wheel",
-            "twine",
-        ],
-    },
-    packages=["janis_assistant"] + modules,
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Topic :: Scientific/Engineering",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Science/Research",
-        "Environment :: Console",
-    ],
-    cmdclass={
-        # 'develop': PostDevelopCommand,
-        # 'install': PostInstallCommand
-    },
-)
+import setuptools; setuptools.setup()