diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml
index 32849b2e..cc2f2a19 100644
--- a/.github/workflows/sanger_test.yml
+++ b/.github/workflows/sanger_test.yml
@@ -23,6 +23,7 @@ jobs:
           parameters: |
             {
               "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}",
+              "use_work_dir_as_temp": true,
             }
           profiles: test,sanger,singularity,cleanup
       - uses: actions/upload-artifact@v3
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bf3e3e6a..050399ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,41 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [[0.6.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.6.0)] – Bellsprout – [2024-09-13]
+
+The pipeline has now been validated for draft (unpublished) assemblies.
+
+- The pipeline now queries the NCBI database instead of GoaT to establish the
+  taxonomic classification of the species and the relevant Busco lineages.
+  In case the taxon_id is not found, the pipeline falls back to GoaT, which
+  is aware of upcoming taxon_ids in ENA.
+- New `--busco_lineages` parameter to choose specific Busco lineages instead of
+  automatically selecting based on the taxonomy.
+- All parameters are now passed the regular Nextflow way. There is no support
+  for the original Yaml configuration files of the Snakemake version.
+- New option `--skip_taxon_filtering` to skip the taxon filtering in blast searches.
+  Mostly relevant for draft assemblies.
+- Introduced the `--use_work_dir_as_temp` parameter to avoid leaving files in `/tmp`.
+
+### Parameters
+
+| Old parameter | New parameter          |
+| ------------- | ---------------------- |
+| --yaml        |                        |
+|               | --busco_lineages       |
+|               | --skip_taxon_filtering |
+|               | --use_work_dir_as_temp |
+
+> **NB:** Parameter has been **updated** if both old and new parameter information is present. </br> **NB:** Parameter has been **added** if just the new parameter information is present. </br> **NB:** Parameter has been **removed** if new parameter information isn't present.
+
+### Software dependencies
+
+Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported.
+
+| Dependency | Old version | New version |
+| ---------- | ----------- | ----------- |
+| goat       | 0.2.5       |             |
+
 ## [[0.5.1](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.5.1)] – Snorlax (patch 1) – [2024-08-22]
 
 ### Enhancements & fixes
diff --git a/README.md b/README.md
index c7b92970..b55ab353 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome
 
 1. Calculate genome statistics in windows ([`fastawindows`](https://github.com/tolkit/fasta_windows))
 2. Calculate Coverage ([`blobtk/depth`](https://github.com/blobtoolkit/blobtk))
-3. Fetch associated BUSCO lineages ([`goat/taxonsearch`](https://github.com/genomehubs/goat-cli))
+3. Determine the appropriate BUSCO lineages from the taxonomy.
 4. Run BUSCO ([`busco`](https://busco.ezlab.org/))
 5. Extract BUSCO genes ([`blobtoolkit/extractbuscos`](https://github.com/blobtoolkit/blobtoolkit))
 6. Run Diamond BLASTp against extracted BUSCO genes ([`diamond/blastp`](https://github.com/bbuchfink/diamond))
diff --git a/assets/mapping_taxids-busco_dataset_name.2019-12-16.txt b/assets/mapping_taxids-busco_dataset_name.2019-12-16.txt
new file mode 100644
index 00000000..7105beeb
--- /dev/null
+++ b/assets/mapping_taxids-busco_dataset_name.2019-12-16.txt
@@ -0,0 +1,166 @@
+422676	aconoidasida
+7898	actinopterygii
+5338	agaricales
+155619	agaricomycetes
+33630	alveolata
+5794	apicomplexa
+6854	arachnida
+6656	arthropoda
+4890	ascomycota
+8782	aves
+5204	basidiomycota
+68889	boletales
+3699	brassicales
+134362	capnodiales
+33554	carnivora
+91561	cetartiodactyla
+34395	chaetothyriales
+3041	chlorophyta
+5796	coccidia
+28738	cyprinodontiformes
+7147	diptera
+147541	dothideomycetes
+3193	embryophyta
+33392	endopterygota
+314146	euarchontoglires
+33682	euglenozoa
+2759	eukaryota
+5042	eurotiales
+147545	eurotiomycetes
+9347	eutheria
+72025	fabales
+4751	fungi
+314147	glires
+1028384	glomerellales
+5178	helotiales
+7524	hemiptera
+7399	hymenoptera
+5125	hypocreales
+50557	insecta
+314145	laurasiatheria
+147548	leotiomycetes
+7088	lepidoptera
+4447	liliopsida
+40674	mammalia
+33208	metazoa
+6029	microsporidia
+6447	mollusca
+4827	mucorales
+1913637	mucoromycota
+6231	nematoda
+33183	onygenales
+9126	passeriformes
+5820	plasmodium
+92860	pleosporales
+38820	poales
+5303	polyporales
+9443	primates
+4891	saccharomycetes
+8457	sauropsida
+4069	solanales
+147550	sordariomycetes
+33634	stramenopiles
+32523	tetrapoda
+155616	tremellomycetes
+7742	vertebrata
+33090	viridiplantae
+71240	eudicots
+57723	acidobacteria
+201174	actinobacteria_phylum
+1760	actinobacteria_class
+28211	alphaproteobacteria
+135622	alteromonadales
+200783	aquificae
+1385	bacillales
+91061	bacilli
+2	bacteria
+171549	bacteroidales
+976	bacteroidetes
+68336	bacteroidetes-chlorobi_group
+200643	bacteroidia
+28216	betaproteobacteria
+80840	burkholderiales
+213849	campylobacterales
+1706369	cellvibrionales
+204428	chlamydiae
+1090	chlorobi
+200795	chloroflexi
+135613	chromatiales
+1118	chroococcales
+186801	clostridia
+186802	clostridiales
+84999	coriobacteriales
+84998	coriobacteriia
+85007	corynebacteriales
+1117	cyanobacteria
+768507	cytophagales
+768503	cytophagia
+68525	delta-epsilon-subdivisions
+28221	deltaproteobacteria
+213118	desulfobacterales
+213115	desulfovibrionales
+69541	desulfuromonadales
+91347	enterobacterales
+186328	entomoplasmatales
+29547	epsilonproteobacteria
+1239	firmicutes
+200644	flavobacteriales
+117743	flavobacteriia
+32066	fusobacteria
+203491	fusobacteriales
+1236	gammaproteobacteria
+186826	lactobacillales
+118969	legionellales
+85006	micrococcales
+31969	mollicutes
+2085	mycoplasmatales
+206351	neisseriales
+32003	nitrosomonadales
+1161	nostocales
+135619	oceanospirillales
+1150	oscillatoriales
+135625	pasteurellales
+203682	planctomycetes
+85009	propionibacteriales
+1224	proteobacteria
+72274	pseudomonadales
+356	rhizobiales
+227290	rhizobium-agrobacterium_group
+204455	rhodobacterales
+204441	rhodospirillales
+766	rickettsiales
+909929	selenomonadales
+117747	sphingobacteriia
+204457	sphingomonadales
+136	spirochaetales
+203691	spirochaetes
+203692	spirochaetia
+85011	streptomycetales
+85012	streptosporangiales
+1890424	synechococcales
+508458	synergistetes
+544448	tenericutes
+68295	thermoanaerobacterales
+200918	thermotogae
+72273	thiotrichales
+1737405	tissierellales
+1737404	tissierellia
+74201	verrucomicrobia
+135623	vibrionales
+135614	xanthomonadales
+2157	archaea
+2266	thermoproteales
+2281	sulfolobales
+114380	desulfurococcales
+183967	thermoplasmata
+651137	thaumarchaeota
+2182	methanococcales
+2191	methanomicrobiales
+183925	methanobacteria
+183924	thermoprotei
+2235	halobacteriales
+1644060	natrialbales
+224756	methanomicrobia
+1644055	haloferacales
+183963	halobacteria
+28890	euryarchaeota
diff --git a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 b/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3
new file mode 100644
index 00000000..dc933c1f
Binary files /dev/null and b/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 b/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3
new file mode 100644
index 00000000..2a56a82f
Binary files /dev/null and b/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 differ
diff --git a/bin/generate_config.py b/bin/generate_config.py
new file mode 100755
index 00000000..e3d00dfa
--- /dev/null
+++ b/bin/generate_config.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+
+import argparse
+import dataclasses
+import os
+import sys
+import sqlite3
+import requests
+import typing
+import yaml
+
+NCBI_TAXONOMY_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/taxonomy/taxon/%s"
+GOAT_LOOKUP_API = "https://goat.genomehubs.org/api/v2/lookup?searchTerm=%s&result=taxon&size=10&taxonomy=ncbi"
+GOAT_RECORD_API = "https://goat.genomehubs.org/api/v2/record?recordId=%s&result=taxon&size=10&taxonomy=ncbi"
+NCBI_DATASETS_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/%s/dataset_report?filters.assembly_version=all_assemblies"
+
+RANKS = [
+    "genus",
+    "family",
+    "order",
+    "class",
+    "phylum",
+    "kingdom",
+    "superkingdom",
+]
+
+BUSCO_BASAL_LINEAGES = ["eukaryota_odb10", "bacteria_odb10", "archaea_odb10"]
+
+
+def parse_args(args=None):
+    Description = "Produce the various configuration files needed within the pipeline"
+
+    parser = argparse.ArgumentParser(description=Description)
+    parser.add_argument("--fasta", required=True, help="Path to the Fasta file of the assembly.")
+    parser.add_argument("--taxon_query", required=True, help="Query string/integer for this taxon.")
+    parser.add_argument("--lineage_tax_ids", required=True, help="Mapping between BUSCO lineages and taxon IDs.")
+    parser.add_argument("--yml_out", required=True, help="Output YML file.")
+    parser.add_argument("--csv_out", required=True, help="Output CSV file.")
+    parser.add_argument("--accession", help="Accession number of the assembly (optional).", default=None)
+    parser.add_argument("--busco", help="Requested BUSCO lineages.", default=None)
+    parser.add_argument("--blastn", required=True, help="Path to the NCBI Taxonomy database")
+    parser.add_argument("--version", action="version", version="%(prog)s 1.4")
+    return parser.parse_args(args)
+
+
+def make_dir(path):
+    if len(path) > 0:
+        os.makedirs(path, exist_ok=True)
+
+
+@dataclasses.dataclass
+class TaxonInfo:
+    taxon_id: int
+    organism_name: str
+    rank: typing.Optional[str]
+    lineage: typing.List["TaxonInfo"]
+
+
+def make_taxon_info_from_goat(body: dict[str, str]) -> TaxonInfo:
+    rank = body["taxon_rank"]
+    if rank == "no rank":
+        rank = None
+    if "lineage" in body:
+        lineage = [make_taxon_info_from_goat(b) for b in body["lineage"]]
+    else:
+        lineage = []
+    return TaxonInfo(int(body["taxon_id"]), body["scientific_name"], rank, lineage)
+
+
+def fetch_taxon_info_from_goat(taxon_name: typing.Union[str, int]) -> TaxonInfo:
+    if isinstance(taxon_name, int):
+        taxon_id = taxon_name
+        record_id = "taxon-%d" % taxon_name
+    else:
+        # Resolve the taxon_id of the species
+        response = requests.get(GOAT_LOOKUP_API % taxon_name).json()
+        taxon_id = int(response["results"][0]["result"]["taxon_id"])
+        record_id = response["results"][0]["id"]
+
+    # Using API, get the taxon_ids of the species and all parents
+    response = requests.get(GOAT_RECORD_API % record_id).json()
+    body = response["records"][0]["record"]
+    return make_taxon_info_from_goat(body)
+
+
+def fetch_taxon_info_from_ncbi(taxon_name: typing.Union[str, int], with_lineage=True) -> typing.Optional[TaxonInfo]:
+    # Using API, get the taxon_ids of the species and all parents
+    response = requests.get(NCBI_TAXONOMY_API % taxon_name).json()
+    if "taxonomy" in response["taxonomy_nodes"][0]:
+        body = response["taxonomy_nodes"][0]["taxonomy"]
+        if with_lineage:
+            lineage = [
+                fetch_taxon_info_from_ncbi(t, with_lineage=False) for t in reversed(body["lineage"][2:])
+            ]  # skip root and cellular_organisms
+        else:
+            lineage = []
+        return TaxonInfo(body["tax_id"], body["organism_name"], body.get("rank"), lineage)
+
+
+def fetch_taxon_info(taxon_name: typing.Union[str, int]) -> TaxonInfo:
+    return fetch_taxon_info_from_ncbi(taxon_name) or fetch_taxon_info_from_goat(taxon_name)
+
+
+def get_classification(taxon_info: TaxonInfo) -> typing.Dict[str, str]:
+    ancestors: typing.Dict[str, str] = {}
+    for anc_taxon_info in taxon_info.lineage:
+        if anc_taxon_info.rank:
+            ancestors[anc_taxon_info.rank.lower()] = anc_taxon_info.organism_name
+    # https://ncbiinsights.ncbi.nlm.nih.gov/2024/06/04/changes-ncbi-taxonomy-classifications/
+    # "superkingdom" will be called "domain"
+    if "superkingdom" not in ancestors:
+        ancestors["superkingdom"] = ancestors["domain"]
+    return {r: ancestors[r] for r in RANKS if r in ancestors}
+
+
+def get_odb(taxon_info: TaxonInfo, lineage_tax_ids: str, requested_buscos: typing.Optional[str]) -> typing.List[str]:
+    # Read the mapping between the BUSCO lineages and their taxon_id
+    with open(lineage_tax_ids) as file_in:
+        lineage_tax_ids_dict: typing.Dict[int, str] = {}
+        for line in file_in:
+            arr = line.split()
+            lineage_tax_ids_dict[int(arr[0])] = arr[1] + "_odb10"
+
+    if requested_buscos:
+        odb_arr = requested_buscos.split(",")
+        valid_odbs = set(lineage_tax_ids_dict.values())
+        for odb in odb_arr:
+            if odb not in valid_odbs:
+                print(f"Invalid BUSCO lineage: {odb}", file=sys.stderr)
+                sys.exit(1)
+    else:
+        # Do the intersection to find the ancestors that have a BUSCO lineage
+        odb_arr = [
+            lineage_tax_ids_dict[anc_taxon_info.taxon_id]
+            for anc_taxon_info in taxon_info.lineage
+            if anc_taxon_info.taxon_id in lineage_tax_ids_dict
+        ]
+
+    return odb_arr
+
+
+def get_assembly_info(accession: str) -> typing.Dict[str, typing.Union[str, int]]:
+    response = requests.get(NCBI_DATASETS_API % accession).json()
+    if response["total_count"] != 1:
+        print(f"Assembly not found: {accession}", file=sys.stderr)
+        sys.exit(1)
+    assembly_report = response["reports"][0]
+    assembly_info = assembly_report["assembly_info"]
+    scaffold_count = assembly_report["assembly_stats"]["number_of_component_sequences"]
+    scaffold_count += assembly_report["assembly_stats"].get("number_of_organelles", 0)
+    span = int(assembly_report["assembly_stats"]["total_sequence_length"])
+    span += sum(int(oi["total_seq_length"]) for oi in assembly_report.get("organelle_info", []))
+    d = {
+        "accession": accession,
+        "alias": assembly_info["assembly_name"],
+        "bioproject": assembly_info["bioproject_accession"],
+        "level": assembly_info["assembly_level"].lower(),
+        "scaffold-count": scaffold_count,
+        "span": span,
+    }
+    if "biosample" in assembly_info:
+        d["biosample"] = assembly_info["biosample"]["accession"]
+    if "wgs_info" in assembly_report:
+        d["prefix"] = assembly_report["wgs_info"]["wgs_project_accession"]
+    return d
+
+
+def adjust_taxon_id(blastn: str, taxon_info: TaxonInfo) -> int:
+    con = sqlite3.connect(os.path.join(blastn, "taxonomy4blast.sqlite3"))
+    cur = con.cursor()
+    for taxon_id in [taxon_info.taxon_id] + [anc_taxon_info.taxon_id for anc_taxon_info in taxon_info.lineage]:
+        res = cur.execute("SELECT * FROM TaxidInfo WHERE taxid = ?", (taxon_id,))
+        if res.fetchone():
+            return taxon_id
+
+
+def print_yaml(
+    file_out,
+    assembly_info: typing.Dict[str, typing.Union[str, int]],
+    taxon_info: TaxonInfo,
+    classification: typing.Dict[str, str],
+    odb_arr: typing.List[str],
+):
+    data = {
+        "assembly": assembly_info,
+        "busco": {
+            "basal_lineages": BUSCO_BASAL_LINEAGES,
+            # "download_dir": <completely skipped because missing from final meta.json>
+            "lineages": odb_arr + [lin for lin in BUSCO_BASAL_LINEAGES if lin not in odb_arr],
+        },
+        # "reads": {}, <added by update_versions.py at the end>
+        "revision": 1,
+        "settings": {
+            # Only settings.stats_windows is mandatory, everything else is superfluous
+            "blast_chunk": 100000,
+            "blast_max_chunks": 10,
+            "blast_min_length": 1000,
+            "blast_overlap": 0,
+            "stats_chunk": 1000,
+            "stats_windows": [0.1, 0.01, 100000, 1000000],
+            # "taxdump": <added by update_versions.py at the end>,
+            "tmp": "/tmp",
+        },
+        "similarity": {
+            # Only the presence similarity.diamond_blastx seems mandatory, everything else is superfluous
+            "blastn": {
+                "name": "nt",
+                # "path": <added by update_versions.py at the end>,
+            },
+            "defaults": {"evalue": 1e-10, "import_evalue": 1e-25, "max_target_seqs": 10, "taxrule": "buscogenes"},
+            "diamond_blastp": {
+                "import_max_target_seqs": 100000,
+                "name": "reference_proteomes",
+                # "path": <added by update_versions.py at the end>,
+                "taxrule": "blastp=buscogenes",
+            },
+            "diamond_blastx": {
+                "name": "reference_proteomes",
+                # "path": <added by update_versions.py at the end>,
+            },
+        },
+        "taxon": {
+            "taxid": str(taxon_info.taxon_id),  # str on purpose, to mimic blobtoolkit
+            "name": taxon_info.organism_name,
+            **classification,
+        },
+        "version": 2,
+    }
+    out_dir = os.path.dirname(file_out)
+    make_dir(out_dir)
+
+    with open(file_out, "w") as fout:
+        yaml.dump(data, fout)
+
+
+def print_csv(file_out, taxon_id: int, odb_arr: typing.List[str]):
+    out_dir = os.path.dirname(file_out)
+    make_dir(out_dir)
+
+    with open(file_out, "w") as fout:
+        print("taxon_id", taxon_id, sep=",", file=fout)
+        for odb_val in odb_arr:
+            print("busco_lineage", odb_val, sep=",", file=fout)
+
+
+def main(args=None):
+    args = parse_args(args)
+
+    assembly_info: typing.Dict[str, typing.Union[str, int]]
+    if args.accession:
+        assembly_info = get_assembly_info(args.accession)
+    else:
+        assembly_info = {"level": "scaffold"}
+    assembly_info["file"] = args.fasta
+
+    taxon_info = fetch_taxon_info(args.taxon_query)
+    classification = get_classification(taxon_info)
+    odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco)
+    taxon_id = adjust_taxon_id(args.blastn, taxon_info)
+
+    print_yaml(args.yml_out, assembly_info, taxon_info, classification, odb_arr)
+    print_csv(args.csv_out, taxon_id, odb_arr)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/bin/update_versions.py b/bin/update_versions.py
index 9e014b46..1d3491a3 100755
--- a/bin/update_versions.py
+++ b/bin/update_versions.py
@@ -15,15 +15,33 @@ def parse_args(args=None):
     parser.add_argument("--meta_in", help="Input JSON file.", required=True)
     parser.add_argument("--meta_out", help="Output JSON file.", required=True)
     parser.add_argument("--software", help="Input YAML file.", required=True)
-    parser.add_argument("--version", action="version", version="%(prog)s 1.1.0")
+    parser.add_argument("--read_id", action="append", help="ID of a read set")
+    parser.add_argument("--read_type", action="append", help="Type of a read set")
+    parser.add_argument("--read_path", action="append", help="Path of a read set")
+    parser.add_argument("--blastp", help="Path to the blastp database", required=True)
+    parser.add_argument("--blastx", help="Path to the blastx database", required=True)
+    parser.add_argument("--blastn", help="Path to the blastn database", required=True)
+    parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True)
+    parser.add_argument("--version", action="version", version="%(prog)s 1.2.0")
     return parser.parse_args(args)
 
 
-def update_meta(meta, software):
-    with open(meta) as fh:
+def datatype_to_platform(s):
+    if s == "ont":
+        return "OXFORD_NANOPORE"
+    elif s.startswith("pacbio"):
+        return "PACBIO_SMRT"
+    elif s in ["hic", "illumina"]:
+        return "ILLUMINA"
+    else:
+        return "OTHER"
+
+
+def update_meta(args):
+    with open(args.meta_in) as fh:
         infile = json.load(fh)
 
-    with open(software) as fh:
+    with open(args.software) as fh:
         versions = yaml.safe_load(fh)
 
     new_dict = dict()
@@ -36,13 +54,27 @@ def update_meta(meta, software):
     del new_dict["sanger-tol/blobtoolkit"]
     infile["settings"]["software_versions"] = new_dict
 
+    infile["settings"]["taxdump"] = args.taxdump
+    for k in ["blastn", "diamond_blastp", "diamond_blastx"]:
+        infile["similarity"].setdefault(k, {})
+    infile["similarity"]["blastn"]["path"] = args.blastn
+    infile["similarity"]["diamond_blastp"]["path"] = args.blastp
+    infile["similarity"]["diamond_blastx"]["path"] = args.blastx
+
+    infile["reads"] = {}
+    for id, datatype, path in zip(args.read_id, args.read_type, args.read_path):
+        infile["reads"][id] = {
+            "file": path,
+            "plaform": datatype_to_platform(datatype),
+        }
+
     return infile
 
 
 def main(args=None):
     args = parse_args(args)
 
-    data = update_meta(args.meta_in, args.software)
+    data = update_meta(args)
     with open(args.meta_out, "w") as fh:
         json.dump(data, fh)
 
diff --git a/conf/modules.config b/conf/modules.config
index ac597dc4..1193cf5f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -64,10 +64,6 @@ process {
         ext.args = "-c"
     }
 
-    withName: "GOAT_TAXONSEARCH" {
-        ext.args = "--lineage --busco"
-    }
-
     withName: "PIGZ_COMPRESS" {
         publishDir = [
             path: { "${params.outdir}/base_content" },
@@ -85,7 +81,8 @@ process {
     }
 
     withName: "BUSCO" {
-        scratch = true
+        // Obey "use_work_dir_as_temp", except for large genomes
+        scratch = { !params.use_work_dir_as_temp || (meta.genome_size < 2000000000) }
         ext.args = { 'test' in workflow.profile.tokenize(',') ?
                         // Additional configuration to speed processes up during testing.
                         // Note: BUSCO *must* see the double-quotes around the parameters
diff --git a/conf/test.config b/conf/test.config
index 623cf3f9..8ad70cae 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -35,4 +35,7 @@ params {
     blastp  = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd"
     blastx  = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd"
     blastn  = "${projectDir}/assets/test/nt_mMelMel3.1"
+
+    // Need to be set to avoid overfilling /tmp
+    use_work_dir_as_temp = true
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index 6af9eecb..ca78130e 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -30,4 +30,7 @@ params {
     blastp  = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd"
     blastx  = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd"
     blastn  = "${projectDir}/assets/test_full/nt_gfLaeSulp1.1"
+
+    // Need to be set to avoid overfilling /tmp
+    use_work_dir_as_temp = true
 }
diff --git a/conf/test_raw.config b/conf/test_raw.config
index 47cc4267..0cf1d16f 100644
--- a/conf/test_raw.config
+++ b/conf/test_raw.config
@@ -36,4 +36,7 @@ params {
     blastp  = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd"
     blastx  = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd"
     blastn  = "${projectDir}/assets/test/nt_mMelMel3.1/"
+
+    // Need to be set to avoid overfilling /tmp
+    use_work_dir_as_temp = true
 }
diff --git a/docs/usage.md b/docs/usage.md
index 4789ff84..4d0ad33e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -162,12 +162,6 @@ If you have [GNU parallel](https://www.gnu.org/software/parallel/) installed, yo
 find v5/data -name "*.tar.gz" | parallel "cd {//}; tar -xzf {/}"
 ```
 
-## YAML File and Nextflow configuration
-
-As in the Snakemake version [a YAML configuration file](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src#configuration) is needed to generate metadata summary. This YAML config file can be generated with a genome accession value for released assemblies (for example, GCA_XXXXXXXXX.X) or can be passed for draft assemblies (for example, [GCA_922984935.2.yaml](assets/test/GCA_922984935.2.yaml) using the `--yaml` parameter. Even for draft assemblies, a placeholder value should be passed with the `--accession` parameter.
-
-The data in the YAML is currently ignored in the Nextflow pipeline version. The YAML file is retained only to allow compatibility with the BlobDir dataset generated by the [Snakemake version](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/blobtoolkit-pipeline/src). The taxonomic information in the YAML file can be obtained from [NCBI Taxonomy](https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/).
-
 ## Changes from Snakemake to Nextflow
 
 ### Commands
@@ -189,9 +183,13 @@ Nextflow
 nextflow run sanger-tol/blobtoolkit --input SAMPLESHEET --fasta GENOME –-accession GCA_ACCESSION --taxon TAXON_ID --taxdump TAXDUMP_DB --blastp DMND_db --blastn BLASTN_DB --blastx BLASTX_DB
 
 # Draft Assemblies
-nextflow run sanger-tol/blobtoolkit --input SAMPLESHEET --fasta GENOME –-accession TAG --taxon TAXON_ID --yaml CONFIG --taxdump TAXDUMP_DB --blastp DMND_db --blastn BLASTN_DB --blastx BLASTX_DB
+nextflow run sanger-tol/blobtoolkit --input SAMPLESHEET --fasta GENOME --taxon TAXON_ID --taxdump TAXDUMP_DB --blastp DMND_db --blastn BLASTN_DB --blastx BLASTX_DB
 ```
 
+The Nextflow pipeline does not support taking input from the Yaml files of the Snakemake pipeline.
+Instead, Nextflow has a uniform way of setting input parameters on the command-line or via a JSON / Yaml file,
+see <https://training.nextflow.io/basic_training/config/> for some examples.
+
 ### Subworkflows
 
 Here is a full list of snakemake subworkflows and their Nextflow couterparts:
@@ -235,7 +233,6 @@ List of tools for any given dataset can be fetched from the API, for example htt
 | busco             | 5.3.2     | 5.5.0    |
 | diamond           | 2.0.15    | 2.1.8    |
 | fasta_windows     |           | 0.2.4    |
-| goat              |           | 0.2.5    |
 | minimap2          | 2.24      | 2.24     |
 | ncbi-datasets-cli | 14.1.0    |          |
 | nextflow          |           | 23.10.0  |
diff --git a/modules.json b/modules.json
index ebb45a6c..3ce2a831 100644
--- a/modules.json
+++ b/modules.json
@@ -45,11 +45,6 @@
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/fastawindows/fastawindows.diff"
                     },
-                    "goat/taxonsearch": {
-                        "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": ["modules"]
-                    },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
diff --git a/modules/local/blobtoolkit/config.nf b/modules/local/blobtoolkit/config.nf
deleted file mode 100644
index 32d4eacd..00000000
--- a/modules/local/blobtoolkit/config.nf
+++ /dev/null
@@ -1,37 +0,0 @@
-process BLOBTOOLKIT_CONFIG {
-    tag "$meta.id"
-    label 'process_single'
-
-    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
-        exit 1, "GENERATE_CONFIG module does not support Conda. Please use Docker / Singularity / Podman instead."
-    }
-    container "docker.io/genomehubs/blobtoolkit:4.3.9"
-
-    input:
-    tuple val(meta), val(reads)
-    tuple val(meta), val(fasta)
-
-    output:
-    tuple val(meta), path("${meta.id}/*.yaml"), emit: yaml
-    path "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def input_reads = reads.collect{"--reads $it"}.join(' ')
-    """
-    btk pipeline \\
-        generate-config \\
-        ${prefix} \\
-        $args \\
-        ${input_reads}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        blobtoolkit: \$(btk --version | cut -d' ' -f2 | sed 's/v//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf
index a5556348..0c3aaaaf 100644
--- a/modules/local/blobtoolkit/updatemeta.nf
+++ b/modules/local/blobtoolkit/updatemeta.nf
@@ -9,7 +9,13 @@ process BLOBTOOLKIT_UPDATEMETA {
 
     input:
     tuple val(meta), path(input)
+    val reads
     path versions
+    // The following are passed as "val" because we just want to know the full paths. No staging necessary
+    val blastp
+    val blastx
+    val blastn
+    val taxdump
 
     output:
     tuple val(meta), path("*.json"), emit: json
@@ -21,11 +27,17 @@ process BLOBTOOLKIT_UPDATEMETA {
     script:
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
+    def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_path ${it[1]}"}.join(' ')
     """
     update_versions.py \\
         ${args} \\
         --meta_in ${input}/meta.json \\
         --software ${versions} \\
+        --blastp ${blastp} \\
+        --blastx ${blastx} \\
+        --blastn ${blastn} \\
+        --taxdump ${taxdump} \\
+        $input_reads \\
         --meta_out ${prefix}.meta.json
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/generate_config.nf b/modules/local/generate_config.nf
new file mode 100644
index 00000000..3aab7ee9
--- /dev/null
+++ b/modules/local/generate_config.nf
@@ -0,0 +1,44 @@
+process GENERATE_CONFIG {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "conda-forge::requests=2.28.1 conda-forge::pyyaml=6.0"
+    container "docker.io/genomehubs/blobtoolkit:4.3.9"
+
+    input:
+    tuple val(meta), val(fasta)
+    val taxon_query
+    val busco_lin
+    path lineage_tax_ids
+    tuple val(meta2), path(blastn)
+
+    output:
+    tuple val(meta), path("*.yaml"), emit: yaml
+    tuple val(meta), path("*.csv") , emit: csv
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def busco_param = busco_lin ? "--busco '${busco_lin}'" : ""
+    def accession_params = params.accession ? "--accession ${params.accession}" : ""
+    """
+    generate_config.py \\
+        --fasta $fasta \\
+        --taxon_query "$taxon_query" \\
+        --lineage_tax_ids $lineage_tax_ids \\
+        $busco_param \\
+        $accession_params \\
+        --blastn $blastn \\
+        --yml_out ${prefix}.yaml \\
+        --csv_out ${prefix}.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        generate_config.py: \$(generate_config.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/goat/taxonsearch/environment.yml b/modules/nf-core/goat/taxonsearch/environment.yml
deleted file mode 100644
index e56e71f1..00000000
--- a/modules/nf-core/goat/taxonsearch/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: goat_taxonsearch
-channels:
-  - conda-forge
-  - bioconda
-  - defaults
-dependencies:
-  - bioconda::goat=0.2.5
diff --git a/modules/nf-core/goat/taxonsearch/main.nf b/modules/nf-core/goat/taxonsearch/main.nf
deleted file mode 100644
index 62c12baa..00000000
--- a/modules/nf-core/goat/taxonsearch/main.nf
+++ /dev/null
@@ -1,36 +0,0 @@
-process GOAT_TAXONSEARCH {
-    tag "$meta.id"
-    label 'process_single'
-
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/goat:0.2.5--h9d3141d_2':
-        'biocontainers/goat:0.2.5--h9d3141d_2' }"
-
-    input:
-    tuple val(meta), val(taxon), path(taxa_file)
-
-    output:
-    tuple val(meta), path("*.tsv"), emit: taxonsearch
-    path "versions.yml"           , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    input = taxa_file ? "-f ${taxa_file}" : "-t \"${taxon}\""
-    if (!taxon && !taxa_file) error "No input. Valid input: single taxon identifier or a .txt file with identifiers"
-    if (taxon && taxa_file ) error "Only one input is required: a single taxon identifier or a .txt file with identifiers"
-    """
-    goat-cli taxon search \\
-        $args \\
-        $input > ${prefix}.tsv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        goat: \$(goat-cli --version | cut -d' ' -f2)
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/goat/taxonsearch/meta.yml b/modules/nf-core/goat/taxonsearch/meta.yml
deleted file mode 100644
index 1bb19e30..00000000
--- a/modules/nf-core/goat/taxonsearch/meta.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: "goat_taxonsearch"
-description: Query metadata for any taxon across the tree of life.
-keywords:
-  - public datasets
-  - ncbi
-  - genomes on a tree
-tools:
-  - goat:
-      description: |
-        goat-cli is a command line interface to query the
-        Genomes on a Tree Open API.
-      homepage: https://github.com/genomehubs/goat-cli
-      documentation: https://github.com/genomehubs/goat-cli/wiki
-      tool_dev_url: https://genomehubs.github.io/goat-cli/goat_cli/
-      licence: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test']
-  - taxon:
-      type: string
-      description: |
-        The taxon to search. An NCBI taxon ID, or the name of a taxon at any rank.
-  - taxa_file:
-      type: file
-      description: |
-        A file of NCBI taxonomy ID's (tips) and/or binomial names. Each line
-        should contain a single entry.File size is limited to 500 entries.
-      pattern: "*.txt"
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-  - taxonsearch:
-      type: file
-      description: TSV file containing search results.
-      pattern: "*.tsv"
-authors:
-  - "@alxndrdiaz"
-maintainers:
-  - "@alxndrdiaz"
-  - "@muffato"
diff --git a/nextflow.config b/nextflow.config
index 84bae361..18994df9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,10 +12,10 @@ params {
     // Specify your pipeline's command line flags
     // Input options
     input                      = null
-    yaml                       = null
     align                      = false
     mask                       = false
     fetchngs_samplesheet       = false
+    busco_lineages             = null
 
     // Reference options
     fasta                      = null
@@ -28,9 +28,15 @@ params {
     // Databases and related options
     taxdump                    = null
     busco                      = null
+    // Taken from https://busco-data.ezlab.org/v5/data/placement_files/mapping_taxids-busco_dataset_name.*.2019-12-16.txt.tar.gz
+    lineage_tax_ids            = "${projectDir}/assets/mapping_taxids-busco_dataset_name.2019-12-16.txt"
     blastp                     = null
     blastx                     = null
     blastn                     = null
+    skip_taxon_filtering       = false
+
+    // Execution options
+    use_work_dir_as_temp        = false
 
     // MultiQC options
     multiqc_config             = null
@@ -243,7 +249,7 @@ manifest {
     description     = """Quality assessment of genome assemblies"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '0.5.1'
+    version         = '0.6.0'
     doi             = '10.5281/zenodo.7949058'
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b392e2a5..e722369d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -32,16 +32,21 @@
                     "description": "Turn on optional genome masking if needed.",
                     "fa_icon": "fas fa-toggle-off"
                 },
+                "skip_taxon_filtering": {
+                    "type": "boolean",
+                    "description": "Skip filtering out hits from the same species in blast* searches.",
+                    "fa_icon": "fas fa-toggle-off"
+                },
                 "fetchngs_samplesheet": {
                     "type": "boolean",
                     "description": "Turn on the conversion from a nf-core/fetchngs samplesheet.",
                     "fa_icon": "fas fa-toggle-off"
                 },
-                "yaml": {
+                "busco_lineages": {
                     "type": "string",
-                    "format": "file-path",
-                    "description": "Custom config file for draft assembly",
-                    "fa_icon": "fas fa-file-alt"
+                    "pattern": "([^,]+_odb10,)*[^,]+_odb10",
+                    "description": "Name of the ODB10 lineages to run BUSCO on. By default, the pipeline will automatically selecy the correct lineage based on the taxonomy.",
+                    "fa_icon": "fas fa-bone"
                 },
                 "image_format": {
                     "type": "string",
@@ -100,7 +105,7 @@
             "type": "object",
             "fa_icon": "fas fa-database",
             "description": "Define the location and parameters to work with databases.",
-            "required": ["blastp", "blastx", "blastn", "taxdump"],
+            "required": ["blastp", "blastx", "blastn", "lineage_tax_ids", "taxdump"],
             "properties": {
                 "busco": {
                     "type": "string",
@@ -108,6 +113,13 @@
                     "description": "Local directory where clade-specific BUSCO lineage datasets are stored",
                     "fa_icon": "fas fa-folder-open"
                 },
+                "lineage_tax_ids": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Local file that holds a mapping between BUSCO lineages and taxon IDs.",
+                    "help_text": "Initialised from https://busco-data.ezlab.org/v5/data/placement_files/mapping_taxids-busco_dataset_name.*.2019-12-16.txt.tar.gz",
+                    "fa_icon": "fas fa-file-code"
+                },
                 "blastp": {
                     "type": "string",
                     "format": "file-path",
@@ -136,6 +148,21 @@
                 }
             }
         },
+        "execution": {
+            "title": "Execution",
+            "type": "object",
+            "description": "Control the execution of the pipeline.",
+            "default": "",
+            "properties": {
+                "use_work_dir_as_temp": {
+                    "type": "boolean",
+                    "description": "Set to true to make tools (e.g. sort, FastK, MerquryFK) use the work directory for their temporary files, rather than the system default.",
+                    "fa_icon": "fas fa-arrow-circle-down",
+                    "hidden": true
+                }
+            },
+            "fa_icon": "fas fa-running"
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -341,6 +368,9 @@
         {
             "$ref": "#/definitions/databases"
         },
+        {
+            "$ref": "#/definitions/execution"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },
diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf
index 2a89471f..4b07723e 100644
--- a/subworkflows/local/busco_diamond_blastp.nf
+++ b/subworkflows/local/busco_diamond_blastp.nf
@@ -1,8 +1,7 @@
 //
-// Run BUSCO for a genome from GOAT and runs diamond_blastp
+// Run BUSCO for a genome and runs diamond_blastp
 //
 
-include { GOAT_TAXONSEARCH          } from '../../modules/nf-core/goat/taxonsearch/main'
 include { BUSCO                     } from '../../modules/nf-core/busco/main'
 include { BLOBTOOLKIT_EXTRACTBUSCOS } from '../../modules/local/blobtoolkit/extractbuscos'
 include { DIAMOND_BLASTP            } from '../../modules/nf-core/diamond/blastp/main'
@@ -12,47 +11,24 @@ include { RESTRUCTUREBUSCODIR       } from '../../modules/local/restructurebusco
 workflow BUSCO_DIAMOND {
     take:
     fasta        // channel: [ val(meta), path(fasta) ]
-    taxon        // channel: val(taxon)
+    busco_lin    // channel: val([busco_lineages])
     busco_db     // channel: path(busco_db)
     blastp       // channel: path(blastp_db)
+    taxon_id     // channel: val(taxon_id)
 
 
     main:
     ch_versions = Channel.empty()
 
 
-    //
-    // Fetch BUSCO lineages for taxon
-    //
-    GOAT_TAXONSEARCH (
-        fasta.combine(taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] }
-    )
-    ch_versions = ch_versions.mix ( GOAT_TAXONSEARCH.out.versions.first() )
-
-
-    //
-    // Get NCBI species ID
-    //
-    GOAT_TAXONSEARCH.out.taxonsearch
-    | map { meta, csv -> csv.splitCsv(header:true, sep:'\t', strip:true) }
-    | map { row -> [ row.taxon_rank, row.taxon_id ] }
-    | transpose()
-    | filter { rank,id -> rank =~ /species/ }
-    | map { rank, id -> id}
-    | first
-    | set { ch_taxid }
-
-
     //
     // Prepare the BUSCO linages
     //
     // 0. Initialise sone variables
     basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ]
     def lineage_position = 0
-    // 1. Parse the GOAT_TAXONSEARCH output
-    GOAT_TAXONSEARCH.out.taxonsearch
-    | map { meta, csv -> csv.splitCsv(header:true, sep:'\t', strip:true) }
-    | map { row -> row.odb10_lineage.findAll { it != "" } }
+    // 1. Start from the taxon's lineages
+    busco_lin
     // 2. Add the (missing) basal lineages
     | map { lineages -> (lineages + basal_lineages).unique() }
     | flatten ()
@@ -117,7 +93,7 @@ workflow BUSCO_DIAMOND {
     // Hardcoded to match the format expected by blobtools
     def outext = 'txt'
     def cols   = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
-    DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, ch_taxid )
+    DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, taxon_id )
     ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() )
 
 
@@ -148,7 +124,6 @@ workflow BUSCO_DIAMOND {
     first_table = ch_first_table          // channel: [ val(meta), path(full_table) ]
     all_tables  = ch_indexed_buscos       // channel: [ val(meta), path(full_tables) ]
     blastp_txt  = DIAMOND_BLASTP.out.txt  // channel: [ val(meta), path(txt) ]
-    taxon_id    = ch_taxid                // channel: taxon_id
     multiqc                               // channel: [ meta, summary ]
     versions    = ch_versions             // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/finalise_blobdir.nf b/subworkflows/local/finalise_blobdir.nf
index ffbbd534..352cb01b 100644
--- a/subworkflows/local/finalise_blobdir.nf
+++ b/subworkflows/local/finalise_blobdir.nf
@@ -8,6 +8,7 @@ include { COMPRESSBLOBDIR        } from '../../modules/local/compressblobdir'
 workflow FINALISE_BLOBDIR {
     take:
     blobdir     // channel: [ val(meta), path(blobdir) ]
+    reads       // channel: [ [meta, reads] ]
     software    // channel: [ val(meta), path(software_yml) ]
     summary     // channel: [ val(meta), path(summary_json) ]
 
@@ -18,7 +19,15 @@ workflow FINALISE_BLOBDIR {
     //
     // MODULE: Update meta json file
     //
-    BLOBTOOLKIT_UPDATEMETA ( blobdir, software )
+    BLOBTOOLKIT_UPDATEMETA (
+        blobdir,
+        reads,
+        software,
+        params.blastp,
+        params.blastx,
+        params.blastn,
+        params.taxdump,
+    )
 
     //
     // MODULE: Compress all the json files
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index d498269f..7a8fd112 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -6,13 +6,16 @@ include { CAT_CAT                   } from '../../modules/nf-core/cat/cat/main'
 include { SAMTOOLS_FLAGSTAT         } from '../../modules/nf-core/samtools/flagstat/main'
 include { SAMPLESHEET_CHECK         } from '../../modules/local/samplesheet_check'
 include { FETCHNGSSAMPLESHEET_CHECK } from '../../modules/local/fetchngssamplesheet_check'
-include { BLOBTOOLKIT_CONFIG        } from '../../modules/local/blobtoolkit/config'
+include { GENERATE_CONFIG           } from '../../modules/local/generate_config'
 
 workflow INPUT_CHECK {
     take:
     samplesheet // file: /path/to/samplesheet.csv
     fasta       // channel: [ meta, path(fasta) ]
-    yaml        // channel: [ meta, path(config ]
+    taxon       // channel: val(taxon)
+    busco_lin   // channel: val([busco_lin])
+    lineage_tax_ids        // channel: /path/to/lineage_tax_ids
+    blastn       // channel: [ val(meta), path(blastn_db) ]
 
     main:
     ch_versions = Channel.empty()
@@ -58,24 +61,53 @@ workflow INPUT_CHECK {
     | set { reads }
 
 
-    if ( !params.yaml ) {
-        read_files
-        | map { meta, data -> meta.id.split("_")[0..-2].join("_") }
-        | combine ( fasta )
-        | map { sample, meta, fasta -> [ meta, sample ] }
-        | groupTuple()
-        | set { grouped_reads }
-
-        BLOBTOOLKIT_CONFIG ( grouped_reads, fasta )
-        ch_versions = ch_versions.mix ( BLOBTOOLKIT_CONFIG.out.versions.first() )
-        ch_config = BLOBTOOLKIT_CONFIG.out.yaml
-    } else {
-        ch_config   = yaml
+    GENERATE_CONFIG (
+        fasta,
+        taxon,
+        busco_lin,
+        lineage_tax_ids,
+        blastn,
+    )
+    ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first())
+
+
+    //
+    // Parse the CSV file
+    //
+    GENERATE_CONFIG.out.csv
+    | map { meta, csv -> csv }
+    | splitCsv(header: ['key', 'value'])
+    | branch {
+        taxon_id: it.key == "taxon_id"
+                    return it.value
+        busco_lineage: it.key == "busco_lineage"
+                    return it.value
     }
+    | set { ch_parsed_csv }
+
+
+    //
+    // Get the taxon ID if we do taxon filtering in blast* searches
+    //
+    ch_parsed_csv.taxon_id
+    | map { params.skip_taxon_filtering ? '' : it }
+    | first
+    | set { ch_taxon_id }
+
+
+    //
+    // Get the BUSCO linages
+    //
+    ch_parsed_csv.busco_lineage
+    | collect
+    | set { ch_busco_lineages }
+
 
     emit:
     reads                                   // channel: [ val(meta), path(datafile) ]
-    config = ch_config                      // channel: [ val(meta), path(yaml) ]
+    config = GENERATE_CONFIG.out.yaml       // channel: [ val(meta), path(yaml) ]
+    taxon_id = ch_taxon_id                  // channel: val(taxon_id)
+    busco_lineages = ch_busco_lineages      // channel: val([busco_lin])
     versions = ch_versions                  // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf
index 7dc92fd7..d479e832 100644
--- a/subworkflows/local/run_blastn.nf
+++ b/subworkflows/local/run_blastn.nf
@@ -47,23 +47,37 @@ workflow RUN_BLASTN {
     | set { ch_chunks }
 
     // Run blastn search
-    // run blastn excluding taxon_id
-    BLASTN_TAXON ( ch_chunks, blastn, taxon_id )
-    ch_versions = ch_versions.mix ( BLASTN_TAXON.out.versions.first() )
-
-    // check if blastn output table is empty
-    BLASTN_TAXON.out.txt
-    | branch { meta, txt ->
-        empty:     txt.isEmpty()
-        not_empty: true
-    }
-    | set { ch_blastn_taxon_out }
+    if (params.skip_taxon_filtering) {
+
+        // skip BLASTN_TAXON
+        ch_blast_blastn_input = ch_chunks
+
+        // fake ch_blastn_taxon_out.not_empty
+        ch_blastn_taxon_out = [
+            not_empty: Channel.empty()
+        ]
+
+    } else {
 
-    // repeat the blastn search without excluding taxon_id
-    ch_blastn_taxon_out.empty
-    | join ( ch_chunks )
-    | map { meta, txt, fasta -> [meta, fasta] }
-    | set { ch_blast_blastn_input }
+        // run blastn excluding taxon_id
+        BLASTN_TAXON ( ch_chunks, blastn, taxon_id )
+        ch_versions = ch_versions.mix ( BLASTN_TAXON.out.versions.first() )
+
+        // check if blastn output table is empty
+        BLASTN_TAXON.out.txt
+        | branch { meta, txt ->
+            empty:     txt.isEmpty()
+            not_empty: true
+        }
+        | set { ch_blastn_taxon_out }
+
+        // repeat the blastn search without excluding taxon_id
+        ch_blastn_taxon_out.empty
+        | join ( ch_chunks )
+        | map { meta, txt, fasta -> [meta, fasta] }
+        | set { ch_blast_blastn_input }
+
+    }
 
     BLAST_BLASTN ( ch_blast_blastn_input, blastn, [] )
     ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions.first() )
diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf
index 3610cdde..cd97fd99 100644
--- a/workflows/blobtoolkit.nf
+++ b/workflows/blobtoolkit.nf
@@ -17,7 +17,7 @@ WorkflowBlobtoolkit.initialise(params, log)
 
 // Add all file path parameters for the pipeline to the list below
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.taxdump, params.busco, params.blastp, params.blastx ]
+def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.taxdump, params.busco, params.blastp, params.blastx, params.lineage_tax_ids ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
@@ -30,11 +30,11 @@ if (params.blastn) { ch_blastn = Channel.value([ [ 'id': file(params.blastn).bas
 if (params.taxdump) { ch_taxdump = file(params.taxdump) } else { exit 1, 'NCBI Taxonomy database not specified!' }
 if (params.fetchngs_samplesheet && !params.align) { exit 1, '--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!' }
 
+if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids).first() } else { exit 1, 'Mapping BUSCO lineage <-> taxon_ids not specified' }
+
 // Create channel for optional parameters
+if (params.busco_lineages) { ch_busco_lin = Channel.value(params.busco_lineages) } else { ch_busco_lin = Channel.value([]) }
 if (params.busco) { ch_busco_db = Channel.fromPath(params.busco).first() } else { ch_busco_db = Channel.value([]) }
-if (params.yaml) { ch_yaml = Channel.fromPath(params.yaml) } else { ch_yaml = Channel.empty() }
-if (params.yaml && params.accession) { exit 1, '--yaml cannot be provided at the same time as --accession !' }
-if (!params.yaml && !params.accession) { exit 1, '--yaml and --accession are both mising. Pick one !' }
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -102,7 +102,14 @@ workflow BLOBTOOLKIT {
     //
     // SUBWORKFLOW: Check samplesheet and create channels for downstream analysis
     //
-    INPUT_CHECK ( ch_input, PREPARE_GENOME.out.genome, ch_yaml )
+    INPUT_CHECK (
+        ch_input,
+        PREPARE_GENOME.out.genome,
+        ch_taxon,
+        ch_busco_lin,
+        ch_lineage_tax_ids,
+        ch_blastn,
+    )
     ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )
 
     //
@@ -123,13 +130,14 @@ workflow BLOBTOOLKIT {
     ch_versions = ch_versions.mix ( COVERAGE_STATS.out.versions )
 
     //
-    // SUBWORKFLOW: Run BUSCO using lineages fetched from GOAT, then run diamond_blastp
+    // SUBWORKFLOW: Run BUSCO using lineages fetched from GoaT, then run diamond_blastp
     //
     BUSCO_DIAMOND (
         PREPARE_GENOME.out.genome,
-        ch_taxon,
+        INPUT_CHECK.out.busco_lineages,
         ch_busco_db,
         ch_blastp,
+        INPUT_CHECK.out.taxon_id,
     )
     ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions )
 
@@ -140,7 +148,7 @@ workflow BLOBTOOLKIT {
         PREPARE_GENOME.out.genome,
         BUSCO_DIAMOND.out.first_table,
         ch_blastx,
-        BUSCO_DIAMOND.out.taxon_id,
+        INPUT_CHECK.out.taxon_id,
     )
     ch_versions = ch_versions.mix ( RUN_BLASTX.out.versions )
 
@@ -152,7 +160,7 @@ workflow BLOBTOOLKIT {
         RUN_BLASTX.out.blastx_out,
         PREPARE_GENOME.out.genome,
         ch_blastn,
-        BUSCO_DIAMOND.out.taxon_id,
+        INPUT_CHECK.out.taxon_id,
     )
 
     //
@@ -199,6 +207,7 @@ workflow BLOBTOOLKIT {
     //
     FINALISE_BLOBDIR (
         BLOBTOOLS.out.blobdir,
+        INPUT_CHECK.out.reads.collect(flat: false).ifEmpty([]),
         CUSTOM_DUMPSOFTWAREVERSIONS.out.yml,
         VIEW.out.summary
     )