diff --git a/src/datasets/loaders/vizgen_merscope/config.vsh.yaml b/src/datasets/loaders/vizgen_merscope/config.vsh.yaml
new file mode 100644
index 00000000..acdaf8cd
--- /dev/null
+++ b/src/datasets/loaders/vizgen_merscope/config.vsh.yaml
@@ -0,0 +1,77 @@
+name: vizgen_merscope
+namespace: datasets/loaders
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - type: string
+        name: --gcloud_bucket
+        default: "vz-ffpe-showcase"
+        description: "Google cloud bucket name"
+      - type: string
+        name: --dataset_bucket_name
+        default: "HumanBreastCancerPatient1"
+        description: "The name of the dataset base directory in the vizgen google cloud bucket"
+      - type: string
+        name: --segmentation_id
+        default: ["cell"]
+        description: The segmentation identifier
+        multiple: true
+  - name: Metadata
+    arguments:
+      - type: string
+        name: --id
+        description: "A unique identifier for the dataset"
+        required: true
+      - name: --dataset_name
+        type: string
+        description: Nicely formatted name.
+        required: true
+      - type: string
+        name: --dataset_url
+        description: Link to the original source of the dataset.
+        required: false
+      - name: --dataset_reference
+        type: string
+        description: Bibtex reference of the paper in which the dataset was published.
+        required: false
+      - name: --dataset_summary
+        type: string
+        description: Short description of the dataset.
+        required: true
+      - name: --dataset_description
+        type: string
+        description: Long description of the dataset.
+        required: true
+      - name: --dataset_organism
+        type: string
+        description: The organism of the sample in the dataset.
+        required: false
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        __merge__: /src/api/file_common_ist.yaml
+        direction: output
+        required: true
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: download.sh
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1.0.0
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+    setup:
+      - type: python
+        pypi:
+          - spatialdata-io
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [highmem, midcpu, midtime]
diff --git a/src/datasets/loaders/vizgen_merscope/download.sh b/src/datasets/loaders/vizgen_merscope/download.sh
new file mode 100755
index 00000000..07dfee2e
--- /dev/null
+++ b/src/datasets/loaders/vizgen_merscope/download.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+######################################################################
+# Requirements: 
+# - Install google-cloud-sdk, e.g. mamba install -c conda-forge google-cloud-sdk
+# - Authenticate with your Google account: gcloud auth login --no-browser
+#
+# Usage: merfish_download.sh <BUCKET_NAME> <DATASET> <OUT_DIR> <DRY_RUN=true/false>
+# - DATASET refers to the naming in the Vizgen download page
+#
+# Vizgen cancer panel datasets download page: https://console.cloud.google.com/storage/browser/vz-ffpe-showcase
+# The script should also work for other Vizgen style data buckets.
+
+BUCKET_NAME=$1
+DATASET=$2
+OUT_DIR=$3
+DRY_RUN=$4
+
+######################################################################
+
+# Suppress BrokenPipeError messages from Python
+export PYTHONWARNINGS="ignore:BrokenPipeError"
+
+LIST_LIMIT=10  # Maximum number of files to list in dry-run mode
+
+# List of files or directories to download
+FILES=(
+    "cell_by_gene.csv"
+    "cell_metadata.csv"
+    "detected_transcripts.csv"
+    "cell_boundaries/"  # This is a directory
+    "cell_boundaries.parquet"
+    "images/mosaic_DAPI_z3.tif" # If you want all images, change this to "images/"
+)
+
+# Check if gsutil is installed
+if ! command -v gsutil &> /dev/null; then
+    echo "ERROR: gsutil is not installed. Please install the Google Cloud SDK."
+fi
+
+# Check if user is authenticated
+if ! gcloud auth list 2>&1 | grep -q ACTIVE; then
+    echo "ERROR: No authenticated Google Cloud account found. Please authenticate using 'gcloud auth login'."
+fi
+
+
+# Loop over each file or directory to download
+for FILE in "${FILES[@]}"; do
+    # Get the directory from the file path (remove the file name)
+    DIR=$(dirname "$FILE")
+
+    # Create the full target directory path
+    TARGET_DIR="$OUT_DIR/$DIR"
+
+    # If DIR is '.', set TARGET_DIR to OUT_DIR
+    if [ "$DIR" = "." ]; then
+        TARGET_DIR="$OUT_DIR"
+    fi
+
+    # Create the directory if it doesn't exist
+    mkdir -p "$TARGET_DIR"
+    
+    # Check if the file or directory exists in the bucket
+    if gsutil ls -d "gs://$BUCKET_NAME/$DATASET/$FILE" > /dev/null 2>&1; then
+        if [ "$DRY_RUN" = true ]; then
+            # Dry-run: list files instead of downloading
+            echo -e "\nSimulating download for gs://$BUCKET_NAME/$DATASET/$FILE"
+            echo -e "Download to $TARGET_DIR"
+
+            if [[ "$FILE" == */ ]]; then
+                echo "Listing first $LIST_LIMIT files from gs://$BUCKET_NAME/$DATASET/$FILE"
+                gsutil ls "gs://$BUCKET_NAME/$DATASET/$FILE" 2>/dev/null | head -n $LIST_LIMIT
+            else
+                gsutil ls "gs://$BUCKET_NAME/$DATASET/$FILE"
+            fi
+        else
+            # If it's a directory, download recursively
+            if [[ "$FILE" == */ ]]; then
+                echo "Downloading directory gs://$BUCKET_NAME/$DATASET/$FILE to $TARGET_DIR/"
+                gsutil -m cp -r "gs://$BUCKET_NAME/$DATASET/$FILE" "$TARGET_DIR/"
+            else
+                # Download the file, maintaining directory structure under OUT_DIR
+                echo "Downloading file gs://$BUCKET_NAME/$DATASET/$FILE to $TARGET_DIR/"
+                gsutil cp "gs://$BUCKET_NAME/$DATASET/$FILE" "$TARGET_DIR/"
+            fi
+        fi
+    else
+        # Skip download if file or directory does not exist
+        echo "Skipping $FILE: does not exist in gs://$BUCKET_NAME/$DATASET/"
+    fi
+done
diff --git a/src/datasets/loaders/vizgen_merscope/script.py b/src/datasets/loaders/vizgen_merscope/script.py
new file mode 100644
index 00000000..fa5d25b2
--- /dev/null
+++ b/src/datasets/loaders/vizgen_merscope/script.py
@@ -0,0 +1,198 @@
+# https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard
+
+import os
+import stat
+from pathlib import Path
+import subprocess
+from datetime import datetime
+import spatialdata as sd
+import spatialdata_io as sdio
+
+## VIASH START
+par = {
+    "gcloud_bucket": "vz-ffpe-showcase",
+    "dataset_bucket_name": "HumanBreastCancerPatient1",
+    "segmentation_id": ["cell"],
+    "output": "output.zarr",
+    "id": "vizgen_merscope/2022_vizgen_human_breast_cancer_merfish/rep1",
+    #"dataset_id": "2022Vizgen_human_breast_cancer_Patient1",
+    "dataset_name": "value",
+    "dataset_url": "https://vizgen.com/data-release-program/",
+    "dataset_reference": "value",
+    "dataset_summary": "value",
+    "dataset_description": "value",
+    "dataset_organism": "human",
+}
+
+
+## VIASH END
+
+assert ("cell" in par["segmentation_id"]) and (len(par["segmentation_id"]) == 1), "Currently cell labels are definitely assigned in this script. And merscope does not provide other segmentations."
+
+
+t0 = datetime.now()
+
+# Download raw data
+print(datetime.now() - t0, "Download raw data", flush=True)
+download_script = f"{meta['resources_dir']}/download.sh" #https://viash.io/guide/component/variables.html#meta-variables-in-meta
+BUCKET_NAME = par['gcloud_bucket']
+DATASET = par["dataset_bucket_name"]
+OUT_DIR = meta["temp_dir"]
+DRY_RUN = "false"
+
+# Change permissions of the download script to make it executable
+os.chmod(download_script, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)  # Grant read, write, and execute permissions to the user
+
+# Run the download script
+out = subprocess.run([download_script, BUCKET_NAME, DATASET, OUT_DIR, DRY_RUN], check=True, capture_output=True, text=True)
+print(out.stdout, flush=True)
+
+# If the cell polygons are in the old format (cell_boundaries/*.hdf5 instead of cell_boundaries.parquet) the raw data
+# needs to be modified for the spatialdata-io loader 
+# (see: https://github.com/scverse/spatialdata-io/issues/71#issuecomment-1741995582)
+import h5py
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import Polygon
+from shapely import MultiPolygon
+import os
+
+def read_boundary_hdf5(folder):
+    print("Convert boundary hdf5 to parquet", flush=True)
+    all_boundaries = {}
+    boundaries = None
+    hdf5_files = os.listdir(folder + '/cell_boundaries/')
+    n_files = len(hdf5_files)
+    incr = n_files // 15
+    for _,i in enumerate(hdf5_files):
+        if (_ % incr) == 0:
+            print(f"\tProcessed {_}/{n_files}", flush=True)
+        with h5py.File(folder + '/cell_boundaries/' + i, "r") as f:
+            for key in f['featuredata'].keys():
+                if boundaries is not None:
+                    boundaries.loc[key] = MultiPolygon([Polygon(f['featuredata'][key]['zIndex_3']['p_0']['coordinates'][()][0])]) # doesn't matter which zIndex we use, MultiPolygon to work with read function in spatialdata-io
+                else:
+                    #boundaries = gpd.GeoDataFrame(index=[key], geometry=MultiPolygon([Polygon(f['featuredata'][key]['zIndex_3']['p_0']['coordinates'][()][0])]))
+                    boundaries = gpd.GeoDataFrame(geometry=gpd.GeoSeries(MultiPolygon([Polygon(f['featuredata'][key]['zIndex_3']['p_0']['coordinates'][()][0])]),index=[key]))
+            all_boundaries[i] = boundaries
+            boundaries = None
+    all_concat = pd.concat(list(all_boundaries.values()))
+    all_concat = all_concat[~all_concat.index.duplicated(keep='first')] # hdf5 can contain duplicates with same cell_id and position, removing those
+    all_concat.rename_geometry('geometry_renamed', inplace=True)  # renaming to make it compatible with spatialdata-io
+    all_concat["EntityID"] = all_concat.index  # renaming to make it compatible with spatialdata-io
+    all_concat['ZIndex'] = 0  # adding to make it compatible with spatialdata-io
+    all_concat.to_parquet(folder + '/cell_boundaries.parquet')
+    
+    count_path = f"{folder}/cell_by_gene.csv"
+    obs_path = f"{folder}/cell_metadata.csv"
+
+    data = pd.read_csv(count_path, index_col=0)
+    obs = pd.read_csv(obs_path, index_col=0)
+
+    data.index = obs.index.astype(str) # data index in old software is range(n_obs)
+    data.index.name = "cell" # renaming to make it compatible with spatialdata-io
+    obs.index.name = 'EntityID' # renaming to make it compatible with spatialdata-io
+    data.to_csv(count_path)
+    obs.to_csv(obs_path)
+
+if not (Path(OUT_DIR) / "cell_boundaries.parquet").exists():
+    read_boundary_hdf5(str(OUT_DIR))
+
+
+# Generate spatialdata.zarr
+RAW_DATA_DIR = Path(OUT_DIR)
+
+#########################################
+# Convert raw files to spatialdata zarr #
+#########################################
+print(datetime.now() - t0, "Convert raw files to spatialdata zarr", flush=True)
+
+slide_name = "slide"
+
+sdata = sdio.merscope(
+    RAW_DATA_DIR, 
+    vpt_outputs=None, 
+    z_layers=3, 
+    region_name=None, 
+    slide_name=slide_name,
+    backend=None, 
+    transcripts=True, 
+    cells_boundaries=True, 
+    cells_table=True, 
+    mosaic_images=True, 
+    #imread_kwargs=mappingproxy({}), 
+    #image_models_kwargs=mappingproxy({})
+)
+
+###############
+# Rename keys #
+###############
+print(datetime.now() - t0, "Rename keys", flush=True)
+
+name = slide_name + "_" + RAW_DATA_DIR.name
+
+elements_renaming_map = {
+    f"{name}_z3"          : "morphology_mip", #TODO: that is actually not the morphology_mip, i.e. either we should rename the label later, or we should actually project over z. But we also want to have 3d at some point anyway
+    f"{name}_transcripts" : "transcripts",
+    f"{name}_polygons"    : "cell_polygons",
+    f"table"              : "metadata",
+}
+
+for old_key, new_key in elements_renaming_map.items():
+    sdata[new_key] = sdata[old_key]
+    del sdata[old_key]
+
+# Rename transcript column
+sdata['transcripts'] = sdata['transcripts'].rename(columns={"global_z":"z"})
+
+#########################################
+# Throw out all channels except of DAPI #
+#########################################
+print(datetime.now() - t0, "Throw out all channels except of DAPI", flush=True)
+
+# TODO: in the future we want to keep PolyT and Cellbound1/2/3 stains. Note however, that somehow saving or plotting the sdata fails when
+#       these channels aren't excluded, not sure why...
+sdata["morphology_mip"] = sdata["morphology_mip"].sel(c=["DAPI"])
+
+#################################
+# Get cell labels from polygons #
+#################################
+print(datetime.now() - t0, "Get cell labels from polygons", flush=True)
+
+img_extent = sd.get_extent(sdata['morphology_mip'])
+# TODO: Just note that currently the rasterize function has a bug, this error is small though with the given spatial resolution.
+#       Check https://github.com/scverse/spatialdata/issues/165 for updates on this bug.
+sdata["cell_labels"] = sd.rasterize(
+    sdata["cell_polygons"],
+    ["x", "y"],
+    min_coordinate=[int(img_extent["x"][0]), int(img_extent["y"][0])],
+    max_coordinate=[int(img_extent["x"][1]), int(img_extent["y"][1])],
+    target_coordinate_system="global",
+    target_unit_to_pixels=1,
+    return_regions_as_labels=True,
+)
+del sdata["cell_labels"].attrs['label_index_to_category']
+
+##############################
+# Add info to metadata table #
+##############################
+print(datetime.now() - t0, "Add info to metadata table", flush=True)
+
+#TODO: values as input variables
+sdata["metadata"].uns["dataset_id"] = par["id"]
+sdata["metadata"].uns["dataset_name"] = par["dataset_name"]
+sdata["metadata"].uns["dataset_url"] = par["dataset_url"]
+sdata["metadata"].uns["dataset_reference"] = par["dataset_reference"]
+sdata["metadata"].uns["dataset_summary"] = par["dataset_summary"]
+sdata["metadata"].uns["dataset_description"] = par["dataset_description"]
+sdata["metadata"].uns["dataset_organism"] = par["dataset_organism"]
+sdata["metadata"].uns["segmentation_id"] = par["segmentation_id"]
+
+#########
+# Write #
+#########
+print(datetime.now() - t0, f"Writing to {par['output']}", flush=True)
+
+sdata.write(par["output"])
+
+print(datetime.now() - t0, "Done", flush=True)
\ No newline at end of file