Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sdf preprocessing #80

Merged
merged 8 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions configs/results/npm1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,4 @@ data_paths:
"/data/npm1/pc.yaml",
]
classification_label: ["STR_connectivity_cc_thresh"]
regression_label:
[
"mean_surface_area",
"mean_volume",
"avg_dists",
"std_dists",
]
regression_label: ["mean_surface_area", "mean_volume", "avg_dists", "std_dists"]
8 changes: 1 addition & 7 deletions configs/results/npm1_64_res.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,4 @@ data_paths:
"/data/npm1_64_res/pc.yaml",
]
classification_label: ["STR_connectivity_cc_thresh"]
regression_label:
[
"mean_surface_area",
"mean_volume",
"avg_dists",
"std_dists",
]
regression_label: ["mean_surface_area", "mean_volume", "avg_dists", "std_dists"]
8 changes: 4 additions & 4 deletions docs/PREPROCESSING.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ src

# Polymorphic structures: Generate SDFs

Edit the data paths in the following files to match the location of your copy of the data, then run both.

Use the segmentation data for polymorphic structures as input to the SDF generation step.
```
src
└── br
└── data
   └── preprocessing
      └── sdf_preprocessing
         ├── image_sdfs.py <- Create 32**3 resolution SDF images
         └── pc_sdfs.py <- Sample point clouds from 32**3 resolution SDF images
         ├── image_sdfs.py <- Create scaled meshes, and 32**3 resolution SDF and seg images
         ├── get_max_bounding_box.py <- Get bounds of the largest scaled mesh
         └── pc_sdfs.py <- Sample point clouds from scaled meshes
```
4 changes: 2 additions & 2 deletions src/br/data/preprocessing/pc_preprocessing/pcna.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def main(args):
main(args)

"""
Example run:
Example run:

python src/br/data/preprocessing/pc_preprocessing/pcna --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_pcna/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
4 changes: 2 additions & 2 deletions src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def main(args):
main(args)

"""
Example run:
Example run:

python src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
python punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
4 changes: 2 additions & 2 deletions src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def main(args):
main(args)

"""
Example run:
Example run:

python src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

import pandas as pd
from aicsimageio import AICSImage
from monai.transforms import FillHoles
from tqdm import tqdm
from br.data.utils import (
get_mesh_from_image,
)
from multiprocessing import Pool
from pathlib import Path
import argparse


def get_bounds(r):
return_df = {'x_delta': [], 'y_delta': [], 'z_delta': [], 'cell_id': [], 'max_delta': []}
cellid = r['CellId']

hole_fill_transform = FillHoles()
seg = AICSImage(r["crop_seg_masked"]).data.squeeze()
seg = hole_fill_transform(seg).numpy()

mesh, _, _ = get_mesh_from_image(seg, sigma=0, lcc=False, denoise=False)

bounds = mesh.GetBounds()
x_delta = bounds[1] - bounds[0]
y_delta = bounds[3] - bounds[2]
z_delta = bounds[5] - bounds[4]
max_delta = max([x_delta, y_delta, z_delta])
return_df['x_delta'].append(x_delta)
return_df['y_delta'].append(y_delta)
return_df['z_delta'].append(z_delta)
return_df['cell_id'].append(cellid)
return_df['max_delta'].append(max_delta)

return return_df

def main(args):
# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_csv(args.manifest)

if args.global_path:
df["crop_seg_masked"] = df["crop_seg_masked"].apply(lambda x: args.global_path + x)

all_rows = []
for _, row in df.iterrows():
all_rows.append(row)

with Pool(1) as p:
jobs = tuple(
tqdm(
p.imap_unordered(
get_bounds,
all_rows,
),
total=len(all_rows),
desc="get_bounds",
)
)
jobs = [i for i in jobs if i is not None]
return_df = pd.DataFrame(jobs).reset_index(drop=True)

return_df.to_csv(Path(args.save_path) / Path('bounds.csv'))


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for getting global mesh bounds for polymorphic structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--manifest",
type=str,
required=True,
help="Path to preprocessed single cell image manifest.",
)
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)


args = parser.parse_args()
main(args)

"""
Example run:
python get_max_bounding_box.py --save_path './test_img/' --manifest ""../../../../../morphology_appropriate_representation_learning/preprocessed_data/npm1/manifest.csv" --global_path "../../../../../"
"""

130 changes: 110 additions & 20 deletions src/br/data/preprocessing/sdf_preprocessing/image_sdfs.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,136 @@
import argparse
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
import pyvista as pv
from aicsimageio import AICSImage
from monai.transforms import FillHoles
from tqdm import tqdm

from br.analysis.analysis_utils import str2bool
from br.data.utils import (
get_mesh_from_image,
get_scaled_seg_from_mesh,
get_sdf_from_mesh,
get_scaled_mesh,
get_sdf_from_mesh_vtk,
voxelize_scaled_mesh,
)

path = PATH_TO_SINGLE_CELL_DATA

parent_out_dir = "..."
path = ""
out_dir_scaled_seg = "./"
out_dir_scaled_sdf = "./"
out_dir_mesh = "./"
df = pd.read_csv(path)

hole_fill_transform = FillHoles()

for i, r in df.iterrows():
def process(r):
cellid = r["CellId"]
scale_factor = r["scale_factor"]
vox_resolution = r["vox_resolution"]

out_path_sdf = f"{out_dir_scaled_sdf}/{cellid}"
out_path_seg = f"{out_dir_scaled_seg}/{cellid}"
out_path_mesh = f"{out_dir_mesh}/{cellid}.stl"

seg = AICSImage(r["crop_seg"]).data.squeeze()
hole_fill_transform = FillHoles()
seg = AICSImage(r["crop_seg_masked"]).data.squeeze()
seg = hole_fill_transform(seg).numpy()

mesh, _, _ = get_mesh_from_image(seg, sigma=0, lcc=False, denoise=False)

pv.wrap(mesh).save(out_path_mesh)

sdf, scale_factor = get_sdf_from_mesh(
path=None, vox_resolution=32, scale_factor=None, vpolydata=mesh
sdf, scale_factor = get_sdf_from_mesh_vtk(
None, vox_resolution=vox_resolution, scale_factor=scale_factor, vpolydata=mesh
)
np.save(out_path_sdf, sdf)
vox_shape = (vox_resolution, vox_resolution, vox_resolution)
scaled_mesh, _ = get_scaled_mesh(None, int(vox_resolution), scale_factor, mesh, True)

scaled_seg = voxelize_scaled_mesh(scaled_mesh)
com = scaled_seg.shape
pad = []
for i, j in zip(vox_shape, com):
pad.append((int(i - j) // 2, int(i - j) // 2))
scaled_seg = np.pad(scaled_seg, pad)

scaled_seg, _ = get_scaled_seg_from_mesh(
path=None, vox_resolution=32, scale_factor=scale_factor, vpolydata=mesh
)
np.save(out_path_seg, scaled_seg)


def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

global out_dir_scaled_seg
global out_dir_scaled_sdf
global out_dir_mesh

out_dir_scaled_seg = Path(args.save_path) / Path("outputs_seg")
out_dir_scaled_sdf = Path(args.save_path) / Path("outputs_sdf")
out_dir_mesh = Path(args.save_path) / Path("outputs_mesh")

Path(out_dir_scaled_seg).mkdir(parents=True, exist_ok=True)
Path(out_dir_scaled_sdf).mkdir(parents=True, exist_ok=True)
Path(out_dir_mesh).mkdir(parents=True, exist_ok=True)

scale_factor_path = Path(args.manifest).parent / Path("scale_factor.npz")

sc_factor_data = np.load(scale_factor_path, allow_pickle=True)
scale_factor_dict = dict(zip(sc_factor_data["keys"], sc_factor_data["values"]))

df = pd.read_csv(args.manifest)

if args.global_path:
df["crop_seg_masked"] = df["crop_seg_masked"].apply(lambda x: args.global_path + x)

if args.debug:
df = df.sample(n=5).reset_index(drop=True)

all_rows = []
for _, row in tqdm(df.iterrows(), total=len(df)):
row["scale_factor"] = scale_factor_dict[row["CellId"]]
row["vox_resolution"] = args.vox_resolution
all_rows.append(row)

with Pool(40) as p:
_ = tuple(
tqdm(
p.imap_unordered(
process,
all_rows,
),
total=len(all_rows),
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing scaled segmentations and SDFs for polymorphic structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--manifest",
type=str,
required=True,
help="Path to preprocessed single cell image manifest.",
)
parser.add_argument(
"--vox_resolution",
type=int,
required=True,
help="Resolution to voxelize images to",
)
parser.add_argument("--debug", type=str2bool, default=False, help="Enable debug mode.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)

args = parser.parse_args()
main(args)

"""
Example run:

python image_sdfs.py --save_path "./test_img/" --manifest ""../../../../../morphology_appropriate_representation_learning/preprocessed_data/npm1/manifest.csv" --vox_resolution 32 --debug True --global_path "../../../../../"
"""
Loading
Loading