Skip to content

Commit

Permalink
Merge pull request #79 from AllenCell/cleanup_pc_preprocessing
Browse files Browse the repository at this point in the history
Cleanup pc preprocessing
  • Loading branch information
ritvikvasan authored Dec 13, 2024
2 parents 4b493c6 + e4e5018 commit 877ae43
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 30 deletions.
6 changes: 4 additions & 2 deletions docs/PREPROCESSING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,17 @@ Preprocessing is divided into three steps that use two different virtual environ

# Punctate structures: Generate pointclouds

Edit the data paths in the following file to match the location of the outputs of the alignment, masking, and registration step, then run it.
Use the preprocessed data manifest generated via the alignment, masking, and registration steps from image as input to the pointcloud generation step

```
src
└── br
└── data
   └── preprocessing
      └── pc_preprocessing
         └── punctate_cyto.py <- Point cloud sampling from raw images for punctate structures here
         └── pcna.py <- Point cloud sampling from raw images for DNA replication foci dataset here
         └── punctate_nuc.py <- Point cloud sampling from raw images of nuclear structures from the WTC-11 hIPS single cell image dataset here
         └── punctate_cyto.py <- Point cloud sampling from raw images of cytoplasmic structures from the WTC-11 hIPS single cell image dataset here
```

# Polymorphic structures: Generate SDFs
Expand Down
12 changes: 7 additions & 5 deletions src/br/analysis/visualize_pointclouds.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main(args):
for _, this_image in orig_image_df.iterrows():
cell_id = this_image["CellId"]
if not strat:
strat_val = this_image['structure_name']
strat_val = this_image["structure_name"]

if args.dataset_name == "pcna":
points_all, _, img, center = compute_labels_pcna(this_image, False)
Expand Down Expand Up @@ -108,10 +108,12 @@ def main(args):
if mem_ind is not None:
img_mem = img[mem_ind]

if (args.dataset_name == 'other_punctate') and (strat_val in ["CETN2", "RAB5A", "SLC25A17"]):
img_raw = np.where(img_mem, img_raw, 0) # mask by mem/nuc seg
if (args.dataset_name == "other_punctate") and (
strat_val in ["CETN2", "RAB5A", "SLC25A17"]
):
img_raw = np.where(img_mem, img_raw, 0) # mask by mem/nuc seg
else:
img_raw = np.where(img_nuc, img_raw, 0) # mask by mem/nuc seg
img_raw = np.where(img_nuc, img_raw, 0) # mask by mem/nuc seg

# Sample sparse point cloud and get images
probs2 = points_all["s"].values
Expand Down Expand Up @@ -168,7 +170,7 @@ def main(args):
center_slice=center_slice,
)
ax_array[2].set_title("Sampling sparse PC")
print(f'Saving {name}.png')
print(f"Saving {name}.png")
fig.savefig(Path(args.save_path) / Path(f"{name}.png"), bbox_inches="tight", dpi=300)


Expand Down
50 changes: 44 additions & 6 deletions src/br/data/preprocessing/pc_preprocessing/pcna.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import argparse
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -64,26 +66,34 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)

cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_csv(PCNA_SINGLE_CELL_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)

if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

path_prefix = SAVE_LOCATION
global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)

with Pool(40) as p:
Expand All @@ -97,3 +107,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for PCNA dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:
python src/br/data/preprocessing/pc_preprocessing/pcna --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_pcna/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
51 changes: 45 additions & 6 deletions src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
import warnings
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -96,25 +98,34 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)
cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_parquet(SINGLE_CELL_IMAGE_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)
df = df.loc[df["structure_name"].isin(SKEW_EXP_DICT.keys())]

path_prefix = SAVE_LOCATION
if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)

with Pool(40) as p:
Expand All @@ -128,3 +139,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for cytoplasmic structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:
python src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""
60 changes: 49 additions & 11 deletions src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import argparse
from multiprocessing import Pool
from pathlib import Path

import numpy as np
import pandas as pd
from pyntcloud import PyntCloud
from scipy.ndimage import binary_dilation
from skimage.io import imread
from tqdm import tqdm

STRUCTS = ["HIST1H2BJ", "NUP153", "SMC1A", "SON"]


def compute_labels(row, save=True):
path = row["registered_path"]
Expand Down Expand Up @@ -64,31 +70,35 @@ def compute_labels(row, save=True):

cell_id = str(row["CellId"])

save_path = path_prefix + cell_id + ".ply"
save_path = Path(path_prefix) / Path(cell_id + ".ply")

new_cents = new_cents.astype(float)
cloud = PyntCloud(new_cents)
cloud.to_file(save_path)
cloud.to_file(str(save_path))


def get_center_of_mass(img):
center_of_mass = np.mean(np.stack(np.where(img > 0)), axis=1)
return np.floor(center_of_mass + 0.5).astype(int)


if __name__ == "__main__":
df = pd.read_parquet(SINGLE_CELL_IMAGE_PATH)
def main(args):

# make save path directory
Path(args.save_path).mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(args.preprocessed_manifest)
df = df.loc[df["structure_name"].isin(STRUCTS)]

path_prefix = SAVE_LOCATION
if args.global_path:
df["registered_path"] = df["registered_path"].apply(lambda x: args.global_path + x)

global path_prefix
path_prefix = args.save_path

all_rows = []
for ind, row in tqdm(df.iterrows(), total=len(df)):
for _, row in tqdm(df.iterrows(), total=len(df)):
all_rows.append(row)
# if str(row['CellId']) == '660844':
# print('yes')
# compute_labels(row)

from multiprocessing import Pool

with Pool(40) as p:
_ = tuple(
Expand All @@ -101,3 +111,31 @@ def get_center_of_mass(img):
desc="compute_everything",
)
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script for computing point clouds for nuclear structures from WTC-11 hIPS single cell image dataset"
)
parser.add_argument("--save_path", type=str, required=True, help="Path to save results.")
parser.add_argument(
"--global_path",
type=str,
default=None,
required=False,
help="Path to append to relative paths in preprocessed manifest",
)
parser.add_argument(
"--preprocessed_manifest",
type=str,
required=True,
help="Path to processed single cell image manifest.",
)
args = parser.parse_args()
main(args)

"""
Example run:
python src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/"
"""

0 comments on commit 877ae43

Please sign in to comment.