scverse · Lilly-May · Dec 8, 2023 · Dec 6, 2023 · Dec 6, 2023 · Dec 7, 2023
diff --git a/pertpy/tools/_perturbation_space/_simple.py b/pertpy/tools/_perturbation_space/_simple.py
@@ -18,6 +18,7 @@ def compute(
         target_col: str = "perturbations",
         layer_key: str = None,
         embedding_key: str = "X_umap",
+        keep_obs: bool = True,
     ) -> AnnData:  # type: ignore
         """Computes the centroids of a pre-computed embedding such as UMAP.
 
@@ -26,6 +27,8 @@ def compute(
             target_col: .obs column that stores the label of the perturbation applied to each cell.
             layer_key: If specified pseudobulk computation is done by using the specified layer. Otherwise, computation is done with .X
             embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
+            keep_obs: .obs columns in the input AnnData to keep in the output pseudobulk AnnData. Only .obs columns with the same value for
+                each cell of one perturbation can be kept. Defaults to None.
 
         Examples:
             Compute the centroids of a UMAP embedding of the papalexi_2021 dataset:
@@ -84,6 +87,18 @@ def compute(
 
         ps_adata = AnnData(X=X)
         ps_adata.obs_names = index
+        ps_adata.obs[target_col] = index
+
+        if embedding_key is not None:
+            ps_adata.obsm[embedding_key] = X
+
+        if keep_obs:  # Save the values of the obs columns of interest in the ps_adata object
+            obs_df = adata.obs
+            obs_df = obs_df.groupby(target_col).agg(lambda x: np.nan if len(set(x)) != 1 else list(set(x))[0])
+            for obs_name in obs_df.columns:
+                if not obs_df[obs_name].isnull().values.any():
+                    mapping = {pert: obs_df.loc[pert][obs_name] for pert in index}
+                    ps_adata.obs[obs_name] = ps_adata.obs[target_col].map(mapping)
 
         return ps_adata