update gitignore and resources

openproblems-bio · Aug 26, 2024 · 1f5a6a5 · 1f5a6a5
1 parent df6ad1d
commit 1f5a6a5
Show file tree

Hide file tree

Showing 6 changed files with 192 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
-resources
-resources_test
+/resources
+/resources_test
 work
 .nextflow*
 target

diff --git a/src/methods/simple_mlp/resources/models.py b/src/methods/simple_mlp/resources/models.py
@@ -0,0 +1,68 @@
+import torch
+import pytorch_lightning as pl
+import torch.nn as nn
+import torch.nn.functional as F
+
+class MLP(pl.LightningModule):
+    def __init__(self,in_dim,out_dim,ymean,config):
+        super(MLP, self).__init__()
+        self.ymean = ymean.cuda()
+        H1 = config.H1
+        H2 = config.H2
+        p = config.dropout
+        self.config = config
+        self.fc1 = nn.Linear(in_dim, H1)
+        self.fc2 = nn.Linear(H1,H2)
+        self.fc3 = nn.Linear(H1+H2, out_dim)
+        self.dp2 = nn.Dropout(p=p)
+
+    def forward(self, x):
+        x0 = x
+        x1 = F.relu(self.fc1(x))
+        x1 = self.dp2(x1)
+        x = F.relu(self.fc2(x1))
+        x = torch.cat([x,x1],dim=1)
+        x = self.fc3(x)
+        x = self.apply_mask(x)
+        return x
+
+    def apply_mask(self,yp):
+        tmp = torch.ones_like(yp).float()*self.ymean
+        mask = tmp<self.config.threshold
+        mask = mask.float()
+        return yp*(1-mask) + tmp*mask
+
+    def training_step(self, batch, batch_nb):
+        x,y = batch
+        yp = self(x)
+        criterion = nn.MSELoss()
+        loss = criterion(yp, y)
+        self.log('train_loss', loss, prog_bar=True)
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        x,y = batch
+        yp = self(x)
+        criterion = nn.MSELoss()
+        loss = criterion(yp, y)
+        self.log('valid_RMSE', loss**0.5, prog_bar=True)
+        return loss
+
+    def predict_step(self, batch, batch_idx):
+        if len(batch) == 2:
+            x,_ = batch
+        else:
+            x = batch
+        return self(x)
+
+    def configure_optimizers(self):
+        lr = self.config.lr
+        wd = float(self.config.wd)
+        adam = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=wd)
+        if self.config.lr_schedule == 'adam':
+            return adam
+        elif self.config.lr_schedule == 'adam_cosin':
+            slr = torch.optim.lr_scheduler.CosineAnnealingLR(adam, self.config.epochs)
+            return [adam], [slr]
+        else:
+            assert 0
diff --git a/src/methods/simple_mlp/resources/utils.py b/src/methods/simple_mlp/resources/utils.py
@@ -0,0 +1,37 @@
+import yaml
+from collections import namedtuple
+
+
+def to_site_donor(data):
+    df = data.obs['batch'].copy().to_frame().reset_index()
+    df.columns = ['index','batch']
+    df['site'] = df['batch'].apply(lambda x: x[:2])
+    df['donor'] = df['batch'].apply(lambda x: x[2:]) 
+    return df
+
+
+def split(tr1, tr2, fold):
+    df = to_site_donor(tr1) 
+    mask = df['site'] == f's{fold+1}'
+    maskr = ~mask
+
+    Xt = tr1[mask].layers["normalized"].toarray()
+    X = tr1[maskr].layers["normalized"].toarray()
+
+    yt = tr2[mask].layers["normalized"].toarray()
+    y = tr2[maskr].layers["normalized"].toarray()
+
+    print(f"{X.shape}, {y.shape}, {Xt.shape}, {yt.shape}")
+
+    return X,y,Xt,yt
+
+
+def load_yaml(path):
+    with open(path) as f:
+        x = yaml.safe_load(f)
+    res = {}
+    for i in x:
+        res[i] = x[i]['value']
+    config = namedtuple('Config', res.keys())(**res)
+    print(config)
+    return config
diff --git a/src/methods/simple_mlp/resources/yaml/mlp_ADT2GEX.yaml b/src/methods/simple_mlp/resources/yaml/mlp_ADT2GEX.yaml
@@ -0,0 +1,29 @@
+
+# sample config defaults file
+epochs:
+  desc: Number of epochs to train over
+  value: 10
+batch_size:
+  desc: Size of each mini-batch
+  value: 512
+H1:
+  desc: Number of hidden neurons in 1st layer of MLP
+  value: 256
+H2:
+  desc: Number of hidden neurons in 2nd layer of MLP
+  value: 128
+dropout:
+  desc: probs of zeroing values
+  value: 0
+lr:
+  desc: learning rate
+  value: 0.001
+wd:
+  desc: weight decay
+  value: 1e-5
+threshold:
+  desc: threshold to set values to zero
+  value: 0
+lr_schedule:
+  desc: learning rate scheduler
+  value: adam 
diff --git a/src/methods/simple_mlp/resources/yaml/mlp_ATAC2GEX.yaml b/src/methods/simple_mlp/resources/yaml/mlp_ATAC2GEX.yaml
@@ -0,0 +1,28 @@
+# sample config defaults file
+epochs:
+  desc: Number of epochs to train over
+  value: 10
+batch_size:
+  desc: Size of each mini-batch
+  value: 512
+H1:
+  desc: Number of hidden neurons in 1st layer of MLP
+  value: 256
+H2:
+  desc: Number of hidden neurons in 2nd layer of MLP
+  value: 128
+dropout:
+  desc: probs of zeroing values
+  value: 0.5
+lr:
+  desc: learning rate
+  value: 0.001
+wd:
+  desc: weight decay
+  value: 1e-5
+threshold:
+  desc: threshold to set values to zero
+  value: 0
+lr_schedule:
+  desc: learning rate scheduler
+  value: adam 
diff --git a/src/methods/simple_mlp/resources/yaml/mlp_GEX2ADT.yaml b/src/methods/simple_mlp/resources/yaml/mlp_GEX2ADT.yaml
@@ -0,0 +1,28 @@
+# sample config defaults file
+epochs:
+  desc: Number of epochs to train over
+  value: 10 
+batch_size:
+  desc: Size of each mini-batch
+  value: 512
+H1:
+  desc: Number of hidden neurons in 1st layer of MLP
+  value: 1024 
+H2:
+  desc: Number of hidden neurons in 2nd layer of MLP
+  value: 512
+dropout:
+  desc: probs of zeroing values
+  value: 0
+lr:
+  desc: learning rate
+  value: 0.001
+wd:
+  desc: weight decay
+  value: 1e-5
+threshold:
+  desc: threshold to set values to zero
+  value: 0.05
+lr_schedule:
+  desc: learning rate scheduler
+  value: adam_cosin