Skip to content

Commit

Permalink
Merge pull request #31 from DataBytes-Organisation/Moraa
Browse files Browse the repository at this point in the history
Create GANBLR_Model_Documentation.pdf
  • Loading branch information
viloshini89 authored Jan 10, 2025
2 parents 743539e + 216fbd3 commit 7b3961c
Show file tree
Hide file tree
Showing 126 changed files with 1,265,856 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ venv_*
katabatic/output.csv
katabatic/running.log
katabatic/output.csv
docs-2/ganblr_interface/creditcard.csv
docs-2/ganblr_interface/uploaded_datasets/credit_X_train.csv
docs-2/ganblr_interface/uploaded_datasets/creditcard.csv
Binary file not shown.
76 changes: 76 additions & 0 deletions Ganblr Evaluation/ganblr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import torch
import torch.nn as nn
import pandas as pd


class GANBLR:
def __init__(self, input_dim):
self.generator = self.build_generator(output_dim=input_dim)
self.discriminator = self.build_discriminator(input_dim=input_dim)
self.criterion = nn.BCELoss()
self.optimizer_G = torch.optim.Adam(self.generator.parameters(), lr=0.0002)
self.optimizer_D = torch.optim.Adam(self.discriminator.parameters(), lr=0.0002)

def build_generator(self, output_dim):
return nn.Sequential(
nn.Linear(100, 128),
nn.ReLU(),
nn.Linear(128, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, output_dim),
nn.Tanh(),
)

def build_discriminator(self, input_dim):
return nn.Sequential(
nn.Linear(input_dim, 128),
nn.LeakyReLU(0.2),
nn.Linear(128, 64),
nn.LeakyReLU(0.2),
nn.Linear(64, 1),
nn.Sigmoid(),
)

def fit(self, data):
# Convert data to a tensor
data_tensor = torch.tensor(data.values, dtype=torch.float32)

for epoch in range(100): # Example: 100 epochs
# Generate noise for the generator
noise = torch.randn(data_tensor.size(0), 100)

# Train generator
self.optimizer_G.zero_grad()
generated_data = self.generator(noise)
validity = self.discriminator(generated_data)
g_loss = self.criterion(validity, torch.ones_like(validity))
g_loss.backward()
self.optimizer_G.step()

# Train discriminator
self.optimizer_D.zero_grad()
real_validity = self.discriminator(data_tensor)
fake_validity = self.discriminator(generated_data.detach())
real_loss = self.criterion(real_validity, torch.ones_like(real_validity))
fake_loss = self.criterion(fake_validity, torch.zeros_like(fake_validity))
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
self.optimizer_D.step()

print(f"Epoch {epoch+1}/100: Generator Loss: {g_loss.item()}, Discriminator Loss: {d_loss.item()}")

def generate(self):
# Generate synthetic data
noise = torch.randn(1000, 100) # Example: Generate 1000 samples
synthetic_data = self.generator(noise).detach().numpy()
return pd.DataFrame(synthetic_data, columns=[f"Feature_{i}" for i in range(synthetic_data.shape[1])])

def save(self, path):
torch.save(self.generator.state_dict(), path)
print(f"Model saved to {path}")

def load(self, path):
self.generator.load_state_dict(torch.load(path, weights_only=False))
print(f"Model loaded from {path}")
Binary file added Ganblr Evaluation/ganblr_model_checkpoint.pth
Binary file not shown.
6 changes: 6 additions & 0 deletions Ganblr Evaluation/preprocessed_real_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sepal_length,sepal_width,petal_length,petal_width,species_setosa
5.1,3.5,1.4,0.2,1
4.9,3.0,1.4,0.2,1
4.7,3.2,1.3,0.2,1
4.6,3.1,1.5,0.2,1
5.0,3.6,1.4,0.2,1
48 changes: 48 additions & 0 deletions Ganblr Evaluation/run_ganblr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import argparse
import pandas as pd
from ganblr import GANBLR


def train_model(input_file):
print(f"Loading dataset from {input_file}...")
data = pd.read_csv(input_file)

# Initialize the GANBLR model with the input dimension of the dataset
model = GANBLR(input_dim=data.shape[1])

# Train the model
print("Training the GANBLR model...")
model.fit(data)

# Save the trained model
model.save("ganblr_model_checkpoint.pth")
print("Training complete. Model saved.")


def generate_data(output_file):
print("Generating synthetic data using GANBLR...")
# Load the preprocessed dataset to infer the input dimensions
data = pd.read_csv("preprocessed_real_dataset.csv")
model = GANBLR(input_dim=data.shape[1])

# Load the trained model
model.load("ganblr_model_checkpoint.pth")
synthetic_data = model.generate()

# Save the synthetic data
synthetic_data.to_csv(output_file, index=False)
print(f"Synthetic data saved to {output_file}.")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Train or Generate Data with GANBLR")
parser.add_argument("--train", type=str, help="Path to the training dataset (CSV)")
parser.add_argument("--generate", action="store_true", help="Generate synthetic data")
parser.add_argument("--output", type=str, default="synthetic_dataset.csv", help="Output file for synthetic data")

args = parser.parse_args()

if args.train:
train_model(args.train)
if args.generate:
generate_data(args.output)
Loading

0 comments on commit 7b3961c

Please sign in to comment.