Skip to content

Commit

Permalink
fix: preprocess output format & some mistake in spelling (#358)
Browse files Browse the repository at this point in the history
* spaceship: format type of y = pd.series; fix a wrong spelling in xgb

* s3e11: format of y -- pd.series

* spaceship: format of y & fit nn

* spaceship: wrong spelling in xgb

* ci issue
  • Loading branch information
TPLin22 authored Sep 26, 2024
1 parent b054017 commit b8b2cd6
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def preprocess_script():
y_valid = pd.read_pickle("/kaggle/input/y_valid.pkl")
X_test = pd.read_pickle("/kaggle/input/X_test.pkl")
others = pd.read_pickle("/kaggle/input/others.pkl")
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

return X_train, X_valid, y_train, y_valid, X_test, *others

Expand All @@ -38,6 +40,8 @@ def preprocess_script():
X_train, X_valid, y_train, y_valid = train_test_split(
train[most_important_features], train["log_cost"], test_size=0.2, random_state=2023
)
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

# test
test = pd.read_csv("/kaggle/input/test.csv")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,16 @@ def preprocess_script():
y_valid = pd.read_pickle("/kaggle/input/y_valid.pkl")
X_test = pd.read_pickle("/kaggle/input/X_test.pkl")
others = pd.read_pickle("/kaggle/input/others.pkl")
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

return X_train, X_valid, y_train, y_valid, X_test, *others
X_train, X_valid, y_train, y_valid = prepreprocess()

# Fit the preprocessor on the training data
preprocessor, numerical_cols, categorical_cols = preprocess_fit(X_train)
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

# Preprocess the train, validation, and test data
X_train = preprocess_transform(X_train, preprocessor, numerical_cols, categorical_cols)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v

# TODO: for quick running....
params = {
"nthred": -1,
"nthread": -1,
}
num_round = 180

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,13 @@ def preprocess_script():
y_valid = pd.read_pickle("/kaggle/input/y_valid.pkl")
X_test = pd.read_pickle("/kaggle/input/X_test.pkl")
others = pd.read_pickle("/kaggle/input/others.pkl")
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

return X_train, X_valid, y_train, y_valid, X_test, *others
X_train, X_valid, y_train, y_valid = prepreprocess()
y_train = pd.Series(y_train).reset_index(drop=True)
y_valid = pd.Series(y_valid).reset_index(drop=True)

# Fit the preprocessor on the training data
preprocessor, label_encoders = preprocess_fit(X_train)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ def fit(X_train, y_train, X_valid, y_valid):

# Convert to TensorDataset and create DataLoader
train_dataset = TensorDataset(
torch.tensor(X_train.to_numpy(), dtype=torch.float32), torch.tensor(y_train.reshape(-1), dtype=torch.float32)
torch.tensor(X_train.to_numpy(), dtype=torch.float32),
torch.tensor(y_train.to_numpy().reshape(-1), dtype=torch.float32),
)
valid_dataset = TensorDataset(
torch.tensor(X_valid.to_numpy(), dtype=torch.float32), torch.tensor(y_valid.reshape(-1), dtype=torch.float32)
torch.tensor(X_valid.to_numpy(), dtype=torch.float32),
torch.tensor(y_valid.to_numpy().reshape(-1), dtype=torch.float32),
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFrame, y_v

# TODO: for quick running....
params = {
"nthred": -1,
"nthread": -1,
}
num_round = 100

Expand Down

0 comments on commit b8b2cd6

Please sign in to comment.