Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ruffformatter #102

Merged
merged 8 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: Ruff lint
on: [push, pull_request]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: chartboost/ruff-action@v1
9 changes: 5 additions & 4 deletions example/dnn_examples/demo_jackknife.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pickle
import pandas as pd
import logging
import pickle
import sys
from motrainer.dnn import NNTrain

import pandas as pd

from motrainer.jackknife import JackknifeGPI

logging.basicConfig(
Expand Down Expand Up @@ -53,7 +54,7 @@
val_split_year,
input_list,
output_list,
outpath='{}/gpi{}'.format(out_path, gpi_num))
outpath=f'{out_path}/gpi{gpi_num}')

gpi.train(searching_space=searching_space,
optimize_space=optimize_space,
Expand Down
138 changes: 70 additions & 68 deletions example/dnn_examples/demo_slurm/jackknife_train_one.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,70 @@
import pickle
import sys
from motrainer.jackknife import JackknifeGPI

if __name__ == "__main__":
# Parsing input
gpi_id = int(sys.argv[1])

# Manual input
out_path = './results/'
file_data = '../example_data/example_data.pickle'
val_split_year = 2017
output_list = ['sig', 'slop', 'curv']
input_list = [
'TG1', 'TG2', 'TG3', 'WG1', 'WG2', 'WG3', 'BIOMA1', 'BIOMA2']
searching_space = {'learning_rate': [5e-4, 1e-2], 'activation': ['relu']}
optimize_space = {
'best_loss': 1,
'n_calls': 15,
'noise': 0.01,
'n_jobs': -1,
'kappa': 5,
'validation_split': 0.2,
'x0': [1e-3, 1, 4, 13, 'relu', 64]
}

# Read example data
df_all_gpi = pd.read_pickle(file_data)

gpi_data = df_all_gpi.iloc[gpi_id]['data']
gpi_data = gpi_data.dropna()

if len(gpi_data) > 0:
gpi = JackknifeGPI(gpi_data,
val_split_year,
input_list,
output_list,
outpath='{}/gpi{}'.format(out_path, gpi_id))

gpi.train(searching_space=searching_space,
optimize_space=optimize_space,
normalize_method='standard',
training_method='dnn',
performance_method='rmse',
val_split_year=val_split_year)

gpi.export_best()

# Export apriori performance
path_apriori_performance = '{}/apriori_performance_{}'.format(
gpi.outpath, gpi.best_year)
with open(path_apriori_performance, 'wb') as f:
pickle.dump(gpi.apr_perf, f)

# Export postpriori performance
path_postpriori_performance = '{}/postpriori_performance_{}'.format(
gpi.outpath, gpi.best_year)
with open(path_postpriori_performance, 'wb') as f:
pickle.dump(gpi.post_perf, f)


print("=========================================")
print(" GPI " + str(gpi_id) + " done")
print("=========================================")
else:
print("=========================================")
print(" GPI" + str(gpi_id) + " is empty")
print("=========================================")
import pickle
import sys

import pandas as pd

from motrainer.jackknife import JackknifeGPI

if __name__ == "__main__":
# Parsing input
gpi_id = int(sys.argv[1])

# Manual input
out_path = './results/'
file_data = '../example_data/example_data.pickle'
val_split_year = 2017
output_list = ['sig', 'slop', 'curv']
input_list = [
'TG1', 'TG2', 'TG3', 'WG1', 'WG2', 'WG3', 'BIOMA1', 'BIOMA2']
searching_space = {'learning_rate': [5e-4, 1e-2], 'activation': ['relu']}
optimize_space = {
'best_loss': 1,
'n_calls': 15,
'noise': 0.01,
'n_jobs': -1,
'kappa': 5,
'validation_split': 0.2,
'x0': [1e-3, 1, 4, 13, 'relu', 64]
}

# Read example data
df_all_gpi = pd.read_pickle(file_data)

gpi_data = df_all_gpi.iloc[gpi_id]['data']
gpi_data = gpi_data.dropna()

if len(gpi_data) > 0:
gpi = JackknifeGPI(gpi_data,
val_split_year,
input_list,
output_list,
outpath=f'{out_path}/gpi{gpi_id}')

gpi.train(searching_space=searching_space,
optimize_space=optimize_space,
normalize_method='standard',
training_method='dnn',
performance_method='rmse',
val_split_year=val_split_year)

gpi.export_best()

# Export apriori performance
path_apriori_performance = f'{gpi.outpath}/apriori_performance_{gpi.best_year}'
with open(path_apriori_performance, 'wb') as f:
pickle.dump(gpi.apr_perf, f)

# Export postpriori performance
path_postpriori_performance = '{}/postpriori_performance_{}'.format(
gpi.outpath, gpi.best_year)
with open(path_postpriori_performance, 'wb') as f:
pickle.dump(gpi.post_perf, f)


print("=========================================")
print(" GPI " + str(gpi_id) + " done")
print("=========================================")
else:
print("=========================================")
print(" GPI" + str(gpi_id) + " is empty")
print("=========================================")
2 changes: 1 addition & 1 deletion motrainer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from motrainer.splitter import is_splitable, dataset_split, train_test_split
from motrainer.splitter import dataset_split, is_splitable, train_test_split

__all__ = ("is_splitable", "dataset_split", "train_test_split")
61 changes: 29 additions & 32 deletions motrainer/dnn.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
import logging
import tensorflow as tf
import skopt
import pickle
from pathlib import Path
from skopt.space import Real, Categorical, Integer
from motrainer.model import keras_dnn, keras_dnn_lossweight

# disable WARNING:absl:Found untraced functions such as _update_step_xla while saving
# see https://github.com/tensorflow/tensorflow/issues/47554
import absl.logging
import skopt
import tensorflow as tf
from skopt.space import Categorical, Integer, Real

from motrainer.model import keras_dnn, keras_dnn_lossweight

absl.logging.set_verbosity(absl.logging.ERROR)


logger = logging.getLogger(__name__)


class NNTrain(object):
"""
Neuron Network trainning object
class NNTrain:
"""Neuron Network trainning object.

Methods
-------
Expand All @@ -29,9 +30,9 @@ class NNTrain(object):
Optimize the neuron network within the searching space by given
optimization settings
"""

def __init__(self, train_input, train_output):
"""
Initialize NNTrain object
"""Initialize NNTrain object.

Parameters
----------
Expand Down Expand Up @@ -62,13 +63,11 @@ def __init__(self, train_input, train_output):
self.model = None

def update_space(self, **kwrags):
"""
Update searching space of optimization.
"""
"""Update searching space of optimization."""
for key, value in kwrags.items():
logger.debug('Update seaching sapce: {}={}'.format(key, value))
logger.debug(f'Update seaching sapce: {key}={value}')
# skopt.space instances
if isinstance(value, (Real, Categorical, Integer)):
if isinstance(value, Real | Categorical | Integer):
self.dimensions[key] = value
self.dimensions[key].name = key

Expand All @@ -77,11 +76,10 @@ def update_space(self, **kwrags):
assert len(value) == 2
if any([isinstance(obj, int) for obj in value]):
logger.warning(
'Mixed fload/int type found in {}:{}. '
f'Mixed fload/int type found in {key}:{value}. '
'The search space will be interpreted as float. '
'If this behavior is not desired, try to specify'
'all elements in {} with the same type.'.format(
key, value, key))
f'all elements in {key} with the same type.')
self.dimensions[key] = Real(low=value[0],
high=value[1],
prior='log-uniform',
Expand All @@ -100,8 +98,7 @@ def update_space(self, **kwrags):

else:
logger.error(
'Do not understand searching space: {}:{}.'.format(
key, value))
f'Do not understand searching space: {key}:{value}.')
raise NotImplementedError

def optimize(self,
Expand All @@ -112,13 +109,11 @@ def optimize(self,
n_jobs=-1,
kappa=5,
validation_split=0.2,
x0=[1e-3, 1, 4, 13, 'relu', 64],
x0=None,
training_method='dnn',
loss_weights=None,
verbose=0):
"""
Optimize the neuron network within the searching space by given
optimization settings
"""Optimize the neuron network within the searching space..

Parameters
----------
Expand Down Expand Up @@ -154,14 +149,13 @@ def optimize(self,
Control the verbosity.
By default 0, which means no screen feedback.
"""

self.best_loss = best_loss
self.keras_verbose = verbose
self.loss_weights = loss_weights

@skopt.utils.use_named_args(dimensions=list(self.dimensions.values()))
def func(**dimensions):
logger.info('optimizing with dimensions: {}'.format(dimensions))
logger.info(f'optimizing with dimensions: {dimensions}')

# setup model
earlystop = tf.keras.callbacks.EarlyStopping(
Expand All @@ -178,8 +172,7 @@ def func(**dimensions):
if self.loss_weights is None:
self.loss_weights = [1] * self.train_output.shape[1]
logger.warning('loss_weights is None.'
'Using default weights {}'.format(
self.loss_weights))
f'Using default weights {self.loss_weights}')
model = keras_dnn_lossweight(dimensions,
self.train_input.shape[1],
self.train_output.shape[1],
Expand Down Expand Up @@ -208,6 +201,9 @@ def func(**dimensions):
tf.keras.backend.clear_session()
return loss

if x0 is None:
x0 = [1e-3, 1, 4, 13, 'relu', 64]

self.gp_result = skopt.gp_minimize(func=func,
dimensions=list(
self.dimensions.values()),
Expand All @@ -218,10 +214,7 @@ def func(**dimensions):
x0=x0)

def export(self, path_model=None, path_hyperparameters=None):
"""
Export model and hyperparameters from tranning.
"""

"""Export model and hyperparameters from tranning."""
if path_model is not None:
Path(path_model).parent.mkdir(parents=True, exist_ok=True)
self.model.save(path_model)
Expand All @@ -232,5 +225,9 @@ def export(self, path_model=None, path_hyperparameters=None):
with open(path_hyperparameters, 'wb') as f:
pickle.dump([
sorted(
zip(self.gp_result.func_vals, self.gp_result.x_iters))
zip(
self.gp_result.func_vals,
self.gp_result.x_iters,
strict=True
))
], f)
Loading