diff --git a/frameworks/RandomForest/__init__.py b/frameworks/RandomForest/__init__.py index b024b051c..a25cbeee7 100644 --- a/frameworks/RandomForest/__init__.py +++ b/frameworks/RandomForest/__init__.py @@ -1,6 +1,6 @@ from amlb.benchmark import TaskConfig from amlb.data import Dataset -from amlb.utils import call_script_in_same_dir +from amlb.utils import call_script_in_same_dir, unsparsify def setup(*args, **kwargs): @@ -14,6 +14,7 @@ def run(dataset: Dataset, config: TaskConfig): encode = config.framework_params.get('_encode', True) X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc) if encode else (dataset.train.X, dataset.test.X) y_train, y_test = (dataset.train.y_enc, dataset.test.y_enc) if encode else (dataset.train.y, dataset.test.y) + y_train, y_test = unsparsify(y_train, y_test) data = dict( train=dict( X=X_train, diff --git a/frameworks/TunedRandomForest/__init__.py b/frameworks/TunedRandomForest/__init__.py index a98dfdfac..561678497 100644 --- a/frameworks/TunedRandomForest/__init__.py +++ b/frameworks/TunedRandomForest/__init__.py @@ -1,6 +1,6 @@ from amlb.benchmark import TaskConfig from amlb.data import Dataset -from amlb.utils import call_script_in_same_dir +from amlb.utils import call_script_in_same_dir, unsparsify def setup(*args, **kwargs): @@ -12,7 +12,7 @@ def run(dataset: Dataset, config: TaskConfig): from frameworks.shared.caller import run_in_venv X_train, X_test = impute_array(dataset.train.X_enc, dataset.test.X_enc) - y_train, y_test = (dataset.train.y_enc, dataset.test.y_enc) + y_train, y_test = unsparsify(dataset.train.y_enc, dataset.test.y_enc) data = dict( train=dict( X=X_train, diff --git a/frameworks/shared/caller.py b/frameworks/shared/caller.py index ee60b42d4..b4c9b0210 100644 --- a/frameworks/shared/caller.py +++ b/frameworks/shared/caller.py @@ -93,7 +93,7 @@ def run_in_venv(caller_file, script_file: str, *args, ser_config = options['serialization'] env = options['env'] or ns() - with TemporaryDirectory() as tmpdir: + with TemporaryDirectory(prefix='amlb_', suffix='_xproc') as tmpdir: ds = _make_input_dataset(input_data, dataset, tmpdir, serialization=ser_config)