diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index 0250befd..00000000 --- a/test.ipynb +++ /dev/null @@ -1,410 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "libcbrush.so: cannot open shared object file: No such file or directory", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel_selection\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m train_test_split\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m mean_squared_error, r2_score\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m_brush\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindividual\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RegressorIndividual\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m_brush\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SearchSpace, Parameters, Dataset\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpybrush\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DeapRegressor\n", - "\u001b[0;31mImportError\u001b[0m: libcbrush.so: cannot open shared object file: No such file or directory" - ] - } - ], - "source": [ - "from sklearn import datasets\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import mean_squared_error, r2_score\n", - "\n", - "from _brush.individual import RegressorIndividual\n", - "from _brush import SearchSpace, Parameters, Dataset\n", - "\n", - "from pybrush import DeapRegressor" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load the diabetes dataset\n", - "diabetes = datasets.load_diabetes()\n", - "\n", - "# Use only one feature\n", - "X = diabetes.data[:, None, 2]\n", - "y = diabetes.target\n", - "\n", - "import pandas as pd\n", - "\n", - "# df = pd.read_csv(\"https://raw.githubusercontent.com/gAldeia/hashing-symbolic-expressions/master/data/lexicase_paper/d_airfoil.txt?token=GHSAT0AAAAAACPJ5UIOJY42GOUHC4GKZOBOZPS7BHA\")\n", - "# X = df.drop('label', axis=1)\n", - "# y = df['label']\n", - "\n", - "print(X.shape, y.shape)\n", - "# Split the data into training/testing sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybrush import engine\n", - "print(\"imported\")\n", - "\n", - "# Validation is to hold some part of the data as the inner validation split\n", - "dataset = Dataset(X=X_train, y=y_train, validation_size=0.75)\n", - "print(\"dataset\")\n", - "\n", - "params = Parameters()\n", - "print(\"parameters\")\n", - "\n", - "brush_estimator = engine.RegressorEngine(params)\n", - "print(\"estimator\")\n", - "\n", - "print(brush_estimator.params.pop_size)\n", - "brush_estimator.params.pop_size = 100\n", - "brush_estimator.params.gens = 100\n", - "brush_estimator.params.num_islands = 5\n", - "brush_estimator.params.max_size = 2**6\n", - "brush_estimator.params.max_depth = 6\n", - "brush_estimator.params.n_jobs = 5\n", - "brush_estimator.params.objectives = [\"error\", \"size\"]\n", - "print(brush_estimator.params.pop_size)\n", - "\n", - "print(brush_estimator.is_fitted)\n", - "print(brush_estimator.best_ind.program.get_model())\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "reg = DeapRegressor(\n", - " gens=100, pop_size=100, max_size=2**6, max_depth=6,\n", - " num_islands=1,\n", - " n_jobs=1,\n", - " objectives=['error', 'size'], #, 'complexity'],\n", - " verbosity=1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "print(\"starting to run\")\n", - "\n", - "brush_estimator.run(dataset)\n", - "print(\"done\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(brush_estimator.is_fitted)\n", - "print(brush_estimator.best_ind.program.get_model())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.metrics import mean_squared_error\n", - "\n", - "mean_squared_error(\n", - " brush_estimator.best_ind.program.predict(X_test), y_test, squared=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "brush_estimator.best_ind.fitness.values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "regr = RegressorIndividual()\n", - "print(dir(regr))\n", - "\n", - "# Validation is to hold some part of the data as the inner validation split\n", - "dataset = Dataset(X=X_train, y=y_train, validation_size=0.75)\n", - "ss = SearchSpace(dataset)\n", - "params = Parameters()\n", - "\n", - "regr.init(ss, params)\n", - "\n", - "# regr.fit(X_train, y_train)\n", - "regr.program.get_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "reg.fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybrush import BrushRegressor\n", - "\n", - "reg2 = BrushRegressor(\n", - " gens=100, pop_size=100, max_size=2**6, max_depth=6,\n", - " num_islands=1,\n", - " n_jobs=3,\n", - " objectives=['error', 'size'], #, 'complexity'],\n", - " verbosity=1\n", - ").fit(X_train, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "reg.best_estimator_.fitness.values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybrush import RegressorEvaluator\n", - "\n", - "# RegressorEvaluator()\n", - "print( reg.best_estimator_.program.get_model() )\n", - "print( reg.best_estimator_.fitness.values )\n", - "\n", - "RegressorEvaluator().assign_fit(\n", - " reg.best_estimator_, reg.data_, reg.parameters_, True)\n", - "print( reg.best_estimator_.fitness.values )\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybrush import RegressorEvaluator\n", - "import numpy as np\n", - "print(regr.fitness.weights)\n", - "regr.objectives = ['error', 'size']\n", - "print(regr.fitness.weights)\n", - "\n", - "regr.fitness.values = [1, 2]\n", - "regr.init(reg.search_space_, reg.parameters_)\n", - "regr.program.fit(reg.data_)\n", - "\n", - "print(regr.program.get_model())\n", - "print(regr.fitness.wvalues)\n", - "print(regr.fitness.values)\n", - "\n", - "RegressorEvaluator().assign_fit(\n", - " regr, reg.data_, reg.parameters_, False)\n", - "print( regr.fitness.values )\n", - "\n", - "def _error(ind, data):\n", - " MSE = np.mean( (data.y-ind.program.predict(data))**2 )\n", - " if not np.isfinite(MSE): # numeric erros, np.nan, +-np.inf\n", - " MSE = np.inf\n", - "\n", - " return MSE\n", - "\n", - "def _fitness_validation(ind, data):\n", - " # Fitness without fitting the expression, used with validation data\n", - "\n", - " ind_objectives = {\n", - " \"error\" : _error(ind, data),\n", - " \"size\" : ind.program.size(),\n", - " \"complexity\": ind.program.complexity()\n", - " }\n", - " return [ ind_objectives[obj] for obj in reg.objectives ]\n", - "\n", - "def _fitness_function(ind, data):\n", - " return _fitness_validation(ind, data)\n", - "\n", - "print(_fitness_function(regr, reg.data_))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import log_loss\n", - "from sklearn.datasets import load_iris, load_breast_cancer\n", - "\n", - "from pybrush import individual\n", - "from pybrush import ClassifierEvaluator\n", - "\n", - "# Load the iris dataset\n", - "iris = load_breast_cancer()\n", - "X = iris.data\n", - "y = iris.target\n", - "print(np.unique(y))\n", - "\n", - "clf = individual.ClassifierIndividual()\n", - "print(dir(clf))\n", - "\n", - "# c=True will add logistic function into the search space\n", - "# Validation is to hold some part of the data as the inner validation split\n", - "dataset = Dataset(X=X, y=y, c=True, validation_size=0.0)\n", - "ss = SearchSpace(dataset)\n", - "params = Parameters()\n", - "\n", - "clf.init(ss, params)\n", - "\n", - "# clf.fit(X_train, y_train)\n", - "clf.program.get_model()\n", - "\n", - "clf.objectives = ['error', 'size']\n", - "print(clf.fitness.weights)\n", - "\n", - "clf.fitness.values = [1, 2]\n", - "clf.program.fit(dataset)\n", - "\n", - "print(clf.program.get_model())\n", - "print(clf.fitness.wvalues)\n", - "print(clf.fitness.values)\n", - "\n", - "ClassifierEvaluator().assign_fit(clf, dataset, params, False)\n", - "print( clf.fitness.values )\n", - "def _error(ind, data):\n", - " probas = ind.program.predict_proba(data)\n", - " print(probas[:3])\n", - " probas = np.array([probas, 1-probas]).T\n", - " print(probas.shape)\n", - " print(probas[:3, :])\n", - " ERR = log_loss(data.y, probas, labels=['a', 'b'])\n", - " if not np.isfinite(ERR): # numeric erros, np.nan, +-np.inf\n", - " ERR = np.inf\n", - "\n", - " return ERR\n", - "\n", - "def _fitness_validation(ind, data):\n", - " # Fitness without fitting the expression, used with validation data\n", - "\n", - " ind_objectives = {\n", - " \"error\" : _error(ind, data),\n", - " \"size\" : ind.program.size(),\n", - " \"complexity\": ind.program.complexity()\n", - " }\n", - " return [ ind_objectives[obj] for obj in clf.objectives ]\n", - "\n", - "def _fitness_function(ind, data):\n", - " return _fitness_validation(ind, data)\n", - "\n", - "print(_fitness_function(clf, dataset))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybrush import BrushClassifier\n", - "\n", - "clf = BrushClassifier(\n", - " gens=10, pop_size=10, max_size=2**5, max_depth=5,\n", - " num_islands=1,\n", - " n_jobs=3,\n", - " objectives=['error', 'size'], #, 'complexity'],\n", - " verbosity=1,\n", - " functions={\"Add\":1.0,\"Logistic\":1.0},\n", - ").fit(X, y)\n", - "clf.best_estimator_.program.get_model()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "clf_eval = ClassifierEvaluator()\n", - "clf_eval.scorer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#from pybrush import RegressorSelector\n", - "\n", - "from _brush import RegressorSelector\n", - "\n", - "# RegressorSelector().select([reg.best_estimator_], params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "brush", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}