From 9e6b48c83519b4816fea403492295657b82c371a Mon Sep 17 00:00:00 2001 From: Younes Strittmatter Date: Wed, 9 Oct 2024 16:57:31 -0400 Subject: [PATCH 1/2] add customizable distance function --- docs/RnnSindy Synthetic.ipynb | 426 ++++++++++++++++++ .../model_disagreement/__init__.py | 197 +++++++- 2 files changed, 608 insertions(+), 15 deletions(-) create mode 100644 docs/RnnSindy Synthetic.ipynb diff --git a/docs/RnnSindy Synthetic.ipynb b/docs/RnnSindy Synthetic.ipynb new file mode 100644 index 0000000..7c814e1 --- /dev/null +++ b/docs/RnnSindy Synthetic.ipynb @@ -0,0 +1,426 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# [RnnSindy](https://github.com/AutoResearch/autora-theorist-rnn-ddm) Theorist and Synthetic Runner\n", + "\n", + "Install the packages" + ], + "id": "a1548dc246c139cb" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# !pip install autora-theorist-rnn-sindy-rl\n", + "# !pip install autora-experimentalist-bandit-random" + ], + "id": "277b08da6658c812", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "import packages", + "id": "fb90c6dd77fb5fc0" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "# Python Core\n", + "from dataclasses import dataclass, field\n", + "from typing import Optional, List\n", + "\n", + "# External Vendors\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.base import BaseEstimator\n", + "import torch\n", + "\n", + "# General AutoRA\n", + "from autora.variable import VariableCollection, Variable\n", + "from autora.state import StandardState, on_state, Delta\n", + "\n", + "# Experimentalists\n", + "from autora.experimentalist.bandit_random import bandit_random_pool\n", + "from autora.experimentalist.model_disagreement import model_disagreement_sampler_cd\n", + "\n", + "# Experiment Runner\n", + "from autora.experiment_runner.synthetic.psychology.q_learning import q_learning\n", + "\n", + "# Theorist\n", + "from autora.theorist.rnn_sindy_rl import RNNSindy" + ], + "id": "51e904e115df3070", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Setting constants", + "id": "27a05646b67aee90" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "TRIALS_PER_PARTICIPANTS = 100\n", + "SAMPLES_PER_CYCLE = 1\n", + "PARTICIPANTS_PER_CYCLE = 40\n", + "CYCLES = 4\n", + "INITIAL_REWARD_PROBABILITY_RANGE = [.2, .8]\n", + "SIGMA_RANGE = [.2, .2]\n", + "\n", + "EPOCHS = 10 # 100\n", + "\n", + "seed = 11" + ], + "id": "c4bcfa0bc950bd45", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Setting seeds for reproducible results", + "id": "6f4af08018b28348" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "np.random.seed(seed)\n", + "torch.manual_seed(seed)" + ], + "id": "e878e50aacf4a643", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Set up variables\n", + "\n", + "independent variable is \"reward-trajectory\": A 2 x n_trials Vector with entries between 0 and 1\n", + "dependent variable is \"choice-trajectory\": A 2 x n_trials Vector with boolean entries (one hot encoded)" + ], + "id": "f4dc01da21cd9715" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "variables = VariableCollection(\n", + " independent_variables=[Variable(name=\"reward-trajectory\")],\n", + " dependent_variables=[Variable(name=\"choice-trajectory\")]\n", + ")" + ], + "id": "c95fce71f88c8145", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## State\n", + "\n", + "We use a non-standard state by extending the standard state with an additional model " + ], + "id": "e3d2636ccf6c8e7d" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "@dataclass(frozen=True)\n", + "class RnnState(StandardState):\n", + " models_additional: List[BaseEstimator] = field(\n", + " default_factory=list,\n", + " metadata={\"delta\": \"extend\"},\n", + " )\n", + "\n", + "# initialize the state:\n", + "state = RnnState(variables=variables)\n" + ], + "id": "c05fefb9ce5aafa6", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Autora Components\n", + "### Experimentalists\n", + "#### Random Pool\n", + "\n", + "Create a pooler on state that creates a pool of conditions" + ], + "id": "e6bbcc2c6679331f" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "@on_state()\n", + "def pool_on_state(num_samples, n_trials=TRIALS_PER_PARTICIPANTS):\n", + " \"\"\"\n", + " This is creates `num_samples` randomized reward trajectories of length `n_trials`\n", + " \"\"\"\n", + " sigma = np.random.uniform(SIGMA_RANGE[0], SIGMA_RANGE[1])\n", + " trajectory_array = bandit_random_pool(\n", + " num_rewards=2,\n", + " sequence_length=n_trials,\n", + " initial_probabilities=[INITIAL_REWARD_PROBABILITY_RANGE, INITIAL_REWARD_PROBABILITY_RANGE],\n", + " sigmas=[sigma, sigma],\n", + " num_samples=num_samples\n", + " )\n", + " trajectory_df = pd.DataFrame({'reward-trajectory': trajectory_array})\n", + " return Delta(conditions=trajectory_df)" + ], + "id": "54d41adb350f78a2", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "state = pool_on_state(state, num_samples=3)\n", + "state.conditions" + ], + "id": "3b5c89b4aa10ebf7", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "", + "id": "b498ba974a2402c0" + }, + { + "metadata": {}, + "cell_type": "code", + "source": "", + "id": "402b150ff64ea52b", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "raw", + "source": "### Runner", + "id": "e18d8865d069d63d" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "runner = q_learning()\n", + "\n", + "@on_state()\n", + "def runner_on_state(conditions):\n", + " choices, choice_probabilities = runner.run(conditions, return_choice_probabilities=True)\n", + " experiment_data = pd.DataFrame({\n", + " 'reward-trajectory': conditions['reward-trajectory'].tolist(),\n", + " 'choice-trajectory': choices,\n", + " 'choice-probability-trajectory': choice_probabilities\n", + " })\n", + " return Delta(experiment_data=experiment_data)" + ], + "id": "de5f4ea0bbee0e67", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "state = runner_on_state(state)\n", + "state.experiment_data" + ], + "id": "2af808ac4e6fb95a", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Theorists\n", + "Here we create two RNNSindy theorists\n" + ], + "id": "8ec89f6ab74f7e66" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "theorist = RNNSindy(2, epochs=EPOCHS, polynomial_degree=2)\n", + "theorist_additional = RNNSindy(2, epochs=EPOCHS, polynomial_degree=1)\n", + "\n", + "@on_state()\n", + "def theorist_on_state(experiment_data):\n", + " x = experiment_data['reward-trajectory']\n", + " y = experiment_data['choice-trajectory']\n", + " return Delta(models=[theorist.fit(x, y)])\n", + "\n", + "\n", + "@on_state()\n", + "def theorist_additional_on_state(experiment_data):\n", + " x = experiment_data['reward-trajectory']\n", + " y = experiment_data['choice-trajectory']\n", + " return Delta(models_additional=[theorist_additional.fit(x, y)])" + ], + "id": "2cd001307f51577", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "state = theorist_additional_on_state(state)\n", + "state = theorist_on_state(state)\n", + "\n", + "print(len(state.models_additional))\n", + "print(len(state.models))\n" + ], + "id": "afc777e5b8639a23", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "state.models[-1].predict(state.conditions)", + "id": "3b115ca30370432e", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Here, we see the prediction for a model is a list of two-dimensional vectors:\n", + "array([[0.5, 0.5], [0.68..., 0.31...], ...]). \n", + "The standard model disagreement sampler only works on predictions that are single numbers. Therefore, we define our own distance functions, that works on two lists with the described format " + ], + "id": "a38335dabf6f7cd9" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "def custom_distance(prob_array_a, prob_array_b):\n", + " return np.mean([(prob_array_a[0] - prob_array_b[0])**2 + (prob_array_a[1] - prob_array_b[1])**2])\n", + "\n", + "# test \n", + "pred_1 = state.models[-1].predict(state.conditions)[0] # first prediction of model 1\n", + "pred_2 = state.models_additional[-1].predict(state.conditions)[0] # first prediction of model 2\n", + "\n", + "custom_distance(pred_1, pred_2)" + ], + "id": "5e2ca8f2ef366591", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "We can now use the `custom_distance` function in our sampler:", + "id": "14511a44b57f3934" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "@on_state()\n", + "def model_disagreement_on_state(\n", + " conditions, models, models_additional, num_samples):\n", + " conditions = model_disagreement_sampler_cd(\n", + " conditions=conditions['reward-trajectory'],\n", + " models=[models[-1], models_additional[-1]],\n", + " distance_fct=custom_distance,\n", + " num_samples=num_samples,\n", + " )\n", + " return Delta(conditions=conditions)" + ], + "id": "4a15928655b36eec", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Now, we can run a full loop with a rnn synthetic model", + "id": "890b6056c1d40059" + }, + { + "metadata": {}, + "cell_type": "code", + "source": "state = RnnState(variables=variables)", + "id": "6ab460285c1010c7", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "for c in range(1, CYCLES + 1):\n", + " \n", + " if len(state.models) > 0:\n", + " state = pool_on_state(state, num_samples=20)\n", + " state = model_disagreement_on_state(state, num_samples=SAMPLES_PER_CYCLE)\n", + " else:\n", + " state = pool_on_state(state, num_samples=SAMPLES_PER_CYCLE)\n", + " \n", + " state = runner_on_state(state)\n", + " \n", + " state = theorist_on_state(state)\n", + " state = theorist_additional_on_state(state)\n" + ], + "id": "79603ed1c4341d43", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "out = state.models[-1].predict(state.conditions['reward-trajectory'])", + "id": "5df6e22f4f214281", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/autora/experimentalist/model_disagreement/__init__.py b/src/autora/experimentalist/model_disagreement/__init__.py index 4b9ee78..2821649 100644 --- a/src/autora/experimentalist/model_disagreement/__init__.py +++ b/src/autora/experimentalist/model_disagreement/__init__.py @@ -1,6 +1,6 @@ import itertools import warnings -from typing import Iterable, List, Optional, Union +from typing import Callable, Iterable, List, Optional, Union import numpy as np import pandas as pd @@ -9,6 +9,133 @@ from autora.utils.deprecation import deprecated_alias +def score_sample_cd( + conditions: Union[pd.DataFrame, np.ndarray], + models: List, + distance_fct: Callable = lambda x, y: (x - y) ** 2, + aggregate_fct: Callable = lambda x: np.sum(x, axis=0), + num_samples: Optional[int] = None, +): + """ + An experimentalist that returns selected samples for independent variables + for which the models disagree the most in terms of their predictions. The disagreement + measurement is customizable. + + + Args: + conditions: pool of IV conditions to evaluate in terms of model disagreement + models: List of Scikit-learn (regression or classification) models to compare + distance_fct: distance function to use on the predictions + aggregate_fct: aggregate function to use on the pairwise distances of the models + num_samples: number of samples to select + + Returns: + Sampled pool with score + + + Examples: + We can use this without passing in a distance function (squared distance as default) ... + >>> class IdentityModel: + ... def predict(self, X): + ... return X + >>> class SquareModel: + ... def predict(self, X): + ... return X**2 + >>> id_model = IdentityModel() + >>> sq_model = SquareModel() + >>> _conditions = np.array([1, 2, 3]) + >>> id_model.predict(_conditions) + array([1, 2, 3]) + >>> sq_model.predict(_conditions) + array([1, 4, 9]) + >>> score_sample_cd(_conditions, [id_model, sq_model]) + 0 score + 2 3 36 + 1 2 4 + 0 1 0 + + ... we can use our own distance function (for example binary 1 and 0 for different or equal) + >>> score_sample_cd(_conditions, [id_model, sq_model], lambda x,y : x != y) + 0 score + 1 2 1 + 2 3 1 + 0 1 0 + + ... this is mostly usefull if the predict function of the model doesn't return a + standard one-dimensional array: + >>> _conditions = np.array([[0, 1], [1, 0], [1, 1], [.5, .5]]) + >>> id_model.predict(_conditions) + array([[0. , 1. ], + [1. , 0. ], + [1. , 1. ], + [0.5, 0.5]]) + >>> sq_model.predict(_conditions) + array([[0. , 1. ], + [1. , 0. ], + [1. , 1. ], + [0.25, 0.25]]) + + >>> def distance(x, y): + ... return np.sqrt((x[0] - y[0])**2 + (x[1] - y[1])**2) + + >>> score_sample_cd(_conditions, [id_model, sq_model], distance) + 0 1 score + 3 0.5 0.5 0.353553 + 0 0.0 1.0 0.000000 + 1 1.0 0.0 0.000000 + 2 1.0 1.0 0.000000 + """ + disagreements = [] + for model_a, model_b in itertools.combinations(models, 2): + if hasattr(model_a, "predict_proba") and hasattr(model_b, "predict_proba"): + model_a_predict = model_a.predict_proba + model_b_predict = model_b.predict_proba + else: + model_a_predict = model_a.predict + model_b_predict = model_b.predict + y_A = model_a_predict(conditions) + y_B = model_b_predict(conditions) + disagreements.append([distance_fct(y_a, y_b) for y_a, y_b in zip(y_A, y_B)]) + score = aggregate_fct(disagreements) + + conditions_new = pd.DataFrame(conditions) + conditions_new["score"] = np.array(score).tolist() + conditions_new = conditions_new.sort_values(by="score", ascending=False) + if num_samples is None: + return conditions_new + else: + return conditions_new.head(num_samples) + + +def sample_cd( + conditions: Union[pd.DataFrame, np.ndarray], + models: List, + distance_fct: Callable = lambda x, y: (x - y) ** 2, + aggregate_fct: Callable = lambda x: np.sum(x, axis=0), + num_samples: Optional[int] = 1, +): + """ + An experimentalist that returns selected samples for independent variables + for which the models disagree the most in terms of their predictions. The disagreement + measurement is customizable. + + Args: + conditions: pool of IV conditions to evaluate in terms of model disagreement + models: List of Scikit-learn (regression or classification) models to compare + distance_fct: distance function to use on the predictions + aggregate_fct: aggregate function to use on the pairwise distances of the models + num_samples: number of samples to select + + Returns: Sampled pool + """ + + selected_conditions = score_sample_cd( + conditions, models, distance_fct, aggregate_fct, num_samples + ) + selected_conditions.drop(columns=["score"], inplace=True) + return selected_conditions + + def score_sample( conditions: Union[pd.DataFrame, np.ndarray], models: List, @@ -50,9 +177,34 @@ def score_sample( 2 1 -0.197345 0 -1 -0.943091 1 0 -0.943091 + + Conditions and observations might be dataframes with single values: + >>> conditions_s = pd.DataFrame({'x_1': [1, 2, 3], 'x_2': [2, 3, 4]}) + >>> class ModelSingle_a(): + ... def predict(self, conditions): + ... return conditions['x_1'] + conditions['x_2'] + + >>> class ModelSingle_b(): + ... def predict(self, conditions): + ... return 2 * conditions['x_1'] + .5 * conditions['x_2'] + + + But they might also have vectors as entries: + >>> conditions_v = pd.DataFrame({'x_1': + ... [np.array([1, 2]), np.array([3, 4])], 'x_2': [np.array([5, 6]), np.array([7, 8])]}) + >>> class ModelVector_a(): + ... def predict(selfm conditions): + ... return conditions['x_1'] + conditions['x_2'] + + + """ - if isinstance(conditions, Iterable) and not isinstance(conditions, pd.DataFrame) and not isinstance(conditions, list): + if ( + isinstance(conditions, Iterable) + and not isinstance(conditions, pd.DataFrame) + and not isinstance(conditions, list) + ): conditions = np.array(list(conditions)) condition_pool_copy = conditions.copy() @@ -71,12 +223,11 @@ def score_sample( for model_a, model_b in itertools.combinations(models, 2): # determine the prediction method + predict_proba = False if hasattr(model_a, "predict_proba") and hasattr(model_b, "predict_proba"): - model_a_predict = model_a.predict_proba - model_b_predict = model_b.predict_proba + predict_proba = True elif hasattr(model_a, "predict") and hasattr(model_b, "predict"): - model_a_predict = model_a.predict - model_b_predict = model_b.predict + predict_proba = False else: raise AttributeError( "Models must both have `predict_proba` or `predict` method." @@ -86,13 +237,19 @@ def score_sample( disagreement_part_list = list() for element in X_predict: if not isinstance(element, np.ndarray): - raise ValueError("X_predict must be a list of numpy arrays if it is a list.") + raise ValueError( + "X_predict must be a list of numpy arrays if it is a list." + ) else: - disagreement_part = compute_disagreement(model_a_predict, model_b_predict, element) + disagreement_part = compute_disagreement( + model_a, model_b, element, predict_proba + ) disagreement_part_list.append(disagreement_part) disagreement = np.sum(disagreement_part_list, axis=1) else: - disagreement = compute_disagreement(model_a_predict, model_b_predict, X_predict) + disagreement = compute_disagreement( + model_a, model_b, X_predict, predict_proba + ) model_disagreement.append(disagreement) @@ -104,7 +261,7 @@ def score_sample( if isinstance(condition_pool_copy, pd.DataFrame): conditions = pd.DataFrame(conditions, columns=condition_pool_copy.columns) elif isinstance(condition_pool_copy, list): - conditions = pd.DataFrame({'X': conditions}) + conditions = pd.DataFrame({"X": conditions}) else: conditions = pd.DataFrame(conditions) @@ -121,10 +278,15 @@ def score_sample( else: return conditions.head(num_samples) -def compute_disagreement(model_a_predict, model_b_predict, X_predict): + +def compute_disagreement(model_a, model_b, X_predict, predict_proba): # get predictions from both models - y_a = model_a_predict(X_predict) - y_b = model_b_predict(X_predict) + if predict_proba: + y_a = model_a.predict_proba(X_predict) + y_b = model_b.predict_proba(X_predict) + else: + y_a = model_a.predict(X_predict) + y_b = model_b.predict(X_predict) assert y_a.shape == y_b.shape, "Models must have same output shape." @@ -135,12 +297,15 @@ def compute_disagreement(model_a_predict, model_b_predict, X_predict): disagreement = np.mean((y_a - y_b) ** 2, axis=1) if np.isinf(disagreement).any() or np.isnan(disagreement).any(): - warnings.warn('Found nan or inf values in model predictions, ' - 'setting disagreement there to 0') + warnings.warn( + "Found nan or inf values in model predictions, " + "setting disagreement there to 0" + ) disagreement[np.isinf(disagreement)] = 0 disagreement = np.nan_to_num(disagreement) return disagreement + def sample( conditions: Union[pd.DataFrame, np.ndarray], models: List, num_samples: int = 1 ): @@ -167,3 +332,5 @@ def sample( model_disagreement_sampler = deprecated_alias( model_disagreement_sample, "model_disagreement_sampler" ) +model_disagreement_sampler_cd = sample_cd +model_disagreement_score_sample_cd = score_sample_cd From 214a84da804d0bce22e1ff0943b9d61c0b1eb830 Mon Sep 17 00:00:00 2001 From: Younes Strittmatter Date: Thu, 10 Oct 2024 14:58:55 -0400 Subject: [PATCH 2/2] rename cd to custom distance --- docs/RnnSindy Synthetic.ipynb | 4 ++-- .../model_disagreement/__init__.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/RnnSindy Synthetic.ipynb b/docs/RnnSindy Synthetic.ipynb index 7c814e1..6922715 100644 --- a/docs/RnnSindy Synthetic.ipynb +++ b/docs/RnnSindy Synthetic.ipynb @@ -47,7 +47,7 @@ "\n", "# Experimentalists\n", "from autora.experimentalist.bandit_random import bandit_random_pool\n", - "from autora.experimentalist.model_disagreement import model_disagreement_sampler_cd\n", + "from autora.experimentalist.model_disagreement import model_disagreement_sampler_custom_distance\n", "\n", "# Experiment Runner\n", "from autora.experiment_runner.synthetic.psychology.q_learning import q_learning\n", @@ -346,7 +346,7 @@ "@on_state()\n", "def model_disagreement_on_state(\n", " conditions, models, models_additional, num_samples):\n", - " conditions = model_disagreement_sampler_cd(\n", + " conditions = model_disagreement_sampler_custom_distance(\n", " conditions=conditions['reward-trajectory'],\n", " models=[models[-1], models_additional[-1]],\n", " distance_fct=custom_distance,\n", diff --git a/src/autora/experimentalist/model_disagreement/__init__.py b/src/autora/experimentalist/model_disagreement/__init__.py index 2821649..795b230 100644 --- a/src/autora/experimentalist/model_disagreement/__init__.py +++ b/src/autora/experimentalist/model_disagreement/__init__.py @@ -9,7 +9,7 @@ from autora.utils.deprecation import deprecated_alias -def score_sample_cd( +def score_sample_custom_distance( conditions: Union[pd.DataFrame, np.ndarray], models: List, distance_fct: Callable = lambda x, y: (x - y) ** 2, @@ -48,14 +48,14 @@ def score_sample_cd( array([1, 2, 3]) >>> sq_model.predict(_conditions) array([1, 4, 9]) - >>> score_sample_cd(_conditions, [id_model, sq_model]) + >>> score_sample_custom_distance(_conditions, [id_model, sq_model]) 0 score 2 3 36 1 2 4 0 1 0 ... we can use our own distance function (for example binary 1 and 0 for different or equal) - >>> score_sample_cd(_conditions, [id_model, sq_model], lambda x,y : x != y) + >>> score_sample_custom_distance(_conditions, [id_model, sq_model], lambda x,y : x != y) 0 score 1 2 1 2 3 1 @@ -78,7 +78,7 @@ def score_sample_cd( >>> def distance(x, y): ... return np.sqrt((x[0] - y[0])**2 + (x[1] - y[1])**2) - >>> score_sample_cd(_conditions, [id_model, sq_model], distance) + >>> score_sample_custom_distance(_conditions, [id_model, sq_model], distance) 0 1 score 3 0.5 0.5 0.353553 0 0.0 1.0 0.000000 @@ -107,7 +107,7 @@ def score_sample_cd( return conditions_new.head(num_samples) -def sample_cd( +def sample_custom_distance( conditions: Union[pd.DataFrame, np.ndarray], models: List, distance_fct: Callable = lambda x, y: (x - y) ** 2, @@ -129,7 +129,7 @@ def sample_cd( Returns: Sampled pool """ - selected_conditions = score_sample_cd( + selected_conditions = score_sample_custom_distance( conditions, models, distance_fct, aggregate_fct, num_samples ) selected_conditions.drop(columns=["score"], inplace=True) @@ -332,5 +332,5 @@ def sample( model_disagreement_sampler = deprecated_alias( model_disagreement_sample, "model_disagreement_sampler" ) -model_disagreement_sampler_cd = sample_cd -model_disagreement_score_sample_cd = score_sample_cd +model_disagreement_sampler_custom_distance = sample_custom_distance +model_disagreement_score_sample_custom_distance = score_sample_custom_distance