Skip to content

Commit

Permalink
experimentalist working with pandas dataframes
Browse files Browse the repository at this point in the history
  • Loading branch information
blinodelka committed Aug 18, 2023
1 parent 236bf8b commit 1610edc
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 60 deletions.
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions .idea/autora-experimentalist-sampler-mixture_experimentalist.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 17 additions & 7 deletions docs/basic-usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'autora'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-5feb8d753802>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mautora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimentalist\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmixture\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmixture_sample\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'autora'"
]
}
],
"source": [
"from autora.experimentalist.sampler.mixture import mixture_sample"
]
Expand Down Expand Up @@ -432,7 +442,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -446,7 +456,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.7.4"
}
},
"nbformat": 4,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"autora-core",
"typing",
"numpy",
"pandas"
]

[project.optional-dependencies]
Expand Down
98 changes: 45 additions & 53 deletions src/autora/experimentalist/sampler/mixture/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,72 +22,64 @@ def adjust_distribution(p, temperature):



def mixture_sample(condition_pool: np.ndarray, temperature: float, samplers: list, params: dict, num_samples: Optional[int] = None) -> np.ndarray:
def mixture_sample(conditions: Union[pd.DataFrame, np.ndarray], temperature: float,
samplers: list, params: dict,
num_samples: Optional[int] = None) -> pd.DataFrame:
"""
Args:
condition_pool: pool of experimental conditions to evaluate
conditions: pool of experimental conditions to evaluate: pd.Dataframe
temperature: how random is selection of conditions (cannot be 0; (0:1) - the choices are more deterministic than the choices made wrt
samplers: tuple containing sampler functions, their names, and weights
samplers: tuple containing sampler functions, their names, and weights
for sampler functions that return both positive and negative scores, user can provide a list with two weights: the first one will be applied to positive scores, the second one -- to the negative
params: nested dictionary. keys correspond to the sampler function names (same as provided in samplers),
values correspond to the dictionaries of function arguments (argument name: its value)
num_samples: number of experimental conditions to select
Returns:
Sampled pool of experimental conditions
Sampled pool of experimental conditions with the scores attached to them
"""

rankings = []
scores = []


condition_pool = pd.DataFrame(conditions)

rankings = pd.DataFrame()
mixture_scores = np.zeros(len(condition_pool))
## getting rankings and weighted scores from each function
for (function, name, weight) in samplers:
sampler_params = params[name]
cur_ranking, cur_scores = function(condition_pool=condition_pool, **sampler_params)
cur_indices = np.argsort(cur_ranking, axis=None)
cur_ranking_sorted = cur_ranking[cur_indices]
rankings.append(cur_ranking_sorted) # for checking: all elements should be the same & same order
## if function scores can be negative, then create a reversed dimension for them
if np.sum(cur_scores<0)>0:

cur_scores_positive = np.copy(cur_scores)
cur_scores_positive[cur_scores<0]=0
cur_scores_negative = -np.copy(cur_scores)
cur_scores_negative[cur_scores>0]=0

# aligning scores
cur_scores_positive_sorted = cur_scores_positive[cur_indices]
cur_scores_negative_sorted = cur_scores_negative[cur_indices]

# if only one weight is provided, use it for both negative and positive dimensions
if isinstance(weight, int):
cur_scores_positive_weighted = cur_scores_positive_sorted * weight
cur_scores_negative_weighted = cur_scores_negative_sorted * weight
else:
cur_scores_positive_weighted = cur_scores_positive_sorted * weight[0] # positive dimension gets the first weight
cur_scores_negative_weighted = cur_scores_negative_sorted * weight[1] # negative dimension gets the second weight

scores.append(cur_scores_positive_weighted)
scores.append(cur_scores_negative_weighted)

try:
sampler_params = params[name]
pd_ranking = function(conditions=condition_pool, **sampler_params)
except:
pd_ranking = function(conditions=condition_pool)
# sorting by index
pd_ranking = pd_ranking.sort_index()

# if only one weight is provided, use it for both negative and positive dimensions
if isinstance(weight, float) or isinstance(weight, int):
pd_ranking["score"] = pd_ranking["score"] * weight
else:
cur_scores_sorted = cur_scores[cur_indices]
if isinstance(weight, int):
cur_scores_weighted = cur_scores_sorted * weight
else:
cur_scores_weighted = cur_scores_sorted * weight[0]
scores.append(cur_scores_weighted)

weighted_mixture_scores = np.sum(scores, axis = 0)

if len(pd_ranking["score"] < 0) > 0 and len(pd_ranking["score"] > 0) > 0: # there are both positive and negative values

pd_ranking.loc[pd_ranking["score"] > 0]["score"] = pd_ranking.loc[pd_ranking["score"] > 0]["score"] * weight[0] # positive dimension gets the first weight
pd_ranking.loc[pd_ranking["score"] < 0]["score"] = pd_ranking.loc[pd_ranking["score"] < 0]["score"] * weight[1] # negative dimension gets the second weight
else:
pd_ranking["score"] = pd_ranking["score"] * weight[0]

pd_ranking.rename(columns={"score": f"{name}_score"}, inplace=True)
# sum_scores are arranged based on the original conditions_ indices
mixture_scores = mixture_scores + pd_ranking[f"{name}_score"]

rankings = pd.merge(rankings, pd_ranking, left_index=True, right_index=True, how="outer")

# adjust mixture scores wrt temperature
weighted_mixture_scores_adjusted = adjust_distribution(weighted_mixture_scores, temperature)
weighted_mixture_scores_adjusted = adjust_distribution(mixture_scores, temperature)

if num_samples is None:
num_samples = condition_pool.shape[0]

conditions = np.random.choice(cur_ranking_sorted.T.squeeze(), num_samples,
p=weighted_mixture_scores_adjusted, replace = False)

return conditions

condition_indices = np.random.choice(np.arange(len(condition_pool)), num_samples,
p=weighted_mixture_scores_adjusted, replace=False)
conditions_ = condition_pool.iloc[condition_indices]
conditions_["score"] = mixture_scores

return conditions_

0 comments on commit 1610edc

Please sign in to comment.