Skip to content

Commit

Permalink
Add unshuffled_sample
Browse files Browse the repository at this point in the history
  • Loading branch information
evhub committed Apr 30, 2022
1 parent 3031d18 commit 06c81c1
Show file tree
Hide file tree
Showing 11 changed files with 822 additions and 740 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ Some examples of BBopt in action:
- [`choice`](#choice)
- [`randbool`](#randbool)
- [`sample`](#sample)
- [`shuffled`](#shuffled)
- [`shuffle`](#shuffle)
- [`random`](#random)
- [`uniform`](#uniform)
- [`loguniform`](#loguniform)
Expand Down Expand Up @@ -426,15 +426,17 @@ BlackBoxOptimizer.**sample**(_name_, _population_, _k_, **_kwargs_)

Create a new parameter modeled by [`random.sample(population, k)`](https://docs.python.org/3/library/random.html#random.sample), which chooses _k_ elements from _population_.

By default, the ordering of elements in the result is random. If random ordering is not important and you're happy to have the same ordering as in _population_, `BlackBoxOptimizer.unshuffled_sample` is recommended instead.

_Backends which support **sample**: `scikit-optimize`, `hyperopt`, `bayes-skopt`, `pySOT`, `random`._

#### `shuffled`
#### `shuffle`

BlackBoxOptimizer.**shuffled**(_name_, _population_, **_kwargs_)
BlackBoxOptimizer.**shuffle**(_name_, _population_, **_kwargs_)

Create a new parameter modeled by [`random.shuffle(population)`](https://docs.python.org/3/library/random.html#random.shuffle) except that it returns the shuffled list instead of shuffling it in place. An in-place version as `BlackBoxOptimizer.shuffle` is also supported.
Create a new parameter modeled by [`random.shuffle(population)`](https://docs.python.org/3/library/random.html#random.shuffle). A version that returns the shuffled list instead of shuffling it in place is also supported as `BlackBoxOptimizer.shuffled`.

_Backends which support **shuffled**: `scikit-optimize`, `hyperopt`, `bayes-skopt`, `pySOT`, `random`._
_Backends which support **shuffle**: `scikit-optimize`, `hyperopt`, `bayes-skopt`, `pySOT`, `random`._

#### `random`

Expand Down
64 changes: 36 additions & 28 deletions bbopt-source/backends/openai.coco
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@ The OpenAI backend. Uses large language models for black box optimization.
"""

import os
import random
from ast import literal_eval

import openai

from bbopt import constants
from bbopt.util import printerr, stdev
from bbopt.util import printerr, stdev, mean
from bbopt.params import param_processor
from bbopt.backends.util import StandardBackend
from bbopt.backends.util import StandardBackend, sorted_params


# Utilities:

def get_prompt(params, data_points, losses, hoped_for_loss) =
"""Get the OpenAI API prompt to use."""
"""Get the base OpenAI API prompt."""
'''# black box function to be minimized
def f({func_params}) -> float:
"""
Expand Down Expand Up @@ -49,7 +48,7 @@ def f({func_params}) -> float:
" {name}: in {func}({args})".format(
name=name,
func="range" if func == "randrange" else func,
args=", ".join(args |> map$(repr)),
args=", ".join((args[:2] if func == "randrange" and args[-1] == 1 else args) |> map$(repr)),
)
for name, (func, args, _) in params.items()
),
Expand All @@ -58,7 +57,7 @@ def f({func_params}) -> float:
"{name}: {func}({args})".format(
name=name,
func="range" if func == "randrange" else func,
args=", ".join(args |> map$(repr)),
args=", ".join((args[:2] if func == "randrange" and args[-1] == 1 else args) |> map$(repr)),
)
for name, (func, args, _) in params.items()
),
Expand All @@ -69,7 +68,7 @@ def f({func_params}) -> float:
)
for point, loss in zip(data_points, losses)
),
hoped_for_loss=hoped_for_loss,
hoped_for_loss=int(hoped_for_loss) if int(hoped_for_loss) == hoped_for_loss else hoped_for_loss,
)


Expand All @@ -92,9 +91,9 @@ def to_python(completion, params):
return completion


def get_loss_eps(min_loss):
"""Get a reasonably-sized expected loss improvement."""
a, b = float(abs(min_loss)).as_integer_ratio()
def get_loss_eps(typical_loss):
"""Get a reasonably-sized hoped for loss improvement."""
a, b = float(abs(typical_loss)).as_integer_ratio()
little_a = int("1" * len(str(a)))
return little_a / b

Expand All @@ -113,8 +112,8 @@ class OpenAIBackend(StandardBackend):

max_prompt_len = float("inf")

def setup_backend(self, params, engine=None, temperature=None, max_retries=None, api_key=None, debug=True):
self.params = params
def setup_backend(self, params, engine=None, temperature=None, max_retries=None, api_key=None, debug=False):
self.params = sorted_params(params)

self.engine = engine ?? constants.openai_default_engine
self.temp = temperature ?? constants.openai_default_temp
Expand All @@ -127,21 +126,29 @@ class OpenAIBackend(StandardBackend):
self.cached_values = ()

def tell_data(self, new_data, new_losses):
self.data_points += new_data
self.losses += new_losses

def get_prompt(self) = (
get_prompt(
self.params,
self.data_points,
self.losses,
hoped_for_loss=min_loss - random.uniform(0, stdev(self.losses) + get_loss_eps(min_loss)),
for point, loss in zip(new_data, new_losses):
# avoid (point, loss) duplicates since they cause GPT to repeat itself
try:
existing_index = self.data_points.index(point)
except ValueError:
existing_index = None
if existing_index is None or self.losses[existing_index] != loss:
self.data_points.append(point)
self.losses.append(loss)

def get_prompt(self) =
"""Get the OpenAI API prompt to use."""
(
get_prompt(
self.params,
self.data_points,
self.losses,
hoped_for_loss=min(self.losses) - stdev(self.losses) - get_loss_eps(mean(self.losses)),
)
+ ", ".join(self.cached_values |> map$(repr))
# only "," not ", " since the prompt shouldn't end in a space
+ ("," if self.cached_values else "")
)
+ ", ".join(self.cached_values |> map$(repr))
# only "," not ", " since the prompt shouldn't end in a space
+ ("," if self.cached_values else "")
) where:
min_loss = min(self.losses)

def get_completion_len(self) =
"""Get the maximum number of characters in a completion."""
Expand Down Expand Up @@ -218,7 +225,7 @@ class OpenAIBackend(StandardBackend):
if values in self.data_points:
if self.debug:
print(f"ERROR: got duplicate point: {legal_values!r}")
return self.retry_get_values(temp=self.temp + (constants.openai_max_temp - self.temp) / 2)
return self.retry_get_values(temp=self.temp + (constants.openai_max_temp - self.temp) / 2, cached_values=())
return values

def retry_get_values(self, temp=None, cached_values=None):
Expand All @@ -236,7 +243,7 @@ class OpenAIBackend(StandardBackend):
old_temp, self.temp = self.temp, temp
if cached_values is not None:
if self.debug:
print(f"CACHING values: {cached_values[:len(self.cached_values)]} + {cached_values[len(self.cached_values):]}")
print(f"CACHING values: {self.cached_values} -> {cached_values}")
self.cached_values = cached_values
try:
return self.get_next_values()
Expand All @@ -252,3 +259,4 @@ class OpenAIBackend(StandardBackend):

OpenAIBackend.register()
OpenAIBackend.register_alg("openai")
OpenAIBackend.register_alg("openai_debug", debug=True)
6 changes: 6 additions & 0 deletions bbopt-source/backends/util.coco
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Utilities for use in BBopt backends.

import random

from collections import OrderedDict
from collections.abc import Iterable

from bbopt import constants
Expand All @@ -24,6 +25,11 @@ from bbopt.registry import (

# Utilities:

def sorted_params(params) =
"""Get an OrderedDict of params in sorted order."""
params |> sorted_items |> OrderedDict


@convert_match_errors
match def _init_backend(backend_cls, examples, params, *args, _attempt_to_update_backend=None, _on_new_backend=None, **options):
"""Create a backend object with the given data (backend can be backend name or class)."""
Expand Down
4 changes: 2 additions & 2 deletions bbopt-source/benchmarking.coco
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ OPT_FUNCS.append(numpy_func)


def sample_func(bb):
xs = bb.sample("xs", range(10), 5, guess=[3,4,5,6,7])
xs = bb.unshuffled_sample("xs", range(10), 5, guess=[3,4,5,6,7])
y = bb.choice("y", [1, 10, 100], guess=10)
loss = abs(sum(xs) - y)
bb.minimize(loss)
Expand Down Expand Up @@ -115,5 +115,5 @@ if __name__ == "__main__":
"tpe_or_gp",
"tree_structured_parzen_estimator",
"safe_gaussian_process",
("openai", "safe_gaussian_process"),
("openai_debug", "safe_gaussian_process"),
))
2 changes: 1 addition & 1 deletion bbopt-source/constants.coco
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Constants for use across all of BBopt.

# Installation constants:
name = "bbopt"
version = "1.4.1"
version = "1.4.2"
description = "The easiest hyperparameter optimization you'll ever do."
long_description = """
See BBopt's GitHub_ for more information.
Expand Down
34 changes: 31 additions & 3 deletions bbopt-source/optimizer.coco
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ class BlackBoxOptimizer:
return bool(self.choice(name, [False, True], **kwargs))

def sample(self, name, population, k, **kwargs):
"""Create a new parameter with the given name modeled by random.sample(population, k)."""
"""Create a new parameter with the given name modeled by random.sample(population, k).
Ordering of elements in the result is random."""
if not isinstance(name, Str):
raise TypeError(f"name must be string, not {name}")
sampling_population = [x for x in population]
Expand All @@ -559,13 +560,40 @@ class BlackBoxOptimizer:
sample.append(sampling_population.pop(ind))
return sample

def unshuffled_sample(self, name, population, k, **kwargs):
"""Create a new parameter with the given name modeled by random.sample(population, k).
Ordering of elements in the result is the same as in population."""
if not isinstance(name, Str):
raise TypeError(f"name must be string, not {name}")
population = tuple(population)
sample = []
for i, x in enumerate(population):
if len(sample) == k:
break
if len(population) - i == k - len(sample):
sample += population[i:]
break
proc_kwargs = kwargs |> param_processor.modify_kwargs$(
val -> 1 if x in val else 0
)
if "placeholder_when_missing" not in proc_kwargs:
proc_kwargs["placeholder_when_missing"] = 0
if self.uniform(
f"{name}[{i}]",
0,
1,
**proc_kwargs,
) >= 1 - (k - len(sample))/(len(population) - i):
sample.append(x)
return sample

def samples_with_replacement(self, name, population, **kwargs):
"""An infinite iterator of samples with replacement from population."""
if not isinstance(name, Str):
raise TypeError(f"name must be string, not {name}")
sampling_population = tuple(population)
population = tuple(population)
for i in count():
yield self.choice(f"{name}[{i}]", sampling_population, **kwargs)
yield self.choice(f"{name}[{i}]", population, **kwargs)

def shuffled(self, name, population, **kwargs):
"""Create a new parameter with the given name modeled by
Expand Down
Loading

0 comments on commit 06c81c1

Please sign in to comment.