diff --git a/docs/Basic Usage.ipynb b/docs/Basic Usage.ipynb
index 779064c..46a1947 100644
--- a/docs/Basic Usage.ipynb
+++ b/docs/Basic Usage.ipynb
@@ -7,18 +7,24 @@
"collapsed": false
},
"source": [
- "# Basic Usage\n"
+ "# Basic Usage\n",
+ "Here, we show how to randomly sample a sequence of rewards which can be used in a bandit task.\n",
+ "A bandit task provides the participant with multiple options (number of arms). Each arm has a reward probability.\n",
+ "Here, we show how to create a sequence of reward probabilities and rewards for a 2-arm bandit task."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
- "from autora.experimentalist.bandit_random import Example"
+ "import pandas as pd\n",
+ "\n",
+ "from autora.experimentalist.bandit_random import bandit_random_pool_proba, \\\n",
+ " bandit_random_pool_from_proba, bandit_random_pool"
]
},
{
@@ -26,26 +32,317 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Include inline mathematics like this: $4 < 5$\n",
+ "This package provides functions to randomly sample a list of\n",
+ "- probability sequences\n",
+ "- reward sequences\n",
"\n",
- "Include block mathematics like this (don't forget the empty lines above and below the block):\n",
+ "# Pool_proba\n",
+ "First, we can use default values, to create a sequence, where the reward probability is .5 for each arm.\n",
+ "We need to pass in the number of arms and the length of the sequence that we want to generate:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]]"
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "default_probability_sequences = bandit_random_pool_proba(num_probabilities=2, sequence_length=4)\n",
+ "default_probability_sequences"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We also can set initial values:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]]"
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "constant_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[.1, .9])\n",
+ "constant_probability_sequence"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We can do the same for drift rates:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0.1, 0.9],\n [0.2, 0.8],\n [0.30000000000000004, 0.7000000000000001],\n [0.4, 0.6000000000000001]]]"
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "changing_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[0.1, .9], drift_rates=[.1, -.1])\n",
+ "changing_probability_sequence"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Instead of having a fixed initial value and drift rate, we can also sample them from a range:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0.015097359670462374, 0.975340226214809],\n [0.08820028316160722, 0.954897827401469],\n [0.16130320665275205, 0.934455428588129],\n [0.23440613014389688, 0.914013029774789]]]"
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "random_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]])\n",
+ "random_probability_sequence"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We pass in the number of sequence to generate as `num_samples`"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0.05277517161024149, 0.9157516813620797],\n [0.06601093773147637, 0.8512254219581823],\n [0.07924670385271125, 0.786699162554285],\n [0.09248246997394613, 0.7221729031503876]],\n [[0.03308990634055732, 0.8608567922155729],\n [0.05423027527564794, 0.8348824396142384],\n [0.07537064421073857, 0.8089080870129038],\n [0.09651101314582919, 0.7829337344115693]],\n [[0.05228116419768012, 0.9571430988304549],\n [0.10872837330001228, 0.922489870191641],\n [0.16517558240234442, 0.887836641552827],\n [0.22162279150467656, 0.8531834129140131]],\n [[0.017985053533171515, 0.9696895439983294],\n [0.07759069582130446, 0.9603867806583171],\n [0.13719633810943738, 0.9510840173183047],\n [0.1968019803975703, 0.9417812539782924]]]"
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "random_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]], num_samples=4)\n",
+ "random_probability_sequence"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We can use the created probability sequences to create reward sequences:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0, 1], [0, 1], [0, 1], [0, 1]],\n [[0, 1], [0, 1], [0, 1], [0, 0]],\n [[0, 1], [0, 1], [0, 1], [0, 0]],\n [[0, 1], [0, 0], [1, 1], [0, 1]]]"
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "reward_sequences = bandit_random_pool_from_proba(random_probability_sequence)\n",
+ "reward_sequences"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Or, we can use `bandit_random_pool` with the same arguments as in the `bandit_random_pool_proba` to generate reward sequences directly:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "[[[0, 0], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 0], [0, 1], [0, 1]]]"
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "reward_sequences = bandit_random_pool(num_rewards=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]], num_samples=4)\n",
+ "reward_sequences"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Use in State\n",
+ "\n",
+ "!!!Warning If you want to use this in the AutoRa `StandardState` you need to convert the return value into a `pd.DataFrame`:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " reward-trajectory\n0 [[0, 1], [0, 1], [1, 0], [0, 0], [1, 1], [0, 1...",
+ "text/html": "
\n\n
\n \n \n | \n reward-trajectory | \n
\n \n \n \n 0 | \n [[0, 1], [0, 1], [1, 0], [0, 0], [1, 1], [0, 1... | \n
\n \n
\n
"
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# First, we define the variables:\n",
+ "from autora.variable import VariableCollection, Variable\n",
"\n",
- "$$ \n",
- "y + 1 = 4 \n",
- "$$\n",
+ "variables = VariableCollection(\n",
+ " independent_variables=[Variable(name=\"reward-trajectory\")],\n",
+ " dependent_variables=[Variable(name=\"choice-trajectory\")]\n",
+ ")\n",
"\n",
- "... or this:\n",
+ "# With these variables, we initialize a StandardState\n",
+ "from autora.state import StandardState\n",
"\n",
- "\\begin{align}\n",
- " p(v_i=1|\\mathbf{h}) & = \\sigma\\left(\\sum_j w_{ij}h_j + b_i\\right) \\\\\n",
- " p(h_j=1|\\mathbf{v}) & = \\sigma\\left(\\sum_i w_{ij}v_i + c_j\\right)\n",
- "\\end{align}"
- ]
+ "state = StandardState()\n",
+ "\n",
+ "# Here, we want to create a random reward-sequences directly as on state function\n",
+ "from autora.state import Delta, on_state\n",
+ "\n",
+ "\n",
+ "@on_state()\n",
+ "def pool_on_state(num_rewards=2, sequence_length=10, num_samples=1, initial_probabilities=None,\n",
+ " drift_rates=None):\n",
+ " sequence_as_list = bandit_random_pool(\n",
+ " num_rewards=num_rewards, sequence_length=sequence_length, num_samples=num_samples,\n",
+ " initial_probabilities=initial_probabilities, drift_rates=drift_rates)\n",
+ " # the condition of the state expect a pandas DataFrame,\n",
+ " sequence_as_df = pd.DataFrame({\"reward-trajectory\": sequence_as_list})\n",
+ " return Delta(conditions=sequence_as_df)\n",
+ "\n",
+ "\n",
+ "# now we can use the pool_on_state on the state to create conditions:\n",
+ "state = pool_on_state(state)\n",
+ "state.conditions"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
},
{
"cell_type": "markdown",
- "metadata": {},
- "source": []
+ "source": [
+ "We can pass in keyword arguments into the on_state function as well. Here, we create 3 sequences with initial values for the first arm between 0 and .3 and for the second arm between .7 and 1. And drift rates are sampled between 0 and .1, or -.1 and 0, respectively:"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " reward-trajectory\n0 [[1, 1], [1, 1], [0, 0], [1, 0], [1, 0], [0, 0...\n1 [[1, 0], [0, 1], [1, 1], [1, 1], [0, 0], [0, 0...\n2 [[0, 1], [0, 0], [0, 0], [1, 0], [0, 1], [0, 1...",
+ "text/html": "\n\n
\n \n \n | \n reward-trajectory | \n
\n \n \n \n 0 | \n [[1, 1], [1, 1], [0, 0], [1, 0], [1, 0], [0, 0... | \n
\n \n 1 | \n [[1, 0], [0, 1], [1, 1], [1, 1], [0, 0], [0, 0... | \n
\n \n 2 | \n [[0, 1], [0, 0], [0, 0], [1, 0], [0, 1], [0, 1... | \n
\n \n
\n
"
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "state = pool_on_state(state, num_samples=3, initial_probabilities=[[0, .3], [.7, 1.]], drift_rates=[[0, .1], [-.1,0]])\n",
+ "state.conditions"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false
+ }
}
],
"metadata": {
diff --git a/docs/index.md b/docs/index.md
index d662cc7..cbe76f9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,24 +1,50 @@
# bandit-random
-Explain what your contribution is doing here
+This package provides functions to randomly sample a list of
-You can include inline mathematics by wrapping LaTeX formatted math in `$` symbols: `$(3 < 4)$` will render as: $(3 < 4)$
+- probability sequences
+- reward sequences
-To include block mathematics wrap the expression with double `$$`:
+## Probability sequence
+
+A probability sequence is a sequence of vectors with elements in the range between 0 and 1:
+
+Example for a probability function that can be used in a 3-arm bandit task:
+
+```
+[[0, 1., .3], [.6, .2, .8], ...]
+```
+
+## Reward sequence
+
+A reward sequences uses the probabilities to generate a sequence with elements of either 0 or 1:
+
+Example for a probability function that can be used in a 3-arm bandit task:
```
-$$
-E(\mathbf{v}, \mathbf{h}) = -\sum_{i,j}w_{ij}v_i h_j - \sum_i b_i v_i - \sum_j c_j h_j
-$$
+[[0, 1, 0], [1, 0, 1], ...]
```
-Will render as:
+The probability sequence can be created by specifying an initial probability for each element and a
+drift:
-$$
-E(\mathbf{v}, \mathbf{h}) = -\sum_{i,j}w_{ij}v_i h_j - \sum_i b_i v_i - \sum_j c_j h_j
-$$
+For example:
+
+```
+initial_proba = [0, .5, 1.]
+drift = [.1, 0., -.1]
+...
+sequence = [[0, .5, 1.], [.1, .5, .9], [.2, .5, .8], [.3, .5, .7]...]
+```
+
+Instead of fixed values for the initial probability and the drift, we can also use ranges. In that
+case the values are randomly sampled from the range.
+
+```
+initial_proa = [[0, .3], [.4, .7], [.8, 1.]]
+drift = [[0, .1], [.1, .2], [.2, .3]]
+```
-See more information on writing mathematical expressions [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/writing-mathematical-expressions).
diff --git a/src/autora/experimentalist/bandit_random/__init__.py b/src/autora/experimentalist/bandit_random/__init__.py
index 6e29a5d..36e0f82 100644
--- a/src/autora/experimentalist/bandit_random/__init__.py
+++ b/src/autora/experimentalist/bandit_random/__init__.py
@@ -1,23 +1,24 @@
"""
-Example Experimentalist
+Experimentalist that returns
+probability sequences: Sequences of vectors with elements between 0 and 1
+or
+reward sequences: Sequences of vectors with binary elements
"""
-import random
import numpy as np
-import pandas as pd
-from typing import Union, List, Iterable, Optional, Tuple
+from typing import Union, List, Optional
from collections.abc import Iterable
def pool_proba(
num_probabilities: int,
sequence_length: int,
- initial_probabilities: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None,
- drift_rates: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None,
+ initial_probabilities: Optional[Iterable[Union[float, Iterable]]] = None,
+ drift_rates: Optional[Iterable[Union[float, Iterable]]] = None,
num_samples: int = 1,
random_state: Optional[int] = None,
-) -> List[List[float]]:
+) -> List[List[List[float]]]:
"""
Returns a list of probability sequences.
A probability sequence is a sequence of vectors of dimension `num_probabilities`. Each entry
@@ -110,9 +111,9 @@ def pool_proba(
def pool_from_proba(
- probability_sequence: List[List[float]],
+ probability_sequence: Iterable,
random_state: Optional[int] = None,
-):
+) -> List[List[List[float]]]:
"""
From a given probability sequence sample rewards (0 or 1)
@@ -132,11 +133,11 @@ def pool_from_proba(
def pool(
num_rewards: int,
sequence_length: int,
- initial_probabilities: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None,
- drift_rates: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None,
+ initial_probabilities: Optional[Iterable[Union[float, Iterable]]] = None,
+ drift_rates: Optional[Iterable[Union[float, Iterable]]] = None,
num_samples: int = 1,
random_state: Optional[int] = None,
-) -> pd.DataFrame:
+) -> List[List[List[float]]]:
"""
Returns a list of rewards.
A reward sequence is a sequence of vectors of dimension `num_probabilities`. Each entry
@@ -205,7 +206,18 @@ def pool(
return pool_from_proba(_sequence, random_state)
+bandit_random_pool_proba = pool_proba
+bandit_random_pool_from_proba = pool_from_proba
+bandit_random_pool = pool
+
+
+# Helper functions
+
def _sample_from_probabilities(prob_list, rng):
+ """
+ Helper function to sample values from a probability sequence
+ """
+
def sample_element(prob):
return int(rng.choice([0, 1], p=[1 - prob, prob]))
@@ -219,8 +231,14 @@ def recursive_sample(nested_list):
def _is_iterable(obj):
+ """
+ Helper function that returns true if an object is iterable
+ """
return isinstance(obj, Iterable)
def _transpose_matrix(matrix):
+ """
+ Helper function to transpose a list of lists.
+ """
return [list(row) for row in zip(*matrix)]