diff --git a/docs/Basic Usage.ipynb b/docs/Basic Usage.ipynb index 779064c..46a1947 100644 --- a/docs/Basic Usage.ipynb +++ b/docs/Basic Usage.ipynb @@ -7,18 +7,24 @@ "collapsed": false }, "source": [ - "# Basic Usage\n" + "# Basic Usage\n", + "Here, we show how to randomly sample a sequence of rewards which can be used in a bandit task.\n", + "A bandit task provides the participant with multiple options (number of arms). Each arm has a reward probability.\n", + "Here, we show how to create a sequence of reward probabilities and rewards for a 2-arm bandit task." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [], "source": [ - "from autora.experimentalist.bandit_random import Example" + "import pandas as pd\n", + "\n", + "from autora.experimentalist.bandit_random import bandit_random_pool_proba, \\\n", + " bandit_random_pool_from_proba, bandit_random_pool" ] }, { @@ -26,26 +32,317 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Include inline mathematics like this: $4 < 5$\n", + "This package provides functions to randomly sample a list of\n", + "- probability sequences\n", + "- reward sequences\n", "\n", - "Include block mathematics like this (don't forget the empty lines above and below the block):\n", + "# Pool_proba\n", + "First, we can use default values, to create a sequence, where the reward probability is .5 for each arm.\n", + "We need to pass in the number of arms and the length of the sequence that we want to generate:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "data": { + "text/plain": "[[[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]]" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "default_probability_sequences = bandit_random_pool_proba(num_probabilities=2, sequence_length=4)\n", + "default_probability_sequences" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "We also can set initial values:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [ + { + "data": { + "text/plain": "[[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]]" + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "constant_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[.1, .9])\n", + "constant_probability_sequence" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "We can do the same for drift rates:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [ + { + "data": { + "text/plain": "[[[0.1, 0.9],\n [0.2, 0.8],\n [0.30000000000000004, 0.7000000000000001],\n [0.4, 0.6000000000000001]]]" + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "changing_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[0.1, .9], drift_rates=[.1, -.1])\n", + "changing_probability_sequence" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Instead of having a fixed initial value and drift rate, we can also sample them from a range:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 31, + "outputs": [ + { + "data": { + "text/plain": "[[[0.015097359670462374, 0.975340226214809],\n [0.08820028316160722, 0.954897827401469],\n [0.16130320665275205, 0.934455428588129],\n [0.23440613014389688, 0.914013029774789]]]" + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]])\n", + "random_probability_sequence" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "We pass in the number of sequence to generate as `num_samples`" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [ + { + "data": { + "text/plain": "[[[0.05277517161024149, 0.9157516813620797],\n [0.06601093773147637, 0.8512254219581823],\n [0.07924670385271125, 0.786699162554285],\n [0.09248246997394613, 0.7221729031503876]],\n [[0.03308990634055732, 0.8608567922155729],\n [0.05423027527564794, 0.8348824396142384],\n [0.07537064421073857, 0.8089080870129038],\n [0.09651101314582919, 0.7829337344115693]],\n [[0.05228116419768012, 0.9571430988304549],\n [0.10872837330001228, 0.922489870191641],\n [0.16517558240234442, 0.887836641552827],\n [0.22162279150467656, 0.8531834129140131]],\n [[0.017985053533171515, 0.9696895439983294],\n [0.07759069582130446, 0.9603867806583171],\n [0.13719633810943738, 0.9510840173183047],\n [0.1968019803975703, 0.9417812539782924]]]" + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random_probability_sequence = bandit_random_pool_proba(num_probabilities=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]], num_samples=4)\n", + "random_probability_sequence" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "We can use the created probability sequences to create reward sequences:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [ + { + "data": { + "text/plain": "[[[0, 1], [0, 1], [0, 1], [0, 1]],\n [[0, 1], [0, 1], [0, 1], [0, 0]],\n [[0, 1], [0, 1], [0, 1], [0, 0]],\n [[0, 1], [0, 0], [1, 1], [0, 1]]]" + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reward_sequences = bandit_random_pool_from_proba(random_probability_sequence)\n", + "reward_sequences" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "Or, we can use `bandit_random_pool` with the same arguments as in the `bandit_random_pool_proba` to generate reward sequences directly:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "data": { + "text/plain": "[[[0, 0], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 1], [0, 1], [1, 1]],\n [[0, 1], [0, 0], [0, 1], [0, 1]]]" + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reward_sequences = bandit_random_pool(num_rewards=2, sequence_length=4, initial_probabilities=[[0.,.1], [.8, 1.]], drift_rates=[[0,.1],[-.1,0]], num_samples=4)\n", + "reward_sequences" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Use in State\n", + "\n", + "!!!Warning If you want to use this in the AutoRa `StandardState` you need to convert the return value into a `pd.DataFrame`:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 41, + "outputs": [ + { + "data": { + "text/plain": " reward-trajectory\n0 [[0, 1], [0, 1], [1, 0], [0, 0], [1, 1], [0, 1...", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n
reward-trajectory
0[[0, 1], [0, 1], [1, 0], [0, 0], [1, 1], [0, 1...
\n
" + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# First, we define the variables:\n", + "from autora.variable import VariableCollection, Variable\n", "\n", - "$$ \n", - "y + 1 = 4 \n", - "$$\n", + "variables = VariableCollection(\n", + " independent_variables=[Variable(name=\"reward-trajectory\")],\n", + " dependent_variables=[Variable(name=\"choice-trajectory\")]\n", + ")\n", "\n", - "... or this:\n", + "# With these variables, we initialize a StandardState\n", + "from autora.state import StandardState\n", "\n", - "\\begin{align}\n", - " p(v_i=1|\\mathbf{h}) & = \\sigma\\left(\\sum_j w_{ij}h_j + b_i\\right) \\\\\n", - " p(h_j=1|\\mathbf{v}) & = \\sigma\\left(\\sum_i w_{ij}v_i + c_j\\right)\n", - "\\end{align}" - ] + "state = StandardState()\n", + "\n", + "# Here, we want to create a random reward-sequences directly as on state function\n", + "from autora.state import Delta, on_state\n", + "\n", + "\n", + "@on_state()\n", + "def pool_on_state(num_rewards=2, sequence_length=10, num_samples=1, initial_probabilities=None,\n", + " drift_rates=None):\n", + " sequence_as_list = bandit_random_pool(\n", + " num_rewards=num_rewards, sequence_length=sequence_length, num_samples=num_samples,\n", + " initial_probabilities=initial_probabilities, drift_rates=drift_rates)\n", + " # the condition of the state expect a pandas DataFrame,\n", + " sequence_as_df = pd.DataFrame({\"reward-trajectory\": sequence_as_list})\n", + " return Delta(conditions=sequence_as_df)\n", + "\n", + "\n", + "# now we can use the pool_on_state on the state to create conditions:\n", + "state = pool_on_state(state)\n", + "state.conditions" + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "markdown", - "metadata": {}, - "source": [] + "source": [ + "We can pass in keyword arguments into the on_state function as well. Here, we create 3 sequences with initial values for the first arm between 0 and .3 and for the second arm between .7 and 1. And drift rates are sampled between 0 and .1, or -.1 and 0, respectively:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 42, + "outputs": [ + { + "data": { + "text/plain": " reward-trajectory\n0 [[1, 1], [1, 1], [0, 0], [1, 0], [1, 0], [0, 0...\n1 [[1, 0], [0, 1], [1, 1], [1, 1], [0, 0], [0, 0...\n2 [[0, 1], [0, 0], [0, 0], [1, 0], [0, 1], [0, 1...", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
reward-trajectory
0[[1, 1], [1, 1], [0, 0], [1, 0], [1, 0], [0, 0...
1[[1, 0], [0, 1], [1, 1], [1, 1], [0, 0], [0, 0...
2[[0, 1], [0, 0], [0, 0], [1, 0], [0, 1], [0, 1...
\n
" + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state = pool_on_state(state, num_samples=3, initial_probabilities=[[0, .3], [.7, 1.]], drift_rates=[[0, .1], [-.1,0]])\n", + "state.conditions" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + } } ], "metadata": { diff --git a/docs/index.md b/docs/index.md index d662cc7..cbe76f9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,24 +1,50 @@ # bandit-random -Explain what your contribution is doing here +This package provides functions to randomly sample a list of -You can include inline mathematics by wrapping LaTeX formatted math in `$` symbols: `$(3 < 4)$` will render as: $(3 < 4)$ +- probability sequences +- reward sequences -To include block mathematics wrap the expression with double `$$`: +## Probability sequence + +A probability sequence is a sequence of vectors with elements in the range between 0 and 1: + +Example for a probability function that can be used in a 3-arm bandit task: + +``` +[[0, 1., .3], [.6, .2, .8], ...] +``` + +## Reward sequence + +A reward sequences uses the probabilities to generate a sequence with elements of either 0 or 1: + +Example for a probability function that can be used in a 3-arm bandit task: ``` -$$ -E(\mathbf{v}, \mathbf{h}) = -\sum_{i,j}w_{ij}v_i h_j - \sum_i b_i v_i - \sum_j c_j h_j -$$ +[[0, 1, 0], [1, 0, 1], ...] ``` -Will render as: +The probability sequence can be created by specifying an initial probability for each element and a +drift: -$$ -E(\mathbf{v}, \mathbf{h}) = -\sum_{i,j}w_{ij}v_i h_j - \sum_i b_i v_i - \sum_j c_j h_j -$$ +For example: + +``` +initial_proba = [0, .5, 1.] +drift = [.1, 0., -.1] +... +sequence = [[0, .5, 1.], [.1, .5, .9], [.2, .5, .8], [.3, .5, .7]...] +``` + +Instead of fixed values for the initial probability and the drift, we can also use ranges. In that +case the values are randomly sampled from the range. + +``` +initial_proa = [[0, .3], [.4, .7], [.8, 1.]] +drift = [[0, .1], [.1, .2], [.2, .3]] +``` -See more information on writing mathematical expressions [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/writing-mathematical-expressions). diff --git a/src/autora/experimentalist/bandit_random/__init__.py b/src/autora/experimentalist/bandit_random/__init__.py index 6e29a5d..36e0f82 100644 --- a/src/autora/experimentalist/bandit_random/__init__.py +++ b/src/autora/experimentalist/bandit_random/__init__.py @@ -1,23 +1,24 @@ """ -Example Experimentalist +Experimentalist that returns +probability sequences: Sequences of vectors with elements between 0 and 1 +or +reward sequences: Sequences of vectors with binary elements """ -import random import numpy as np -import pandas as pd -from typing import Union, List, Iterable, Optional, Tuple +from typing import Union, List, Optional from collections.abc import Iterable def pool_proba( num_probabilities: int, sequence_length: int, - initial_probabilities: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None, - drift_rates: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None, + initial_probabilities: Optional[Iterable[Union[float, Iterable]]] = None, + drift_rates: Optional[Iterable[Union[float, Iterable]]] = None, num_samples: int = 1, random_state: Optional[int] = None, -) -> List[List[float]]: +) -> List[List[List[float]]]: """ Returns a list of probability sequences. A probability sequence is a sequence of vectors of dimension `num_probabilities`. Each entry @@ -110,9 +111,9 @@ def pool_proba( def pool_from_proba( - probability_sequence: List[List[float]], + probability_sequence: Iterable, random_state: Optional[int] = None, -): +) -> List[List[List[float]]]: """ From a given probability sequence sample rewards (0 or 1) @@ -132,11 +133,11 @@ def pool_from_proba( def pool( num_rewards: int, sequence_length: int, - initial_probabilities: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None, - drift_rates: Optional[Iterable[Tuple[Union[float, Tuple]]]] = None, + initial_probabilities: Optional[Iterable[Union[float, Iterable]]] = None, + drift_rates: Optional[Iterable[Union[float, Iterable]]] = None, num_samples: int = 1, random_state: Optional[int] = None, -) -> pd.DataFrame: +) -> List[List[List[float]]]: """ Returns a list of rewards. A reward sequence is a sequence of vectors of dimension `num_probabilities`. Each entry @@ -205,7 +206,18 @@ def pool( return pool_from_proba(_sequence, random_state) +bandit_random_pool_proba = pool_proba +bandit_random_pool_from_proba = pool_from_proba +bandit_random_pool = pool + + +# Helper functions + def _sample_from_probabilities(prob_list, rng): + """ + Helper function to sample values from a probability sequence + """ + def sample_element(prob): return int(rng.choice([0, 1], p=[1 - prob, prob])) @@ -219,8 +231,14 @@ def recursive_sample(nested_list): def _is_iterable(obj): + """ + Helper function that returns true if an object is iterable + """ return isinstance(obj, Iterable) def _transpose_matrix(matrix): + """ + Helper function to transpose a list of lists. + """ return [list(row) for row in zip(*matrix)]