diff --git a/notebooks/save_dataframe.ipynb b/notebooks/save_dataframe.ipynb new file mode 100644 index 0000000..4e2020e --- /dev/null +++ b/notebooks/save_dataframe.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e0b15caf-2e2b-4b65-a832-01892a353bed", + "metadata": {}, + "source": [ + "## Static dataset creation\n", + "This notebook walks through how to use the modules to create and save a static dataset for use in all of the statistical and ML methods. The method-focused notebooks show how to import and utilize this static dataset in inference." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "47611152-0598-4d26-ac4d-d1f243dd0736", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import jax.numpy as jnp\n", + "from deepbench.physics_object import Pendulum\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "4ca50a6f-8f0e-469f-9993-e1e082133a7f", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def save_thetas_and_xs_multiple(params_in):\n", + " # except length will have 4 elements\n", + " lengths, thetas, μ_a_g, σ_a_g = params_in\n", + "\n", + " ag0 = rs.normal(loc=μ_a_g, scale=σ_a_g)\n", + " ag1 = rs.normal(loc=μ_a_g, scale=σ_a_g)\n", + " ags = np.array([np.repeat(ag0,int(len(lengths)/2)), np.repeat(ag1,int(len(lengths)/2))]).flatten()\n", + " #ags = np.array([rs.normal(loc=μ_a_g, scale=σ_a_g, size = int(len(lengths)/2)),\n", + " # rs.normal(loc=μ_a_g, scale=σ_a_g, size = int(len(lengths)/2))]).flatten()\n", + " \n", + " \n", + " xs = []\n", + " for i in range(len(lengths)):\n", + " #print(lengths[i], thetas[i], ags[i])\n", + " pendulum = Pendulum(\n", + " pendulum_arm_length=float(lengths[i]),\n", + " starting_angle_radians=float(thetas[i]),\n", + " acceleration_due_to_gravity=float(ags[i]),\n", + " noise_std_percent={\n", + " \"pendulum_arm_length\": 0.0,\n", + " \"starting_angle_radians\": 0.1,\n", + " \"acceleration_due_to_gravity\": 0.0,\n", + " },\n", + " )\n", + " x = pendulum.create_object(0.75, noiseless=False)\n", + " xs.append(x)\n", + " del pendulum\n", + " return ags, xs" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "9fdc1f49-e453-4526-b18b-814b68f92aca", + "metadata": {}, + "outputs": [], + "source": [ + "#length0, length1, length2, length3, theta0, theta1, theta2, theta3, μ_a_g, σ_a_g = thetas_in\n", + "length_percent_error_all = 0.0\n", + "theta_percent_error_all = 0.1\n", + "a_g_percent_error_all = 0.0\n", + "pos_err = 0.0\n", + "\n", + "time = 0.75\n", + "\n", + "total_length = 1000\n", + "length_df = int(total_length/4) # divide by four because we want the same total size as above\n", + "\n", + "pendulums_per_planet = 100\n", + "\n", + "# and we get four pendulums per iteration of the below\n", + "thetas = np.zeros((total_length, 5))\n", + "# this needs to have the extra 1 so that SBI is happy\n", + "xs = np.zeros((total_length,1))\n", + "#labels = np.zeros((2*length_df, 2))\n", + "#error = []\n", + "#y_noisy = []\n", + "\n", + " \n", + "rs = np.random.RandomState()#2147483648)# \n", + "\n", + "\n", + "lengths_draw = abs(rs.normal(loc=5, scale=2, size = pendulums_per_planet))\n", + "thetas_draw = abs(rs.normal(loc=jnp.pi/100, scale=jnp.pi/500, size = pendulums_per_planet))\n", + "\n", + "μ_a_g = abs(rs.normal(loc=10, scale=2))\n", + "σ_a_g = abs(rs.normal(loc=1, scale=0.5))\n", + "\n", + "\n", + "params_in = [lengths_draw,\n", + " thetas_draw,\n", + " μ_a_g, σ_a_g]\n", + "\n", + "a_gs, xs_out = save_thetas_and_xs_multiple(params_in)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "9714134e-53a2-42df-b254-e942a2a41314", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lengththetaa_gtimepos
05.9395400.04032310.3485000.750.126648
13.6459480.04158710.3485000.750.046904
24.4945730.03268110.3485000.750.066548
33.6917170.03244910.3485000.750.036753
48.4632760.02602310.3485000.750.124928
..................
953.2873640.0276069.8211570.750.027284
964.9013770.0315599.8211570.750.080431
976.3741460.0283119.8211570.750.102336
981.0756610.0306359.8211570.75-0.023095
995.7313870.0237059.8211570.750.071557
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " length theta a_g time pos\n", + "0 5.939540 0.040323 10.348500 0.75 0.126648\n", + "1 3.645948 0.041587 10.348500 0.75 0.046904\n", + "2 4.494573 0.032681 10.348500 0.75 0.066548\n", + "3 3.691717 0.032449 10.348500 0.75 0.036753\n", + "4 8.463276 0.026023 10.348500 0.75 0.124928\n", + ".. ... ... ... ... ...\n", + "95 3.287364 0.027606 9.821157 0.75 0.027284\n", + "96 4.901377 0.031559 9.821157 0.75 0.080431\n", + "97 6.374146 0.028311 9.821157 0.75 0.102336\n", + "98 1.075661 0.030635 9.821157 0.75 -0.023095\n", + "99 5.731387 0.023705 9.821157 0.75 0.071557\n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# now make it into a dataframe\n", + "data_params = {\n", + " 'length': lengths_draw,\n", + " 'theta': thetas_draw,\n", + " 'a_g': a_gs,\n", + " 'time': np.repeat(time, len(lengths_draw)),\n", + " 'pos': xs_out,\n", + " \n", + "}\n", + "\n", + "## create the DataFrame\n", + "df = pd.DataFrame(data_params)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "44c6292d-fea9-4693-9173-913fd396bbd5", + "metadata": {}, + "outputs": [], + "source": [ + "# save the dataframe\n", + "filepath = '../data/'\n", + "df.to_csv(filepath+'static_hierarchical_df.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4efa4d17-45d1-4505-a111-13bcc479452a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}