diff --git a/README.md b/README.md
index d859d5da..ac8e3f41 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@
[![Test installation from source](https://github.com/digitraceslab/niimpy/actions/workflows/install.yml/badge.svg)](https://github.com/digitraceslab/niimpy/actions/workflows/install.yml)
[![codecov](https://codecov.io/gh/digitraceslab/niimpy/branch/master/graph/badge.svg?token=SEEOOF7A70)](https://codecov.io/gh/digitraceslab/niimpy)
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/digitraceslab/niimpy/HEAD?labpath=docs)
What
----
@@ -89,12 +90,12 @@ location = location.reset_index(0).dropna()
# Feature extraction
features = nilo.extract_features(
- lats=location['double_latitude'],
- lons=location['double_longitude'],
+ lats=location['latitude'],
+ lons=location['longitude'],
users=location['user'],
groups=location['group'],
times=location.index,
- speeds=location['double_speed']
+ speeds=location['speed']
)
```
diff --git a/dist/niimpy-1.2.1-py3-none-any.whl b/dist/niimpy-1.2.1-py3-none-any.whl
new file mode 100644
index 00000000..8828df58
Binary files /dev/null and b/dist/niimpy-1.2.1-py3-none-any.whl differ
diff --git a/dist/niimpy-1.2.1.tar.gz b/dist/niimpy-1.2.1.tar.gz
new file mode 100644
index 00000000..aed4df83
Binary files /dev/null and b/dist/niimpy-1.2.1.tar.gz differ
diff --git a/dist/niimpy-1.2.2-py3-none-any.whl b/dist/niimpy-1.2.2-py3-none-any.whl
new file mode 100644
index 00000000..68b0723c
Binary files /dev/null and b/dist/niimpy-1.2.2-py3-none-any.whl differ
diff --git a/dist/niimpy-1.2.2.tar.gz b/dist/niimpy-1.2.2.tar.gz
new file mode 100644
index 00000000..ac53f316
Binary files /dev/null and b/dist/niimpy-1.2.2.tar.gz differ
diff --git a/docs/user_guide/preprocessing/audio.ipynb b/docs/user_guide/preprocessing/audio.ipynb
index bffd3c69..8c8a8771 100644
--- a/docs/user_guide/preprocessing/audio.ipynb
+++ b/docs/user_guide/preprocessing/audio.ipynb
@@ -23,8 +23,8 @@
"- `user`: Subject ID\n",
"- `device`: Device ID\n",
"- `is_silent`: Boolean value, indicates when audio is too quiet to record\n",
- "- `double_frequency`: Audio frequency in Hz\n",
- "- `double_decibels`: Audio volume in decibels\n",
+ "- `frequency`: Audio frequency in Hz\n",
+ "- `decibels`: Audio volume in decibels\n",
"\n",
"Niimpy extracts the following audio features:\n",
"- `audio_count_silent`: number of times when there has been some sound in the environment\n",
@@ -381,7 +381,7 @@
"- rows are observations, indexed by timestamps, i.e. each row represents a snippet that has been recorded at a given time and date\n",
"- columns are characteristics for each observation, for example, the user whose data we are analyzing\n",
"- there are at least two different users in the dataframe\n",
- "- there are two main columns: `double_decibels` and `double_frequency`.\n",
+ "- there are two main columns: `decibels` and `frequency`.\n",
"\n",
"In fact, we can check the first three elements for each user"
]
@@ -684,6 +684,145 @@
"data.drop_duplicates(['user','time']).groupby('user').head(3)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "b4988507",
+ "metadata": {},
+ "source": [
+ "The main column names in our dataframe do not match the Niimpy schema. We could provide these column names as parameters but it easier to rename them here."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5f17abe7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " device | \n",
+ " time | \n",
+ " is_silent | \n",
+ " double_decibels | \n",
+ " double_frequency | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2019-08-13 07:28:27.657999992+03:00 | \n",
+ " iGyXetHE3S8u | \n",
+ " Cq9vueHh3zVs | \n",
+ " 1.565671e+09 | \n",
+ " 0 | \n",
+ " 51 | \n",
+ " 7735 | \n",
+ " 2019-08-13 07:28:27.657999992+03:00 | \n",
+ "
\n",
+ " \n",
+ " 2019-08-13 07:58:29.657999992+03:00 | \n",
+ " iGyXetHE3S8u | \n",
+ " Cq9vueHh3zVs | \n",
+ " 1.565672e+09 | \n",
+ " 0 | \n",
+ " 90 | \n",
+ " 13609 | \n",
+ " 2019-08-13 07:58:29.657999992+03:00 | \n",
+ "
\n",
+ " \n",
+ " 2019-08-13 08:28:31.657999992+03:00 | \n",
+ " iGyXetHE3S8u | \n",
+ " Cq9vueHh3zVs | \n",
+ " 1.565674e+09 | \n",
+ " 0 | \n",
+ " 81 | \n",
+ " 7690 | \n",
+ " 2019-08-13 08:28:31.657999992+03:00 | \n",
+ "
\n",
+ " \n",
+ " 2019-08-13 08:58:33.657999992+03:00 | \n",
+ " iGyXetHE3S8u | \n",
+ " Cq9vueHh3zVs | \n",
+ " 1.565676e+09 | \n",
+ " 0 | \n",
+ " 58 | \n",
+ " 8347 | \n",
+ " 2019-08-13 08:58:33.657999992+03:00 | \n",
+ "
\n",
+ " \n",
+ " 2019-08-13 09:28:35.657999992+03:00 | \n",
+ " iGyXetHE3S8u | \n",
+ " Cq9vueHh3zVs | \n",
+ " 1.565678e+09 | \n",
+ " 1 | \n",
+ " 36 | \n",
+ " 13592 | \n",
+ " 2019-08-13 09:28:35.657999992+03:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user device time \\\n",
+ "2019-08-13 07:28:27.657999992+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.565671e+09 \n",
+ "2019-08-13 07:58:29.657999992+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.565672e+09 \n",
+ "2019-08-13 08:28:31.657999992+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.565674e+09 \n",
+ "2019-08-13 08:58:33.657999992+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.565676e+09 \n",
+ "2019-08-13 09:28:35.657999992+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.565678e+09 \n",
+ "\n",
+ " is_silent double_decibels \\\n",
+ "2019-08-13 07:28:27.657999992+03:00 0 51 \n",
+ "2019-08-13 07:58:29.657999992+03:00 0 90 \n",
+ "2019-08-13 08:28:31.657999992+03:00 0 81 \n",
+ "2019-08-13 08:58:33.657999992+03:00 0 58 \n",
+ "2019-08-13 09:28:35.657999992+03:00 1 36 \n",
+ "\n",
+ " double_frequency \\\n",
+ "2019-08-13 07:28:27.657999992+03:00 7735 \n",
+ "2019-08-13 07:58:29.657999992+03:00 13609 \n",
+ "2019-08-13 08:28:31.657999992+03:00 7690 \n",
+ "2019-08-13 08:58:33.657999992+03:00 8347 \n",
+ "2019-08-13 09:28:35.657999992+03:00 13592 \n",
+ "\n",
+ " datetime \n",
+ "2019-08-13 07:28:27.657999992+03:00 2019-08-13 07:28:27.657999992+03:00 \n",
+ "2019-08-13 07:58:29.657999992+03:00 2019-08-13 07:58:29.657999992+03:00 \n",
+ "2019-08-13 08:28:31.657999992+03:00 2019-08-13 08:28:31.657999992+03:00 \n",
+ "2019-08-13 08:58:33.657999992+03:00 2019-08-13 08:58:33.657999992+03:00 \n",
+ "2019-08-13 09:28:35.657999992+03:00 2019-08-13 09:28:35.657999992+03:00 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = data.rename(columns={'decibels': 'decibels', 'frequency': 'frequency'})\n",
+ "data.head()"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -701,18 +840,43 @@
" - index: date and time when the event happened (timestamp)\n",
" - user: stores the user name whose data is analyzed. Each user should have a unique name or hash (i.e. one hash for each unique user)\n",
" - is_silent: stores whether the decibel level is above a set threshold (usually 50dB)\n",
- " - double_decibels: stores the decibels of the recorded snippet\n",
- " - double_frequency: the frequency of the recorded snippet in Hz\n",
+ " - decibels: stores the decibels of the recorded snippet\n",
+ " - frequency: the frequency of the recorded snippet in Hz\n",
" - NOTE: most of our audio examples come from data recorded with the Aware Framework, if you want to know more about the frequency and decibels, please read https://github.com/denzilferreira/com.aware.plugin.ambient_noise\n",
"4. Additional columns are allowed.\n",
- "5. The names of the columns do not need to be exactly \"user\", \"is_silent\", \"double_decibels\" or \"double_frequency\" as we can pass our own names in an argument (to be explained later).\n",
- "\n",
+ "5. The names of the columns do not need to be exactly \"user\", \"is_silent\", \"decibels\" or \"frequency\" as we can pass our own names in an argument.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b8a7a20d",
+ "metadata": {},
+ "source": [
+ "Column names in our data do not match the Niimpy schema. We could provide these column names as parameters to niimpy functions, but it is simpler to rename them here."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "9436998e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = data.rename(columns={'double_decibels': 'decibels', 'double_frequency': 'frequency'})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f2e6e2d6",
+ "metadata": {},
+ "source": [
"Below is an example of a dataframe that complies with these minimum requirements"
]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 9,
"id": "8c66c6b3",
"metadata": {},
"outputs": [
@@ -739,8 +903,8 @@
" | \n",
" user | \n",
" is_silent | \n",
- " double_decibels | \n",
- " double_frequency | \n",
+ " decibels | \n",
+ " frequency | \n",
" \n",
" \n",
" \n",
@@ -770,24 +934,24 @@
""
],
"text/plain": [
- " user is_silent double_decibels \\\n",
- "2019-08-13 07:28:27.657999992+03:00 iGyXetHE3S8u 0 51 \n",
- "2019-08-13 07:58:29.657999992+03:00 iGyXetHE3S8u 0 90 \n",
- "2019-08-13 08:28:31.657999992+03:00 iGyXetHE3S8u 0 81 \n",
+ " user is_silent decibels \\\n",
+ "2019-08-13 07:28:27.657999992+03:00 iGyXetHE3S8u 0 51 \n",
+ "2019-08-13 07:58:29.657999992+03:00 iGyXetHE3S8u 0 90 \n",
+ "2019-08-13 08:28:31.657999992+03:00 iGyXetHE3S8u 0 81 \n",
"\n",
- " double_frequency \n",
- "2019-08-13 07:28:27.657999992+03:00 7735 \n",
- "2019-08-13 07:58:29.657999992+03:00 13609 \n",
- "2019-08-13 08:28:31.657999992+03:00 7690 "
+ " frequency \n",
+ "2019-08-13 07:28:27.657999992+03:00 7735 \n",
+ "2019-08-13 07:58:29.657999992+03:00 13609 \n",
+ "2019-08-13 08:28:31.657999992+03:00 7690 "
]
},
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "example_dataschema = data[['user','is_silent','double_decibels','double_frequency']]\n",
+ "example_dataschema = data[['user','is_silent','decibels','frequency']]\n",
"example_dataschema.head(3)"
]
},
@@ -817,12 +981,12 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 10,
"id": "a8bb0c68",
"metadata": {},
"outputs": [],
"source": [
- "feature_dict1:{\"audio_column_name\":\"double_frequency\",\"resample_args\":{\"rule\":\"1D\"}}\n",
+ "feature_dict1:{\"audio_column_name\":\"frequency\",\"resample_args\":{\"rule\":\"1D\"}}\n",
"feature_dict2:{\"audio_column_name\":\"random_name\",\"resample_args\":{\"rule\":\"30T\"}}\n",
"feature_dict3:{\"audio_column_name\":\"other_name\",\"resample_args\":{\"rule\":\"45T\",\"origin\":\"end\"}}"
]
@@ -835,16 +999,26 @@
"source": [
"Here, we have three basic feature dictionaries. \n",
"\n",
- "- `feature_dict1` will be used to analyze the data stored in the column `double_frequency` in our dataframe. The data will be binned in one day periods\n",
+ "- `feature_dict1` will be used to analyze the data stored in the column `frequency` in our dataframe. The data will be binned in one day periods\n",
"- `feature_dict2` will be used to analyze the data stored in the column `random_name` in our dataframe. The data will be aggregated in 30-minutes bins\n",
"- `feature_dict3` will be used to analyze the data stored in the column `other_name` in our dataframe. The data will be binned in 45-minutes bins, but the binning will start from the last timestamp in the dataframe. \n",
"\n",
- "**Default values:** if no arguments are passed, `niimpy`'s will aggregate the data in 30-min bins, and will select the audio_column_name according to the most suitable column. For example, if we are computing the minimum frequency, `niimpy` will select *double_frquency* as the column name. \n",
+ "**Default values:** if no arguments are passed, `niimpy`'s will aggregate the data in 30-min bins, and will select the audio_column_name according to the most suitable column. For example, if we are computing the minimum frequency, `niimpy` will select *frquency* as the column name. \n",
"\n",
"#### 4.1.2 Using the functions\n",
"Now that we understand how the functions are customized, it is time we compute our first audio feature. Suppose that we are interested in extracting the total number of times our recordings were loud every 50 minutes. We will need `niimpy`'s `audio_count_loud` function, the data, and we will also need to create a dictionary to customize our function. Let's create the dictionary first"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d681b542",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "function_features={\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"50T\"}}"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -856,12 +1030,12 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 12,
"id": "98a0af37",
"metadata": {},
"outputs": [],
"source": [
- "my_loud_times = au.audio_count_loud(data, audio_column_name = \"double_decibels\", resample_args = {\"rule\":\"50T\"})"
+ "my_loud_times = au.audio_count_loud(data, audio_column_name = \"decibels\", resample_args = {\"rule\":\"50T\"})"
]
},
{
@@ -875,7 +1049,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 13,
"id": "ae8260cb",
"metadata": {},
"outputs": [
@@ -900,76 +1074,76 @@
" \n",
" \n",
" | \n",
- " device | \n",
" audio_count_loud | \n",
+ " device | \n",
" user | \n",
"
\n",
" \n",
" \n",
" \n",
" 2020-01-09 01:40:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 1 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 02:30:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 2 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 03:20:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 2 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 04:10:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 0 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 05:00:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 1 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 05:50:00+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 1 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 06:40:00+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 07:30:00+02:00 | \n",
- " OWd1Uau8POix | \n",
" 0 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 08:20:00+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 09:10:00+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 10:00:00+02:00 | \n",
- " OWd1Uau8POix | \n",
" 2 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
@@ -977,21 +1151,21 @@
""
],
"text/plain": [
- " device audio_count_loud user\n",
- "2020-01-09 01:40:00+02:00 3p83yASkOb_B 1 jd9INuQ5BBlW\n",
- "2020-01-09 02:30:00+02:00 3p83yASkOb_B 2 jd9INuQ5BBlW\n",
- "2020-01-09 03:20:00+02:00 3p83yASkOb_B 2 jd9INuQ5BBlW\n",
- "2020-01-09 04:10:00+02:00 3p83yASkOb_B 0 jd9INuQ5BBlW\n",
- "2020-01-09 05:00:00+02:00 3p83yASkOb_B 1 jd9INuQ5BBlW\n",
- "2020-01-09 05:50:00+02:00 3p83yASkOb_B 1 jd9INuQ5BBlW\n",
- "2020-01-09 06:40:00+02:00 OWd1Uau8POix 1 jd9INuQ5BBlW\n",
- "2020-01-09 07:30:00+02:00 OWd1Uau8POix 0 jd9INuQ5BBlW\n",
- "2020-01-09 08:20:00+02:00 OWd1Uau8POix 1 jd9INuQ5BBlW\n",
- "2020-01-09 09:10:00+02:00 OWd1Uau8POix 1 jd9INuQ5BBlW\n",
- "2020-01-09 10:00:00+02:00 OWd1Uau8POix 2 jd9INuQ5BBlW"
+ " audio_count_loud device user\n",
+ "2020-01-09 01:40:00+02:00 1 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 02:30:00+02:00 2 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 03:20:00+02:00 2 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 04:10:00+02:00 0 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 05:00:00+02:00 1 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 05:50:00+02:00 1 3p83yASkOb_B jd9INuQ5BBlW\n",
+ "2020-01-09 06:40:00+02:00 1 OWd1Uau8POix jd9INuQ5BBlW\n",
+ "2020-01-09 07:30:00+02:00 0 OWd1Uau8POix jd9INuQ5BBlW\n",
+ "2020-01-09 08:20:00+02:00 1 OWd1Uau8POix jd9INuQ5BBlW\n",
+ "2020-01-09 09:10:00+02:00 1 OWd1Uau8POix jd9INuQ5BBlW\n",
+ "2020-01-09 10:00:00+02:00 2 OWd1Uau8POix jd9INuQ5BBlW"
]
},
- "execution_count": 10,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -1011,7 +1185,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 14,
"id": "e085424f",
"metadata": {},
"outputs": [
@@ -1040,8 +1214,8 @@
" device | \n",
" time | \n",
" is_silent | \n",
- " double_decibels | \n",
- " double_frequency | \n",
+ " decibels | \n",
+ " frequency | \n",
" datetime | \n",
" \n",
" \n",
@@ -1130,23 +1304,14 @@
"2020-01-09 04:38:03.895999908+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578537e+09 \n",
"2020-01-09 05:08:03.895999908+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578539e+09 \n",
"\n",
- " is_silent double_decibels \\\n",
- "2020-01-09 02:08:03.895999908+02:00 0 84 \n",
- "2020-01-09 02:38:03.895999908+02:00 0 89 \n",
- "2020-01-09 03:08:03.895999908+02:00 0 99 \n",
- "2020-01-09 03:38:03.895999908+02:00 0 77 \n",
- "2020-01-09 04:08:03.895999908+02:00 0 80 \n",
- "2020-01-09 04:38:03.895999908+02:00 0 52 \n",
- "2020-01-09 05:08:03.895999908+02:00 0 63 \n",
- "\n",
- " double_frequency \\\n",
- "2020-01-09 02:08:03.895999908+02:00 4935 \n",
- "2020-01-09 02:38:03.895999908+02:00 8734 \n",
- "2020-01-09 03:08:03.895999908+02:00 1710 \n",
- "2020-01-09 03:38:03.895999908+02:00 9054 \n",
- "2020-01-09 04:08:03.895999908+02:00 12265 \n",
- "2020-01-09 04:38:03.895999908+02:00 7281 \n",
- "2020-01-09 05:08:03.895999908+02:00 14408 \n",
+ " is_silent decibels frequency \\\n",
+ "2020-01-09 02:08:03.895999908+02:00 0 84 4935 \n",
+ "2020-01-09 02:38:03.895999908+02:00 0 89 8734 \n",
+ "2020-01-09 03:08:03.895999908+02:00 0 99 1710 \n",
+ "2020-01-09 03:38:03.895999908+02:00 0 77 9054 \n",
+ "2020-01-09 04:08:03.895999908+02:00 0 80 12265 \n",
+ "2020-01-09 04:38:03.895999908+02:00 0 52 7281 \n",
+ "2020-01-09 05:08:03.895999908+02:00 0 63 14408 \n",
"\n",
" datetime \n",
"2020-01-09 02:08:03.895999908+02:00 2020-01-09 02:08:03.895999908+02:00 \n",
@@ -1158,7 +1323,7 @@
"2020-01-09 05:08:03.895999908+02:00 2020-01-09 05:08:03.895999908+02:00 "
]
},
- "execution_count": 11,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -1180,7 +1345,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 15,
"id": "d7ff80f4",
"metadata": {},
"outputs": [],
@@ -1189,14 +1354,14 @@
"results = []\n",
"for user in users:\n",
" start_time = data[data[\"user\"]==user].index.min()\n",
- " function_features={\"audio_column_name\":\"double_decibels\",\"resample_args\":{\"rule\":\"50T\",\"origin\":start_time}}\n",
- " results.append(au.audio_count_loud(data[data[\"user\"]==user], audio_column_name = \"double_decibels\", resample_args = {\"rule\":\"50T\",\"origin\":start_time}))\n",
+ " function_features={\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"50T\",\"origin\":start_time}}\n",
+ " results.append(au.audio_count_loud(data[data[\"user\"]==user], **function_features))\n",
"my_loud_times = pd.concat(results)"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 16,
"id": "427ab240",
"metadata": {},
"outputs": [
@@ -1221,142 +1386,142 @@
" \n",
" \n",
" | \n",
- " device | \n",
" audio_count_loud | \n",
+ " device | \n",
" user | \n",
"
\n",
" \n",
" \n",
" \n",
" 2019-08-13 07:28:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 08:18:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 09:08:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 0 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 09:58:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 2 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 10:48:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 2 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 11:38:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 12:28:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 0 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 13:18:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 0 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 14:08:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 14:58:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 0 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 15:48:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2019-08-13 16:38:27.657999992+03:00 | \n",
- " Cq9vueHh3zVs | \n",
" 1 | \n",
+ " Cq9vueHh3zVs | \n",
" iGyXetHE3S8u | \n",
"
\n",
" \n",
" 2020-01-09 02:08:03.895999908+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 2 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 02:58:03.895999908+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 2 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 03:48:03.895999908+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 1 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 04:38:03.895999908+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 0 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 05:28:03.895999908+02:00 | \n",
- " 3p83yASkOb_B | \n",
" 2 | \n",
+ " 3p83yASkOb_B | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 07:08:03.895999908+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 07:58:03.895999908+02:00 | \n",
- " OWd1Uau8POix | \n",
" 0 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 08:48:03.895999908+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 09:38:03.895999908+02:00 | \n",
- " OWd1Uau8POix | \n",
" 2 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
" 2020-01-09 10:28:03.895999908+02:00 | \n",
- " OWd1Uau8POix | \n",
" 1 | \n",
+ " OWd1Uau8POix | \n",
" jd9INuQ5BBlW | \n",
"
\n",
" \n",
@@ -1364,29 +1529,29 @@
""
],
"text/plain": [
- " device audio_count_loud \\\n",
- "2019-08-13 07:28:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2019-08-13 08:18:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2019-08-13 09:08:27.657999992+03:00 Cq9vueHh3zVs 0 \n",
- "2019-08-13 09:58:27.657999992+03:00 Cq9vueHh3zVs 2 \n",
- "2019-08-13 10:48:27.657999992+03:00 Cq9vueHh3zVs 2 \n",
- "2019-08-13 11:38:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2019-08-13 12:28:27.657999992+03:00 Cq9vueHh3zVs 0 \n",
- "2019-08-13 13:18:27.657999992+03:00 Cq9vueHh3zVs 0 \n",
- "2019-08-13 14:08:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2019-08-13 14:58:27.657999992+03:00 Cq9vueHh3zVs 0 \n",
- "2019-08-13 15:48:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2019-08-13 16:38:27.657999992+03:00 Cq9vueHh3zVs 1 \n",
- "2020-01-09 02:08:03.895999908+02:00 3p83yASkOb_B 2 \n",
- "2020-01-09 02:58:03.895999908+02:00 3p83yASkOb_B 2 \n",
- "2020-01-09 03:48:03.895999908+02:00 3p83yASkOb_B 1 \n",
- "2020-01-09 04:38:03.895999908+02:00 3p83yASkOb_B 0 \n",
- "2020-01-09 05:28:03.895999908+02:00 3p83yASkOb_B 2 \n",
- "2020-01-09 07:08:03.895999908+02:00 OWd1Uau8POix 1 \n",
- "2020-01-09 07:58:03.895999908+02:00 OWd1Uau8POix 0 \n",
- "2020-01-09 08:48:03.895999908+02:00 OWd1Uau8POix 1 \n",
- "2020-01-09 09:38:03.895999908+02:00 OWd1Uau8POix 2 \n",
- "2020-01-09 10:28:03.895999908+02:00 OWd1Uau8POix 1 \n",
+ " audio_count_loud device \\\n",
+ "2019-08-13 07:28:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2019-08-13 08:18:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2019-08-13 09:08:27.657999992+03:00 0 Cq9vueHh3zVs \n",
+ "2019-08-13 09:58:27.657999992+03:00 2 Cq9vueHh3zVs \n",
+ "2019-08-13 10:48:27.657999992+03:00 2 Cq9vueHh3zVs \n",
+ "2019-08-13 11:38:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2019-08-13 12:28:27.657999992+03:00 0 Cq9vueHh3zVs \n",
+ "2019-08-13 13:18:27.657999992+03:00 0 Cq9vueHh3zVs \n",
+ "2019-08-13 14:08:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2019-08-13 14:58:27.657999992+03:00 0 Cq9vueHh3zVs \n",
+ "2019-08-13 15:48:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2019-08-13 16:38:27.657999992+03:00 1 Cq9vueHh3zVs \n",
+ "2020-01-09 02:08:03.895999908+02:00 2 3p83yASkOb_B \n",
+ "2020-01-09 02:58:03.895999908+02:00 2 3p83yASkOb_B \n",
+ "2020-01-09 03:48:03.895999908+02:00 1 3p83yASkOb_B \n",
+ "2020-01-09 04:38:03.895999908+02:00 0 3p83yASkOb_B \n",
+ "2020-01-09 05:28:03.895999908+02:00 2 3p83yASkOb_B \n",
+ "2020-01-09 07:08:03.895999908+02:00 1 OWd1Uau8POix \n",
+ "2020-01-09 07:58:03.895999908+02:00 0 OWd1Uau8POix \n",
+ "2020-01-09 08:48:03.895999908+02:00 1 OWd1Uau8POix \n",
+ "2020-01-09 09:38:03.895999908+02:00 2 OWd1Uau8POix \n",
+ "2020-01-09 10:28:03.895999908+02:00 1 OWd1Uau8POix \n",
"\n",
" user \n",
"2019-08-13 07:28:27.657999992+03:00 iGyXetHE3S8u \n",
@@ -1413,7 +1578,7 @@
"2020-01-09 10:28:03.895999908+02:00 jd9INuQ5BBlW "
]
},
- "execution_count": 13,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1442,13 +1607,13 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 17,
"id": "87d9d44d",
"metadata": {},
"outputs": [],
"source": [
- "wrapper_features1 = {au.audio_count_loud:{\"audio_column_name\":\"double_decibels\",\"resample_args\":{\"rule\":\"1D\"}},\n",
- " au.audio_max_freq:{\"audio_column_name\":\"double_frequency\",\"resample_args\":{\"rule\":\"1D\"}}}"
+ "wrapper_features1 = {au.audio_count_loud:{\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"1D\"}},\n",
+ " au.audio_max_freq:{\"audio_column_name\":\"frequency\",\"resample_args\":{\"rule\":\"1D\"}}}"
]
},
{
@@ -1457,18 +1622,18 @@
"id": "7a67b446",
"metadata": {},
"source": [
- "- `wrapper_features1` will be used to analyze two features, `audio_count_loud` and `audio_max_freq`. For the feature audio_count_loud, we will use the data stored in the column `double_decibels` in our dataframe and the data will be binned in one day periods. For the feature audio_max_freq, we will use the data stored in the column `double_frequency` in our dataframe and the data will be binned in one day periods. "
+ "- `wrapper_features1` will be used to analyze two features, `audio_count_loud` and `audio_max_freq`. For the feature audio_count_loud, we will use the data stored in the column `decibels` in our dataframe and the data will be binned in one day periods. For the feature audio_max_freq, we will use the data stored in the column `frequency` in our dataframe and the data will be binned in one day periods. "
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 18,
"id": "d3332573",
"metadata": {},
"outputs": [],
"source": [
"wrapper_features2 = {au.audio_mean_db:{\"audio_column_name\":\"random_name\",\"resample_args\":{\"rule\":\"1D\"}},\n",
- " au.audio_count_speech:{\"audio_column_name\":\"double_decibels\", \"audio_freq_name\":\"double_frequency\", \"resample_args\":{\"rule\":\"5H\",\"offset\":\"5min\"}}}"
+ " au.audio_count_speech:{\"audio_column_name\":\"decibels\", \"audio_freq_name\":\"frequency\", \"resample_args\":{\"rule\":\"5H\",\"offset\":\"5min\"}}}"
]
},
{
@@ -1477,12 +1642,12 @@
"id": "205c28ba",
"metadata": {},
"source": [
- "- `wrapper_features2` will be used to analyze two features, `audio_mean_db` and `audio_count_speech`. For the feature audio_mean_db, we will use the data stored in the column `random_name` in our dataframe and the data will be binned in one day periods. For the feature audio_count_speech, we will use the data stored in the column `double_decibels` in our dataframe and the data will be binned in 5-hour periods with a 5-minute offset. Note that for this feature we will also need another column named \"audio_freq_column\", this is because the speech is not only defined by the amplitude of the recording, but the frequency range. "
+ "- `wrapper_features2` will be used to analyze two features, `audio_mean_db` and `audio_count_speech`. For the feature audio_mean_db, we will use the data stored in the column `random_name` in our dataframe and the data will be binned in one day periods. For the feature audio_count_speech, we will use the data stored in the column `decibels` in our dataframe and the data will be binned in 5-hour periods with a 5-minute offset. Note that for this feature we will also need another column named \"audio_freq_column\", this is because the speech is not only defined by the amplitude of the recording, but the frequency range. "
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 19,
"id": "a2570c5b",
"metadata": {},
"outputs": [],
@@ -1500,7 +1665,7 @@
"source": [
"- `wrapper_features3` will be used to analyze three features, `audio_mean_db`, `audio_min_freq`, and `audio_count_silent`. For the feature audio_mean_db, we will use the data stored in the column `one_name` and the data will be binned in one day periods with a 5-min offset. For the feature audio_min_freq, we will use the data stored in the column `one_name` in our dataframe and the data will be binned in 5-hour periods. Finally, for the feature audio_count_silent, we will use the data stored in the column `another_name` in our dataframe and the data will be binned in 30-minute periods and the origin of the bins will be the ceiling midnight of the last day.\n",
"\n",
- "**Default values:** if no arguments are passed, `niimpy`'s default values are either \"double_decibels\", \"double_frequency\", or \"is_silent\" for the communication_column_name, and 30-min aggregation bins. The column name depends on the function to be called. Moreover, the wrapper will compute all the available functions in absence of the argument dictionary. \n",
+ "**Default values:** if no arguments are passed, `niimpy`'s default values are either \"decibels\", \"frequency\", or \"is_silent\" for the communication_column_name, and 30-min aggregation bins. The column name depends on the function to be called. Moreover, the wrapper will compute all the available functions in absence of the argument dictionary. \n",
"\n",
"#### 4.2.2 Using the wrapper\n",
"Now that we understand how the wrapper is customized, it is time we compute our first communication feature using the wrapper. Suppose that we are interested in extracting the audio_count_loud duration every 50 minutes. We will need `niimpy`'s `extract_features_audio` function, the data, and we will also need to create a dictionary to customize our function. Let's create the dictionary first"
@@ -1508,12 +1673,12 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 20,
"id": "1a16011f",
"metadata": {},
"outputs": [],
"source": [
- "wrapper_features1 = {au.audio_count_loud:{\"audio_column_name\":\"double_decibels\",\"resample_args\":{\"rule\":\"50T\"}}}"
+ "wrapper_features1 = {au.audio_count_loud:{\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"50T\"}}}"
]
},
{
@@ -1527,7 +1692,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 21,
"id": "24f453c0",
"metadata": {},
"outputs": [
@@ -1601,7 +1766,7 @@
"2020-01-09 05:00:00+02:00 3p83yASkOb_B jd9INuQ5BBlW 1"
]
},
- "execution_count": 18,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1622,7 +1787,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 22,
"id": "0906693e",
"metadata": {},
"outputs": [
@@ -1709,14 +1874,14 @@
"2020-01-09 06:00:00+02:00 4138.0 "
]
},
- "execution_count": 19,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "wrapper_features2 = {au.audio_count_loud:{\"audio_column_name\":\"double_decibels\",\"resample_args\":{\"rule\":\"1H\"}},\n",
- " au.audio_min_freq:{\"audio_column_name\":\"double_frequency\", \"resample_args\":{\"rule\":\"1H\"}}}\n",
+ "wrapper_features2 = {au.audio_count_loud:{\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"1H\"}},\n",
+ " au.audio_min_freq:{\"audio_column_name\":\"frequency\", \"resample_args\":{\"rule\":\"1H\"}}}\n",
"results_wrapper = au.extract_features_audio(data, features=wrapper_features2)\n",
"results_wrapper.head(5)"
]
@@ -1732,7 +1897,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 23,
"id": "4e80bfd0",
"metadata": {},
"outputs": [
@@ -1819,14 +1984,14 @@
"2020-01-09 05:05:00+02:00 4138.0 "
]
},
- "execution_count": 20,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "wrapper_features3 = {au.audio_count_loud:{\"audio_column_name\":\"double_decibels\",\"resample_args\":{\"rule\":\"1D\"}},\n",
- " au.audio_min_freq:{\"audio_column_name\":\"double_frequency\", \"resample_args\":{\"rule\":\"5H\", \"offset\":\"5min\"}}}\n",
+ "wrapper_features3 = {au.audio_count_loud:{\"audio_column_name\":\"decibels\",\"resample_args\":{\"rule\":\"1D\"}},\n",
+ " au.audio_min_freq:{\"audio_column_name\":\"frequency\", \"resample_args\":{\"rule\":\"5H\", \"offset\":\"5min\"}}}\n",
"results_wrapper = au.extract_features_audio(data, features=wrapper_features3)\n",
"results_wrapper.head(5)"
]
@@ -1852,7 +2017,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 24,
"id": "daf215ac",
"metadata": {},
"outputs": [],
@@ -1862,7 +2027,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 25,
"id": "68a22b4e",
"metadata": {},
"outputs": [
@@ -2043,7 +2208,7 @@
"2020-01-09 04:00:00+02:00 NaN "
]
},
- "execution_count": 22,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -2073,12 +2238,12 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 26,
"id": "839a0dee",
"metadata": {},
"outputs": [],
"source": [
- "def audio_sum_freq(df, audio_column_name = \"double_frequency\", resample_args = {\"rule\":\"30T\"}):\n",
+ "def audio_sum_freq(df, audio_column_name = \"frequency\", resample_args= {\"rule\":\"30T\"}):\n",
" if len(df)>0:\n",
" result = df.groupby('user')[audio_column_name].resample(**resample_args).sum()\n",
" result = result.to_frame(name='audio_sum_freq')\n",
@@ -2099,7 +2264,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 27,
"id": "150945da",
"metadata": {},
"outputs": [],
@@ -2109,7 +2274,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 28,
"id": "4d4bd7e4",
"metadata": {},
"outputs": [
@@ -2183,7 +2348,7 @@
"2019-08-13 09:00:00+03:00 iGyXetHE3S8u 13592"
]
},
- "execution_count": 25,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
diff --git a/docs/user_guide/preprocessing/location.ipynb b/docs/user_guide/preprocessing/location.ipynb
index 8316fca3..dc9d6042 100644
--- a/docs/user_guide/preprocessing/location.ipynb
+++ b/docs/user_guide/preprocessing/location.ipynb
@@ -22,11 +22,11 @@
"Location data is expected to have the following columns (column names can be different, but in that case they must be provided as parameters):\n",
"- `user`: Subject ID\n",
"- `device`: Device ID\n",
- "- `double_latitude`: Latitude as a floating point number\n",
- "- `double_longitude`: Longitude as a floating point number\n",
+ "- `latitude`: Latitude as a floating point number\n",
+ "- `longitude`: Longitude as a floating point number\n",
"\n",
"Optional columns include:\n",
- "- `double_speed`: Speed measured at the location\n",
+ "- `speed`: Speed measured at the location\n",
"\n",
"\n",
"`Niimpy` provides these main functions to clean, downsample, and extract features from GPS location data:\n",
@@ -208,7 +208,120 @@
"id": "86e7396c",
"metadata": {},
"source": [
- "The necessary columns for further analysis are `double_latitude`, `double_longitude`, `double_speed`, and `user`. `user` refers to a unique identifier for a subject."
+ "For further analysis we need a `latitude`, `longitude`, `speed`, and `user` column. `user` refers to a unique identifier for a subject.\n",
+ "\n",
+ "These columsn exist in the data, but some column names are different. We could provide these column names as arguments, but it is easier to rename them here:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "df3a2a0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " time | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " speed | \n",
+ " user | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-03-27 06:03:29+02:00 | \n",
+ " 1364357009 | \n",
+ " 43.706667 | \n",
+ " -72.289097 | \n",
+ " 0.00 | \n",
+ " gps_u01 | \n",
+ " 2013-03-27 06:03:29+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2013-03-27 06:23:29+02:00 | \n",
+ " 1364358209 | \n",
+ " 43.706637 | \n",
+ " -72.289066 | \n",
+ " 0.00 | \n",
+ " gps_u01 | \n",
+ " 2013-03-27 06:23:29+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2013-03-27 06:43:25+02:00 | \n",
+ " 1364359405 | \n",
+ " 43.706678 | \n",
+ " -72.289018 | \n",
+ " 0.25 | \n",
+ " gps_u01 | \n",
+ " 2013-03-27 06:43:25+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2013-03-27 07:03:29+02:00 | \n",
+ " 1364360609 | \n",
+ " 43.706665 | \n",
+ " -72.289087 | \n",
+ " 0.00 | \n",
+ " gps_u01 | \n",
+ " 2013-03-27 07:03:29+02:00 | \n",
+ "
\n",
+ " \n",
+ " 2013-03-27 07:23:25+02:00 | \n",
+ " 1364361805 | \n",
+ " 43.706808 | \n",
+ " -72.289370 | \n",
+ " 0.00 | \n",
+ " gps_u01 | \n",
+ " 2013-03-27 07:23:25+02:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " time latitude longitude speed user \\\n",
+ "2013-03-27 06:03:29+02:00 1364357009 43.706667 -72.289097 0.00 gps_u01 \n",
+ "2013-03-27 06:23:29+02:00 1364358209 43.706637 -72.289066 0.00 gps_u01 \n",
+ "2013-03-27 06:43:25+02:00 1364359405 43.706678 -72.289018 0.25 gps_u01 \n",
+ "2013-03-27 07:03:29+02:00 1364360609 43.706665 -72.289087 0.00 gps_u01 \n",
+ "2013-03-27 07:23:25+02:00 1364361805 43.706808 -72.289370 0.00 gps_u01 \n",
+ "\n",
+ " datetime \n",
+ "2013-03-27 06:03:29+02:00 2013-03-27 06:03:29+02:00 \n",
+ "2013-03-27 06:23:29+02:00 2013-03-27 06:23:29+02:00 \n",
+ "2013-03-27 06:43:25+02:00 2013-03-27 06:43:25+02:00 \n",
+ "2013-03-27 07:03:29+02:00 2013-03-27 07:03:29+02:00 \n",
+ "2013-03-27 07:23:25+02:00 2013-03-27 07:23:25+02:00 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = data.rename(columns={\"double_latitude\": \"latitude\", \"double_longitude\": \"longitude\", \"double_speed\": \"speed\"})\n",
+ "data.head()"
]
},
{
@@ -235,7 +348,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "a96bdaa6",
"metadata": {},
"outputs": [
@@ -245,7 +358,7 @@
"(9857, 6)"
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -290,7 +403,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"id": "01aefd90",
"metadata": {},
"outputs": [
@@ -300,7 +413,7 @@
"(9755, 5)"
]
},
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -313,7 +426,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"id": "d7027bec",
"metadata": {},
"outputs": [
@@ -340,9 +453,9 @@
" | \n",
" user | \n",
" time | \n",
- " double_latitude | \n",
- " double_longitude | \n",
- " double_speed | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " speed | \n",
" \n",
" \n",
" \n",
@@ -386,96 +499,26 @@
" -72.329240 | \n",
" 0.0 | \n",
" \n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 2013-05-29 16:10:00+03:00 | \n",
- " gps_u01 | \n",
- " 1.369833e+09 | \n",
- " 43.706711 | \n",
- " -72.289205 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2013-05-29 16:20:00+03:00 | \n",
- " gps_u01 | \n",
- " 1.369834e+09 | \n",
- " 43.706708 | \n",
- " -72.289162 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2013-05-29 16:30:00+03:00 | \n",
- " gps_u01 | \n",
- " 1.369834e+09 | \n",
- " 43.706725 | \n",
- " -72.289149 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2013-05-29 16:40:00+03:00 | \n",
- " gps_u01 | \n",
- " 1.369835e+09 | \n",
- " 43.706697 | \n",
- " -72.289165 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2013-05-29 16:50:00+03:00 | \n",
- " gps_u01 | \n",
- " 1.369836e+09 | \n",
- " 43.706713 | \n",
- " -72.289191 | \n",
- " 0.0 | \n",
- "
\n",
" \n",
"\n",
- "9755 rows × 5 columns
\n",
""
],
"text/plain": [
- " user time double_latitude \\\n",
- "2013-03-27 06:00:00+02:00 gps_u00 1.364357e+09 43.759135 \n",
- "2013-03-27 06:20:00+02:00 gps_u00 1.364358e+09 43.759503 \n",
- "2013-03-27 06:40:00+02:00 gps_u00 1.364359e+09 43.759134 \n",
- "2013-03-27 07:00:00+02:00 gps_u00 1.364361e+09 43.759135 \n",
- "2013-03-27 07:20:00+02:00 gps_u00 1.364362e+09 43.759135 \n",
- "... ... ... ... \n",
- "2013-05-29 16:10:00+03:00 gps_u01 1.369833e+09 43.706711 \n",
- "2013-05-29 16:20:00+03:00 gps_u01 1.369834e+09 43.706708 \n",
- "2013-05-29 16:30:00+03:00 gps_u01 1.369834e+09 43.706725 \n",
- "2013-05-29 16:40:00+03:00 gps_u01 1.369835e+09 43.706697 \n",
- "2013-05-29 16:50:00+03:00 gps_u01 1.369836e+09 43.706713 \n",
- "\n",
- " double_longitude double_speed \n",
- "2013-03-27 06:00:00+02:00 -72.329240 0.0 \n",
- "2013-03-27 06:20:00+02:00 -72.329018 0.0 \n",
- "2013-03-27 06:40:00+02:00 -72.329238 0.0 \n",
- "2013-03-27 07:00:00+02:00 -72.329240 0.0 \n",
- "2013-03-27 07:20:00+02:00 -72.329240 0.0 \n",
- "... ... ... \n",
- "2013-05-29 16:10:00+03:00 -72.289205 0.0 \n",
- "2013-05-29 16:20:00+03:00 -72.289162 0.0 \n",
- "2013-05-29 16:30:00+03:00 -72.289149 0.0 \n",
- "2013-05-29 16:40:00+03:00 -72.289165 0.0 \n",
- "2013-05-29 16:50:00+03:00 -72.289191 0.0 \n",
- "\n",
- "[9755 rows x 5 columns]"
+ " user time latitude longitude speed\n",
+ "2013-03-27 06:00:00+02:00 gps_u00 1.364357e+09 43.759135 -72.329240 0.0\n",
+ "2013-03-27 06:20:00+02:00 gps_u00 1.364358e+09 43.759503 -72.329018 0.0\n",
+ "2013-03-27 06:40:00+02:00 gps_u00 1.364359e+09 43.759134 -72.329238 0.0\n",
+ "2013-03-27 07:00:00+02:00 gps_u00 1.364361e+09 43.759135 -72.329240 0.0\n",
+ "2013-03-27 07:20:00+02:00 gps_u00 1.364362e+09 43.759135 -72.329240 0.0"
]
},
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "binned_data"
+ "binned_data.head()"
]
},
{
@@ -532,7 +575,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"id": "5bf0185c",
"metadata": {},
"outputs": [
@@ -559,25 +602,25 @@
" | \n",
" user | \n",
" n_significant_places | \n",
- " n_transitions | \n",
+ " n_home | \n",
+ " n_sps | \n",
" normalized_entropy | \n",
- " n_top1 | \n",
- " n_top4 | \n",
- " n_rare | \n",
- " n_top2 | \n",
" n_moving | \n",
+ " n_top4 | \n",
" max_dist_home | \n",
+ " n_top5 | \n",
+ " n_rare | \n",
" ... | \n",
- " n_top3 | \n",
+ " n_top1 | \n",
" entropy | \n",
- " n_sps | \n",
- " log_variance | \n",
+ " n_transitions | \n",
" variance | \n",
+ " log_variance | \n",
" dist_total | \n",
- " speed_variance | \n",
" speed_average | \n",
- " n_bins | \n",
" speed_max | \n",
+ " n_bins | \n",
+ " speed_variance | \n",
" \n",
" \n",
" \n",
@@ -585,169 +628,169 @@
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u00 | \n",
" 6 | \n",
- " 48.0 | \n",
- " 3.163631 | \n",
- " 106.0 | \n",
- " 20.0 | \n",
- " 3.0 | \n",
- " 99.0 | \n",
- " 8.0 | \n",
+ " 97.0 | \n",
+ " 5.0 | \n",
+ " 3.006920 | \n",
+ " 54.0 | \n",
+ " 18.0 | \n",
" 2.074186e+04 | \n",
+ " 8.0 | \n",
+ " 0.0 | \n",
" ... | \n",
- " 34.0 | \n",
- " 5.091668 | \n",
- " 5.0 | \n",
- " -5.761688 | \n",
+ " 98.0 | \n",
+ " 4.839451 | \n",
+ " 25.0 | \n",
" 0.003146 | \n",
+ " -5.761688 | \n",
" 4.132581e+05 | \n",
- " 0.044885 | \n",
- " 0.033496 | \n",
+ " 1.116127 | \n",
+ " 17.284037 | \n",
" 288.0 | \n",
- " 1.750000 | \n",
+ " 9.876405 | \n",
" \n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 10 | \n",
- " 194.0 | \n",
- " 3.163793 | \n",
- " 1016.0 | \n",
- " 45.0 | \n",
- " 45.0 | \n",
- " 668.0 | \n",
- " 66.0 | \n",
+ " 969.0 | \n",
+ " 8.0 | \n",
+ " 3.045317 | \n",
+ " 318.0 | \n",
+ " 37.0 | \n",
" 2.914790e+05 | \n",
+ " 18.0 | \n",
+ " 18.0 | \n",
" ... | \n",
- " 135.0 | \n",
- " 7.284903 | \n",
- " 10.0 | \n",
- " -1.439133 | \n",
+ " 976.0 | \n",
+ " 6.332559 | \n",
+ " 97.0 | \n",
" 0.237133 | \n",
+ " -1.439133 | \n",
" 2.179693e+06 | \n",
- " 6.129277 | \n",
- " 0.269932 | \n",
+ " 0.821680 | \n",
+ " 33.831053 | \n",
" 2032.0 | \n",
- " 33.250000 | \n",
+ " 9.970465 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 15 | \n",
- " 107.0 | \n",
- " 2.696752 | \n",
- " 1030.0 | \n",
- " 65.0 | \n",
- " 86.0 | \n",
- " 501.0 | \n",
- " 76.0 | \n",
+ " 1007.0 | \n",
+ " 9.0 | \n",
+ " 3.110317 | \n",
+ " 255.0 | \n",
+ " 43.0 | \n",
" 1.041741e+06 | \n",
+ " 38.0 | \n",
+ " 31.0 | \n",
" ... | \n",
- " 86.0 | \n",
- " 6.701177 | \n",
- " 12.0 | \n",
- " 2.114892 | \n",
+ " 1009.0 | \n",
+ " 6.834065 | \n",
+ " 69.0 | \n",
" 8.288687 | \n",
+ " 2.114892 | \n",
" 6.986551e+06 | \n",
- " 7.590639 | \n",
- " 0.351280 | \n",
+ " 0.847341 | \n",
+ " 42.507751 | \n",
" 1903.0 | \n",
- " 34.000000 | \n",
+ " 15.081070 | \n",
"
\n",
" \n",
" 2013-06-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 1 | \n",
- " 10.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
" 0.000000 | \n",
- " 15.0 | \n",
+ " 18.0 | \n",
" 0.0 | \n",
- " 15.0 | \n",
- " 7.0 | \n",
- " 2.0 | \n",
- " 2.035837e+04 | \n",
- " ... | \n",
+ " 1.989381e+04 | \n",
" 0.0 | \n",
+ " 6.0 | \n",
+ " ... | \n",
+ " 6.0 | \n",
" 0.000000 | \n",
- " 1.0 | \n",
- " -4.200287 | \n",
+ " 0.0 | \n",
" 0.014991 | \n",
+ " -4.200287 | \n",
" 2.252893e+05 | \n",
- " 0.021490 | \n",
- " 0.044126 | \n",
+ " 14.601880 | \n",
+ " 43.321397 | \n",
" 24.0 | \n",
- " 0.559017 | \n",
+ " 242.791725 | \n",
"
\n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u01 | \n",
" 4 | \n",
- " 8.0 | \n",
- " 4.392317 | \n",
- " 286.0 | \n",
- " 0.0 | \n",
+ " 273.0 | \n",
+ " 2.0 | \n",
+ " 2.584963 | \n",
+ " 12.0 | \n",
" 0.0 | \n",
- " 21.0 | \n",
- " 18.0 | \n",
" 6.975303e+02 | \n",
- " ... | \n",
" 0.0 | \n",
- " 3.044522 | \n",
- " 2.0 | \n",
- " -12.520989 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 307.0 | \n",
+ " 1.791759 | \n",
+ " 4.0 | \n",
" 0.000004 | \n",
+ " -12.520989 | \n",
" 1.328713e+04 | \n",
- " 0.073370 | \n",
- " 0.056290 | \n",
+ " 0.029994 | \n",
+ " 0.744393 | \n",
" 325.0 | \n",
- " 2.692582 | \n",
+ " 0.008880 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u01 | \n",
" 4 | \n",
+ " 1492.0 | \n",
" 2.0 | \n",
- " 0.000000 | \n",
- " 1998.0 | \n",
+ " 5.977280 | \n",
+ " 78.0 | \n",
" 0.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 71.0 | \n",
" 1.156568e+04 | \n",
- " ... | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 1.0 | \n",
- " -10.510017 | \n",
+ " ... | \n",
+ " 1928.0 | \n",
+ " 4.143135 | \n",
+ " 26.0 | \n",
" 0.000027 | \n",
+ " -10.510017 | \n",
" 1.238429e+05 | \n",
- " 0.629393 | \n",
- " 0.066961 | \n",
+ " 0.050416 | \n",
+ " 16.992157 | \n",
" 2070.0 | \n",
- " 32.750000 | \n",
+ " 0.237219 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u01 | \n",
" 2 | \n",
- " 2.0 | \n",
+ " 42.0 | \n",
+ " 1.0 | \n",
" 0.000000 | \n",
- " 3078.0 | \n",
+ " 110.0 | \n",
" 0.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 34.0 | \n",
- " 3.957650e+03 | \n",
- " ... | \n",
+ " 6.771047e+02 | \n",
" 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 3003.0 | \n",
" 0.000000 | \n",
- " 1.0 | \n",
- " -11.364454 | \n",
+ " 0.0 | \n",
" 0.000012 | \n",
+ " -11.364454 | \n",
" 1.228235e+05 | \n",
- " 0.261978 | \n",
- " 0.026392 | \n",
+ " 0.044657 | \n",
+ " 9.967899 | \n",
" 3113.0 | \n",
- " 20.250000 | \n",
+ " 0.085366 | \n",
"
\n",
" \n",
"\n",
@@ -755,55 +798,64 @@
""
],
"text/plain": [
- " user n_significant_places n_transitions \\\n",
- "2013-03-31 00:00:00+02:00 gps_u00 6 48.0 \n",
- "2013-04-30 00:00:00+03:00 gps_u00 10 194.0 \n",
- "2013-05-31 00:00:00+03:00 gps_u00 15 107.0 \n",
- "2013-06-30 00:00:00+03:00 gps_u00 1 10.0 \n",
- "2013-03-31 00:00:00+02:00 gps_u01 4 8.0 \n",
- "2013-04-30 00:00:00+03:00 gps_u01 4 2.0 \n",
- "2013-05-31 00:00:00+03:00 gps_u01 2 2.0 \n",
+ " user n_significant_places n_home n_sps \\\n",
+ "2013-03-31 00:00:00+02:00 gps_u00 6 97.0 5.0 \n",
+ "2013-04-30 00:00:00+03:00 gps_u00 10 969.0 8.0 \n",
+ "2013-05-31 00:00:00+03:00 gps_u00 15 1007.0 9.0 \n",
+ "2013-06-30 00:00:00+03:00 gps_u00 1 0.0 0.0 \n",
+ "2013-03-31 00:00:00+02:00 gps_u01 4 273.0 2.0 \n",
+ "2013-04-30 00:00:00+03:00 gps_u01 4 1492.0 2.0 \n",
+ "2013-05-31 00:00:00+03:00 gps_u01 2 42.0 1.0 \n",
"\n",
- " normalized_entropy n_top1 n_top4 n_rare n_top2 \\\n",
- "2013-03-31 00:00:00+02:00 3.163631 106.0 20.0 3.0 99.0 \n",
- "2013-04-30 00:00:00+03:00 3.163793 1016.0 45.0 45.0 668.0 \n",
- "2013-05-31 00:00:00+03:00 2.696752 1030.0 65.0 86.0 501.0 \n",
- "2013-06-30 00:00:00+03:00 0.000000 15.0 0.0 15.0 7.0 \n",
- "2013-03-31 00:00:00+02:00 4.392317 286.0 0.0 0.0 21.0 \n",
- "2013-04-30 00:00:00+03:00 0.000000 1998.0 0.0 1.0 1.0 \n",
- "2013-05-31 00:00:00+03:00 0.000000 3078.0 0.0 1.0 1.0 \n",
+ " normalized_entropy n_moving n_top4 \\\n",
+ "2013-03-31 00:00:00+02:00 3.006920 54.0 18.0 \n",
+ "2013-04-30 00:00:00+03:00 3.045317 318.0 37.0 \n",
+ "2013-05-31 00:00:00+03:00 3.110317 255.0 43.0 \n",
+ "2013-06-30 00:00:00+03:00 0.000000 18.0 0.0 \n",
+ "2013-03-31 00:00:00+02:00 2.584963 12.0 0.0 \n",
+ "2013-04-30 00:00:00+03:00 5.977280 78.0 0.0 \n",
+ "2013-05-31 00:00:00+03:00 0.000000 110.0 0.0 \n",
"\n",
- " n_moving max_dist_home ... n_top3 entropy \\\n",
- "2013-03-31 00:00:00+02:00 8.0 2.074186e+04 ... 34.0 5.091668 \n",
- "2013-04-30 00:00:00+03:00 66.0 2.914790e+05 ... 135.0 7.284903 \n",
- "2013-05-31 00:00:00+03:00 76.0 1.041741e+06 ... 86.0 6.701177 \n",
- "2013-06-30 00:00:00+03:00 2.0 2.035837e+04 ... 0.0 0.000000 \n",
- "2013-03-31 00:00:00+02:00 18.0 6.975303e+02 ... 0.0 3.044522 \n",
- "2013-04-30 00:00:00+03:00 71.0 1.156568e+04 ... 0.0 0.000000 \n",
- "2013-05-31 00:00:00+03:00 34.0 3.957650e+03 ... 0.0 0.000000 \n",
+ " max_dist_home n_top5 n_rare ... n_top1 \\\n",
+ "2013-03-31 00:00:00+02:00 2.074186e+04 8.0 0.0 ... 98.0 \n",
+ "2013-04-30 00:00:00+03:00 2.914790e+05 18.0 18.0 ... 976.0 \n",
+ "2013-05-31 00:00:00+03:00 1.041741e+06 38.0 31.0 ... 1009.0 \n",
+ "2013-06-30 00:00:00+03:00 1.989381e+04 0.0 6.0 ... 6.0 \n",
+ "2013-03-31 00:00:00+02:00 6.975303e+02 0.0 0.0 ... 307.0 \n",
+ "2013-04-30 00:00:00+03:00 1.156568e+04 0.0 1.0 ... 1928.0 \n",
+ "2013-05-31 00:00:00+03:00 6.771047e+02 0.0 0.0 ... 3003.0 \n",
"\n",
- " n_sps log_variance variance dist_total \\\n",
- "2013-03-31 00:00:00+02:00 5.0 -5.761688 0.003146 4.132581e+05 \n",
- "2013-04-30 00:00:00+03:00 10.0 -1.439133 0.237133 2.179693e+06 \n",
- "2013-05-31 00:00:00+03:00 12.0 2.114892 8.288687 6.986551e+06 \n",
- "2013-06-30 00:00:00+03:00 1.0 -4.200287 0.014991 2.252893e+05 \n",
- "2013-03-31 00:00:00+02:00 2.0 -12.520989 0.000004 1.328713e+04 \n",
- "2013-04-30 00:00:00+03:00 1.0 -10.510017 0.000027 1.238429e+05 \n",
- "2013-05-31 00:00:00+03:00 1.0 -11.364454 0.000012 1.228235e+05 \n",
+ " entropy n_transitions variance log_variance \\\n",
+ "2013-03-31 00:00:00+02:00 4.839451 25.0 0.003146 -5.761688 \n",
+ "2013-04-30 00:00:00+03:00 6.332559 97.0 0.237133 -1.439133 \n",
+ "2013-05-31 00:00:00+03:00 6.834065 69.0 8.288687 2.114892 \n",
+ "2013-06-30 00:00:00+03:00 0.000000 0.0 0.014991 -4.200287 \n",
+ "2013-03-31 00:00:00+02:00 1.791759 4.0 0.000004 -12.520989 \n",
+ "2013-04-30 00:00:00+03:00 4.143135 26.0 0.000027 -10.510017 \n",
+ "2013-05-31 00:00:00+03:00 0.000000 0.0 0.000012 -11.364454 \n",
"\n",
- " speed_variance speed_average n_bins speed_max \n",
- "2013-03-31 00:00:00+02:00 0.044885 0.033496 288.0 1.750000 \n",
- "2013-04-30 00:00:00+03:00 6.129277 0.269932 2032.0 33.250000 \n",
- "2013-05-31 00:00:00+03:00 7.590639 0.351280 1903.0 34.000000 \n",
- "2013-06-30 00:00:00+03:00 0.021490 0.044126 24.0 0.559017 \n",
- "2013-03-31 00:00:00+02:00 0.073370 0.056290 325.0 2.692582 \n",
- "2013-04-30 00:00:00+03:00 0.629393 0.066961 2070.0 32.750000 \n",
- "2013-05-31 00:00:00+03:00 0.261978 0.026392 3113.0 20.250000 \n",
+ " dist_total speed_average speed_max n_bins \\\n",
+ "2013-03-31 00:00:00+02:00 4.132581e+05 1.116127 17.284037 288.0 \n",
+ "2013-04-30 00:00:00+03:00 2.179693e+06 0.821680 33.831053 2032.0 \n",
+ "2013-05-31 00:00:00+03:00 6.986551e+06 0.847341 42.507751 1903.0 \n",
+ "2013-06-30 00:00:00+03:00 2.252893e+05 14.601880 43.321397 24.0 \n",
+ "2013-03-31 00:00:00+02:00 1.328713e+04 0.029994 0.744393 325.0 \n",
+ "2013-04-30 00:00:00+03:00 1.238429e+05 0.050416 16.992157 2070.0 \n",
+ "2013-05-31 00:00:00+03:00 1.228235e+05 0.044657 9.967899 3113.0 \n",
+ "\n",
+ " speed_variance \n",
+ "2013-03-31 00:00:00+02:00 9.876405 \n",
+ "2013-04-30 00:00:00+03:00 9.970465 \n",
+ "2013-05-31 00:00:00+03:00 15.081070 \n",
+ "2013-06-30 00:00:00+03:00 242.791725 \n",
+ "2013-03-31 00:00:00+02:00 0.008880 \n",
+ "2013-04-30 00:00:00+03:00 0.237219 \n",
+ "2013-05-31 00:00:00+03:00 0.085366 \n",
"\n",
"[7 rows x 23 columns]"
]
},
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -820,7 +872,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"id": "d2b2e06b",
"metadata": {},
"outputs": [
@@ -846,118 +898,118 @@
" \n",
" | \n",
" user | \n",
- " log_variance | \n",
" variance | \n",
+ " log_variance | \n",
" dist_total | \n",
- " speed_variance | \n",
" speed_average | \n",
- " n_bins | \n",
" speed_max | \n",
+ " n_bins | \n",
+ " speed_variance | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u00 | \n",
- " -5.761688 | \n",
" 0.003146 | \n",
+ " -5.761688 | \n",
" 4.132581e+05 | \n",
- " 0.044885 | \n",
- " 0.033496 | \n",
+ " 1.116127 | \n",
+ " 17.284037 | \n",
" 288.0 | \n",
- " 1.750000 | \n",
+ " 9.876405 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
- " -1.439133 | \n",
" 0.237133 | \n",
+ " -1.439133 | \n",
" 2.179693e+06 | \n",
- " 6.129277 | \n",
- " 0.269932 | \n",
+ " 0.821680 | \n",
+ " 33.831053 | \n",
" 2032.0 | \n",
- " 33.250000 | \n",
+ " 9.970465 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u00 | \n",
- " 2.114892 | \n",
" 8.288687 | \n",
+ " 2.114892 | \n",
" 6.986551e+06 | \n",
- " 7.590639 | \n",
- " 0.351280 | \n",
+ " 0.847341 | \n",
+ " 42.507751 | \n",
" 1903.0 | \n",
- " 34.000000 | \n",
+ " 15.081070 | \n",
"
\n",
" \n",
" 2013-06-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
- " -4.200287 | \n",
" 0.014991 | \n",
+ " -4.200287 | \n",
" 2.252893e+05 | \n",
- " 0.021490 | \n",
- " 0.044126 | \n",
+ " 14.601880 | \n",
+ " 43.321397 | \n",
" 24.0 | \n",
- " 0.559017 | \n",
+ " 242.791725 | \n",
"
\n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u01 | \n",
- " -12.520989 | \n",
" 0.000004 | \n",
+ " -12.520989 | \n",
" 1.328713e+04 | \n",
- " 0.073370 | \n",
- " 0.056290 | \n",
+ " 0.029994 | \n",
+ " 0.744393 | \n",
" 325.0 | \n",
- " 2.692582 | \n",
+ " 0.008880 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u01 | \n",
- " -10.510017 | \n",
" 0.000027 | \n",
+ " -10.510017 | \n",
" 1.238429e+05 | \n",
- " 0.629393 | \n",
- " 0.066961 | \n",
+ " 0.050416 | \n",
+ " 16.992157 | \n",
" 2070.0 | \n",
- " 32.750000 | \n",
+ " 0.237219 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u01 | \n",
- " -11.364454 | \n",
" 0.000012 | \n",
+ " -11.364454 | \n",
" 1.228235e+05 | \n",
- " 0.261978 | \n",
- " 0.026392 | \n",
+ " 0.044657 | \n",
+ " 9.967899 | \n",
" 3113.0 | \n",
- " 20.250000 | \n",
+ " 0.085366 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " user log_variance variance dist_total \\\n",
- "2013-03-31 00:00:00+02:00 gps_u00 -5.761688 0.003146 4.132581e+05 \n",
- "2013-04-30 00:00:00+03:00 gps_u00 -1.439133 0.237133 2.179693e+06 \n",
- "2013-05-31 00:00:00+03:00 gps_u00 2.114892 8.288687 6.986551e+06 \n",
- "2013-06-30 00:00:00+03:00 gps_u00 -4.200287 0.014991 2.252893e+05 \n",
- "2013-03-31 00:00:00+02:00 gps_u01 -12.520989 0.000004 1.328713e+04 \n",
- "2013-04-30 00:00:00+03:00 gps_u01 -10.510017 0.000027 1.238429e+05 \n",
- "2013-05-31 00:00:00+03:00 gps_u01 -11.364454 0.000012 1.228235e+05 \n",
+ " user variance log_variance dist_total \\\n",
+ "2013-03-31 00:00:00+02:00 gps_u00 0.003146 -5.761688 4.132581e+05 \n",
+ "2013-04-30 00:00:00+03:00 gps_u00 0.237133 -1.439133 2.179693e+06 \n",
+ "2013-05-31 00:00:00+03:00 gps_u00 8.288687 2.114892 6.986551e+06 \n",
+ "2013-06-30 00:00:00+03:00 gps_u00 0.014991 -4.200287 2.252893e+05 \n",
+ "2013-03-31 00:00:00+02:00 gps_u01 0.000004 -12.520989 1.328713e+04 \n",
+ "2013-04-30 00:00:00+03:00 gps_u01 0.000027 -10.510017 1.238429e+05 \n",
+ "2013-05-31 00:00:00+03:00 gps_u01 0.000012 -11.364454 1.228235e+05 \n",
"\n",
- " speed_variance speed_average n_bins speed_max \n",
- "2013-03-31 00:00:00+02:00 0.044885 0.033496 288.0 1.750000 \n",
- "2013-04-30 00:00:00+03:00 6.129277 0.269932 2032.0 33.250000 \n",
- "2013-05-31 00:00:00+03:00 7.590639 0.351280 1903.0 34.000000 \n",
- "2013-06-30 00:00:00+03:00 0.021490 0.044126 24.0 0.559017 \n",
- "2013-03-31 00:00:00+02:00 0.073370 0.056290 325.0 2.692582 \n",
- "2013-04-30 00:00:00+03:00 0.629393 0.066961 2070.0 32.750000 \n",
- "2013-05-31 00:00:00+03:00 0.261978 0.026392 3113.0 20.250000 "
+ " speed_average speed_max n_bins speed_variance \n",
+ "2013-03-31 00:00:00+02:00 1.116127 17.284037 288.0 9.876405 \n",
+ "2013-04-30 00:00:00+03:00 0.821680 33.831053 2032.0 9.970465 \n",
+ "2013-05-31 00:00:00+03:00 0.847341 42.507751 1903.0 15.081070 \n",
+ "2013-06-30 00:00:00+03:00 14.601880 43.321397 24.0 242.791725 \n",
+ "2013-03-31 00:00:00+02:00 0.029994 0.744393 325.0 0.008880 \n",
+ "2013-04-30 00:00:00+03:00 0.050416 16.992157 2070.0 0.237219 \n",
+ "2013-05-31 00:00:00+03:00 0.044657 9.967899 3113.0 0.085366 "
]
},
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -1002,19 +1054,19 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"id": "e602497e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{: {},\n",
- " : {},\n",
- " : {}}"
+ "{: {'resample_args': {'rule': '1ME'}},\n",
+ " : {'resample_args': {'rule': '1ME'}},\n",
+ " : {'resample_args': {'rule': '1ME'}}}"
]
},
- "execution_count": 9,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -1025,7 +1077,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"id": "6a042466",
"metadata": {},
"outputs": [
@@ -1052,9 +1104,9 @@
" | \n",
" user | \n",
" time | \n",
- " double_latitude | \n",
- " double_longitude | \n",
- " double_speed | \n",
+ " latitude | \n",
+ " longitude | \n",
+ " speed | \n",
" \n",
" \n",
" \n",
@@ -1103,22 +1155,15 @@
""
],
"text/plain": [
- " user time double_latitude \\\n",
- "2013-03-27 06:00:00+02:00 gps_u00 1.364357e+09 43.759135 \n",
- "2013-03-27 06:20:00+02:00 gps_u00 1.364358e+09 43.759503 \n",
- "2013-03-27 06:40:00+02:00 gps_u00 1.364359e+09 43.759134 \n",
- "2013-03-27 07:00:00+02:00 gps_u00 1.364361e+09 43.759135 \n",
- "2013-03-27 07:20:00+02:00 gps_u00 1.364362e+09 43.759135 \n",
- "\n",
- " double_longitude double_speed \n",
- "2013-03-27 06:00:00+02:00 -72.329240 0.0 \n",
- "2013-03-27 06:20:00+02:00 -72.329018 0.0 \n",
- "2013-03-27 06:40:00+02:00 -72.329238 0.0 \n",
- "2013-03-27 07:00:00+02:00 -72.329240 0.0 \n",
- "2013-03-27 07:20:00+02:00 -72.329240 0.0 "
+ " user time latitude longitude speed\n",
+ "2013-03-27 06:00:00+02:00 gps_u00 1.364357e+09 43.759135 -72.329240 0.0\n",
+ "2013-03-27 06:20:00+02:00 gps_u00 1.364358e+09 43.759503 -72.329018 0.0\n",
+ "2013-03-27 06:40:00+02:00 gps_u00 1.364359e+09 43.759134 -72.329238 0.0\n",
+ "2013-03-27 07:00:00+02:00 gps_u00 1.364361e+09 43.759135 -72.329240 0.0\n",
+ "2013-03-27 07:20:00+02:00 gps_u00 1.364362e+09 43.759135 -72.329240 0.0"
]
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -1138,7 +1183,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"id": "81d9d0d1",
"metadata": {},
"outputs": [
@@ -1164,7 +1209,7 @@
" \n",
" | \n",
" user | \n",
- " double_speed | \n",
+ " speed | \n",
"
\n",
" \n",
" \n",
@@ -1183,12 +1228,12 @@
""
],
"text/plain": [
- " user double_speed\n",
- "0 gps_u00 34.00\n",
- "1 gps_u01 32.75"
+ " user speed\n",
+ "0 gps_u00 34.00\n",
+ "1 gps_u01 32.75"
]
},
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1197,7 +1242,7 @@
"# customized function\n",
"def max_speed(df):\n",
" grouped = df.groupby('user')\n",
- " df = grouped['double_speed'].max().reset_index('user')\n",
+ " df = grouped['speed'].max().reset_index('user')\n",
" return df\n",
"\n",
"customized_features = nilo.extract_features_location(\n",
diff --git a/niimpy/_version.py b/niimpy/_version.py
index 3f262a63..923b9879 100644
--- a/niimpy/_version.py
+++ b/niimpy/_version.py
@@ -1 +1 @@
-__version__ = '1.2.1'
+__version__ = '1.2.2'
diff --git a/niimpy/exploration/missingness.py b/niimpy/exploration/missingness.py
index ff84c1f6..f28c5d77 100644
--- a/niimpy/exploration/missingness.py
+++ b/niimpy/exploration/missingness.py
@@ -126,7 +126,7 @@ def missing_noise(database,subject,start=None,end=None):
else:
end = noise.iloc[len(noise)-1]['datetime']
- noise = noise.drop(['device','user','time','double_silence_threshold','double_rms','blob_raw','is_silent','double_frequency'],axis=1)
+ noise = noise.drop(['device','user','time','silence_threshold','rms','blob_raw','is_silent','frequency'],axis=1)
noise = noise.loc[start:end]
noise['duration'] = noise['datetime'].diff()
noise['duration'] = get_seconds(noise['duration'])
@@ -139,7 +139,7 @@ def missing_noise(database,subject,start=None,end=None):
dates=noise.datetime_x.combine_first(noise.datetime_y)
noise['datetime']=dates
noise = noise.drop(['datetime_x','datetime_y'],axis=1)
- noise=noise.drop(['double_decibels', 'duration_y'],axis=1)
+ noise=noise.drop(['decibels', 'duration_y'],axis=1)
noise['missing'] = np.where(noise['duration']>=1860, 1, 0) #detect the missing points
noise['dummy'] = noise.missing.shift(-2) #assumes that everytime the cellphone shuts down, two timestamps are generated with -1 in the battery_health
noise['dummy'] = noise.dummy*noise.duration
diff --git a/niimpy/preprocessing/audio.py b/niimpy/preprocessing/audio.py
index d6537be6..aa830c6b 100755
--- a/niimpy/preprocessing/audio.py
+++ b/niimpy/preprocessing/audio.py
@@ -39,7 +39,7 @@ def audio_count_silent(df_u, audio_column_name = "is_silent", resample_args = {"
return result
return None
-def audio_count_speech(df_u, audio_column_name = "is_silent", audio_freq_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_count_speech(df_u, audio_column_name = "is_silent", audio_freq_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the number of times, within the specified timeframe,
when there has been some sound between 65Hz and 255Hz in the environment that could
be specified as speech. If there is no specified timeframe, the function sets a
@@ -77,7 +77,7 @@ def audio_count_speech(df_u, audio_column_name = "is_silent", audio_freq_name =
return result
return None
-def audio_count_loud(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_count_loud(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the number of times, within the specified timeframe,
when there has been some sound louder than 70dB in the environment. If there
is no specified timeframe, the function sets a 30 min default time window.
@@ -113,7 +113,7 @@ def audio_count_loud(df_u, audio_column_name = "double_decibels", resample_args
return result
return None
-def audio_min_freq(df_u, audio_column_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_min_freq(df_u, audio_column_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the minimum frequency of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -145,7 +145,7 @@ def audio_min_freq(df_u, audio_column_name = "double_frequency", resample_args =
return result
return None
-def audio_max_freq(df_u, audio_column_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_max_freq(df_u, audio_column_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the maximum frequency of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -177,7 +177,7 @@ def audio_max_freq(df_u, audio_column_name = "double_frequency", resample_args =
return result
return None
-def audio_mean_freq(df_u, audio_column_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_mean_freq(df_u, audio_column_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the mean frequency of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -209,7 +209,7 @@ def audio_mean_freq(df_u, audio_column_name = "double_frequency", resample_args
return result
return None
-def audio_median_freq(df_u, audio_column_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_median_freq(df_u, audio_column_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the median frequency of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -241,7 +241,7 @@ def audio_median_freq(df_u, audio_column_name = "double_frequency", resample_arg
return result
return None
-def audio_std_freq(df_u, audio_column_name = "double_frequency", resample_args = {"rule":"30min"}, **kwargs):
+def audio_std_freq(df_u, audio_column_name = "frequency", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the standard deviation of the frequency of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -273,7 +273,7 @@ def audio_std_freq(df_u, audio_column_name = "double_frequency", resample_args =
return result
return None
-def audio_min_db(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_min_db(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the minimum decibels of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -305,7 +305,7 @@ def audio_min_db(df_u, audio_column_name = "double_decibels", resample_args = {"
return result
return None
-def audio_max_db(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_max_db(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the maximum decibels of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -337,7 +337,7 @@ def audio_max_db(df_u, audio_column_name = "double_decibels", resample_args = {"
return result
return None
-def audio_mean_db(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_mean_db(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the mean decibels of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -369,7 +369,7 @@ def audio_mean_db(df_u, audio_column_name = "double_decibels", resample_args = {
return result
return None
-def audio_median_db(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_median_db(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the median decibels of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
@@ -401,7 +401,7 @@ def audio_median_db(df_u, audio_column_name = "double_decibels", resample_args =
return result
return None
-def audio_std_db(df_u, audio_column_name = "double_decibels", resample_args = {"rule":"30min"}, **kwargs):
+def audio_std_db(df_u, audio_column_name = "decibels", resample_args = {"rule":"30min"}, **kwargs):
""" This function returns the standard deviation of the decibels of the recorded audio snippets,
within the specified timeframe. If there is no specified timeframe, the function sets a
30 min default time window. The function aggregates this number by user, by timewindow.
diff --git a/niimpy/preprocessing/location.py b/niimpy/preprocessing/location.py
index 7101a3f1..d93851dc 100644
--- a/niimpy/preprocessing/location.py
+++ b/niimpy/preprocessing/location.py
@@ -65,8 +65,8 @@ def filter_location(location,
remove_disabled=True,
remove_zeros=True,
remove_network=False,
- latitude_column = "double_latitude",
- longitude_column = "double_longitude",
+ latitude_column = "latitude",
+ longitude_column = "longitude",
label_column = "label",
provider_column = "provider",
):
@@ -263,8 +263,8 @@ def number_of_significant_places(lats, lons, times):
def location_number_of_significant_places(
df,
- latitude_column="double_latitude",
- longitude_column="double_longitude",
+ latitude_column="latitude",
+ longitude_column="longitude",
resample_args={"rule": default_freq},
**kwargs
):
@@ -327,9 +327,9 @@ def compute_nbin_maxdist_home(lats, lons, latlon_home, home_radius=50):
def location_significant_place_features(
df,
- latitude_column="double_latitude",
- longitude_column="double_latitude",
- speed_column="double_speed",
+ latitude_column="latitude",
+ longitude_column="latitude",
+ speed_column="speed",
speed_threshold=0.277,
resample_args={"rule": default_freq},
**kwargs
@@ -342,9 +342,9 @@ def location_significant_place_features(
config: A dictionary of optional arguments
Optional arguments in config:
- longitude_column: The name of the column with longitude data in a floating point format. Defaults to 'double_longitude'.
- latitude_column: The name of the column with latitude data in a floating point format. Defaults to 'double_latitude'.
- speed_column: The name of the column with speed data in a floating point format. Defaults to 'double_speed'.
+ longitude_column: The name of the column with longitude data in a floating point format. Defaults to 'longitude'.
+ latitude_column: The name of the column with latitude data in a floating point format. Defaults to 'latitude'.
+ speed_column: The name of the column with speed data in a floating point format. Defaults to 'speed'.
resample_args: a dictionary of arguments for the Pandas resample function. For example to resample by hour, you would pass {"rule": "1h"}.
"""
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
@@ -427,9 +427,9 @@ def compute_features(df):
def location_distance_features(
df,
- latitude_column="double_latitude",
- longitude_column="double_latitude",
- speed_column="double_speed",
+ latitude_column="latitude",
+ longitude_column="latitude",
+ speed_column="speed",
resample_args={"rule": default_freq},
**kwargs
):
@@ -441,9 +441,9 @@ def location_distance_features(
config: A dictionary of optional arguments
Optional arguments in config:
- longitude_column: The name of the column with longitude data in a floating point format. Defaults to 'double_longitude'.
- latitude_column: The name of the column with latitude data in a floating point format. Defaults to 'double_latitude'.
- speed_column: The name of the column with speed data in a floating point format. Defaults to 'double_speed'.
+ longitude_column: The name of the column with longitude data in a floating point format. Defaults to 'longitude'.
+ latitude_column: The name of the column with latitude data in a floating point format. Defaults to 'latitude'.
+ speed_column: The name of the column with speed data in a floating point format. Defaults to 'speed'.
resample_args: a dictionary of arguments for the Pandas resample function. For example to resample by hour, you would pass {"rule": "1h"}.
"""
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
@@ -502,8 +502,8 @@ def extract_features_location(df, features=None):
----------
df : pd.DataFrame
dataframe of location data. It must contain these columns:
- `double_latitude`, `double_longitude`, `user`, `group`.
- `double_speed` is optional. If not provided, it will be
+ `latitude`, `longitude`, `user`, `group`.
+ `speed` is optional. If not provided, it will be
computed manually.
speed_threshold : float
Bins whose speed is lower than `speed_threshold` are considred
diff --git a/pyproject.toml b/pyproject.toml
index f26accc3..7d73e2ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "niimpy"
[project]
name = "niimpy"
-version = "1.2.1"
+version = "1.2.2"
readme = "README.md"
description = "Python module for analysis of behavioral data"
authors = [
@@ -56,7 +56,7 @@ Repository = "https://github.com/digitraceslab/niimpy"
[bumpver]
-current_version = "1.2.1"
+current_version = "1.2.2"
version_pattern = "MAJOR.MINOR.PATCH"
commit_message = "bump version {old_version} -> {new_version}"
commit = true
diff --git a/tests/preprocessing/test_audio.py b/tests/preprocessing/test_audio.py
index 7245c40c..a31fc9e3 100755
--- a/tests/preprocessing/test_audio.py
+++ b/tests/preprocessing/test_audio.py
@@ -6,6 +6,7 @@
# read sample data
data = niimpy.read_csv(config.MULTIUSER_AWARE_AUDIO_PATH, tz='Europe/Helsinki')
+data = data.rename(columns={"double_frequency": "frequency", "double_decibels": "decibels"})
def test_audio_features():
data["group"] = "group1"
@@ -46,8 +47,8 @@ def test_audio_features():
assert test_user2.loc[pd.Timestamp("2019-08-13 15:00:00", tz='Europe/Helsinki')]["audio_std_db"] < 3.54
features ={audio.audio_count_silent:{"audio_column_name":"is_silent","resample_args":{"rule":"1D"}},
- audio.audio_count_speech:{"audio_column_name":"is_silent","audio_freq_name":"double_frequency","resample_args":{"rule":"1D"}},
- audio.audio_count_loud:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}}}
+ audio.audio_count_speech:{"audio_column_name":"is_silent","audio_freq_name":"frequency","resample_args":{"rule":"1D"}},
+ audio.audio_count_loud:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}}}
test = audio.extract_features_audio(data, features=features)
test_user1 = test[test["user"] == "jd9INuQ5BBlW"]
@@ -61,11 +62,11 @@ def test_audio_features():
assert test_user1_dev2.loc[pd.Timestamp("2020-01-09", tz='Europe/Helsinki')]["audio_count_loud"] == 5
assert test_user2.loc[pd.Timestamp("2019-08-13", tz='Europe/Helsinki')]["audio_count_loud"] == 10
- features ={audio.audio_min_freq:{"audio_column_name":"double_frequency","resample_args":{"rule":"2h"}},
- audio.audio_max_freq:{"audio_column_name":"double_frequency","resample_args":{"rule":"2h"}},
- audio.audio_mean_freq:{"audio_column_name":"double_frequency","resample_args":{"rule":"2h"}},
- audio.audio_median_freq:{"audio_column_name":"double_frequency","resample_args":{"rule":"3h"}},
- audio.audio_std_freq:{"audio_column_name":"double_frequency","resample_args":{"rule":"3h"}}}
+ features ={audio.audio_min_freq:{"audio_column_name":"frequency","resample_args":{"rule":"2h"}},
+ audio.audio_max_freq:{"audio_column_name":"frequency","resample_args":{"rule":"2h"}},
+ audio.audio_mean_freq:{"audio_column_name":"frequency","resample_args":{"rule":"2h"}},
+ audio.audio_median_freq:{"audio_column_name":"frequency","resample_args":{"rule":"3h"}},
+ audio.audio_std_freq:{"audio_column_name":"frequency","resample_args":{"rule":"3h"}}}
test = audio.extract_features_audio(data, features=features)
test_user1 = test[test["user"] == "jd9INuQ5BBlW"]
@@ -82,11 +83,11 @@ def test_audio_features():
assert test_user2.loc[pd.Timestamp("2019-08-13 15:00:00", tz='Europe/Helsinki')]["audio_median_freq"] == 3853
assert test_user2.loc[pd.Timestamp("2019-08-13 15:00:00", tz='Europe/Helsinki')]["audio_std_freq"] < 3081
- features ={audio.audio_min_db:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}},
- audio.audio_max_db:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}},
- audio.audio_mean_db:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}},
- audio.audio_median_db:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}},
- audio.audio_std_db:{"audio_column_name":"double_decibels","resample_args":{"rule":"1D"}}}
+ features ={audio.audio_min_db:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}},
+ audio.audio_max_db:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}},
+ audio.audio_mean_db:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}},
+ audio.audio_median_db:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}},
+ audio.audio_std_db:{"audio_column_name":"decibels","resample_args":{"rule":"1D"}}}
test = audio.extract_features_audio(data, features=features)
test_user1 = test[test["user"] == "jd9INuQ5BBlW"]
diff --git a/tests/preprocessing/test_location.py b/tests/preprocessing/test_location.py
index 13a4040c..e13079ad 100644
--- a/tests/preprocessing/test_location.py
+++ b/tests/preprocessing/test_location.py
@@ -8,6 +8,7 @@
# read sample data
data = niimpy.read_csv(config.GPS_PATH, tz='et')
+data = data.rename(columns={"double_latitude": "latitude", "double_longitude": "longitude", "double_speed": "speed"})
data["group"] = "group1"
def test_distance_matrix():
@@ -37,6 +38,7 @@ def test_location_features():
assert "extra_column" not in features.columns
sps = features['n_sps'].dropna()
+ print(sps)
assert ((sps > 0) & (sps < 100)).all(), "Number of SPs not reasonable"
features_u1 = features[features["user"] == 'gps_u00']