diff --git a/docs/user_guide/preprocessing/location.ipynb b/docs/user_guide/preprocessing/location.ipynb
index 46ff53ac..8316fca3 100644
--- a/docs/user_guide/preprocessing/location.ipynb
+++ b/docs/user_guide/preprocessing/location.ipynb
@@ -307,7 +307,7 @@
],
"source": [
"binned_data = niimpy.util.aggregate(data, freq='5min', method_numerical='median')\n",
- "binned_data = binned_data.reset_index(0).dropna()\n",
+ "binned_data = binned_data.dropna()\n",
"binned_data.shape"
]
},
@@ -559,25 +559,25 @@
"
| \n",
" user | \n",
" n_significant_places | \n",
- " n_sps | \n",
- " n_static | \n",
- " n_moving | \n",
- " n_rare | \n",
- " n_home | \n",
- " max_dist_home | \n",
" n_transitions | \n",
+ " normalized_entropy | \n",
" n_top1 | \n",
+ " n_top4 | \n",
+ " n_rare | \n",
+ " n_top2 | \n",
+ " n_moving | \n",
+ " max_dist_home | \n",
" ... | \n",
- " n_top5 | \n",
+ " n_top3 | \n",
" entropy | \n",
- " normalized_entropy | \n",
+ " n_sps | \n",
+ " log_variance | \n",
+ " variance | \n",
" dist_total | \n",
- " n_bins | \n",
- " speed_average | \n",
" speed_variance | \n",
+ " speed_average | \n",
+ " n_bins | \n",
" speed_max | \n",
- " variance | \n",
- " log_variance | \n",
" \n",
" \n",
" \n",
@@ -585,169 +585,169 @@
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u00 | \n",
" 6 | \n",
- " 5.0 | \n",
- " 280.0 | \n",
- " 8.0 | \n",
- " 3.0 | \n",
- " 106.0 | \n",
- " 2.074186e+04 | \n",
" 48.0 | \n",
+ " 3.163631 | \n",
" 106.0 | \n",
+ " 20.0 | \n",
+ " 3.0 | \n",
+ " 99.0 | \n",
+ " 8.0 | \n",
+ " 2.074186e+04 | \n",
" ... | \n",
- " 18.0 | \n",
+ " 34.0 | \n",
" 5.091668 | \n",
- " 3.163631 | \n",
+ " 5.0 | \n",
+ " -5.761688 | \n",
+ " 0.003146 | \n",
" 4.132581e+05 | \n",
- " 288.0 | \n",
- " 0.033496 | \n",
" 0.044885 | \n",
+ " 0.033496 | \n",
+ " 288.0 | \n",
" 1.750000 | \n",
- " 0.003146 | \n",
- " -5.761688 | \n",
" \n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 10 | \n",
- " 10.0 | \n",
- " 1966.0 | \n",
- " 66.0 | \n",
- " 45.0 | \n",
- " 1010.0 | \n",
- " 2.914790e+05 | \n",
" 194.0 | \n",
+ " 3.163793 | \n",
" 1016.0 | \n",
+ " 45.0 | \n",
+ " 45.0 | \n",
+ " 668.0 | \n",
+ " 66.0 | \n",
+ " 2.914790e+05 | \n",
" ... | \n",
- " 38.0 | \n",
+ " 135.0 | \n",
" 7.284903 | \n",
- " 3.163793 | \n",
+ " 10.0 | \n",
+ " -1.439133 | \n",
+ " 0.237133 | \n",
" 2.179693e+06 | \n",
- " 2032.0 | \n",
- " 0.269932 | \n",
" 6.129277 | \n",
+ " 0.269932 | \n",
+ " 2032.0 | \n",
" 33.250000 | \n",
- " 0.237133 | \n",
- " -1.439133 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 15 | \n",
- " 12.0 | \n",
- " 1827.0 | \n",
- " 76.0 | \n",
- " 86.0 | \n",
- " 1028.0 | \n",
- " 1.041741e+06 | \n",
" 107.0 | \n",
+ " 2.696752 | \n",
" 1030.0 | \n",
+ " 65.0 | \n",
+ " 86.0 | \n",
+ " 501.0 | \n",
+ " 76.0 | \n",
+ " 1.041741e+06 | \n",
" ... | \n",
- " 46.0 | \n",
+ " 86.0 | \n",
" 6.701177 | \n",
- " 2.696752 | \n",
+ " 12.0 | \n",
+ " 2.114892 | \n",
+ " 8.288687 | \n",
" 6.986551e+06 | \n",
- " 1903.0 | \n",
- " 0.351280 | \n",
" 7.590639 | \n",
+ " 0.351280 | \n",
+ " 1903.0 | \n",
" 34.000000 | \n",
- " 8.288687 | \n",
- " 2.114892 | \n",
"
\n",
" \n",
" 2013-06-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
" 1 | \n",
- " 1.0 | \n",
- " 22.0 | \n",
- " 2.0 | \n",
+ " 10.0 | \n",
+ " 0.000000 | \n",
" 15.0 | \n",
" 0.0 | \n",
- " 2.035837e+04 | \n",
- " 10.0 | \n",
" 15.0 | \n",
+ " 7.0 | \n",
+ " 2.0 | \n",
+ " 2.035837e+04 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.000000 | \n",
+ " 1.0 | \n",
+ " -4.200287 | \n",
+ " 0.014991 | \n",
" 2.252893e+05 | \n",
- " 24.0 | \n",
- " 0.044126 | \n",
" 0.021490 | \n",
+ " 0.044126 | \n",
+ " 24.0 | \n",
" 0.559017 | \n",
- " 0.014991 | \n",
- " -4.200287 | \n",
"
\n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u01 | \n",
" 4 | \n",
- " 2.0 | \n",
- " 307.0 | \n",
- " 18.0 | \n",
- " 0.0 | \n",
- " 260.0 | \n",
- " 6.975303e+02 | \n",
" 8.0 | \n",
+ " 4.392317 | \n",
" 286.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 21.0 | \n",
+ " 18.0 | \n",
+ " 6.975303e+02 | \n",
" ... | \n",
" 0.0 | \n",
" 3.044522 | \n",
- " 4.392317 | \n",
+ " 2.0 | \n",
+ " -12.520989 | \n",
+ " 0.000004 | \n",
" 1.328713e+04 | \n",
- " 325.0 | \n",
- " 0.056290 | \n",
" 0.073370 | \n",
+ " 0.056290 | \n",
+ " 325.0 | \n",
" 2.692582 | \n",
- " 0.000004 | \n",
- " -12.520989 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u01 | \n",
" 4 | \n",
+ " 2.0 | \n",
+ " 0.000000 | \n",
+ " 1998.0 | \n",
+ " 0.0 | \n",
" 1.0 | \n",
- " 1999.0 | \n",
- " 71.0 | \n",
" 1.0 | \n",
- " 1500.0 | \n",
+ " 71.0 | \n",
" 1.156568e+04 | \n",
- " 2.0 | \n",
- " 1998.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.000000 | \n",
+ " 1.0 | \n",
+ " -10.510017 | \n",
+ " 0.000027 | \n",
" 1.238429e+05 | \n",
- " 2070.0 | \n",
- " 0.066961 | \n",
" 0.629393 | \n",
+ " 0.066961 | \n",
+ " 2070.0 | \n",
" 32.750000 | \n",
- " 0.000027 | \n",
- " -10.510017 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u01 | \n",
" 2 | \n",
+ " 2.0 | \n",
+ " 0.000000 | \n",
+ " 3078.0 | \n",
+ " 0.0 | \n",
" 1.0 | \n",
- " 3079.0 | \n",
- " 34.0 | \n",
" 1.0 | \n",
- " 45.0 | \n",
+ " 34.0 | \n",
" 3.957650e+03 | \n",
- " 2.0 | \n",
- " 3078.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.000000 | \n",
+ " 1.0 | \n",
+ " -11.364454 | \n",
+ " 0.000012 | \n",
" 1.228235e+05 | \n",
- " 3113.0 | \n",
- " 0.026392 | \n",
" 0.261978 | \n",
+ " 0.026392 | \n",
+ " 3113.0 | \n",
" 20.250000 | \n",
- " 0.000012 | \n",
- " -11.364454 | \n",
"
\n",
" \n",
"\n",
@@ -755,59 +755,50 @@
""
],
"text/plain": [
- " user n_significant_places n_sps n_static \\\n",
- "2013-03-31 00:00:00+02:00 gps_u00 6 5.0 280.0 \n",
- "2013-04-30 00:00:00+03:00 gps_u00 10 10.0 1966.0 \n",
- "2013-05-31 00:00:00+03:00 gps_u00 15 12.0 1827.0 \n",
- "2013-06-30 00:00:00+03:00 gps_u00 1 1.0 22.0 \n",
- "2013-03-31 00:00:00+02:00 gps_u01 4 2.0 307.0 \n",
- "2013-04-30 00:00:00+03:00 gps_u01 4 1.0 1999.0 \n",
- "2013-05-31 00:00:00+03:00 gps_u01 2 1.0 3079.0 \n",
+ " user n_significant_places n_transitions \\\n",
+ "2013-03-31 00:00:00+02:00 gps_u00 6 48.0 \n",
+ "2013-04-30 00:00:00+03:00 gps_u00 10 194.0 \n",
+ "2013-05-31 00:00:00+03:00 gps_u00 15 107.0 \n",
+ "2013-06-30 00:00:00+03:00 gps_u00 1 10.0 \n",
+ "2013-03-31 00:00:00+02:00 gps_u01 4 8.0 \n",
+ "2013-04-30 00:00:00+03:00 gps_u01 4 2.0 \n",
+ "2013-05-31 00:00:00+03:00 gps_u01 2 2.0 \n",
"\n",
- " n_moving n_rare n_home max_dist_home \\\n",
- "2013-03-31 00:00:00+02:00 8.0 3.0 106.0 2.074186e+04 \n",
- "2013-04-30 00:00:00+03:00 66.0 45.0 1010.0 2.914790e+05 \n",
- "2013-05-31 00:00:00+03:00 76.0 86.0 1028.0 1.041741e+06 \n",
- "2013-06-30 00:00:00+03:00 2.0 15.0 0.0 2.035837e+04 \n",
- "2013-03-31 00:00:00+02:00 18.0 0.0 260.0 6.975303e+02 \n",
- "2013-04-30 00:00:00+03:00 71.0 1.0 1500.0 1.156568e+04 \n",
- "2013-05-31 00:00:00+03:00 34.0 1.0 45.0 3.957650e+03 \n",
+ " normalized_entropy n_top1 n_top4 n_rare n_top2 \\\n",
+ "2013-03-31 00:00:00+02:00 3.163631 106.0 20.0 3.0 99.0 \n",
+ "2013-04-30 00:00:00+03:00 3.163793 1016.0 45.0 45.0 668.0 \n",
+ "2013-05-31 00:00:00+03:00 2.696752 1030.0 65.0 86.0 501.0 \n",
+ "2013-06-30 00:00:00+03:00 0.000000 15.0 0.0 15.0 7.0 \n",
+ "2013-03-31 00:00:00+02:00 4.392317 286.0 0.0 0.0 21.0 \n",
+ "2013-04-30 00:00:00+03:00 0.000000 1998.0 0.0 1.0 1.0 \n",
+ "2013-05-31 00:00:00+03:00 0.000000 3078.0 0.0 1.0 1.0 \n",
"\n",
- " n_transitions n_top1 ... n_top5 entropy \\\n",
- "2013-03-31 00:00:00+02:00 48.0 106.0 ... 18.0 5.091668 \n",
- "2013-04-30 00:00:00+03:00 194.0 1016.0 ... 38.0 7.284903 \n",
- "2013-05-31 00:00:00+03:00 107.0 1030.0 ... 46.0 6.701177 \n",
- "2013-06-30 00:00:00+03:00 10.0 15.0 ... 0.0 0.000000 \n",
- "2013-03-31 00:00:00+02:00 8.0 286.0 ... 0.0 3.044522 \n",
- "2013-04-30 00:00:00+03:00 2.0 1998.0 ... 0.0 0.000000 \n",
- "2013-05-31 00:00:00+03:00 2.0 3078.0 ... 0.0 0.000000 \n",
+ " n_moving max_dist_home ... n_top3 entropy \\\n",
+ "2013-03-31 00:00:00+02:00 8.0 2.074186e+04 ... 34.0 5.091668 \n",
+ "2013-04-30 00:00:00+03:00 66.0 2.914790e+05 ... 135.0 7.284903 \n",
+ "2013-05-31 00:00:00+03:00 76.0 1.041741e+06 ... 86.0 6.701177 \n",
+ "2013-06-30 00:00:00+03:00 2.0 2.035837e+04 ... 0.0 0.000000 \n",
+ "2013-03-31 00:00:00+02:00 18.0 6.975303e+02 ... 0.0 3.044522 \n",
+ "2013-04-30 00:00:00+03:00 71.0 1.156568e+04 ... 0.0 0.000000 \n",
+ "2013-05-31 00:00:00+03:00 34.0 3.957650e+03 ... 0.0 0.000000 \n",
"\n",
- " normalized_entropy dist_total n_bins \\\n",
- "2013-03-31 00:00:00+02:00 3.163631 4.132581e+05 288.0 \n",
- "2013-04-30 00:00:00+03:00 3.163793 2.179693e+06 2032.0 \n",
- "2013-05-31 00:00:00+03:00 2.696752 6.986551e+06 1903.0 \n",
- "2013-06-30 00:00:00+03:00 0.000000 2.252893e+05 24.0 \n",
- "2013-03-31 00:00:00+02:00 4.392317 1.328713e+04 325.0 \n",
- "2013-04-30 00:00:00+03:00 0.000000 1.238429e+05 2070.0 \n",
- "2013-05-31 00:00:00+03:00 0.000000 1.228235e+05 3113.0 \n",
+ " n_sps log_variance variance dist_total \\\n",
+ "2013-03-31 00:00:00+02:00 5.0 -5.761688 0.003146 4.132581e+05 \n",
+ "2013-04-30 00:00:00+03:00 10.0 -1.439133 0.237133 2.179693e+06 \n",
+ "2013-05-31 00:00:00+03:00 12.0 2.114892 8.288687 6.986551e+06 \n",
+ "2013-06-30 00:00:00+03:00 1.0 -4.200287 0.014991 2.252893e+05 \n",
+ "2013-03-31 00:00:00+02:00 2.0 -12.520989 0.000004 1.328713e+04 \n",
+ "2013-04-30 00:00:00+03:00 1.0 -10.510017 0.000027 1.238429e+05 \n",
+ "2013-05-31 00:00:00+03:00 1.0 -11.364454 0.000012 1.228235e+05 \n",
"\n",
- " speed_average speed_variance speed_max variance \\\n",
- "2013-03-31 00:00:00+02:00 0.033496 0.044885 1.750000 0.003146 \n",
- "2013-04-30 00:00:00+03:00 0.269932 6.129277 33.250000 0.237133 \n",
- "2013-05-31 00:00:00+03:00 0.351280 7.590639 34.000000 8.288687 \n",
- "2013-06-30 00:00:00+03:00 0.044126 0.021490 0.559017 0.014991 \n",
- "2013-03-31 00:00:00+02:00 0.056290 0.073370 2.692582 0.000004 \n",
- "2013-04-30 00:00:00+03:00 0.066961 0.629393 32.750000 0.000027 \n",
- "2013-05-31 00:00:00+03:00 0.026392 0.261978 20.250000 0.000012 \n",
- "\n",
- " log_variance \n",
- "2013-03-31 00:00:00+02:00 -5.761688 \n",
- "2013-04-30 00:00:00+03:00 -1.439133 \n",
- "2013-05-31 00:00:00+03:00 2.114892 \n",
- "2013-06-30 00:00:00+03:00 -4.200287 \n",
- "2013-03-31 00:00:00+02:00 -12.520989 \n",
- "2013-04-30 00:00:00+03:00 -10.510017 \n",
- "2013-05-31 00:00:00+03:00 -11.364454 \n",
+ " speed_variance speed_average n_bins speed_max \n",
+ "2013-03-31 00:00:00+02:00 0.044885 0.033496 288.0 1.750000 \n",
+ "2013-04-30 00:00:00+03:00 6.129277 0.269932 2032.0 33.250000 \n",
+ "2013-05-31 00:00:00+03:00 7.590639 0.351280 1903.0 34.000000 \n",
+ "2013-06-30 00:00:00+03:00 0.021490 0.044126 24.0 0.559017 \n",
+ "2013-03-31 00:00:00+02:00 0.073370 0.056290 325.0 2.692582 \n",
+ "2013-04-30 00:00:00+03:00 0.629393 0.066961 2070.0 32.750000 \n",
+ "2013-05-31 00:00:00+03:00 0.261978 0.026392 3113.0 20.250000 \n",
"\n",
"[7 rows x 23 columns]"
]
@@ -821,7 +812,8 @@
"import warnings\n",
"warnings.filterwarnings('ignore', category=RuntimeWarning)\n",
"\n",
- "# extract all the available features\n",
+ "# extract all the availa\n",
+ "# ble features\n",
"all_features = nilo.extract_features_location(binned_data)\n",
"all_features"
]
@@ -854,115 +846,115 @@
" \n",
" | \n",
" user | \n",
+ " log_variance | \n",
+ " variance | \n",
" dist_total | \n",
- " n_bins | \n",
- " speed_average | \n",
" speed_variance | \n",
+ " speed_average | \n",
+ " n_bins | \n",
" speed_max | \n",
- " variance | \n",
- " log_variance | \n",
"
\n",
" \n",
" \n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u00 | \n",
+ " -5.761688 | \n",
+ " 0.003146 | \n",
" 4.132581e+05 | \n",
- " 288.0 | \n",
- " 0.033496 | \n",
" 0.044885 | \n",
+ " 0.033496 | \n",
+ " 288.0 | \n",
" 1.750000 | \n",
- " 0.003146 | \n",
- " -5.761688 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
+ " -1.439133 | \n",
+ " 0.237133 | \n",
" 2.179693e+06 | \n",
- " 2032.0 | \n",
- " 0.269932 | \n",
" 6.129277 | \n",
+ " 0.269932 | \n",
+ " 2032.0 | \n",
" 33.250000 | \n",
- " 0.237133 | \n",
- " -1.439133 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u00 | \n",
+ " 2.114892 | \n",
+ " 8.288687 | \n",
" 6.986551e+06 | \n",
- " 1903.0 | \n",
- " 0.351280 | \n",
" 7.590639 | \n",
+ " 0.351280 | \n",
+ " 1903.0 | \n",
" 34.000000 | \n",
- " 8.288687 | \n",
- " 2.114892 | \n",
"
\n",
" \n",
" 2013-06-30 00:00:00+03:00 | \n",
" gps_u00 | \n",
+ " -4.200287 | \n",
+ " 0.014991 | \n",
" 2.252893e+05 | \n",
- " 24.0 | \n",
- " 0.044126 | \n",
" 0.021490 | \n",
+ " 0.044126 | \n",
+ " 24.0 | \n",
" 0.559017 | \n",
- " 0.014991 | \n",
- " -4.200287 | \n",
"
\n",
" \n",
" 2013-03-31 00:00:00+02:00 | \n",
" gps_u01 | \n",
+ " -12.520989 | \n",
+ " 0.000004 | \n",
" 1.328713e+04 | \n",
- " 325.0 | \n",
- " 0.056290 | \n",
" 0.073370 | \n",
+ " 0.056290 | \n",
+ " 325.0 | \n",
" 2.692582 | \n",
- " 0.000004 | \n",
- " -12.520989 | \n",
"
\n",
" \n",
" 2013-04-30 00:00:00+03:00 | \n",
" gps_u01 | \n",
+ " -10.510017 | \n",
+ " 0.000027 | \n",
" 1.238429e+05 | \n",
- " 2070.0 | \n",
- " 0.066961 | \n",
" 0.629393 | \n",
+ " 0.066961 | \n",
+ " 2070.0 | \n",
" 32.750000 | \n",
- " 0.000027 | \n",
- " -10.510017 | \n",
"
\n",
" \n",
" 2013-05-31 00:00:00+03:00 | \n",
" gps_u01 | \n",
+ " -11.364454 | \n",
+ " 0.000012 | \n",
" 1.228235e+05 | \n",
- " 3113.0 | \n",
- " 0.026392 | \n",
" 0.261978 | \n",
+ " 0.026392 | \n",
+ " 3113.0 | \n",
" 20.250000 | \n",
- " 0.000012 | \n",
- " -11.364454 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " user dist_total n_bins speed_average \\\n",
- "2013-03-31 00:00:00+02:00 gps_u00 4.132581e+05 288.0 0.033496 \n",
- "2013-04-30 00:00:00+03:00 gps_u00 2.179693e+06 2032.0 0.269932 \n",
- "2013-05-31 00:00:00+03:00 gps_u00 6.986551e+06 1903.0 0.351280 \n",
- "2013-06-30 00:00:00+03:00 gps_u00 2.252893e+05 24.0 0.044126 \n",
- "2013-03-31 00:00:00+02:00 gps_u01 1.328713e+04 325.0 0.056290 \n",
- "2013-04-30 00:00:00+03:00 gps_u01 1.238429e+05 2070.0 0.066961 \n",
- "2013-05-31 00:00:00+03:00 gps_u01 1.228235e+05 3113.0 0.026392 \n",
+ " user log_variance variance dist_total \\\n",
+ "2013-03-31 00:00:00+02:00 gps_u00 -5.761688 0.003146 4.132581e+05 \n",
+ "2013-04-30 00:00:00+03:00 gps_u00 -1.439133 0.237133 2.179693e+06 \n",
+ "2013-05-31 00:00:00+03:00 gps_u00 2.114892 8.288687 6.986551e+06 \n",
+ "2013-06-30 00:00:00+03:00 gps_u00 -4.200287 0.014991 2.252893e+05 \n",
+ "2013-03-31 00:00:00+02:00 gps_u01 -12.520989 0.000004 1.328713e+04 \n",
+ "2013-04-30 00:00:00+03:00 gps_u01 -10.510017 0.000027 1.238429e+05 \n",
+ "2013-05-31 00:00:00+03:00 gps_u01 -11.364454 0.000012 1.228235e+05 \n",
"\n",
- " speed_variance speed_max variance log_variance \n",
- "2013-03-31 00:00:00+02:00 0.044885 1.750000 0.003146 -5.761688 \n",
- "2013-04-30 00:00:00+03:00 6.129277 33.250000 0.237133 -1.439133 \n",
- "2013-05-31 00:00:00+03:00 7.590639 34.000000 8.288687 2.114892 \n",
- "2013-06-30 00:00:00+03:00 0.021490 0.559017 0.014991 -4.200287 \n",
- "2013-03-31 00:00:00+02:00 0.073370 2.692582 0.000004 -12.520989 \n",
- "2013-04-30 00:00:00+03:00 0.629393 32.750000 0.000027 -10.510017 \n",
- "2013-05-31 00:00:00+03:00 0.261978 20.250000 0.000012 -11.364454 "
+ " speed_variance speed_average n_bins speed_max \n",
+ "2013-03-31 00:00:00+02:00 0.044885 0.033496 288.0 1.750000 \n",
+ "2013-04-30 00:00:00+03:00 6.129277 0.269932 2032.0 33.250000 \n",
+ "2013-05-31 00:00:00+03:00 7.590639 0.351280 1903.0 34.000000 \n",
+ "2013-06-30 00:00:00+03:00 0.021490 0.044126 24.0 0.559017 \n",
+ "2013-03-31 00:00:00+02:00 0.073370 0.056290 325.0 2.692582 \n",
+ "2013-04-30 00:00:00+03:00 0.629393 0.066961 2070.0 32.750000 \n",
+ "2013-05-31 00:00:00+03:00 0.261978 0.026392 3113.0 20.250000 "
]
},
"execution_count": 8,
@@ -1017,9 +1009,9 @@
{
"data": {
"text/plain": [
- "{: {'resample_args': {'rule': '1M'}},\n",
- " : {'resample_args': {'rule': '1M'}},\n",
- " : {'resample_args': {'rule': '1M'}}}"
+ "{: {},\n",
+ " : {},\n",
+ " : {}}"
]
},
"execution_count": 9,
@@ -1203,7 +1195,7 @@
],
"source": [
"# customized function\n",
- "def max_speed(df, feature_arg):\n",
+ "def max_speed(df):\n",
" grouped = df.groupby('user')\n",
" df = grouped['double_speed'].max().reset_index('user')\n",
" return df\n",
@@ -1232,7 +1224,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.4"
+ "version": "3.12.6"
}
},
"nbformat": 4,
diff --git a/niimpy/preprocessing/location.py b/niimpy/preprocessing/location.py
index 2ec7249f..7101a3f1 100644
--- a/niimpy/preprocessing/location.py
+++ b/niimpy/preprocessing/location.py
@@ -261,16 +261,15 @@ def number_of_significant_places(lats, lons, times):
return np.nanmedian(sps)
-def location_number_of_significant_places(df, config=None):
+def location_number_of_significant_places(
+ df,
+ latitude_column="double_latitude",
+ longitude_column="double_longitude",
+ resample_args={"rule": default_freq},
+ **kwargs
+ ):
"""Computes number of significant places """
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
- if config is None:
- config = {}
- assert isinstance(config, dict), "config is not a dictionary"
-
- latitude_column = config.get("latitude_column", "double_latitude")
- longitude_column = config.get("longitude_column", "double_longitude")
- config["resample_args"] = config.get("resample_args", {"rule": default_freq})
def compute_features(df):
df = df.sort_index() # sort based on time
@@ -288,7 +287,7 @@ def compute_features(df):
})
return row
- result = util.group_data(df).resample(**config["resample_args"], include_groups=False).apply(compute_features)
+ result = util.group_data(df).resample(**resample_args, include_groups=False).apply(compute_features)
result = util.reset_groups(result)
result = util.select_columns(result, ["n_significant_places"])
return result
@@ -326,7 +325,15 @@ def compute_nbin_maxdist_home(lats, lons, latlon_home, home_radius=50):
return time_home, max_dist_home
-def location_significant_place_features(df, config=None):
+def location_significant_place_features(
+ df,
+ latitude_column="double_latitude",
+ longitude_column="double_latitude",
+ speed_column="double_speed",
+ speed_threshold=0.277,
+ resample_args={"rule": default_freq},
+ **kwargs
+ ):
"""Calculates features related to Significant Places.
Parameters
@@ -341,15 +348,6 @@ def location_significant_place_features(df, config=None):
resample_args: a dictionary of arguments for the Pandas resample function. For example to resample by hour, you would pass {"rule": "1h"}.
"""
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
- if config is None:
- config = {}
- assert isinstance(config, dict), "config is not a dictionary"
-
- latitude_column = config.get("latitude_column", "double_latitude")
- longitude_column = config.get("longitude_column", "double_latitude")
- speed_column = config.get("speed_column", "double_speed")
- speed_threshold = config.get("speed_threshold", 0.277)
- config["resample_args"] = config.get("resample_args", {"rule": default_freq})
def compute_features(df):
"""Compute features for a single user"""
@@ -421,13 +419,20 @@ def compute_features(df):
})
return row
- result = util.group_data(df).resample(**config["resample_args"], include_groups=False).apply(compute_features)
+ result = util.group_data(df).resample(**resample_args, include_groups=False).apply(compute_features)
result = util.reset_groups(result)
result = util.select_columns(result, ["n_sps", "n_static", "n_moving", "n_rare", "n_home", "max_dist_home", "n_transitions", "n_top1", "n_top2", "n_top3", "n_top4", "n_top5", "entropy", "normalized_entropy"])
return result
-def location_distance_features(df, config=None):
+def location_distance_features(
+ df,
+ latitude_column="double_latitude",
+ longitude_column="double_latitude",
+ speed_column="double_speed",
+ resample_args={"rule": default_freq},
+ **kwargs
+ ):
"""Calculates features related to distance and speed.
Parameters
@@ -442,14 +447,6 @@ def location_distance_features(df, config=None):
resample_args: a dictionary of arguments for the Pandas resample function. For example to resample by hour, you would pass {"rule": "1h"}.
"""
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
- if config is None:
- config = {}
- assert isinstance(config, dict), "config is not a dictionary"
-
- latitude_column = config.get("latitude_column", "double_latitude")
- longitude_column = config.get("longitude_column", "double_latitude")
- speed_column = config.get("speed_column", "double_speed")
- config["resample_args"] = config.get("resample_args", {"rule": default_freq})
def compute_features(df):
"""Compute features for a single user and given time interval"""
@@ -488,7 +485,7 @@ def compute_features(df):
})
return row
- result = util.group_data(df).resample(**config["resample_args"], include_groups=False).apply(compute_features)
+ result = util.group_data(df).resample(**resample_args, include_groups=False).apply(compute_features)
result = util.reset_groups(result)
result = util.select_columns(result, ["dist_total", "n_bins", "speed_average", "speed_variance", "speed_max", "variance", "log_variance"])
return result
@@ -533,7 +530,7 @@ def extract_features_location(df, features=None):
computed_features = []
for features, feature_arg in features.items():
- computed_feature = features(df, feature_arg)
+ computed_feature = features(df, **feature_arg)
computed_feature = util.set_conserved_index(computed_feature)
computed_features.append(computed_feature)