diff --git a/docs/user_guide/preprocessing/location.ipynb b/docs/user_guide/preprocessing/location.ipynb index 2052fb89..deda2a17 100644 --- a/docs/user_guide/preprocessing/location.ipynb +++ b/docs/user_guide/preprocessing/location.ipynb @@ -602,25 +602,25 @@ " \n", " user\n", " n_significant_places\n", - " n_home\n", - " n_sps\n", - " normalized_entropy\n", + " n_transitions\n", " n_moving\n", " n_top4\n", + " n_rare\n", " max_dist_home\n", + " n_static\n", + " n_top2\n", " n_top5\n", - " n_rare\n", " ...\n", - " n_top1\n", - " entropy\n", - " n_transitions\n", " variance\n", - " log_variance\n", " dist_total\n", - " speed_average\n", - " speed_max\n", " n_bins\n", - " speed_variance\n", + " speed_max\n", + " time\n", + " latitude\n", + " longitude\n", + " speed\n", + " timezone\n", + " local_time\n", " \n", " \n", " \n", @@ -628,231 +628,231 @@ " 2013-03-31 00:00:00+02:00\n", " gps_u00\n", " 6\n", - " 97.0\n", - " 5.0\n", - " 3.006920\n", - " 54.0\n", - " 18.0\n", - " 2.074186e+04\n", + " 48.0\n", " 8.0\n", - " 0.0\n", + " 20.0\n", + " 3.0\n", + " 2.074186e+04\n", + " 280.0\n", + " 99.0\n", + " 18.0\n", " ...\n", - " 98.0\n", - " 4.839451\n", - " 25.0\n", " 0.003146\n", - " -5.761688\n", " 4.132581e+05\n", - " 1.116127\n", - " 17.284037\n", " 288.0\n", - " 9.876405\n", + " 1.750000\n", + " 2013-03-27 06:00:00+02:00\n", + " 43.759135\n", + " -72.329240\n", + " 0.00\n", + " America/New_York\n", + " 2013-03-27 00:00:00-04:00\n", " \n", " \n", " 2013-04-30 00:00:00+03:00\n", " gps_u00\n", " 10\n", - " 969.0\n", - " 8.0\n", - " 3.045317\n", - " 318.0\n", - " 37.0\n", + " 194.0\n", + " 66.0\n", + " 45.0\n", + " 45.0\n", " 2.914790e+05\n", - " 18.0\n", - " 18.0\n", + " 1966.0\n", + " 668.0\n", + " 38.0\n", " ...\n", - " 976.0\n", - " 6.332559\n", - " 97.0\n", " 0.237133\n", - " -1.439133\n", " 2.179693e+06\n", - " 0.821680\n", - " 33.831053\n", " 2032.0\n", - " 9.970465\n", + " 33.250000\n", + " 2013-04-01 00:00:00+03:00\n", + " 43.768644\n", + " -72.323482\n", + " 0.75\n", + " America/New_York\n", + " 2013-03-31 17:00:00-04:00\n", " \n", " \n", " 2013-05-31 00:00:00+03:00\n", " gps_u00\n", " 15\n", - " 1007.0\n", - " 9.0\n", - " 3.110317\n", - " 255.0\n", - " 43.0\n", + " 107.0\n", + " 76.0\n", + " 65.0\n", + " 86.0\n", " 1.041741e+06\n", - " 38.0\n", - " 31.0\n", + " 1827.0\n", + " 501.0\n", + " 46.0\n", " ...\n", - " 1009.0\n", - " 6.834065\n", - " 69.0\n", " 8.288687\n", - " 2.114892\n", " 6.986551e+06\n", - " 0.847341\n", - " 42.507751\n", " 1903.0\n", - " 15.081070\n", + " 34.000000\n", + " 2013-05-01 00:10:00+03:00\n", + " 43.706655\n", + " -72.287370\n", + " 0.00\n", + " America/New_York\n", + " 2013-04-30 17:10:00-04:00\n", " \n", " \n", " 2013-06-30 00:00:00+03:00\n", " gps_u00\n", " 1\n", + " 10.0\n", + " 2.0\n", " 0.0\n", + " 15.0\n", + " 2.035837e+04\n", + " 22.0\n", + " 7.0\n", " 0.0\n", - " 0.000000\n", - " 18.0\n", - " 0.0\n", - " 1.989381e+04\n", - " 0.0\n", - " 6.0\n", " ...\n", - " 6.0\n", - " 0.000000\n", - " 0.0\n", " 0.014991\n", - " -4.200287\n", " 2.252893e+05\n", - " 14.601880\n", - " 43.321397\n", " 24.0\n", - " 242.791725\n", + " 0.559017\n", + " 2013-06-01 00:00:00+03:00\n", + " 43.701397\n", + " -72.295015\n", + " 0.00\n", + " America/New_York\n", + " 2013-05-31 17:00:00-04:00\n", " \n", " \n", " 2013-03-31 00:00:00+02:00\n", " gps_u01\n", " 4\n", - " 273.0\n", - " 2.0\n", - " 2.584963\n", - " 12.0\n", + " 8.0\n", + " 18.0\n", " 0.0\n", - " 6.975303e+02\n", " 0.0\n", + " 6.975303e+02\n", + " 307.0\n", + " 21.0\n", " 0.0\n", " ...\n", - " 307.0\n", - " 1.791759\n", - " 4.0\n", " 0.000004\n", - " -12.520989\n", " 1.328713e+04\n", - " 0.029994\n", - " 0.744393\n", " 325.0\n", - " 0.008880\n", + " 2.692582\n", + " 2013-03-27 06:00:00+02:00\n", + " 43.706667\n", + " -72.289097\n", + " 0.00\n", + " America/New_York\n", + " 2013-03-27 00:00:00-04:00\n", " \n", " \n", " 2013-04-30 00:00:00+03:00\n", " gps_u01\n", " 4\n", - " 1492.0\n", " 2.0\n", - " 5.977280\n", - " 78.0\n", + " 71.0\n", " 0.0\n", + " 1.0\n", " 1.156568e+04\n", - " 0.0\n", + " 1999.0\n", " 1.0\n", + " 0.0\n", " ...\n", - " 1928.0\n", - " 4.143135\n", - " 26.0\n", " 0.000027\n", - " -10.510017\n", " 1.238429e+05\n", - " 0.050416\n", - " 16.992157\n", " 2070.0\n", - " 0.237219\n", + " 32.750000\n", + " 2013-04-01 00:00:00+03:00\n", + " 43.706653\n", + " -72.289024\n", + " 0.00\n", + " America/New_York\n", + " 2013-03-31 17:00:00-04:00\n", " \n", " \n", " 2013-05-31 00:00:00+03:00\n", " gps_u01\n", " 2\n", - " 42.0\n", - " 1.0\n", - " 0.000000\n", - " 110.0\n", - " 0.0\n", - " 6.771047e+02\n", + " 2.0\n", + " 34.0\n", " 0.0\n", + " 1.0\n", + " 3.957650e+03\n", + " 3079.0\n", + " 1.0\n", " 0.0\n", " ...\n", - " 3003.0\n", - " 0.000000\n", - " 0.0\n", " 0.000012\n", - " -11.364454\n", " 1.228235e+05\n", - " 0.044657\n", - " 9.967899\n", " 3113.0\n", - " 0.085366\n", + " 20.250000\n", + " 2013-05-01 00:15:00+03:00\n", + " 43.706731\n", + " -72.289062\n", + " 0.00\n", + " America/New_York\n", + " 2013-04-30 17:15:00-04:00\n", " \n", " \n", "\n", - "

7 rows × 23 columns

\n", + "

7 rows × 29 columns

\n", "" ], "text/plain": [ - " user n_significant_places n_home n_sps \\\n", - "2013-03-31 00:00:00+02:00 gps_u00 6 97.0 5.0 \n", - "2013-04-30 00:00:00+03:00 gps_u00 10 969.0 8.0 \n", - "2013-05-31 00:00:00+03:00 gps_u00 15 1007.0 9.0 \n", - "2013-06-30 00:00:00+03:00 gps_u00 1 0.0 0.0 \n", - "2013-03-31 00:00:00+02:00 gps_u01 4 273.0 2.0 \n", - "2013-04-30 00:00:00+03:00 gps_u01 4 1492.0 2.0 \n", - "2013-05-31 00:00:00+03:00 gps_u01 2 42.0 1.0 \n", + " user n_significant_places n_transitions \\\n", + "2013-03-31 00:00:00+02:00 gps_u00 6 48.0 \n", + "2013-04-30 00:00:00+03:00 gps_u00 10 194.0 \n", + "2013-05-31 00:00:00+03:00 gps_u00 15 107.0 \n", + "2013-06-30 00:00:00+03:00 gps_u00 1 10.0 \n", + "2013-03-31 00:00:00+02:00 gps_u01 4 8.0 \n", + "2013-04-30 00:00:00+03:00 gps_u01 4 2.0 \n", + "2013-05-31 00:00:00+03:00 gps_u01 2 2.0 \n", "\n", - " normalized_entropy n_moving n_top4 \\\n", - "2013-03-31 00:00:00+02:00 3.006920 54.0 18.0 \n", - "2013-04-30 00:00:00+03:00 3.045317 318.0 37.0 \n", - "2013-05-31 00:00:00+03:00 3.110317 255.0 43.0 \n", - "2013-06-30 00:00:00+03:00 0.000000 18.0 0.0 \n", - "2013-03-31 00:00:00+02:00 2.584963 12.0 0.0 \n", - "2013-04-30 00:00:00+03:00 5.977280 78.0 0.0 \n", - "2013-05-31 00:00:00+03:00 0.000000 110.0 0.0 \n", + " n_moving n_top4 n_rare max_dist_home n_static \\\n", + "2013-03-31 00:00:00+02:00 8.0 20.0 3.0 2.074186e+04 280.0 \n", + "2013-04-30 00:00:00+03:00 66.0 45.0 45.0 2.914790e+05 1966.0 \n", + "2013-05-31 00:00:00+03:00 76.0 65.0 86.0 1.041741e+06 1827.0 \n", + "2013-06-30 00:00:00+03:00 2.0 0.0 15.0 2.035837e+04 22.0 \n", + "2013-03-31 00:00:00+02:00 18.0 0.0 0.0 6.975303e+02 307.0 \n", + "2013-04-30 00:00:00+03:00 71.0 0.0 1.0 1.156568e+04 1999.0 \n", + "2013-05-31 00:00:00+03:00 34.0 0.0 1.0 3.957650e+03 3079.0 \n", "\n", - " max_dist_home n_top5 n_rare ... n_top1 \\\n", - "2013-03-31 00:00:00+02:00 2.074186e+04 8.0 0.0 ... 98.0 \n", - "2013-04-30 00:00:00+03:00 2.914790e+05 18.0 18.0 ... 976.0 \n", - "2013-05-31 00:00:00+03:00 1.041741e+06 38.0 31.0 ... 1009.0 \n", - "2013-06-30 00:00:00+03:00 1.989381e+04 0.0 6.0 ... 6.0 \n", - "2013-03-31 00:00:00+02:00 6.975303e+02 0.0 0.0 ... 307.0 \n", - "2013-04-30 00:00:00+03:00 1.156568e+04 0.0 1.0 ... 1928.0 \n", - "2013-05-31 00:00:00+03:00 6.771047e+02 0.0 0.0 ... 3003.0 \n", + " n_top2 n_top5 ... variance dist_total \\\n", + "2013-03-31 00:00:00+02:00 99.0 18.0 ... 0.003146 4.132581e+05 \n", + "2013-04-30 00:00:00+03:00 668.0 38.0 ... 0.237133 2.179693e+06 \n", + "2013-05-31 00:00:00+03:00 501.0 46.0 ... 8.288687 6.986551e+06 \n", + "2013-06-30 00:00:00+03:00 7.0 0.0 ... 0.014991 2.252893e+05 \n", + "2013-03-31 00:00:00+02:00 21.0 0.0 ... 0.000004 1.328713e+04 \n", + "2013-04-30 00:00:00+03:00 1.0 0.0 ... 0.000027 1.238429e+05 \n", + "2013-05-31 00:00:00+03:00 1.0 0.0 ... 0.000012 1.228235e+05 \n", "\n", - " entropy n_transitions variance log_variance \\\n", - "2013-03-31 00:00:00+02:00 4.839451 25.0 0.003146 -5.761688 \n", - "2013-04-30 00:00:00+03:00 6.332559 97.0 0.237133 -1.439133 \n", - "2013-05-31 00:00:00+03:00 6.834065 69.0 8.288687 2.114892 \n", - "2013-06-30 00:00:00+03:00 0.000000 0.0 0.014991 -4.200287 \n", - "2013-03-31 00:00:00+02:00 1.791759 4.0 0.000004 -12.520989 \n", - "2013-04-30 00:00:00+03:00 4.143135 26.0 0.000027 -10.510017 \n", - "2013-05-31 00:00:00+03:00 0.000000 0.0 0.000012 -11.364454 \n", + " n_bins speed_max time \\\n", + "2013-03-31 00:00:00+02:00 288.0 1.750000 2013-03-27 06:00:00+02:00 \n", + "2013-04-30 00:00:00+03:00 2032.0 33.250000 2013-04-01 00:00:00+03:00 \n", + "2013-05-31 00:00:00+03:00 1903.0 34.000000 2013-05-01 00:10:00+03:00 \n", + "2013-06-30 00:00:00+03:00 24.0 0.559017 2013-06-01 00:00:00+03:00 \n", + "2013-03-31 00:00:00+02:00 325.0 2.692582 2013-03-27 06:00:00+02:00 \n", + "2013-04-30 00:00:00+03:00 2070.0 32.750000 2013-04-01 00:00:00+03:00 \n", + "2013-05-31 00:00:00+03:00 3113.0 20.250000 2013-05-01 00:15:00+03:00 \n", "\n", - " dist_total speed_average speed_max n_bins \\\n", - "2013-03-31 00:00:00+02:00 4.132581e+05 1.116127 17.284037 288.0 \n", - "2013-04-30 00:00:00+03:00 2.179693e+06 0.821680 33.831053 2032.0 \n", - "2013-05-31 00:00:00+03:00 6.986551e+06 0.847341 42.507751 1903.0 \n", - "2013-06-30 00:00:00+03:00 2.252893e+05 14.601880 43.321397 24.0 \n", - "2013-03-31 00:00:00+02:00 1.328713e+04 0.029994 0.744393 325.0 \n", - "2013-04-30 00:00:00+03:00 1.238429e+05 0.050416 16.992157 2070.0 \n", - "2013-05-31 00:00:00+03:00 1.228235e+05 0.044657 9.967899 3113.0 \n", + " latitude longitude speed timezone \\\n", + "2013-03-31 00:00:00+02:00 43.759135 -72.329240 0.00 America/New_York \n", + "2013-04-30 00:00:00+03:00 43.768644 -72.323482 0.75 America/New_York \n", + "2013-05-31 00:00:00+03:00 43.706655 -72.287370 0.00 America/New_York \n", + "2013-06-30 00:00:00+03:00 43.701397 -72.295015 0.00 America/New_York \n", + "2013-03-31 00:00:00+02:00 43.706667 -72.289097 0.00 America/New_York \n", + "2013-04-30 00:00:00+03:00 43.706653 -72.289024 0.00 America/New_York \n", + "2013-05-31 00:00:00+03:00 43.706731 -72.289062 0.00 America/New_York \n", "\n", - " speed_variance \n", - "2013-03-31 00:00:00+02:00 9.876405 \n", - "2013-04-30 00:00:00+03:00 9.970465 \n", - "2013-05-31 00:00:00+03:00 15.081070 \n", - "2013-06-30 00:00:00+03:00 242.791725 \n", - "2013-03-31 00:00:00+02:00 0.008880 \n", - "2013-04-30 00:00:00+03:00 0.237219 \n", - "2013-05-31 00:00:00+03:00 0.085366 \n", + " local_time \n", + "2013-03-31 00:00:00+02:00 2013-03-27 00:00:00-04:00 \n", + "2013-04-30 00:00:00+03:00 2013-03-31 17:00:00-04:00 \n", + "2013-05-31 00:00:00+03:00 2013-04-30 17:10:00-04:00 \n", + "2013-06-30 00:00:00+03:00 2013-05-31 17:00:00-04:00 \n", + "2013-03-31 00:00:00+02:00 2013-03-27 00:00:00-04:00 \n", + "2013-04-30 00:00:00+03:00 2013-03-31 17:00:00-04:00 \n", + "2013-05-31 00:00:00+03:00 2013-04-30 17:15:00-04:00 \n", "\n", - "[7 rows x 23 columns]" + "[7 rows x 29 columns]" ] }, "execution_count": 8, @@ -897,115 +897,124 @@ " \n", " \n", " user\n", - " variance\n", + " speed_average\n", " log_variance\n", + " speed_variance\n", + " variance\n", " dist_total\n", - " speed_average\n", - " speed_max\n", " n_bins\n", - " speed_variance\n", + " speed_max\n", " \n", " \n", " \n", " \n", " 2013-03-31 00:00:00+02:00\n", " gps_u00\n", - " 0.003146\n", + " 0.033496\n", " -5.761688\n", + " 0.044885\n", + " 0.003146\n", " 4.132581e+05\n", - " 1.116127\n", - " 17.284037\n", " 288.0\n", - " 9.876405\n", + " 1.750000\n", " \n", " \n", " 2013-04-30 00:00:00+03:00\n", " gps_u00\n", - " 0.237133\n", + " 0.269932\n", " -1.439133\n", + " 6.129277\n", + " 0.237133\n", " 2.179693e+06\n", - " 0.821680\n", - " 33.831053\n", " 2032.0\n", - " 9.970465\n", + " 33.250000\n", " \n", " \n", " 2013-05-31 00:00:00+03:00\n", " gps_u00\n", - " 8.288687\n", + " 0.351280\n", " 2.114892\n", + " 7.590639\n", + " 8.288687\n", " 6.986551e+06\n", - " 0.847341\n", - " 42.507751\n", " 1903.0\n", - " 15.081070\n", + " 34.000000\n", " \n", " \n", " 2013-06-30 00:00:00+03:00\n", " gps_u00\n", - " 0.014991\n", + " 0.044126\n", " -4.200287\n", + " 0.021490\n", + " 0.014991\n", " 2.252893e+05\n", - " 14.601880\n", - " 43.321397\n", " 24.0\n", - " 242.791725\n", + " 0.559017\n", " \n", " \n", " 2013-03-31 00:00:00+02:00\n", " gps_u01\n", - " 0.000004\n", + " 0.056290\n", " -12.520989\n", + " 0.073370\n", + " 0.000004\n", " 1.328713e+04\n", - " 0.029994\n", - " 0.744393\n", " 325.0\n", - " 0.008880\n", + " 2.692582\n", " \n", " \n", " 2013-04-30 00:00:00+03:00\n", " gps_u01\n", - " 0.000027\n", + " 0.066961\n", " -10.510017\n", + " 0.629393\n", + " 0.000027\n", " 1.238429e+05\n", - " 0.050416\n", - " 16.992157\n", " 2070.0\n", - " 0.237219\n", + " 32.750000\n", " \n", " \n", " 2013-05-31 00:00:00+03:00\n", " gps_u01\n", - " 0.000012\n", + " 0.026392\n", " -11.364454\n", + " 0.261978\n", + " 0.000012\n", " 1.228235e+05\n", - " 0.044657\n", - " 9.967899\n", " 3113.0\n", - " 0.085366\n", + " 20.250000\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user variance log_variance dist_total \\\n", - "2013-03-31 00:00:00+02:00 gps_u00 0.003146 -5.761688 4.132581e+05 \n", - "2013-04-30 00:00:00+03:00 gps_u00 0.237133 -1.439133 2.179693e+06 \n", - "2013-05-31 00:00:00+03:00 gps_u00 8.288687 2.114892 6.986551e+06 \n", - "2013-06-30 00:00:00+03:00 gps_u00 0.014991 -4.200287 2.252893e+05 \n", - "2013-03-31 00:00:00+02:00 gps_u01 0.000004 -12.520989 1.328713e+04 \n", - "2013-04-30 00:00:00+03:00 gps_u01 0.000027 -10.510017 1.238429e+05 \n", - "2013-05-31 00:00:00+03:00 gps_u01 0.000012 -11.364454 1.228235e+05 \n", + " user speed_average log_variance \\\n", + "2013-03-31 00:00:00+02:00 gps_u00 0.033496 -5.761688 \n", + "2013-04-30 00:00:00+03:00 gps_u00 0.269932 -1.439133 \n", + "2013-05-31 00:00:00+03:00 gps_u00 0.351280 2.114892 \n", + "2013-06-30 00:00:00+03:00 gps_u00 0.044126 -4.200287 \n", + "2013-03-31 00:00:00+02:00 gps_u01 0.056290 -12.520989 \n", + "2013-04-30 00:00:00+03:00 gps_u01 0.066961 -10.510017 \n", + "2013-05-31 00:00:00+03:00 gps_u01 0.026392 -11.364454 \n", + "\n", + " speed_variance variance dist_total n_bins \\\n", + "2013-03-31 00:00:00+02:00 0.044885 0.003146 4.132581e+05 288.0 \n", + "2013-04-30 00:00:00+03:00 6.129277 0.237133 2.179693e+06 2032.0 \n", + "2013-05-31 00:00:00+03:00 7.590639 8.288687 6.986551e+06 1903.0 \n", + "2013-06-30 00:00:00+03:00 0.021490 0.014991 2.252893e+05 24.0 \n", + "2013-03-31 00:00:00+02:00 0.073370 0.000004 1.328713e+04 325.0 \n", + "2013-04-30 00:00:00+03:00 0.629393 0.000027 1.238429e+05 2070.0 \n", + "2013-05-31 00:00:00+03:00 0.261978 0.000012 1.228235e+05 3113.0 \n", "\n", - " speed_average speed_max n_bins speed_variance \n", - "2013-03-31 00:00:00+02:00 1.116127 17.284037 288.0 9.876405 \n", - "2013-04-30 00:00:00+03:00 0.821680 33.831053 2032.0 9.970465 \n", - "2013-05-31 00:00:00+03:00 0.847341 42.507751 1903.0 15.081070 \n", - "2013-06-30 00:00:00+03:00 14.601880 43.321397 24.0 242.791725 \n", - "2013-03-31 00:00:00+02:00 0.029994 0.744393 325.0 0.008880 \n", - "2013-04-30 00:00:00+03:00 0.050416 16.992157 2070.0 0.237219 \n", - "2013-05-31 00:00:00+03:00 0.044657 9.967899 3113.0 0.085366 " + " speed_max \n", + "2013-03-31 00:00:00+02:00 1.750000 \n", + "2013-04-30 00:00:00+03:00 33.250000 \n", + "2013-05-31 00:00:00+03:00 34.000000 \n", + "2013-06-30 00:00:00+03:00 0.559017 \n", + "2013-03-31 00:00:00+02:00 2.692582 \n", + "2013-04-30 00:00:00+03:00 32.750000 \n", + "2013-05-31 00:00:00+03:00 20.250000 " ] }, "execution_count": 9, @@ -1062,7 +1071,8 @@ "text/plain": [ "{: {'resample_args': {'rule': '1ME'}},\n", " : {'resample_args': {'rule': '1ME'}},\n", - " : {'resample_args': {'rule': '1ME'}}}" + " : {'resample_args': {'rule': '1ME'}},\n", + " : {'resample_args': {'rule': '1ME'}}}" ] }, "execution_count": 10, @@ -1112,7 +1122,7 @@ " \n", " 2013-03-27 06:00:00+02:00\n", " gps_u00\n", - " 1.364357e+09\n", + " 2013-03-27 06:00:00+02:00\n", " 43.759135\n", " -72.329240\n", " 0.0\n", @@ -1120,7 +1130,7 @@ " \n", " 2013-03-27 06:20:00+02:00\n", " gps_u00\n", - " 1.364358e+09\n", + " 2013-03-27 06:20:00+02:00\n", " 43.759503\n", " -72.329018\n", " 0.0\n", @@ -1128,7 +1138,7 @@ " \n", " 2013-03-27 06:40:00+02:00\n", " gps_u00\n", - " 1.364359e+09\n", + " 2013-03-27 06:40:00+02:00\n", " 43.759134\n", " -72.329238\n", " 0.0\n", @@ -1136,7 +1146,7 @@ " \n", " 2013-03-27 07:00:00+02:00\n", " gps_u00\n", - " 1.364361e+09\n", + " 2013-03-27 07:00:00+02:00\n", " 43.759135\n", " -72.329240\n", " 0.0\n", @@ -1144,7 +1154,7 @@ " \n", " 2013-03-27 07:20:00+02:00\n", " gps_u00\n", - " 1.364362e+09\n", + " 2013-03-27 07:20:00+02:00\n", " 43.759135\n", " -72.329240\n", " 0.0\n", @@ -1154,12 +1164,19 @@ "" ], "text/plain": [ - " user time latitude longitude speed\n", - "2013-03-27 06:00:00+02:00 gps_u00 1.364357e+09 43.759135 -72.329240 0.0\n", - "2013-03-27 06:20:00+02:00 gps_u00 1.364358e+09 43.759503 -72.329018 0.0\n", - "2013-03-27 06:40:00+02:00 gps_u00 1.364359e+09 43.759134 -72.329238 0.0\n", - "2013-03-27 07:00:00+02:00 gps_u00 1.364361e+09 43.759135 -72.329240 0.0\n", - "2013-03-27 07:20:00+02:00 gps_u00 1.364362e+09 43.759135 -72.329240 0.0" + " user time latitude \\\n", + "2013-03-27 06:00:00+02:00 gps_u00 2013-03-27 06:00:00+02:00 43.759135 \n", + "2013-03-27 06:20:00+02:00 gps_u00 2013-03-27 06:20:00+02:00 43.759503 \n", + "2013-03-27 06:40:00+02:00 gps_u00 2013-03-27 06:40:00+02:00 43.759134 \n", + "2013-03-27 07:00:00+02:00 gps_u00 2013-03-27 07:00:00+02:00 43.759135 \n", + "2013-03-27 07:20:00+02:00 gps_u00 2013-03-27 07:20:00+02:00 43.759135 \n", + "\n", + " longitude speed \n", + "2013-03-27 06:00:00+02:00 -72.329240 0.0 \n", + "2013-03-27 06:20:00+02:00 -72.329018 0.0 \n", + "2013-03-27 06:40:00+02:00 -72.329238 0.0 \n", + "2013-03-27 07:00:00+02:00 -72.329240 0.0 \n", + "2013-03-27 07:20:00+02:00 -72.329240 0.0 " ] }, "execution_count": 11, @@ -1254,7 +1271,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "niimpy", "language": "python", "name": "python3" }, @@ -1268,7 +1285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/niimpy/preprocessing/location.py b/niimpy/preprocessing/location.py index bf33c01f..d94973df 100644 --- a/niimpy/preprocessing/location.py +++ b/niimpy/preprocessing/location.py @@ -5,6 +5,7 @@ import scipy.stats from sklearn.cluster import DBSCAN from geopy.distance import geodesic +from tzfpy import get_tz from niimpy.preprocessing import util @@ -493,6 +494,37 @@ def compute_features(df): result = util.select_columns(result, ["dist_total", "n_bins", "speed_average", "speed_variance", "speed_max", "variance", "log_variance"]) return result + +def location_local_time(df, config=None): + """ Calculates the local time of the user based on the longitude. + + Parameters + ---------- + df: dataframe with date index + config: A dictionary of optional arguments + """ + + longitude_column = config.get("longitude_column", "longitude") + latitude_column = config.get("latitude_column", "latitude") + config["resample_args"] = config.get("resample_args", {"rule": default_freq}) + + def get_timezone(row): + return get_tz(row[longitude_column], row[latitude_column]) + + def set_timezone(row): + row["local_time"] = row["time"].tz_convert(row["timezone"]) + return row + + df["time"] = df.index + df = util.group_data(df).resample(**config["resample_args"], include_groups=False).first() + df = util.reset_groups(df) + df["timezone"] = df.apply(get_timezone, axis=1) + df = df.apply(set_timezone, axis=1) + return df + + + + ALL_FEATURES = [globals()[name] for name in globals() if name.startswith('location_')] ALL_FEATURES = {x: {} for x in ALL_FEATURES} diff --git a/pyproject.toml b/pyproject.toml index 7d73e2ce..bfbcaa8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ "BeautifulSoup4", "pyarrow", "google_takeout_email", - "tqdm" + "tqdm", + "tzfpy" ] [project.optional-dependencies]