diff --git a/docs/user_guide/preprocessing/screen.ipynb b/docs/user_guide/preprocessing/screen.ipynb index 11bbb377..d5ab6d9c 100644 --- a/docs/user_guide/preprocessing/screen.ipynb +++ b/docs/user_guide/preprocessing/screen.ipynb @@ -91,7 +91,7 @@ { "data": { "text/plain": [ - "(277, 5)" + "(343, 5)" ] }, "execution_count": 2, @@ -149,44 +149,44 @@ " \n", " \n", " \n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578528e+09\n", " 0\n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " \n", " \n", - " 2020-01-09 02:09:29.152000+02:00\n", + " 2020-01-09 02:09:29.151999950+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 1\n", - " 2020-01-09 02:09:29.152000+02:00\n", + " 2020-01-09 02:09:29.151999950+02:00\n", " \n", " \n", - " 2020-01-09 02:09:32.790999808+02:00\n", + " 2020-01-09 02:09:32.790999889+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 3\n", - " 2020-01-09 02:09:32.790999808+02:00\n", + " 2020-01-09 02:09:32.790999889+02:00\n", " \n", " \n", - " 2020-01-09 02:11:41.996000+02:00\n", + " 2020-01-09 02:11:41.996000051+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 0\n", - " 2020-01-09 02:11:41.996000+02:00\n", + " 2020-01-09 02:11:41.996000051+02:00\n", " \n", " \n", - " 2020-01-09 02:16:19.010999808+02:00\n", + " 2020-01-09 02:16:19.010999918+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 1\n", - " 2020-01-09 02:16:19.010999808+02:00\n", + " 2020-01-09 02:16:19.010999918+02:00\n", " \n", " \n", "\n", @@ -194,25 +194,25 @@ ], "text/plain": [ " user device time \\\n", - "2020-01-09 02:06:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578528e+09 \n", - "2020-01-09 02:09:29.152000+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:09:32.790999808+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:11:41.996000+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:16:19.010999808+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:06:41.573999882+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578528e+09 \n", + "2020-01-09 02:09:29.151999950+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:09:32.790999889+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:11:41.996000051+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:16:19.010999918+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", "\n", " screen_status \\\n", - "2020-01-09 02:06:41.573999872+02:00 0 \n", - "2020-01-09 02:09:29.152000+02:00 1 \n", - "2020-01-09 02:09:32.790999808+02:00 3 \n", - "2020-01-09 02:11:41.996000+02:00 0 \n", - "2020-01-09 02:16:19.010999808+02:00 1 \n", + "2020-01-09 02:06:41.573999882+02:00 0 \n", + "2020-01-09 02:09:29.151999950+02:00 1 \n", + "2020-01-09 02:09:32.790999889+02:00 3 \n", + "2020-01-09 02:11:41.996000051+02:00 0 \n", + "2020-01-09 02:16:19.010999918+02:00 1 \n", "\n", " datetime \n", - "2020-01-09 02:06:41.573999872+02:00 2020-01-09 02:06:41.573999872+02:00 \n", - "2020-01-09 02:09:29.152000+02:00 2020-01-09 02:09:29.152000+02:00 \n", - "2020-01-09 02:09:32.790999808+02:00 2020-01-09 02:09:32.790999808+02:00 \n", - "2020-01-09 02:11:41.996000+02:00 2020-01-09 02:11:41.996000+02:00 \n", - "2020-01-09 02:16:19.010999808+02:00 2020-01-09 02:16:19.010999808+02:00 " + "2020-01-09 02:06:41.573999882+02:00 2020-01-09 02:06:41.573999882+02:00 \n", + "2020-01-09 02:09:29.151999950+02:00 2020-01-09 02:09:29.151999950+02:00 \n", + "2020-01-09 02:09:32.790999889+02:00 2020-01-09 02:09:32.790999889+02:00 \n", + "2020-01-09 02:11:41.996000051+02:00 2020-01-09 02:11:41.996000051+02:00 \n", + "2020-01-09 02:16:19.010999918+02:00 2020-01-09 02:16:19.010999918+02:00 " ] }, "execution_count": 3, @@ -260,70 +260,70 @@ " \n", " \n", " \n", - " 2019-09-08 17:17:14.216000+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1.567952e+09\n", - " 1\n", - " 2019-09-08 17:17:14.216000+03:00\n", + " 2019-08-07 17:42:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 1.565189e+09\n", + " 2\n", + " 2019-08-07 17:42:41.009999990+03:00\n", " \n", " \n", - " 2019-09-08 17:17:31.966000128+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1.567952e+09\n", - " 0\n", - " 2019-09-08 17:17:31.966000128+03:00\n", + " 2019-08-07 18:32:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 1.565192e+09\n", + " 1\n", + " 2019-08-07 18:32:41.009999990+03:00\n", " \n", " \n", - " 2019-09-08 20:50:07.360000+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1.567965e+09\n", - " 3\n", - " 2019-09-08 20:50:07.360000+03:00\n", + " 2019-08-07 19:22:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 1.565195e+09\n", + " 0\n", + " 2019-08-07 19:22:41.009999990+03:00\n", " \n", " \n", - " 2019-09-08 20:50:08.139000064+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1.567965e+09\n", + " 2019-08-07 20:12:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 1.565198e+09\n", " 1\n", - " 2019-09-08 20:50:08.139000064+03:00\n", + " 2019-08-07 20:12:41.009999990+03:00\n", " \n", " \n", - " 2019-09-08 20:53:12.960000+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1.567965e+09\n", - " 0\n", - " 2019-09-08 20:53:12.960000+03:00\n", + " 2019-08-07 21:02:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 1.565201e+09\n", + " 2\n", + " 2019-08-07 21:02:41.009999990+03:00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user device time \\\n", - "2019-09-08 17:17:14.216000+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.567952e+09 \n", - "2019-09-08 17:17:31.966000128+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.567952e+09 \n", - "2019-09-08 20:50:07.360000+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.567965e+09 \n", - "2019-09-08 20:50:08.139000064+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.567965e+09 \n", - "2019-09-08 20:53:12.960000+03:00 iGyXetHE3S8u Cq9vueHh3zVs 1.567965e+09 \n", - "\n", - " screen_status \\\n", - "2019-09-08 17:17:14.216000+03:00 1 \n", - "2019-09-08 17:17:31.966000128+03:00 0 \n", - "2019-09-08 20:50:07.360000+03:00 3 \n", - "2019-09-08 20:50:08.139000064+03:00 1 \n", - "2019-09-08 20:53:12.960000+03:00 0 \n", + " user device \\\n", + "2019-08-07 17:42:41.009999990+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-07 18:32:41.009999990+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-07 19:22:41.009999990+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-07 20:12:41.009999990+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-07 21:02:41.009999990+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "\n", + " time screen_status \\\n", + "2019-08-07 17:42:41.009999990+03:00 1.565189e+09 2 \n", + "2019-08-07 18:32:41.009999990+03:00 1.565192e+09 1 \n", + "2019-08-07 19:22:41.009999990+03:00 1.565195e+09 0 \n", + "2019-08-07 20:12:41.009999990+03:00 1.565198e+09 1 \n", + "2019-08-07 21:02:41.009999990+03:00 1.565201e+09 2 \n", "\n", " datetime \n", - "2019-09-08 17:17:14.216000+03:00 2019-09-08 17:17:14.216000+03:00 \n", - "2019-09-08 17:17:31.966000128+03:00 2019-09-08 17:17:31.966000128+03:00 \n", - "2019-09-08 20:50:07.360000+03:00 2019-09-08 20:50:07.360000+03:00 \n", - "2019-09-08 20:50:08.139000064+03:00 2019-09-08 20:50:08.139000064+03:00 \n", - "2019-09-08 20:53:12.960000+03:00 2019-09-08 20:53:12.960000+03:00 " + "2019-08-07 17:42:41.009999990+03:00 2019-08-07 17:42:41.009999990+03:00 \n", + "2019-08-07 18:32:41.009999990+03:00 2019-08-07 18:32:41.009999990+03:00 \n", + "2019-08-07 19:22:41.009999990+03:00 2019-08-07 19:22:41.009999990+03:00 \n", + "2019-08-07 20:12:41.009999990+03:00 2019-08-07 20:12:41.009999990+03:00 \n", + "2019-08-07 21:02:41.009999990+03:00 2019-08-07 21:02:41.009999990+03:00 " ] }, "execution_count": 4, @@ -396,17 +396,17 @@ " \n", " \n", " \n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " jd9INuQ5BBlW\n", " 0\n", " \n", " \n", - " 2020-01-09 02:09:29.152000+02:00\n", + " 2020-01-09 02:09:29.151999950+02:00\n", " jd9INuQ5BBlW\n", " 1\n", " \n", " \n", - " 2020-01-09 02:09:32.790999808+02:00\n", + " 2020-01-09 02:09:32.790999889+02:00\n", " jd9INuQ5BBlW\n", " 3\n", " \n", @@ -416,9 +416,9 @@ ], "text/plain": [ " user screen_status\n", - "2020-01-09 02:06:41.573999872+02:00 jd9INuQ5BBlW 0\n", - "2020-01-09 02:09:29.152000+02:00 jd9INuQ5BBlW 1\n", - "2020-01-09 02:09:32.790999808+02:00 jd9INuQ5BBlW 3" + "2020-01-09 02:06:41.573999882+02:00 jd9INuQ5BBlW 0\n", + "2020-01-09 02:09:29.151999950+02:00 jd9INuQ5BBlW 1\n", + "2020-01-09 02:09:32.790999889+02:00 jd9INuQ5BBlW 3" ] }, "execution_count": 5, @@ -492,7 +492,7 @@ " \n", " \n", " \n", - " 2020-01-09 02:20:02.924999936+02:00\n", + " 2020-01-09 02:20:02.924999952+02:00\n", " jd9INuQ5BBlW\n", " 3p83yASkOb_B\n", " 1.578529e+09\n", @@ -500,10 +500,10 @@ " 3\n", " 2\n", " 0\n", - " 2020-01-09 02:20:02.924999936+02:00\n", + " 2020-01-09 02:20:02.924999952+02:00\n", " \n", " \n", - " 2020-01-09 02:21:30.405999872+02:00\n", + " 2020-01-09 02:21:30.405999899+02:00\n", " jd9INuQ5BBlW\n", " 3p83yASkOb_B\n", " 1.578529e+09\n", @@ -511,10 +511,10 @@ " 3\n", " 2\n", " 0\n", - " 2020-01-09 02:21:30.405999872+02:00\n", + " 2020-01-09 02:21:30.405999899+02:00\n", " \n", " \n", - " 2020-01-09 02:24:12.805999872+02:00\n", + " 2020-01-09 02:24:12.805999994+02:00\n", " jd9INuQ5BBlW\n", " 3p83yASkOb_B\n", " 1.578529e+09\n", @@ -522,10 +522,10 @@ " 3\n", " 2\n", " 0\n", - " 2020-01-09 02:24:12.805999872+02:00\n", + " 2020-01-09 02:24:12.805999994+02:00\n", " \n", " \n", - " 2020-01-09 02:35:38.561000192+02:00\n", + " 2020-01-09 02:35:38.561000109+02:00\n", " jd9INuQ5BBlW\n", " 3p83yASkOb_B\n", " 1.578530e+09\n", @@ -533,10 +533,10 @@ " 2\n", " 2\n", " 0\n", - " 2020-01-09 02:35:38.561000192+02:00\n", + " 2020-01-09 02:35:38.561000109+02:00\n", " \n", " \n", - " 2020-01-09 02:35:38.953000192+02:00\n", + " 2020-01-09 02:35:38.953000069+02:00\n", " jd9INuQ5BBlW\n", " 3p83yASkOb_B\n", " 1.578530e+09\n", @@ -544,7 +544,7 @@ " 2\n", " 2\n", " 2\n", - " 2020-01-09 02:35:38.953000192+02:00\n", + " 2020-01-09 02:35:38.953000069+02:00\n", " \n", " \n", "\n", @@ -552,32 +552,32 @@ ], "text/plain": [ " user device time \\\n", - "2020-01-09 02:20:02.924999936+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", - "2020-01-09 02:21:30.405999872+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", - "2020-01-09 02:24:12.805999872+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", - "2020-01-09 02:35:38.561000192+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578530e+09 \n", - "2020-01-09 02:35:38.953000192+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578530e+09 \n", + "2020-01-09 02:20:02.924999952+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", + "2020-01-09 02:21:30.405999899+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", + "2020-01-09 02:24:12.805999994+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578529e+09 \n", + "2020-01-09 02:35:38.561000109+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578530e+09 \n", + "2020-01-09 02:35:38.953000069+02:00 jd9INuQ5BBlW 3p83yASkOb_B 1.578530e+09 \n", "\n", " battery_level battery_status \\\n", - "2020-01-09 02:20:02.924999936+02:00 74 3 \n", - "2020-01-09 02:21:30.405999872+02:00 73 3 \n", - "2020-01-09 02:24:12.805999872+02:00 72 3 \n", - "2020-01-09 02:35:38.561000192+02:00 72 2 \n", - "2020-01-09 02:35:38.953000192+02:00 72 2 \n", + "2020-01-09 02:20:02.924999952+02:00 74 3 \n", + "2020-01-09 02:21:30.405999899+02:00 73 3 \n", + "2020-01-09 02:24:12.805999994+02:00 72 3 \n", + "2020-01-09 02:35:38.561000109+02:00 72 2 \n", + "2020-01-09 02:35:38.953000069+02:00 72 2 \n", "\n", " battery_health battery_adaptor \\\n", - "2020-01-09 02:20:02.924999936+02:00 2 0 \n", - "2020-01-09 02:21:30.405999872+02:00 2 0 \n", - "2020-01-09 02:24:12.805999872+02:00 2 0 \n", - "2020-01-09 02:35:38.561000192+02:00 2 0 \n", - "2020-01-09 02:35:38.953000192+02:00 2 2 \n", + "2020-01-09 02:20:02.924999952+02:00 2 0 \n", + "2020-01-09 02:21:30.405999899+02:00 2 0 \n", + "2020-01-09 02:24:12.805999994+02:00 2 0 \n", + "2020-01-09 02:35:38.561000109+02:00 2 0 \n", + "2020-01-09 02:35:38.953000069+02:00 2 2 \n", "\n", " datetime \n", - "2020-01-09 02:20:02.924999936+02:00 2020-01-09 02:20:02.924999936+02:00 \n", - "2020-01-09 02:21:30.405999872+02:00 2020-01-09 02:21:30.405999872+02:00 \n", - "2020-01-09 02:24:12.805999872+02:00 2020-01-09 02:24:12.805999872+02:00 \n", - "2020-01-09 02:35:38.561000192+02:00 2020-01-09 02:35:38.561000192+02:00 \n", - "2020-01-09 02:35:38.953000192+02:00 2020-01-09 02:35:38.953000192+02:00 " + "2020-01-09 02:20:02.924999952+02:00 2020-01-09 02:20:02.924999952+02:00 \n", + "2020-01-09 02:21:30.405999899+02:00 2020-01-09 02:21:30.405999899+02:00 \n", + "2020-01-09 02:24:12.805999994+02:00 2020-01-09 02:24:12.805999994+02:00 \n", + "2020-01-09 02:35:38.561000109+02:00 2020-01-09 02:35:38.561000109+02:00 \n", + "2020-01-09 02:35:38.953000069+02:00 2020-01-09 02:35:38.953000069+02:00 " ] }, "execution_count": 6, @@ -650,17 +650,7 @@ "**Default values:** if no arguments are passed, `niimpy`'s default values are \"screen_status\" for the screen_column_name, and 30-min aggregation bins. \n", "\n", "#### 4.1.2 Using the functions\n", - "Now that we understand how the functions are customized, it is time we compute our first communication feature. Suppose that we are interested in extracting the total duration of outgoing calls every 20 minutes. We will need `niimpy`'s `screen_count` function, the data, and we will also need to create a dictionary to customize our function. Let's create the dictionary first" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "d0927c98", - "metadata": {}, - "outputs": [], - "source": [ - "function_features={\"screen_column_name\":\"screen_status\",\"resample_args\":{\"rule\":\"20T\"}}" + "Now that we understand how the functions are customized, it is time we compute our first communication feature. Suppose that we are interested in extracting the total duration of outgoing calls every 20 minutes. We will need `niimpy`'s `screen_count` function, the data, and the parameters we previously provided as a dictionary." ] }, { @@ -674,12 +664,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "f1ccfcc9", "metadata": {}, "outputs": [], "source": [ - "my_screen_count = s.screen_count(data, bat_data, function_features)" + "my_screen_count = s.screen_count(data, bat_data, screen_column_name = \"screen_status\", resample_args = {\"rule\":\"20T\"})" ] }, { @@ -693,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "353d5acc", "metadata": {}, "outputs": [ @@ -719,52 +709,52 @@ " \n", " \n", " user\n", - " device\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", + " device\n", " \n", " \n", " \n", " \n", " 2020-01-09 02:00:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 2\n", " 2\n", " 2\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 02:20:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 3\n", - " 4\n", " 2\n", + " 4\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 02:40:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", - " 2\n", " 2\n", " 1\n", + " 2\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 03:00:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 03:20:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", " ...\n", @@ -777,42 +767,42 @@ " \n", " 2020-01-09 21:40:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", - " 1\n", " 1\n", " 0\n", + " 1\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 22:00:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", - " 1\n", " 1\n", " 0\n", + " 1\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 22:20:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 22:40:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", " 2020-01-09 23:00:00+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 4\n", - " 3\n", " 0\n", + " 3\n", + " OWd1Uau8POix\n", " \n", " \n", "\n", @@ -820,36 +810,36 @@ "" ], "text/plain": [ - " user device screen_on_count \\\n", - "2020-01-09 02:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 2 \n", - "2020-01-09 02:20:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 3 \n", - "2020-01-09 02:40:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 2 \n", - "2020-01-09 03:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0 \n", - "2020-01-09 03:20:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0 \n", - "... ... ... ... \n", - "2020-01-09 21:40:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 1 \n", - "2020-01-09 22:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 1 \n", - "2020-01-09 22:20:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0 \n", - "2020-01-09 22:40:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0 \n", - "2020-01-09 23:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 4 \n", - "\n", - " screen_off_count screen_use_count \n", - "2020-01-09 02:00:00+02:00 2 2 \n", - "2020-01-09 02:20:00+02:00 4 2 \n", - "2020-01-09 02:40:00+02:00 2 1 \n", - "2020-01-09 03:00:00+02:00 0 0 \n", - "2020-01-09 03:20:00+02:00 0 0 \n", - "... ... ... \n", - "2020-01-09 21:40:00+02:00 1 0 \n", - "2020-01-09 22:00:00+02:00 1 0 \n", - "2020-01-09 22:20:00+02:00 0 0 \n", - "2020-01-09 22:40:00+02:00 0 0 \n", - "2020-01-09 23:00:00+02:00 3 0 \n", + " user screen_on_count screen_use_count \\\n", + "2020-01-09 02:00:00+02:00 jd9INuQ5BBlW 2 2 \n", + "2020-01-09 02:20:00+02:00 jd9INuQ5BBlW 3 2 \n", + "2020-01-09 02:40:00+02:00 jd9INuQ5BBlW 2 1 \n", + "2020-01-09 03:00:00+02:00 jd9INuQ5BBlW 0 0 \n", + "2020-01-09 03:20:00+02:00 jd9INuQ5BBlW 0 0 \n", + "... ... ... ... \n", + "2020-01-09 21:40:00+02:00 jd9INuQ5BBlW 1 0 \n", + "2020-01-09 22:00:00+02:00 jd9INuQ5BBlW 1 0 \n", + "2020-01-09 22:20:00+02:00 jd9INuQ5BBlW 0 0 \n", + "2020-01-09 22:40:00+02:00 jd9INuQ5BBlW 0 0 \n", + "2020-01-09 23:00:00+02:00 jd9INuQ5BBlW 4 0 \n", + "\n", + " screen_off_count device \n", + "2020-01-09 02:00:00+02:00 2 OWd1Uau8POix \n", + "2020-01-09 02:20:00+02:00 4 OWd1Uau8POix \n", + "2020-01-09 02:40:00+02:00 2 OWd1Uau8POix \n", + "2020-01-09 03:00:00+02:00 0 OWd1Uau8POix \n", + "2020-01-09 03:20:00+02:00 0 OWd1Uau8POix \n", + "... ... ... \n", + "2020-01-09 21:40:00+02:00 1 OWd1Uau8POix \n", + "2020-01-09 22:00:00+02:00 1 OWd1Uau8POix \n", + "2020-01-09 22:20:00+02:00 0 OWd1Uau8POix \n", + "2020-01-09 22:40:00+02:00 0 OWd1Uau8POix \n", + "2020-01-09 23:00:00+02:00 3 OWd1Uau8POix \n", "\n", "[64 rows x 5 columns]" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -869,7 +859,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "a906348d", "metadata": {}, "outputs": [ @@ -903,60 +893,60 @@ " \n", " \n", " \n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578528e+09\n", " 0\n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " \n", " \n", - " 2020-01-09 02:09:29.152000+02:00\n", + " 2020-01-09 02:09:29.151999950+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 1\n", - " 2020-01-09 02:09:29.152000+02:00\n", + " 2020-01-09 02:09:29.151999950+02:00\n", " \n", " \n", - " 2020-01-09 02:09:32.790999808+02:00\n", + " 2020-01-09 02:09:32.790999889+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 3\n", - " 2020-01-09 02:09:32.790999808+02:00\n", + " 2020-01-09 02:09:32.790999889+02:00\n", " \n", " \n", - " 2020-01-09 02:11:41.996000+02:00\n", + " 2020-01-09 02:11:41.996000051+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 0\n", - " 2020-01-09 02:11:41.996000+02:00\n", + " 2020-01-09 02:11:41.996000051+02:00\n", " \n", " \n", - " 2020-01-09 02:16:19.010999808+02:00\n", + " 2020-01-09 02:16:19.010999918+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 1\n", - " 2020-01-09 02:16:19.010999808+02:00\n", + " 2020-01-09 02:16:19.010999918+02:00\n", " \n", " \n", - " 2020-01-09 02:16:29.648999936+02:00\n", + " 2020-01-09 02:16:29.648999929+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 0\n", - " 2020-01-09 02:16:29.648999936+02:00\n", + " 2020-01-09 02:16:29.648999929+02:00\n", " \n", " \n", - " 2020-01-09 02:16:29.657999872+02:00\n", + " 2020-01-09 02:16:29.657999992+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", " 1.578529e+09\n", " 2\n", - " 2020-01-09 02:16:29.657999872+02:00\n", + " 2020-01-09 02:16:29.657999992+02:00\n", " \n", " \n", "\n", @@ -964,34 +954,34 @@ ], "text/plain": [ " user device time \\\n", - "2020-01-09 02:06:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578528e+09 \n", - "2020-01-09 02:09:29.152000+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:09:32.790999808+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:11:41.996000+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:16:19.010999808+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:16:29.648999936+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", - "2020-01-09 02:16:29.657999872+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:06:41.573999882+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578528e+09 \n", + "2020-01-09 02:09:29.151999950+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:09:32.790999889+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:11:41.996000051+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:16:19.010999918+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:16:29.648999929+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", + "2020-01-09 02:16:29.657999992+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.578529e+09 \n", "\n", " screen_status \\\n", - "2020-01-09 02:06:41.573999872+02:00 0 \n", - "2020-01-09 02:09:29.152000+02:00 1 \n", - "2020-01-09 02:09:32.790999808+02:00 3 \n", - "2020-01-09 02:11:41.996000+02:00 0 \n", - "2020-01-09 02:16:19.010999808+02:00 1 \n", - "2020-01-09 02:16:29.648999936+02:00 0 \n", - "2020-01-09 02:16:29.657999872+02:00 2 \n", + "2020-01-09 02:06:41.573999882+02:00 0 \n", + "2020-01-09 02:09:29.151999950+02:00 1 \n", + "2020-01-09 02:09:32.790999889+02:00 3 \n", + "2020-01-09 02:11:41.996000051+02:00 0 \n", + "2020-01-09 02:16:19.010999918+02:00 1 \n", + "2020-01-09 02:16:29.648999929+02:00 0 \n", + "2020-01-09 02:16:29.657999992+02:00 2 \n", "\n", " datetime \n", - "2020-01-09 02:06:41.573999872+02:00 2020-01-09 02:06:41.573999872+02:00 \n", - "2020-01-09 02:09:29.152000+02:00 2020-01-09 02:09:29.152000+02:00 \n", - "2020-01-09 02:09:32.790999808+02:00 2020-01-09 02:09:32.790999808+02:00 \n", - "2020-01-09 02:11:41.996000+02:00 2020-01-09 02:11:41.996000+02:00 \n", - "2020-01-09 02:16:19.010999808+02:00 2020-01-09 02:16:19.010999808+02:00 \n", - "2020-01-09 02:16:29.648999936+02:00 2020-01-09 02:16:29.648999936+02:00 \n", - "2020-01-09 02:16:29.657999872+02:00 2020-01-09 02:16:29.657999872+02:00 " + "2020-01-09 02:06:41.573999882+02:00 2020-01-09 02:06:41.573999882+02:00 \n", + "2020-01-09 02:09:29.151999950+02:00 2020-01-09 02:09:29.151999950+02:00 \n", + "2020-01-09 02:09:32.790999889+02:00 2020-01-09 02:09:32.790999889+02:00 \n", + "2020-01-09 02:11:41.996000051+02:00 2020-01-09 02:11:41.996000051+02:00 \n", + "2020-01-09 02:16:19.010999918+02:00 2020-01-09 02:16:19.010999918+02:00 \n", + "2020-01-09 02:16:29.648999929+02:00 2020-01-09 02:16:29.648999929+02:00 \n", + "2020-01-09 02:16:29.657999992+02:00 2020-01-09 02:16:29.657999992+02:00 " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1013,7 +1003,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "670d20f8", "metadata": {}, "outputs": [], @@ -1022,14 +1012,18 @@ "results = []\n", "for user in users:\n", " start_time = data[data[\"user\"]==user].index.min()\n", - " function_features={\"screen_column_name\":\"screen_status\",\"resample_args\":{\"rule\":\"20T\",\"origin\":start_time}}\n", - " results.append(s.screen_count(data[data[\"user\"]==user],bat_data[bat_data[\"user\"]==user], function_features))\n", + " results.append(s.screen_count(\n", + " data[data[\"user\"]==user],\n", + " bat_data[bat_data[\"user\"]==user],\n", + " screen_column_name = \"screen_status\",\n", + " resample_args = {\"rule\":\"20T\",\"origin\":start_time}\n", + " ))\n", "my_screen_count = pd.concat(results)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "0b9030fa", "metadata": {}, "outputs": [ @@ -1055,52 +1049,52 @@ " \n", " \n", " user\n", - " device\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", + " device\n", " \n", " \n", " \n", " \n", - " 2020-01-09 02:06:41.573999872+02:00\n", + " 2020-01-09 02:06:41.573999882+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 4\n", " 3\n", " 3\n", + " OWd1Uau8POix\n", " \n", " \n", - " 2020-01-09 02:26:41.573999872+02:00\n", + " 2020-01-09 02:26:41.573999882+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 2\n", - " 3\n", " 1\n", + " 3\n", + " OWd1Uau8POix\n", " \n", " \n", - " 2020-01-09 02:46:41.573999872+02:00\n", + " 2020-01-09 02:46:41.573999882+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", - " 2\n", " 2\n", " 1\n", + " 2\n", + " OWd1Uau8POix\n", " \n", " \n", - " 2020-01-09 03:06:41.573999872+02:00\n", + " 2020-01-09 03:06:41.573999882+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", - " 2020-01-09 03:26:41.573999872+02:00\n", + " 2020-01-09 03:26:41.573999882+02:00\n", " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", " 0\n", " 0\n", " 0\n", + " OWd1Uau8POix\n", " \n", " \n", " ...\n", @@ -1111,94 +1105,94 @@ " ...\n", " \n", " \n", - " 2019-09-08 19:22:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-07 17:52:41.009999990+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-09-08 19:42:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-07 18:12:41.009999990+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-09-08 20:02:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0\n", + " 2019-08-07 18:32:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " 1\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-09-08 20:22:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-07 18:52:41.009999990+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-09-08 20:42:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-07 19:12:41.009999990+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 1\n", + " i8jmoIuoe12Mo\n", " \n", " \n", "\n", - "

2533 rows × 5 columns

\n", + "

2689 rows × 5 columns

\n", "" ], "text/plain": [ - " user device \\\n", - "2020-01-09 02:06:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", - "2020-01-09 02:26:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", - "2020-01-09 02:46:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", - "2020-01-09 03:06:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", - "2020-01-09 03:26:41.573999872+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", - "... ... ... \n", - "2019-09-08 19:22:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-09-08 19:42:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-09-08 20:02:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-09-08 20:22:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-09-08 20:42:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "\n", - " screen_on_count screen_off_count \\\n", - "2020-01-09 02:06:41.573999872+02:00 4 3 \n", - "2020-01-09 02:26:41.573999872+02:00 2 3 \n", - "2020-01-09 02:46:41.573999872+02:00 2 2 \n", - "2020-01-09 03:06:41.573999872+02:00 0 0 \n", - "2020-01-09 03:26:41.573999872+02:00 0 0 \n", - "... ... ... \n", - "2019-09-08 19:22:41.009999872+03:00 0 0 \n", - "2019-09-08 19:42:41.009999872+03:00 0 0 \n", - "2019-09-08 20:02:41.009999872+03:00 0 0 \n", - "2019-09-08 20:22:41.009999872+03:00 0 0 \n", - "2019-09-08 20:42:41.009999872+03:00 0 0 \n", - "\n", - " screen_use_count \n", - "2020-01-09 02:06:41.573999872+02:00 3 \n", - "2020-01-09 02:26:41.573999872+02:00 1 \n", - "2020-01-09 02:46:41.573999872+02:00 1 \n", - "2020-01-09 03:06:41.573999872+02:00 0 \n", - "2020-01-09 03:26:41.573999872+02:00 0 \n", - "... ... \n", - "2019-09-08 19:22:41.009999872+03:00 0 \n", - "2019-09-08 19:42:41.009999872+03:00 0 \n", - "2019-09-08 20:02:41.009999872+03:00 0 \n", - "2019-09-08 20:22:41.009999872+03:00 0 \n", - "2019-09-08 20:42:41.009999872+03:00 1 \n", - "\n", - "[2533 rows x 5 columns]" + " user screen_on_count \\\n", + "2020-01-09 02:06:41.573999882+02:00 jd9INuQ5BBlW 4 \n", + "2020-01-09 02:26:41.573999882+02:00 jd9INuQ5BBlW 2 \n", + "2020-01-09 02:46:41.573999882+02:00 jd9INuQ5BBlW 2 \n", + "2020-01-09 03:06:41.573999882+02:00 jd9INuQ5BBlW 0 \n", + "2020-01-09 03:26:41.573999882+02:00 jd9INuQ5BBlW 0 \n", + "... ... ... \n", + "2019-08-07 17:52:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-07 18:12:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-07 18:32:41.009999990+03:00 dvWdLQesv21a 1 \n", + "2019-08-07 18:52:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-07 19:12:41.009999990+03:00 dvWdLQesv21a 0 \n", + "\n", + " screen_use_count screen_off_count \\\n", + "2020-01-09 02:06:41.573999882+02:00 3 3 \n", + "2020-01-09 02:26:41.573999882+02:00 1 3 \n", + "2020-01-09 02:46:41.573999882+02:00 1 2 \n", + "2020-01-09 03:06:41.573999882+02:00 0 0 \n", + "2020-01-09 03:26:41.573999882+02:00 0 0 \n", + "... ... ... \n", + "2019-08-07 17:52:41.009999990+03:00 0 0 \n", + "2019-08-07 18:12:41.009999990+03:00 0 0 \n", + "2019-08-07 18:32:41.009999990+03:00 0 0 \n", + "2019-08-07 18:52:41.009999990+03:00 0 0 \n", + "2019-08-07 19:12:41.009999990+03:00 0 1 \n", + "\n", + " device \n", + "2020-01-09 02:06:41.573999882+02:00 OWd1Uau8POix \n", + "2020-01-09 02:26:41.573999882+02:00 OWd1Uau8POix \n", + "2020-01-09 02:46:41.573999882+02:00 OWd1Uau8POix \n", + "2020-01-09 03:06:41.573999882+02:00 OWd1Uau8POix \n", + "2020-01-09 03:26:41.573999882+02:00 OWd1Uau8POix \n", + "... ... \n", + "2019-08-07 17:52:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-07 18:12:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-07 18:32:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-07 18:52:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-07 19:12:41.009999990+03:00 i8jmoIuoe12Mo \n", + "\n", + "[2689 rows x 5 columns]" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1218,7 +1212,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "39979c25", "metadata": {}, "outputs": [ @@ -1244,80 +1238,80 @@ " \n", " \n", " user\n", - " device\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", + " device\n", " \n", " \n", " \n", " \n", - " 2019-08-05 14:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 4\n", - " 4\n", - " 4\n", - " \n", - " \n", - " 2019-08-05 14:30:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 2\n", - " 2\n", - " 2\n", + " 2019-08-05 15:30:00+03:00\n", + " dvWdLQesv21a\n", + " 0\n", + " 0\n", + " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 15:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-05 16:00:00+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 15:30:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-05 16:30:00+03:00\n", + " dvWdLQesv21a\n", + " 1\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", + " \n", + " \n", + " 2019-08-05 17:00:00+03:00\n", + " dvWdLQesv21a\n", " 0\n", + " 0\n", + " 1\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 16:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-05 17:30:00+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user device screen_on_count \\\n", - "2019-08-05 14:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs 4 \n", - "2019-08-05 14:30:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs 2 \n", - "2019-08-05 15:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs 0 \n", - "2019-08-05 15:30:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs 0 \n", - "2019-08-05 16:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs 0 \n", - "\n", - " screen_off_count screen_use_count \n", - "2019-08-05 14:00:00+03:00 4 4 \n", - "2019-08-05 14:30:00+03:00 2 2 \n", - "2019-08-05 15:00:00+03:00 0 0 \n", - "2019-08-05 15:30:00+03:00 0 0 \n", - "2019-08-05 16:00:00+03:00 0 0 " + " user screen_on_count screen_use_count \\\n", + "2019-08-05 15:30:00+03:00 dvWdLQesv21a 0 0 \n", + "2019-08-05 16:00:00+03:00 dvWdLQesv21a 0 0 \n", + "2019-08-05 16:30:00+03:00 dvWdLQesv21a 1 0 \n", + "2019-08-05 17:00:00+03:00 dvWdLQesv21a 0 0 \n", + "2019-08-05 17:30:00+03:00 dvWdLQesv21a 0 0 \n", + "\n", + " screen_off_count device \n", + "2019-08-05 15:30:00+03:00 0 i8jmoIuoe12Mo \n", + "2019-08-05 16:00:00+03:00 0 i8jmoIuoe12Mo \n", + "2019-08-05 16:30:00+03:00 0 i8jmoIuoe12Mo \n", + "2019-08-05 17:00:00+03:00 1 i8jmoIuoe12Mo \n", + "2019-08-05 17:30:00+03:00 0 i8jmoIuoe12Mo " ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "my_screen_count = s.screen_count(data, bat_data, {})\n", + "my_screen_count = s.screen_count(data, bat_data)\n", "my_screen_count.head()" ] }, @@ -1332,7 +1326,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "3235f028", "metadata": {}, "outputs": [ @@ -1358,88 +1352,92 @@ " \n", " \n", " user\n", - " device\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", + " device\n", " \n", " \n", " \n", " \n", - " 2019-08-05 14:02:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 3\n", - " 3\n", - " 3\n", + " 2019-08-05 15:32:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " 0\n", + " 0\n", + " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 14:22:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 2\n", - " 2\n", - " 2\n", + " 2019-08-05 15:52:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " 0\n", + " 0\n", + " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 14:42:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 1\n", - " 1\n", - " 1\n", + " 2019-08-05 16:12:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " 0\n", + " 0\n", + " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 15:02:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0\n", + " 2019-08-05 16:32:41.009999990+03:00\n", + " dvWdLQesv21a\n", + " 1\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", - " 2019-08-05 15:22:41.009999872+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", + " 2019-08-05 16:52:41.009999990+03:00\n", + " dvWdLQesv21a\n", " 0\n", " 0\n", " 0\n", + " i8jmoIuoe12Mo\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user device \\\n", - "2019-08-05 14:02:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 14:22:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 14:42:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:02:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:22:41.009999872+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "\n", - " screen_on_count screen_off_count \\\n", - "2019-08-05 14:02:41.009999872+03:00 3 3 \n", - "2019-08-05 14:22:41.009999872+03:00 2 2 \n", - "2019-08-05 14:42:41.009999872+03:00 1 1 \n", - "2019-08-05 15:02:41.009999872+03:00 0 0 \n", - "2019-08-05 15:22:41.009999872+03:00 0 0 \n", - "\n", - " screen_use_count \n", - "2019-08-05 14:02:41.009999872+03:00 3 \n", - "2019-08-05 14:22:41.009999872+03:00 2 \n", - "2019-08-05 14:42:41.009999872+03:00 1 \n", - "2019-08-05 15:02:41.009999872+03:00 0 \n", - "2019-08-05 15:22:41.009999872+03:00 0 " + " user screen_on_count \\\n", + "2019-08-05 15:32:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-05 15:52:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-05 16:12:41.009999990+03:00 dvWdLQesv21a 0 \n", + "2019-08-05 16:32:41.009999990+03:00 dvWdLQesv21a 1 \n", + "2019-08-05 16:52:41.009999990+03:00 dvWdLQesv21a 0 \n", + "\n", + " screen_use_count screen_off_count \\\n", + "2019-08-05 15:32:41.009999990+03:00 0 0 \n", + "2019-08-05 15:52:41.009999990+03:00 0 0 \n", + "2019-08-05 16:12:41.009999990+03:00 0 0 \n", + "2019-08-05 16:32:41.009999990+03:00 0 0 \n", + "2019-08-05 16:52:41.009999990+03:00 0 0 \n", + "\n", + " device \n", + "2019-08-05 15:32:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-05 15:52:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-05 16:12:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-05 16:32:41.009999990+03:00 i8jmoIuoe12Mo \n", + "2019-08-05 16:52:41.009999990+03:00 i8jmoIuoe12Mo " ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "empty_bat = pd.DataFrame()\n", - "no_bat = s.screen_count(data, empty_bat, function_features) #no battery information\n", + "no_bat = s.screen_count(\n", + " data, empty_bat,\n", + " screen_column_name = \"screen_status\",\n", + " resample_args = {\"rule\":\"20T\",\"origin\":start_time}\n", + ") #no battery information\n", "no_bat.head()" ] }, @@ -1463,7 +1461,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "eb9e4e80", "metadata": {}, "outputs": [], @@ -1483,7 +1481,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "0762b7ff", "metadata": {}, "outputs": [], @@ -1503,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "e4539ae0", "metadata": {}, "outputs": [], @@ -1529,7 +1527,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "46ad73f5", "metadata": {}, "outputs": [], @@ -1548,7 +1546,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "0663affa", "metadata": {}, "outputs": [ @@ -1575,80 +1573,80 @@ " \n", " user\n", " device\n", + " screen_use_durationtotal\n", " screen_on_durationtotal\n", " screen_off_durationtotal\n", - " screen_use_durationtotal\n", " \n", " \n", " \n", " \n", - " 2019-08-05 13:20:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 78.193\n", - " 546.422\n", - " 0.139\n", + " 2019-08-05 15:50:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " NaN\n", + " 3000.0\n", + " NaN\n", " \n", " \n", - " 2019-08-05 14:10:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 198.189\n", - " 286720.506\n", - " 1.050\n", + " 2019-08-05 16:40:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " NaN\n", + " 0.0\n", + " 3000.0\n", " \n", " \n", - " 2019-08-05 15:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", + " 2019-08-05 17:30:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 3000.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", - " 2019-08-05 15:50:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", + " 2019-08-05 18:20:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", - " 2019-08-05 16:40:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", + " 2019-08-05 19:10:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 0.0\n", + " 3000.0\n", + " 0.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user device \\\n", - "2019-08-05 13:20:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 14:10:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:50:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 16:40:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "\n", - " screen_on_durationtotal screen_off_durationtotal \\\n", - "2019-08-05 13:20:00+03:00 78.193 546.422 \n", - "2019-08-05 14:10:00+03:00 198.189 286720.506 \n", - "2019-08-05 15:00:00+03:00 0.000 0.000 \n", - "2019-08-05 15:50:00+03:00 0.000 0.000 \n", - "2019-08-05 16:40:00+03:00 0.000 0.000 \n", - "\n", - " screen_use_durationtotal \n", - "2019-08-05 13:20:00+03:00 0.139 \n", - "2019-08-05 14:10:00+03:00 1.050 \n", - "2019-08-05 15:00:00+03:00 0.000 \n", - "2019-08-05 15:50:00+03:00 0.000 \n", - "2019-08-05 16:40:00+03:00 0.000 " + " user device \\\n", + "2019-08-05 15:50:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 16:40:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 17:30:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 18:20:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 19:10:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "\n", + " screen_use_durationtotal screen_on_durationtotal \\\n", + "2019-08-05 15:50:00+03:00 NaN 3000.0 \n", + "2019-08-05 16:40:00+03:00 NaN 0.0 \n", + "2019-08-05 17:30:00+03:00 3000.0 0.0 \n", + "2019-08-05 18:20:00+03:00 0.0 0.0 \n", + "2019-08-05 19:10:00+03:00 0.0 3000.0 \n", + "\n", + " screen_off_durationtotal \n", + "2019-08-05 15:50:00+03:00 NaN \n", + "2019-08-05 16:40:00+03:00 3000.0 \n", + "2019-08-05 17:30:00+03:00 0.0 \n", + "2019-08-05 18:20:00+03:00 0.0 \n", + "2019-08-05 19:10:00+03:00 0.0 " ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1669,7 +1667,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "360ac232", "metadata": {}, "outputs": [ @@ -1696,67 +1694,67 @@ " \n", " user\n", " device\n", + " screen_use_durationtotal\n", " screen_on_durationtotal\n", " screen_off_durationtotal\n", - " screen_use_durationtotal\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", " \n", " \n", " \n", " \n", - " 2019-08-05 13:20:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 78.193\n", - " 546.422\n", - " 0.139\n", - " 1\n", - " 1\n", + " 2019-08-05 15:50:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " NaN\n", + " 3000.0\n", + " NaN\n", " 1\n", + " 0\n", + " 0\n", " \n", " \n", - " 2019-08-05 14:10:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 198.189\n", - " 286720.506\n", - " 1.050\n", - " 5\n", - " 5\n", - " 5\n", + " 2019-08-05 16:40:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " NaN\n", + " 0.0\n", + " 3000.0\n", + " 0\n", + " 0\n", + " 1\n", " \n", " \n", - " 2019-08-05 15:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", - " 0\n", + " 2019-08-05 17:30:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 3000.0\n", + " 0.0\n", + " 0.0\n", " 0\n", + " 1\n", " 0\n", " \n", " \n", - " 2019-08-05 15:50:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", + " 2019-08-05 18:20:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " 0\n", " 0\n", " 0\n", " \n", " \n", - " 2019-08-05 16:40:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000\n", - " 0.000\n", - " 0\n", + " 2019-08-05 19:10:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 0.0\n", + " 3000.0\n", + " 0.0\n", + " 1\n", " 0\n", " 0\n", " \n", @@ -1765,36 +1763,36 @@ "" ], "text/plain": [ - " user device \\\n", - "2019-08-05 13:20:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 14:10:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 15:50:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-05 16:40:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "\n", - " screen_on_durationtotal screen_off_durationtotal \\\n", - "2019-08-05 13:20:00+03:00 78.193 546.422 \n", - "2019-08-05 14:10:00+03:00 198.189 286720.506 \n", - "2019-08-05 15:00:00+03:00 0.000 0.000 \n", - "2019-08-05 15:50:00+03:00 0.000 0.000 \n", - "2019-08-05 16:40:00+03:00 0.000 0.000 \n", - "\n", - " screen_use_durationtotal screen_on_count \\\n", - "2019-08-05 13:20:00+03:00 0.139 1 \n", - "2019-08-05 14:10:00+03:00 1.050 5 \n", - "2019-08-05 15:00:00+03:00 0.000 0 \n", - "2019-08-05 15:50:00+03:00 0.000 0 \n", - "2019-08-05 16:40:00+03:00 0.000 0 \n", - "\n", - " screen_off_count screen_use_count \n", - "2019-08-05 13:20:00+03:00 1 1 \n", - "2019-08-05 14:10:00+03:00 5 5 \n", - "2019-08-05 15:00:00+03:00 0 0 \n", + " user device \\\n", + "2019-08-05 15:50:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 16:40:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 17:30:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 18:20:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 19:10:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "\n", + " screen_use_durationtotal screen_on_durationtotal \\\n", + "2019-08-05 15:50:00+03:00 NaN 3000.0 \n", + "2019-08-05 16:40:00+03:00 NaN 0.0 \n", + "2019-08-05 17:30:00+03:00 3000.0 0.0 \n", + "2019-08-05 18:20:00+03:00 0.0 0.0 \n", + "2019-08-05 19:10:00+03:00 0.0 3000.0 \n", + "\n", + " screen_off_durationtotal screen_on_count \\\n", + "2019-08-05 15:50:00+03:00 NaN 1 \n", + "2019-08-05 16:40:00+03:00 3000.0 0 \n", + "2019-08-05 17:30:00+03:00 0.0 0 \n", + "2019-08-05 18:20:00+03:00 0.0 0 \n", + "2019-08-05 19:10:00+03:00 0.0 1 \n", + "\n", + " screen_use_count screen_off_count \n", "2019-08-05 15:50:00+03:00 0 0 \n", - "2019-08-05 16:40:00+03:00 0 0 " + "2019-08-05 16:40:00+03:00 0 1 \n", + "2019-08-05 17:30:00+03:00 1 0 \n", + "2019-08-05 18:20:00+03:00 0 0 \n", + "2019-08-05 19:10:00+03:00 0 0 " ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1817,7 +1815,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "91107404", "metadata": {}, "outputs": [ @@ -1844,66 +1842,66 @@ " \n", " user\n", " device\n", + " screen_use_durationtotal\n", " screen_on_durationtotal\n", " screen_off_durationtotal\n", - " screen_use_durationtotal\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", " \n", " \n", " \n", " \n", " 2019-08-05 00:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 276.382\n", - " 287266.927999\n", - " 1.189\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 6000.000\n", + " 9000.000\n", + " 6000.000000\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", " 2019-08-06 00:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000000\n", - " 0.000\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 15000.000\n", + " 27000.000\n", + " 24000.000000\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", " 2019-08-07 00:00:00+03:00\n", - " iGyXetHE3S8u\n", - " Cq9vueHh3zVs\n", - " 0.000\n", - " 0.000000\n", - " 0.000\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " 15000.000\n", + " 21000.000\n", + " 15000.000000\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", - " 2019-08-08 00:00:00+03:00\n", + " 2019-08-05 00:00:00+03:00\n", " iGyXetHE3S8u\n", " Cq9vueHh3zVs\n", - " 98.228\n", - " 34238.356000\n", - " 2.866\n", + " 1.189\n", + " 276.382\n", + " 287266.927999\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", - " 2019-08-09 00:00:00+03:00\n", + " 2019-08-06 00:00:00+03:00\n", " iGyXetHE3S8u\n", " Cq9vueHh3zVs\n", - " 8.136\n", - " 114869.103000\n", - " 0.516\n", + " 0.000\n", + " 0.000\n", + " 0.000000\n", " NaN\n", " NaN\n", " NaN\n", @@ -1913,36 +1911,36 @@ "" ], "text/plain": [ - " user device \\\n", - "2019-08-05 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-06 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-07 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-08 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "2019-08-09 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", - "\n", - " screen_on_durationtotal screen_off_durationtotal \\\n", - "2019-08-05 00:00:00+03:00 276.382 287266.927999 \n", - "2019-08-06 00:00:00+03:00 0.000 0.000000 \n", - "2019-08-07 00:00:00+03:00 0.000 0.000000 \n", - "2019-08-08 00:00:00+03:00 98.228 34238.356000 \n", - "2019-08-09 00:00:00+03:00 8.136 114869.103000 \n", - "\n", - " screen_use_durationtotal screen_on_count \\\n", - "2019-08-05 00:00:00+03:00 1.189 NaN \n", - "2019-08-06 00:00:00+03:00 0.000 NaN \n", - "2019-08-07 00:00:00+03:00 0.000 NaN \n", - "2019-08-08 00:00:00+03:00 2.866 NaN \n", - "2019-08-09 00:00:00+03:00 0.516 NaN \n", - "\n", - " screen_off_count screen_use_count \n", + " user device \\\n", + "2019-08-05 00:00:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-06 00:00:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-07 00:00:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo \n", + "2019-08-05 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", + "2019-08-06 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs \n", + "\n", + " screen_use_durationtotal screen_on_durationtotal \\\n", + "2019-08-05 00:00:00+03:00 6000.000 9000.000 \n", + "2019-08-06 00:00:00+03:00 15000.000 27000.000 \n", + "2019-08-07 00:00:00+03:00 15000.000 21000.000 \n", + "2019-08-05 00:00:00+03:00 1.189 276.382 \n", + "2019-08-06 00:00:00+03:00 0.000 0.000 \n", + "\n", + " screen_off_durationtotal screen_on_count \\\n", + "2019-08-05 00:00:00+03:00 6000.000000 NaN \n", + "2019-08-06 00:00:00+03:00 24000.000000 NaN \n", + "2019-08-07 00:00:00+03:00 15000.000000 NaN \n", + "2019-08-05 00:00:00+03:00 287266.927999 NaN \n", + "2019-08-06 00:00:00+03:00 0.000000 NaN \n", + "\n", + " screen_use_count screen_off_count \n", "2019-08-05 00:00:00+03:00 NaN NaN \n", "2019-08-06 00:00:00+03:00 NaN NaN \n", "2019-08-07 00:00:00+03:00 NaN NaN \n", - "2019-08-08 00:00:00+03:00 NaN NaN \n", - "2019-08-09 00:00:00+03:00 NaN NaN " + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-06 00:00:00+03:00 NaN NaN " ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1956,7 +1954,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "85e5de98", "metadata": {}, "outputs": [ @@ -1983,12 +1981,12 @@ " \n", " user\n", " device\n", + " screen_use_durationtotal\n", " screen_on_durationtotal\n", " screen_off_durationtotal\n", - " screen_use_durationtotal\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", " \n", " \n", " \n", @@ -2000,8 +1998,8 @@ " NaN\n", " NaN\n", " 7.0\n", - " 8.0\n", " 5.0\n", + " 8.0\n", " \n", " \n", " 2020-01-09 05:05:00+02:00\n", @@ -2022,8 +2020,8 @@ " NaN\n", " NaN\n", " 9.0\n", - " 9.0\n", " 3.0\n", + " 9.0\n", " \n", " \n", " 2020-01-09 15:05:00+02:00\n", @@ -2033,8 +2031,8 @@ " NaN\n", " NaN\n", " 17.0\n", - " 17.0\n", " 7.0\n", + " 17.0\n", " \n", " \n", " 2020-01-09 20:05:00+02:00\n", @@ -2044,8 +2042,8 @@ " NaN\n", " NaN\n", " 12.0\n", - " 11.0\n", " 3.0\n", + " 11.0\n", " \n", " \n", "\n", @@ -2059,29 +2057,29 @@ "2020-01-09 15:05:00+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", "2020-01-09 20:05:00+02:00 jd9INuQ5BBlW OWd1Uau8POix \n", "\n", - " screen_on_durationtotal screen_off_durationtotal \\\n", - "2020-01-09 00:05:00+02:00 NaN NaN \n", - "2020-01-09 05:05:00+02:00 NaN NaN \n", - "2020-01-09 10:05:00+02:00 NaN NaN \n", - "2020-01-09 15:05:00+02:00 NaN NaN \n", - "2020-01-09 20:05:00+02:00 NaN NaN \n", + " screen_use_durationtotal screen_on_durationtotal \\\n", + "2020-01-09 00:05:00+02:00 NaN NaN \n", + "2020-01-09 05:05:00+02:00 NaN NaN \n", + "2020-01-09 10:05:00+02:00 NaN NaN \n", + "2020-01-09 15:05:00+02:00 NaN NaN \n", + "2020-01-09 20:05:00+02:00 NaN NaN \n", "\n", - " screen_use_durationtotal screen_on_count \\\n", + " screen_off_durationtotal screen_on_count \\\n", "2020-01-09 00:05:00+02:00 NaN 7.0 \n", "2020-01-09 05:05:00+02:00 NaN 0.0 \n", "2020-01-09 10:05:00+02:00 NaN 9.0 \n", "2020-01-09 15:05:00+02:00 NaN 17.0 \n", "2020-01-09 20:05:00+02:00 NaN 12.0 \n", "\n", - " screen_off_count screen_use_count \n", - "2020-01-09 00:05:00+02:00 8.0 5.0 \n", + " screen_use_count screen_off_count \n", + "2020-01-09 00:05:00+02:00 5.0 8.0 \n", "2020-01-09 05:05:00+02:00 0.0 0.0 \n", - "2020-01-09 10:05:00+02:00 9.0 3.0 \n", - "2020-01-09 15:05:00+02:00 17.0 7.0 \n", - "2020-01-09 20:05:00+02:00 11.0 3.0 " + "2020-01-09 10:05:00+02:00 3.0 9.0 \n", + "2020-01-09 15:05:00+02:00 7.0 17.0 \n", + "2020-01-09 20:05:00+02:00 3.0 11.0 " ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -2111,7 +2109,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "fcb09628", "metadata": {}, "outputs": [], @@ -2130,7 +2128,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "c0fa2718", "metadata": {}, "outputs": [ @@ -2157,63 +2155,39 @@ " \n", " user\n", " device\n", - " screen_off\n", " screen_on_count\n", - " screen_off_count\n", " screen_use_count\n", + " screen_off_count\n", + " screen_use_durationtotal\n", " screen_on_durationtotal\n", " screen_off_durationtotal\n", - " screen_use_durationtotal\n", + " screen_off_durationminimum\n", " screen_on_durationminimum\n", " ...\n", + " screen_use_durationmean\n", " screen_on_durationmean\n", " screen_off_durationmean\n", - " screen_use_durationmean\n", - " screen_on_durationmedian\n", - " screen_off_durationmedian\n", " screen_use_durationmedian\n", - " screen_on_durationstd\n", - " screen_off_durationstd\n", + " screen_off_durationmedian\n", + " screen_on_durationmedian\n", " screen_use_durationstd\n", + " screen_off_durationstd\n", + " screen_on_durationstd\n", " first_unlock\n", " \n", " \n", " \n", " \n", - " 2020-01-09 19:30:00+02:00\n", - " jd9INuQ5BBlW\n", - " OWd1Uau8POix\n", - " NaN\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.000\n", - " 0.000000\n", - " 0.000\n", - " NaN\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " \n", - " \n", " 2020-01-09 20:00:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.000\n", - " 0.000000\n", " 0.000\n", + " 0.000\n", + " NaN\n", " NaN\n", " ...\n", " NaN\n", @@ -2231,21 +2205,21 @@ " 2020-01-09 20:30:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 1.0\n", " 1.0\n", " 1.0\n", - " 8.253\n", - " 0.005000\n", " 28.930\n", " 8.253\n", + " 0.005\n", + " 0.005\n", + " 8.253\n", " ...\n", + " 28.930\n", " 8.253000\n", " 0.005000\n", " 28.930\n", - " 8.2530\n", " 0.005\n", - " 28.930\n", + " 8.2530\n", " NaN\n", " NaN\n", " NaN\n", @@ -2255,69 +2229,69 @@ " 2020-01-09 21:00:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 2.0\n", " 1.0\n", " 1.0\n", - " 11.158\n", - " 0.010000\n", " 39.087\n", + " 11.158\n", + " 0.010\n", + " 0.010\n", " 5.234\n", " ...\n", + " 39.087\n", " 5.579000\n", " 0.010000\n", " 39.087\n", - " 5.5790\n", " 0.010\n", - " 39.087\n", - " 0.487904\n", + " 5.5790\n", " NaN\n", " NaN\n", + " 0.487904\n", " NaT\n", " \n", " \n", " 2020-01-09 21:30:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 4.0\n", - " 5.0\n", " 1.0\n", - " 376.930\n", - " 46.027999\n", + " 5.0\n", " 101.062\n", + " 376.930\n", + " 46.028\n", + " 0.006\n", " 33.834\n", " ...\n", + " 101.062\n", " 94.232500\n", " 9.205600\n", " 101.062\n", - " 73.2835\n", " 0.012\n", - " 101.062\n", - " 71.990324\n", - " 20.561987\n", + " 73.2835\n", " NaN\n", + " 20.561987\n", + " 71.990324\n", " NaT\n", " \n", " \n", " 2020-01-09 22:00:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", - " 1.0\n", " 1.0\n", " 0.0\n", - " 154.643\n", - " 0.011000\n", + " 1.0\n", " NaN\n", " 154.643\n", + " 0.011\n", + " 0.011\n", + " 154.643\n", " ...\n", + " NaN\n", " 154.643000\n", " 0.011000\n", " NaN\n", - " 154.6430\n", " 0.011\n", - " NaN\n", + " 154.6430\n", " NaN\n", " NaN\n", " NaN\n", @@ -2327,12 +2301,12 @@ " 2020-01-09 22:30:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 0.0\n", " 0.0\n", " 0.0\n", + " NaN\n", + " 0.000\n", " 0.000\n", - " 0.000000\n", " NaN\n", " NaN\n", " ...\n", @@ -2351,28 +2325,52 @@ " 2020-01-09 23:00:00+02:00\n", " jd9INuQ5BBlW\n", " OWd1Uau8POix\n", - " NaN\n", " 4.0\n", - " 3.0\n", " 0.0\n", - " 6.931\n", - " 0.025000\n", + " 3.0\n", " NaN\n", + " 6.931\n", + " 0.025\n", + " 0.008\n", " 2.079\n", " ...\n", + " NaN\n", " 2.310333\n", " 0.008333\n", " NaN\n", - " 2.2620\n", " 0.008\n", + " 2.2620\n", " NaN\n", - " 0.258906\n", " 0.000577\n", - " NaN\n", + " 0.258906\n", " NaT\n", " \n", " \n", " 2019-08-05 00:00:00+03:00\n", + " dvWdLQesv21a\n", + " i8jmoIuoe12Mo\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " ...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 2019-08-05 16:32:41.009999990+03:00\n", + " \n", + " \n", + " 2019-08-05 00:00:00+03:00\n", " iGyXetHE3S8u\n", " Cq9vueHh3zVs\n", " NaN\n", @@ -2393,7 +2391,7 @@ " NaN\n", " NaN\n", " NaN\n", - " 2019-08-05 14:03:42.322000128+03:00\n", + " 2019-08-05 14:03:42.322000027+03:00\n", " \n", " \n", " 2020-01-09 00:00:00+02:00\n", @@ -2417,64 +2415,75 @@ " NaN\n", " NaN\n", " NaN\n", - " 2020-01-09 02:16:19.010999808+02:00\n", + " 2020-01-09 02:16:19.010999918+02:00\n", " \n", " \n", "\n", - "

10 rows × 25 columns

\n", + "

10 rows × 24 columns

\n", "" ], "text/plain": [ - " user device screen_off \\\n", - "2020-01-09 19:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 20:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 20:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 21:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 21:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 22:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 22:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2020-01-09 23:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "2019-08-05 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs NaN \n", - "2020-01-09 00:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", - "\n", - " screen_on_count screen_off_count \\\n", - "2020-01-09 19:30:00+02:00 0.0 0.0 \n", - "2020-01-09 20:00:00+02:00 0.0 0.0 \n", - "2020-01-09 20:30:00+02:00 1.0 1.0 \n", - "2020-01-09 21:00:00+02:00 2.0 1.0 \n", - "2020-01-09 21:30:00+02:00 4.0 5.0 \n", - "2020-01-09 22:00:00+02:00 1.0 1.0 \n", - "2020-01-09 22:30:00+02:00 0.0 0.0 \n", - "2020-01-09 23:00:00+02:00 4.0 3.0 \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", - "\n", - " screen_use_count screen_on_durationtotal \\\n", - "2020-01-09 19:30:00+02:00 0.0 0.000 \n", - "2020-01-09 20:00:00+02:00 0.0 0.000 \n", - "2020-01-09 20:30:00+02:00 1.0 8.253 \n", - "2020-01-09 21:00:00+02:00 1.0 11.158 \n", - "2020-01-09 21:30:00+02:00 1.0 376.930 \n", - "2020-01-09 22:00:00+02:00 0.0 154.643 \n", - "2020-01-09 22:30:00+02:00 0.0 0.000 \n", - "2020-01-09 23:00:00+02:00 0.0 6.931 \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", - "\n", - " screen_off_durationtotal screen_use_durationtotal \\\n", - "2020-01-09 19:30:00+02:00 0.000000 0.000 \n", - "2020-01-09 20:00:00+02:00 0.000000 0.000 \n", - "2020-01-09 20:30:00+02:00 0.005000 28.930 \n", - "2020-01-09 21:00:00+02:00 0.010000 39.087 \n", - "2020-01-09 21:30:00+02:00 46.027999 101.062 \n", - "2020-01-09 22:00:00+02:00 0.011000 NaN \n", - "2020-01-09 22:30:00+02:00 0.000000 NaN \n", - "2020-01-09 23:00:00+02:00 0.025000 NaN \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", + " user device screen_on_count \\\n", + "2020-01-09 20:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0.0 \n", + "2020-01-09 20:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.0 \n", + "2020-01-09 21:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 2.0 \n", + "2020-01-09 21:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 4.0 \n", + "2020-01-09 22:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 1.0 \n", + "2020-01-09 22:30:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 0.0 \n", + "2020-01-09 23:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix 4.0 \n", + "2019-08-05 00:00:00+03:00 dvWdLQesv21a i8jmoIuoe12Mo NaN \n", + "2019-08-05 00:00:00+03:00 iGyXetHE3S8u Cq9vueHh3zVs NaN \n", + "2020-01-09 00:00:00+02:00 jd9INuQ5BBlW OWd1Uau8POix NaN \n", + "\n", + " screen_use_count screen_off_count \\\n", + "2020-01-09 20:00:00+02:00 0.0 0.0 \n", + "2020-01-09 20:30:00+02:00 1.0 1.0 \n", + "2020-01-09 21:00:00+02:00 1.0 1.0 \n", + "2020-01-09 21:30:00+02:00 1.0 5.0 \n", + "2020-01-09 22:00:00+02:00 0.0 1.0 \n", + "2020-01-09 22:30:00+02:00 0.0 0.0 \n", + "2020-01-09 23:00:00+02:00 0.0 3.0 \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", + "\n", + " screen_use_durationtotal screen_on_durationtotal \\\n", + "2020-01-09 20:00:00+02:00 0.000 0.000 \n", + "2020-01-09 20:30:00+02:00 28.930 8.253 \n", + "2020-01-09 21:00:00+02:00 39.087 11.158 \n", + "2020-01-09 21:30:00+02:00 101.062 376.930 \n", + "2020-01-09 22:00:00+02:00 NaN 154.643 \n", + "2020-01-09 22:30:00+02:00 NaN 0.000 \n", + "2020-01-09 23:00:00+02:00 NaN 6.931 \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", + "\n", + " screen_off_durationtotal \\\n", + "2020-01-09 20:00:00+02:00 0.000 \n", + "2020-01-09 20:30:00+02:00 0.005 \n", + "2020-01-09 21:00:00+02:00 0.010 \n", + "2020-01-09 21:30:00+02:00 46.028 \n", + "2020-01-09 22:00:00+02:00 0.011 \n", + "2020-01-09 22:30:00+02:00 0.000 \n", + "2020-01-09 23:00:00+02:00 0.025 \n", + "2019-08-05 00:00:00+03:00 NaN \n", + "2019-08-05 00:00:00+03:00 NaN \n", + "2020-01-09 00:00:00+02:00 NaN \n", + "\n", + " screen_off_durationminimum \\\n", + "2020-01-09 20:00:00+02:00 NaN \n", + "2020-01-09 20:30:00+02:00 0.005 \n", + "2020-01-09 21:00:00+02:00 0.010 \n", + "2020-01-09 21:30:00+02:00 0.006 \n", + "2020-01-09 22:00:00+02:00 0.011 \n", + "2020-01-09 22:30:00+02:00 NaN \n", + "2020-01-09 23:00:00+02:00 0.008 \n", + "2019-08-05 00:00:00+03:00 NaN \n", + "2019-08-05 00:00:00+03:00 NaN \n", + "2020-01-09 00:00:00+02:00 NaN \n", "\n", " screen_on_durationminimum ... \\\n", - "2020-01-09 19:30:00+02:00 NaN ... \n", "2020-01-09 20:00:00+02:00 NaN ... \n", "2020-01-09 20:30:00+02:00 8.253 ... \n", "2020-01-09 21:00:00+02:00 5.234 ... \n", @@ -2483,34 +2492,34 @@ "2020-01-09 22:30:00+02:00 NaN ... \n", "2020-01-09 23:00:00+02:00 2.079 ... \n", "2019-08-05 00:00:00+03:00 NaN ... \n", + "2019-08-05 00:00:00+03:00 NaN ... \n", "2020-01-09 00:00:00+02:00 NaN ... \n", "\n", - " screen_on_durationmean screen_off_durationmean \\\n", - "2020-01-09 19:30:00+02:00 NaN NaN \n", - "2020-01-09 20:00:00+02:00 NaN NaN \n", - "2020-01-09 20:30:00+02:00 8.253000 0.005000 \n", - "2020-01-09 21:00:00+02:00 5.579000 0.010000 \n", - "2020-01-09 21:30:00+02:00 94.232500 9.205600 \n", - "2020-01-09 22:00:00+02:00 154.643000 0.011000 \n", - "2020-01-09 22:30:00+02:00 NaN NaN \n", - "2020-01-09 23:00:00+02:00 2.310333 0.008333 \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", - "\n", - " screen_use_durationmean screen_on_durationmedian \\\n", - "2020-01-09 19:30:00+02:00 NaN NaN \n", - "2020-01-09 20:00:00+02:00 NaN NaN \n", - "2020-01-09 20:30:00+02:00 28.930 8.2530 \n", - "2020-01-09 21:00:00+02:00 39.087 5.5790 \n", - "2020-01-09 21:30:00+02:00 101.062 73.2835 \n", - "2020-01-09 22:00:00+02:00 NaN 154.6430 \n", - "2020-01-09 22:30:00+02:00 NaN NaN \n", - "2020-01-09 23:00:00+02:00 NaN 2.2620 \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", + " screen_use_durationmean screen_on_durationmean \\\n", + "2020-01-09 20:00:00+02:00 NaN NaN \n", + "2020-01-09 20:30:00+02:00 28.930 8.253000 \n", + "2020-01-09 21:00:00+02:00 39.087 5.579000 \n", + "2020-01-09 21:30:00+02:00 101.062 94.232500 \n", + "2020-01-09 22:00:00+02:00 NaN 154.643000 \n", + "2020-01-09 22:30:00+02:00 NaN NaN \n", + "2020-01-09 23:00:00+02:00 NaN 2.310333 \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", + "\n", + " screen_off_durationmean screen_use_durationmedian \\\n", + "2020-01-09 20:00:00+02:00 NaN NaN \n", + "2020-01-09 20:30:00+02:00 0.005000 28.930 \n", + "2020-01-09 21:00:00+02:00 0.010000 39.087 \n", + "2020-01-09 21:30:00+02:00 9.205600 101.062 \n", + "2020-01-09 22:00:00+02:00 0.011000 NaN \n", + "2020-01-09 22:30:00+02:00 NaN NaN \n", + "2020-01-09 23:00:00+02:00 0.008333 NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", "\n", " screen_off_durationmedian \\\n", - "2020-01-09 19:30:00+02:00 NaN \n", "2020-01-09 20:00:00+02:00 NaN \n", "2020-01-09 20:30:00+02:00 0.005 \n", "2020-01-09 21:00:00+02:00 0.010 \n", @@ -2519,34 +2528,34 @@ "2020-01-09 22:30:00+02:00 NaN \n", "2020-01-09 23:00:00+02:00 0.008 \n", "2019-08-05 00:00:00+03:00 NaN \n", + "2019-08-05 00:00:00+03:00 NaN \n", "2020-01-09 00:00:00+02:00 NaN \n", "\n", - " screen_use_durationmedian screen_on_durationstd \\\n", - "2020-01-09 19:30:00+02:00 NaN NaN \n", - "2020-01-09 20:00:00+02:00 NaN NaN \n", - "2020-01-09 20:30:00+02:00 28.930 NaN \n", - "2020-01-09 21:00:00+02:00 39.087 0.487904 \n", - "2020-01-09 21:30:00+02:00 101.062 71.990324 \n", - "2020-01-09 22:00:00+02:00 NaN NaN \n", - "2020-01-09 22:30:00+02:00 NaN NaN \n", - "2020-01-09 23:00:00+02:00 NaN 0.258906 \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", - "\n", - " screen_off_durationstd screen_use_durationstd \\\n", - "2020-01-09 19:30:00+02:00 NaN NaN \n", - "2020-01-09 20:00:00+02:00 NaN NaN \n", - "2020-01-09 20:30:00+02:00 NaN NaN \n", - "2020-01-09 21:00:00+02:00 NaN NaN \n", - "2020-01-09 21:30:00+02:00 20.561987 NaN \n", - "2020-01-09 22:00:00+02:00 NaN NaN \n", - "2020-01-09 22:30:00+02:00 NaN NaN \n", - "2020-01-09 23:00:00+02:00 0.000577 NaN \n", - "2019-08-05 00:00:00+03:00 NaN NaN \n", - "2020-01-09 00:00:00+02:00 NaN NaN \n", + " screen_on_durationmedian screen_use_durationstd \\\n", + "2020-01-09 20:00:00+02:00 NaN NaN \n", + "2020-01-09 20:30:00+02:00 8.2530 NaN \n", + "2020-01-09 21:00:00+02:00 5.5790 NaN \n", + "2020-01-09 21:30:00+02:00 73.2835 NaN \n", + "2020-01-09 22:00:00+02:00 154.6430 NaN \n", + "2020-01-09 22:30:00+02:00 NaN NaN \n", + "2020-01-09 23:00:00+02:00 2.2620 NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", + "\n", + " screen_off_durationstd screen_on_durationstd \\\n", + "2020-01-09 20:00:00+02:00 NaN NaN \n", + "2020-01-09 20:30:00+02:00 NaN NaN \n", + "2020-01-09 21:00:00+02:00 NaN 0.487904 \n", + "2020-01-09 21:30:00+02:00 20.561987 71.990324 \n", + "2020-01-09 22:00:00+02:00 NaN NaN \n", + "2020-01-09 22:30:00+02:00 NaN NaN \n", + "2020-01-09 23:00:00+02:00 0.000577 0.258906 \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2019-08-05 00:00:00+03:00 NaN NaN \n", + "2020-01-09 00:00:00+02:00 NaN NaN \n", "\n", " first_unlock \n", - "2020-01-09 19:30:00+02:00 NaT \n", "2020-01-09 20:00:00+02:00 NaT \n", "2020-01-09 20:30:00+02:00 NaT \n", "2020-01-09 21:00:00+02:00 NaT \n", @@ -2554,13 +2563,14 @@ "2020-01-09 22:00:00+02:00 NaT \n", "2020-01-09 22:30:00+02:00 NaT \n", "2020-01-09 23:00:00+02:00 NaT \n", - "2019-08-05 00:00:00+03:00 2019-08-05 14:03:42.322000128+03:00 \n", - "2020-01-09 00:00:00+02:00 2020-01-09 02:16:19.010999808+02:00 \n", + "2019-08-05 00:00:00+03:00 2019-08-05 16:32:41.009999990+03:00 \n", + "2019-08-05 00:00:00+03:00 2019-08-05 14:03:42.322000027+03:00 \n", + "2020-01-09 00:00:00+02:00 2020-01-09 02:16:19.010999918+02:00 \n", "\n", - "[10 rows x 25 columns]" + "[10 rows x 24 columns]" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -2590,7 +2600,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "e36a48ed", "metadata": {}, "outputs": [], @@ -2625,17 +2635,31 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "0ee78bf7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "argument of type 'NoneType' is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m customized_features \u001b[38;5;241m=\u001b[39m \u001b[43ms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mextract_features_screen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbat_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mscreen_last_unlock\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/niimpy/niimpy/preprocessing/screen.py:585\u001b[0m, in \u001b[0;36mextract_features_screen\u001b[0;34m(df, bat, features)\u001b[0m\n\u001b[1;32m 583\u001b[0m computed_features \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 584\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m feature, feature_arg \u001b[38;5;129;01min\u001b[39;00m features\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 585\u001b[0m computed_feature \u001b[38;5;241m=\u001b[39m \u001b[43mfeature\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfeature_arg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 586\u001b[0m computed_feature \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mset_conserved_index(computed_feature)\n\u001b[1;32m 587\u001b[0m computed_features\u001b[38;5;241m.\u001b[39mappend(computed_feature)\n", + "Cell \u001b[0;32mIn[25], line 2\u001b[0m, in \u001b[0;36mscreen_last_unlock\u001b[0;34m(df, bat, config)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mscreen_last_unlock\u001b[39m(df, bat, config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mscreen_column_name\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m:\n\u001b[1;32m 3\u001b[0m col_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscreen_status\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mTypeError\u001b[0m: argument of type 'NoneType' is not iterable" + ] + } + ], "source": [ "customized_features = s.extract_features_screen(data, bat_data, features={screen_last_unlock: {}})" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "babdda7a", "metadata": {}, "outputs": [ @@ -2742,7 +2766,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.12.6" } }, "nbformat": 4, diff --git a/docs/user_guide/preprocessing/tracker.ipynb b/docs/user_guide/preprocessing/tracker.ipynb index 29c7bcd7..b8ffced6 100644 --- a/docs/user_guide/preprocessing/tracker.ipynb +++ b/docs/user_guide/preprocessing/tracker.ipynb @@ -209,34 +209,34 @@ " \n", " \n", " \n", - " user\n", " median_sum_step\n", + " user\n", + " std_sum_step\n", " min_sum_step\n", " avg_sum_step\n", " max_sum_step\n", - " std_sum_step\n", " \n", " \n", " \n", " \n", " 0\n", - " wiam9xme\n", " 6480.0\n", + " wiam9xme\n", + " 3352.347745\n", " 5616\n", " 8437.383562\n", " 13025\n", - " 3352.347745\n", " \n", " \n", "\n", "" ], "text/plain": [ - " user median_sum_step min_sum_step avg_sum_step max_sum_step \\\n", - "0 wiam9xme 6480.0 5616 8437.383562 13025 \n", + " median_sum_step user std_sum_step min_sum_step avg_sum_step \\\n", + "0 6480.0 wiam9xme 3352.347745 5616 8437.383562 \n", "\n", - " std_sum_step \n", - "0 3352.347745 " + " max_sum_step \n", + "0 13025 " ] }, "execution_count": 4, @@ -269,7 +269,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{: {}} {}\n" + "{: {}} {}\n" ] }, { diff --git a/niimpy/preprocessing/application.py b/niimpy/preprocessing/application.py index bc6b7022..d3b396eb 100644 --- a/niimpy/preprocessing/application.py +++ b/niimpy/preprocessing/application.py @@ -363,7 +363,7 @@ def app_count(df, bat=None, screen=None, config=None): # Insert missing data due to the screen being off or battery depleated if not screen.empty: - screen = s.screen_off(screen, bat, config) + screen = s.screen_off(screen, bat, **config) if type(screen.index) == pd.MultiIndex: screen.reset_index(inplace=True) screen.set_index("index", inplace=True) @@ -372,7 +372,7 @@ def app_count(df, bat=None, screen=None, config=None): df2.fillna({"app_group": "off"}, inplace=True) if screen.empty and not bat.empty: - shutdown = b.shutdown_info(bat, config) + shutdown = b.shutdown_info(bat, **config) shutdown = shutdown.replace([-1, -2], "off") if type(shutdown.index) == pd.MultiIndex: shutdown.reset_index(inplace=True) @@ -449,7 +449,7 @@ def app_duration(df, bat=None, screen=None, config=None): # Insert missing data due to the screen being off or battery depleated if not screen.empty: - screen = s.screen_off(screen, bat, config) + screen = s.screen_off(screen, bat, **config) if type(screen.index) == pd.MultiIndex: screen.reset_index(inplace=True) screen.set_index("index", inplace=True) @@ -458,7 +458,7 @@ def app_duration(df, bat=None, screen=None, config=None): df2.fillna({"app_group": "off"}, inplace=True) if screen.empty and not bat.empty: - shutdown = b.shutdown_info(bat, config) + shutdown = b.shutdown_info(bat, **config) shutdown = shutdown.replace([-1, -2], "off") if type(shutdown.index) == pd.MultiIndex: shutdown.reset_index(inplace=True) diff --git a/niimpy/preprocessing/battery.py b/niimpy/preprocessing/battery.py index 14874913..c3dd2347 100644 --- a/niimpy/preprocessing/battery.py +++ b/niimpy/preprocessing/battery.py @@ -4,7 +4,7 @@ from niimpy.preprocessing import util -def shutdown_info(df, config=None): +def shutdown_info(df, battery_column_name = "battery_status", **kwargs): """ Returns a pandas DataFrame with battery information for the timestamps when the phone has shutdown. This includes both events, when the phone has shut down and when the phone @@ -22,19 +22,13 @@ def shutdown_info(df, config=None): ------- shutdown: pandas series """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" + df[battery_column_name] = pd.to_numeric(df[battery_column_name]) #convert to numeric in case it is not - col_name = config.get("battery_column_name", "battery_status") - - df[col_name] = pd.to_numeric(df[col_name]) #convert to numeric in case it is not - - shutdown = df[df[col_name].between(-3, 0, inclusive="neither")] + shutdown = df[df[battery_column_name].between(-3, 0, inclusive="neither")] return shutdown -def battery_mean_level(df, config=None): +def battery_mean_level(df, battery_column_name = "battery_level", resample_args = {"rule":"30min"}, **kwargs): """ This function returns the mean battery level within the specified timeframe. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number by user, by timewindow. @@ -49,24 +43,17 @@ def battery_mean_level(df, config=None): ------- result: dataframe """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("battery_column_name", "battery_level") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - - df[col_name] = pd.to_numeric(df[col_name]) #convert to numeric in case it is not + df[battery_column_name] = pd.to_numeric(df[battery_column_name]) #convert to numeric in case it is not if len(df)>0: - result = util.group_data(df)[col_name].resample(**config["resample_args"]).mean() + result = util.group_data(df)[battery_column_name].resample(**resample_args).mean() result = result.to_frame(name='battery_mean_level') result = util.reset_groups(result) result = util.select_columns(result, ["battery_mean_level"]) return result -def battery_median_level(df, config=None): +def battery_median_level(df, battery_column_name = "battery_level", resample_args = {"rule":"30min"}, **kwargs): """ This function returns the median battery level within the specified timeframe. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number by user, by timewindow. @@ -81,24 +68,17 @@ def battery_median_level(df, config=None): ------- result: dataframe """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("battery_column_name", "battery_level") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - - df[col_name] = pd.to_numeric(df[col_name]) #convert to numeric in case it is not + df[battery_column_name] = pd.to_numeric(df[battery_column_name]) #convert to numeric in case it is not if len(df)>0: - result = util.group_data(df)[col_name].resample(**config["resample_args"]).median() + result = util.group_data(df)[battery_column_name].resample(**resample_args).median() result = result.to_frame(name='battery_median_level') result = util.reset_groups(result) result = util.select_columns(result, ["battery_median_level"]) return result -def battery_std_level(df, config=None): +def battery_std_level(df, battery_column_name = "battery_level", resample_args = {"rule":"30min"}, **kwargs): """ This function returns the standard deviation battery level within the specified timeframe. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number by user, by timewindow. @@ -112,25 +92,18 @@ def battery_std_level(df, config=None): Returns ------- result: dataframe - """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("battery_column_name", "battery_level") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - - df[col_name] = pd.to_numeric(df[col_name]) #convert to numeric in case it is not + """ + df[battery_column_name] = pd.to_numeric(df[battery_column_name]) #convert to numeric in case it is not if len(df)>0: - result = util.group_data(df)[col_name].resample(**config["resample_args"]).std() + result = util.group_data(df)[battery_column_name].resample(**resample_args).std() result = result.to_frame(name='battery_std_level') result = util.reset_groups(result) result = util.select_columns(result, ["battery_std_level"]) return result -def battery_shutdown_time(df, config=None): +def battery_shutdown_time(df, battery_column_name = "battery_status", resample_args = {"rule":"30min"}, **kwargs): """ This function returns the total time the phone has been turned off within a specified time window. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number by user, by timewindow. @@ -145,15 +118,8 @@ def battery_shutdown_time(df, config=None): ------- result: dataframe """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("battery_column_name", "battery_status") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - def calculate_shutdown(df): - df['next'] = df[col_name].astype(int).astype(str)+df[col_name].shift(-1).fillna(0).astype(int).astype(str) + df['next'] = df[battery_column_name].astype(int).astype(str)+df[battery_column_name].shift(-1).fillna(0).astype(int).astype(str) ids = np.where((df.next=='-32') | (df.next=='-33') | (df.next=='-12') | (df.next=='-13') | (df.next=='-22') | (df.next=='-23'))[0] ids = ids.tolist() [ids.append(ids[i]+1) for i in range(len(ids))] @@ -166,7 +132,7 @@ def calculate_shutdown(df): duration = duration.shift(-1).iloc[:-1] duration = duration.dt.total_seconds() - result = duration.resample(**config["resample_args"]).sum() + result = duration.resample(**resample_args).sum() result = result.to_frame(name='shutdown_time') return result @@ -176,7 +142,7 @@ def calculate_shutdown(df): return result -def battery_discharge(df, config=None): +def battery_discharge(df, battery_column_name = "battery_level", resample_args = {"rule":"30min"}, **kwargs): """ This function returns the mean discharge rate of the battery within a specified time window. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number by user, by timewindow. @@ -190,14 +156,7 @@ def battery_discharge(df, config=None): Returns ------- result: dataframe - """ - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("battery_column_name", "battery_level") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - + """ def calculate_discharge(df): df.sort_index(inplace=True) @@ -205,12 +164,12 @@ def calculate_discharge(df): df['duration'] = df.index.to_series().diff() df['duration'] = df['duration'].shift(-1) df["duration"] = df["duration"].dt.total_seconds() - df['discharge'] = (df[col_name].shift(-1) - df[col_name])/df['duration'] + df['discharge'] = (df[battery_column_name].shift(-1) - df[battery_column_name])/df['duration'] df['discharge'] = df['discharge'].shift(1) result = None if len(df)>0: - result = df['discharge'].resample(**config["resample_args"]).mean() + result = df['discharge'].resample(**resample_args).mean() result = result.to_frame(name='battery_discharge') return result @@ -220,18 +179,17 @@ def calculate_discharge(df): return result -def format_battery_data(df, config=None): +def format_battery_data(df, batterylevel_column = "battery_level"): """ Returns a DataFrame with battery data for a user. Parameters ---------- battery: DataFrame with battery data """ - - batterylevel_column = config.get("batterylevel_column", "battery_level") df[batterylevel_column] = pd.to_numeric(df[batterylevel_column]) return df -def battery_occurrences(df, config=None): + +def battery_occurrences(df, battery_status=False, battery_status_column_name="battery_status", resample_args={"rule":"30min"}, **kwargs): """ Returns a dataframe showing the amount of battery data points found within a specified time window. If there is no specified timeframe, the function sets a 30 min default time window. Parameters @@ -243,32 +201,25 @@ def battery_occurrences(df, config=None): information. Keys can be column names, other dictionaries, etc. """ assert isinstance(df, pd.DataFrame), "data is not a pandas DataFrame" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - battery_status = config.get("battery_status", False) - battery_status_col = config.get("battery_status_column_name", "battery_status") - occurrence_data = df.drop_duplicates(subset=['datetime', 'device', battery_status_col], keep='last') + occurrence_data = df.drop_duplicates(subset=['datetime', 'device', battery_status_column_name], keep='last') - if ((battery_status == True) & (battery_status_col in occurrence_data.columns)): + if ((battery_status == True) & (battery_status_column_name in occurrence_data.columns)): def count_alive(series): return ((series == '-1') | (series == '-2') | (series == '-3')).sum() occurrence_data["time"] = occurrence_data.index occurrences = util.group_data(occurrence_data).resample( - **config["resample_args"], include_groups=False + **resample_args, include_groups=False ).agg({ "time": "count", - battery_status_col: count_alive + battery_status_column_name: count_alive }).to_frame(name='occurrences') else: occurrence_data["time"] = occurrence_data.index occurrences = util.group_data(occurrence_data).resample( - **config["resample_args"], include_groups=False + **resample_args, include_groups=False )["time"].count() occurrences = occurrences.to_frame(name='occurrences') @@ -277,7 +228,7 @@ def count_alive(series): return occurrences -def battery_gaps(df, config=None): +def battery_gaps(df, min_duration_between = None, resample_args = {"rule":"30min"}, **kwargs): '''Returns a DataFrame with the mean time difference between consecutive battery timestamps. The mean is calculated within intervals specified in config. The minimum size of the considered deltas can be decided with the min_duration_between @@ -296,12 +247,6 @@ def battery_gaps(df, config=None): ''' assert isinstance(df, pd.core.frame.DataFrame), "df is not a pandas DataFrame" assert isinstance(df.index, pd.core.indexes.datetimes.DatetimeIndex), "df index is not DatetimeIndex" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - min_duration_between = config.get("min_duration_between", None) def calculate_gaps(df): tvalue = df.index.to_series() @@ -309,7 +254,7 @@ def calculate_gaps(df): if (min_duration_between is not None): delta[delta < min_duration_between] = None - delta = delta.resample(**config["resample_args"]).mean() + delta = delta.resample(**resample_args).mean() return pd.DataFrame({"battery_gap": delta}) @@ -319,7 +264,7 @@ def calculate_gaps(df): return result -def battery_charge_discharge(df, config=None): +def battery_charge_discharge(df, battery_level_column = "battery_level", resample_args = {"rule":"30min"}, **kwargs): '''Returns a DataFrame showing the mean difference in battery values and mean battery charge/discharge rate within specified time windows. If there is no specified timeframe, the function sets a 30 min default time window. @@ -329,12 +274,6 @@ def battery_charge_discharge(df, config=None): ''' assert isinstance(df, pd.core.frame.DataFrame), "df is not a pandas DataFrame" assert isinstance(df.index, pd.core.indexes.datetimes.DatetimeIndex), "df index is not DatetimeIndex" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - battery_level_column = config.get("battery_level_column", "battery_level") - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) def calculate_discharge(df): battery_level = pd.to_numeric(df[battery_level_column]) @@ -342,8 +281,8 @@ def calculate_discharge(df): tdelta = (tvalue - tvalue.shift()).fillna(pd.Timedelta(seconds=0)) bdelta = (battery_level - battery_level.shift()).fillna(0) delta = bdelta / (tdelta / pd.Timedelta(seconds=1)) - bdelta = bdelta.resample(**config["resample_args"]).mean() - delta = delta.resample(**config["resample_args"]).mean() + bdelta = bdelta.resample(**resample_args).mean() + delta = delta.resample(**resample_args).mean() return pd.DataFrame({ 'bdelta': bdelta, 'charge/discharge': delta @@ -355,7 +294,7 @@ def calculate_discharge(df): return discharge -def find_real_gaps(battery_df, other_df, config=None): +def find_real_gaps(battery_df, other_df, **kwargs): """ Returns a dataframe showing the gaps found both in the battery data and the other data. The default interval is 6 hours. Parameters @@ -370,13 +309,10 @@ def find_real_gaps(battery_df, other_df, config=None): pd.core.indexes.datetimes.DatetimeIndex), "battery_df index is not DatetimeIndex" assert isinstance(other_df.index, pd.core.indexes.datetimes.DatetimeIndex), "other_df index is not DatetimeIndex" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - battery = battery_occurrences(battery_df, config) + battery = battery_occurrences(battery_df, **kwargs) battery.rename({'occurrences': 'battery_occurrences'}, axis=1, inplace=True) - other = battery_occurrences(other_df, config) + other = battery_occurrences(other_df, **kwargs) mask = (battery['battery_occurrences'] == 0) & (other['occurrences'] == 0) gaps = pd.concat([battery[mask], other[mask]['occurrences']], axis=1, sort=False) @@ -384,7 +320,7 @@ def find_real_gaps(battery_df, other_df, config=None): return gaps -def find_non_battery_gaps(battery_df, other_df, config=None): +def find_non_battery_gaps(battery_df, other_df, **kwargs): """ Returns a dataframe showing the gaps found only in the other data. The default interval is 6 hours. Parameters @@ -399,20 +335,17 @@ def find_non_battery_gaps(battery_df, other_df, config=None): pd.core.indexes.datetimes.DatetimeIndex), "battery_df index is not DatetimeIndex" assert isinstance(other_df.index, pd.core.indexes.datetimes.DatetimeIndex), "other_df index is not DatetimeIndex" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - battery = battery_occurrences(battery_df, config) + battery = battery_occurrences(battery_df, **kwargs) battery.rename({'occurrences': 'battery_occurrences'}, axis=1, inplace=True) - other = battery_occurrences(other_df, config) + other = battery_occurrences(other_df, **kwargs) mask = (battery['battery_occurrences'] > 10) & (other['occurrences'] == 0) gaps = pd.concat([battery[mask], other[mask]['occurrences']], axis=1, sort=False) return gaps -def find_battery_gaps(battery_df, other_df, config): +def find_battery_gaps(battery_df, other_df, **kwargs): """ Returns a dataframe showing the gaps found only in the battery data. The default interval is 6 hours. Parameters @@ -427,13 +360,10 @@ def find_battery_gaps(battery_df, other_df, config): pd.core.indexes.datetimes.DatetimeIndex), "battery_df index is not DatetimeIndex" assert isinstance(other_df.index, pd.core.indexes.datetimes.DatetimeIndex), "other_df index is not DatetimeIndex" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - battery = battery_occurrences(battery_df, config) + battery = battery_occurrences(battery_df, **kwargs) battery.rename({'occurrences': 'battery_occurrences'}, axis=1, inplace=True) - other = battery_occurrences(other_df, config) + other = battery_occurrences(other_df, **kwargs) mask = (battery['battery_occurrences'] == 0) & (other['occurrences'] > 0) gaps = pd.concat([battery[mask], other[mask]['occurrences']], axis=1, sort=False) @@ -474,7 +404,7 @@ def extract_features_battery(df, features=None): features = ALL_FEATURES for features, kwargs in features.items(): print(features, kwargs) - computed_feature = features(df, kwargs) + computed_feature = features(df, **kwargs) computed_feature = util.set_conserved_index(computed_feature) computed_features.append(computed_feature) diff --git a/niimpy/preprocessing/screen.py b/niimpy/preprocessing/screen.py index 8b94e506..6b1d1547 100755 --- a/niimpy/preprocessing/screen.py +++ b/niimpy/preprocessing/screen.py @@ -5,7 +5,7 @@ from niimpy.preprocessing import util -def util_screen(df, bat=None, config=None): +def util_screen(df, bat=None, screen_column_name = "screen_status", **kwargs): """ This function is a helper function for all other screen preprocessing. The function has the option to merge information from the battery sensors to include data when the phone is shut down. The function also detects the missing @@ -31,24 +31,19 @@ def util_screen(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("screen_column_name", "screen_status") + id_columns = util.identifier_columns(df) - - df[col_name]=pd.to_numeric(df[col_name]) #convert to numeric in case it is not + df[screen_column_name]=pd.to_numeric(df[screen_column_name]) #convert to numeric in case it is not #Include the missing points that are due to shutting down the phone if not bat.empty: - shutdown = b.shutdown_info(bat, config) + shutdown = b.shutdown_info(bat, **kwargs) shutdown = shutdown.replace([-1,-2],0) if not shutdown.empty: df = pd.concat([df, shutdown]) df.fillna(0, inplace=True) - df = df[id_columns + [col_name]] + df = df[id_columns + [screen_column_name]] #Sort the dataframe df.sort_index(inplace=True) @@ -56,8 +51,8 @@ def util_screen(df, bat=None, config=None): #Detect missing data points df['missing']=0 - df['next']=df[col_name].shift(-1) - df['dummy']=df[col_name]-df['next'] + df['next']=df[screen_column_name].shift(-1) + df['dummy']=df[screen_column_name]-df['next'] df['missing'] = np.where(df['dummy']==0, 1, 0) #Check the missing points and label them as 1 df['missing'] = df['missing'].shift(1) df.drop(['dummy','next'], axis=1, inplace=True) @@ -68,7 +63,7 @@ def util_screen(df, bat=None, config=None): return df -def event_classification_screen(df, config=None): +def event_classification_screen(df, screen_column_name = "screen_status", **kwargs): """ This function is a helper function for other screen preprocessing. The function classifies the screen events into four transition types: on, off, in use, and undefined, based on the screen events recorded. For example, @@ -94,17 +89,12 @@ def event_classification_screen(df, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" assert "user" in df.columns - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - col_name = config.get("screen_column_name", "screen_status") id_columns = util.identifier_columns(df) #Classify the event df.sort_index(inplace=True) df.sort_values(by=id_columns, inplace=True) - col_as_str = df[col_name].astype(int).astype(str) + col_as_str = df[screen_column_name].astype(int).astype(str) next_as_str = col_as_str.shift(-1).fillna("0") df['next'] = col_as_str + next_as_str @@ -118,7 +108,7 @@ def event_classification_screen(df, config=None): df.loc[(df.next=='21') | (df.next=='23'), "na"]=1 #irrelevant. It seems like from 2 to 1 is from off to on (i.e. the screen goes to off and then it locks) df.loc[(df.next=='01') | (df.next=='02') | (df.next=='03'), "off"]=1 #off - df.drop(columns=["next",col_name], inplace=True) + df.drop(columns=["next", screen_column_name], inplace=True) #Discard the first and last row because they do not have all info. We do not #know what happened before or after these points. @@ -172,7 +162,7 @@ def duration_util_screen(df): return df -def screen_off(df, bat=None, config=None): +def screen_off(df, bat=None, **kwargs): """ This function returns the timestamps, within the specified timeframe, when the screen has turned off. If there is no specified timeframe, the function sets a 30 min default time window. The function aggregates this number @@ -195,12 +185,9 @@ def screen_off(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" id_columns = util.identifier_columns(df) - df = util_screen(df, bat, config) + df = util_screen(df, bat, **kwargs) df = df[df.screen_status == 0] #Select only those OFF events when no missing data is present df["screen_status"] = 1 df = df[id_columns + ["screen_status"]] @@ -210,7 +197,7 @@ def screen_off(df, bat=None, config=None): return df -def screen_count(df, bat=None, config=None): +def screen_count(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the number of times, within the specified timeframe, when the screen has turned off, turned on, and been in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -237,21 +224,16 @@ def screen_count(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) if len(df2)>0: - on = util.group_data(df2)["on"].resample(**config["resample_args"]).sum() + on = util.group_data(df2)["on"].resample(**resample_args).sum() on = on.to_frame(name='screen_on_count') - off = util.group_data(df2)["off"].resample(**config["resample_args"]).sum() + off = util.group_data(df2)["off"].resample(**resample_args).sum() off = off.to_frame(name='screen_off_count') - use = util.group_data(df2)["use"].resample(**config["resample_args"]).sum() + use = util.group_data(df2)["use"].resample(**resample_args).sum() use = use.to_frame(name='screen_use_count') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -259,7 +241,7 @@ def screen_count(df, bat=None, config=None): return result -def screen_duration(df, bat=None, config=None): +def screen_duration(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -286,22 +268,17 @@ def screen_duration(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).sum() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).sum() on = on.to_frame(name='screen_on_durationtotal') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).sum() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).sum() off = off.to_frame(name='screen_off_durationtotal') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).sum() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).sum() use = use.to_frame(name='screen_use_durationtotal') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -309,7 +286,7 @@ def screen_duration(df, bat=None, config=None): return result -def screen_duration_min(df, bat=None, config=None): +def screen_duration_min(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -336,22 +313,17 @@ def screen_duration_min(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).min() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).min() on = on.to_frame(name='screen_on_durationminimum') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).min() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).min() off = off.to_frame(name='screen_off_durationminimum') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).min() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).min() use = use.to_frame(name='screen_use_durationminimum') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -359,7 +331,7 @@ def screen_duration_min(df, bat=None, config=None): return result -def screen_duration_max(df, bat=None, config=None): +def screen_duration_max(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -385,23 +357,18 @@ def screen_duration_max(df, bat=None, config=None): Resulting dataframe """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" bat = util.ensure_dataframe(bat) - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).max() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).max() on = on.to_frame(name='screen_on_durationmaximum') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).max() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).max() off = off.to_frame(name='screen_off_durationmaximum') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).max() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).max() use = use.to_frame(name='screen_use_durationmaximum') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -409,7 +376,7 @@ def screen_duration_max(df, bat=None, config=None): return result -def screen_duration_mean(df, bat=None, config=None): +def screen_duration_mean(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -437,22 +404,17 @@ def screen_duration_mean(df, bat=None, config=None): assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).mean() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).mean() on = on.to_frame(name='screen_on_durationmean') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).mean() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).mean() off = off.to_frame(name='screen_off_durationmean') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).mean() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).mean() use = use.to_frame(name='screen_use_durationmean') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -460,7 +422,7 @@ def screen_duration_mean(df, bat=None, config=None): return result -def screen_duration_median(df, bat=None, config=None): +def screen_duration_median(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -487,22 +449,17 @@ def screen_duration_median(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).median() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).median() on = on.to_frame(name='screen_on_durationmedian') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).median() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).median() off = off.to_frame(name='screen_off_durationmedian') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).median() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).median() use = use.to_frame(name='screen_use_durationmedian') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -510,7 +467,7 @@ def screen_duration_median(df, bat=None, config=None): return result -def screen_duration_std(df, bat=None, config=None): +def screen_duration_std(df, bat=None, resample_args = {"rule":"30min"}, **kwargs): """ This function returns the duration (in seconds) of each transition, within the specified timeframe. The transitions are off, on, and in use. If there is no specified timeframe, the function sets a 30 min default time window. The @@ -537,22 +494,17 @@ def screen_duration_std(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2 = duration_util_screen(df2) if len(df2)>0: - on = util.group_data(df2[df2.on==1])["duration"].resample(**config["resample_args"]).std() + on = util.group_data(df2[df2.on==1])["duration"].resample(**resample_args).std() on = on.to_frame(name='screen_on_durationstd') - off = util.group_data(df2[df2.off==1])["duration"].resample(**config["resample_args"]).std() + off = util.group_data(df2[df2.off==1])["duration"].resample(**resample_args).std() off = off.to_frame(name='screen_off_durationstd') - use = util.group_data(df2[df2.use==1])["duration"].resample(**config["resample_args"]).std() + use = util.group_data(df2[df2.use==1])["duration"].resample(**resample_args).std() use = use.to_frame(name='screen_use_durationstd') result = pd.concat([on, off, use], axis=1) result = util.reset_groups(result) @@ -560,7 +512,7 @@ def screen_duration_std(df, bat=None, config=None): return result -def screen_first_unlock(df, bat=None, config=None): +def screen_first_unlock(df, bat=None, **kwargs): """ This function returns the first time the phone was unlocked each day. The data is aggregated by user, by day. @@ -583,14 +535,9 @@ def screen_first_unlock(df, bat=None, config=None): """ assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" bat = util.ensure_dataframe(bat) - if config is None: - config = {} - assert isinstance(config, dict), "config is not a dictionary" - - config["resample_args"] = config.get("resample_args", {"rule":"30min"}) - df2 = util_screen(df, bat, config) - df2 = event_classification_screen(df2, config) + df2 = util_screen(df, bat, **kwargs) + df2 = event_classification_screen(df2, **kwargs) df2["time"] = df2.index result = util.group_data(df2[df2.on==1])["time"].resample(rule='1D').min() @@ -635,7 +582,7 @@ def extract_features_screen(df, bat=None, features=None): computed_features = [] for feature, feature_arg in features.items(): - computed_feature = feature(df, bat, feature_arg) + computed_feature = feature(df, bat, **feature_arg) computed_feature = util.set_conserved_index(computed_feature) computed_features.append(computed_feature) diff --git a/tests/preprocessing/test_battery.py b/tests/preprocessing/test_battery.py index f5cd9c74..e3cead6b 100644 --- a/tests/preprocessing/test_battery.py +++ b/tests/preprocessing/test_battery.py @@ -29,7 +29,7 @@ def test_format_battery_data(): df = df11.copy() - battery = niimpy.preprocessing.battery.format_battery_data(df, {}) + battery = niimpy.preprocessing.battery.format_battery_data(df) assert battery.loc[Timestamp('2019-01-17 09:20:14.049999872+02:00'), 'battery_level'] == 96 assert battery.loc[Timestamp('2019-01-17 09:21:26.036000+02:00'), 'battery_health'] == '2' diff --git a/tests/preprocessing/test_screen.py b/tests/preprocessing/test_screen.py index 400f168c..0e18f3d4 100644 --- a/tests/preprocessing/test_screen.py +++ b/tests/preprocessing/test_screen.py @@ -1,5 +1,4 @@ import pandas as pd -import numpy as np import niimpy import niimpy.preprocessing.screen as sc @@ -16,7 +15,7 @@ def test_audio_features(): data["extra_column"] = "extra" test = sc.extract_features_screen(data, bat, features=None) assert "extra_column" not in test.columns - + time = pd.Timestamp("2020-01-09 02:30:00", tz='Europe/Helsinki') test_user = test[test["user"] == "jd9INuQ5BBlW"]