Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exjobb #240

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions forest/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@

class Frequency(Enum):
"""This class enumerates possible frequencies for summary data."""
MINUTELY = 1
HOURLY = 60
THREE_HOURLY = 3 * 60
SIX_HOURLY = 6 * 60
TWELVE_HOURLY = 12 * 60
DAILY = 24 * 60
HOURLY_AND_DAILY = -1
HOURLY = 1
DAILY = 24
HOURLY_AND_DAILY = "hourly_and_daily"
THREE_HOURLY = 3
SIX_HOURLY = 6
TWELVE_HOURLY = 12


class OSMTags(Enum):
Expand Down
9 changes: 7 additions & 2 deletions forest/jasmine/mobmat2traj.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ def calculate_delta(
displacement and time difference
between two points
"""
if flight_table.size == 0:
return 0, 0, 0
delta_x = flight_table[flight_index, 4] - flight_table[flight_index, 1]
delta_y = flight_table[flight_index, 5] - flight_table[flight_index, 2]
delta_t = flight_table[flight_index, 6] - flight_table[flight_index, 3]
Expand Down Expand Up @@ -666,8 +668,11 @@ def forward_impute(
sys.exit("Invalid method for calculate_k1.")

normalize_w = (weight + 1e-5) / float(sum(weight + 1e-5))
flight_index = np.random.choice(flight_table.shape[0], p=normalize_w)

try:
flight_index = np.random.choice(flight_table.shape[0], p=normalize_w)
except ValueError as e:
print(f"an error with flight table: {flight_table}")
flight_index = 0
delta_x, delta_y, delta_t = calculate_delta(flight_table, flight_index)

delta_x, delta_y, delta_t = adjust_delta_if_needed(
Expand Down
8 changes: 4 additions & 4 deletions forest/jasmine/tests/test_traj2stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def test_compute_window_size(sample_trajectory):
"""Testing window size is correct"""

window, _ = compute_window_and_count(
sample_trajectory[0, 3], sample_trajectory[-1, 6], 60
sample_trajectory[0, 3], sample_trajectory[-1, 6], 1
)

assert window == 3600
Expand All @@ -664,7 +664,7 @@ def test_compute_window_count(sample_trajectory):
"""Testing number of windows is correct"""

_, num_windows = compute_window_and_count(
sample_trajectory[0, 3], sample_trajectory[-1, 6], 60
sample_trajectory[0, 3], sample_trajectory[-1, 6], 1
)

assert num_windows == 24
Expand All @@ -674,7 +674,7 @@ def test_compute_window_size_6_hour(sample_trajectory):
"""Testing window size is correct 6 hour window"""

window, _ = compute_window_and_count(
sample_trajectory[0, 3], sample_trajectory[-1, 6], 360
sample_trajectory[0, 3], sample_trajectory[-1, 6], 6
)

assert window == 3600 * 6
Expand All @@ -684,7 +684,7 @@ def test_compute_window_count_6_hour(sample_trajectory):
"""Testing number of windows is correct 6 hour window"""

_, num_windows = compute_window_and_count(
sample_trajectory[0, 3], sample_trajectory[-1, 6], 360
sample_trajectory[0, 3], sample_trajectory[-1, 6], 6
)

assert num_windows == 4
107 changes: 88 additions & 19 deletions forest/jasmine/traj2stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
modules and calculate summary statistics of imputed trajectories.
"""

from ast import Attribute
from dataclasses import dataclass
from datetime import datetime
import json
Expand Down Expand Up @@ -1020,6 +1021,7 @@ def format_summary_stats(
pcr_cols = []

if frequency != Frequency.DAILY:
print("not daily")
summary_stats_df.columns = (
[
"year",
Expand Down Expand Up @@ -1125,8 +1127,8 @@ def gps_summaries(
ValueError: Frequency is not valid
"""

if frequency in [Frequency.HOURLY_AND_DAILY, Frequency.MINUTELY]:
raise ValueError(f"Frequency cannot be {frequency.name.lower()}.")
if frequency == Frequency.HOURLY_AND_DAILY:
raise ValueError("Frequency must be 'hourly' or 'daily'")

if frequency != Frequency.DAILY:
parameters.split_day_night = False
Expand Down Expand Up @@ -1161,7 +1163,7 @@ def gps_summaries(
traj, [3, 4, 5], tz_str, 3600*24
)
window, num_windows = compute_window_and_count(
start_stamp, end_stamp, 24*60, parameters.split_day_night
start_stamp, end_stamp, 24, parameters.split_day_night
)

if num_windows <= 0:
Expand Down Expand Up @@ -1484,23 +1486,23 @@ def get_time_range(


def compute_window_and_count(
start_stamp: int, end_stamp: int, window_minutes: int,
start_stamp: int, end_stamp: int, window_hours: int,
split_day_night: bool = False
) -> Tuple[int, int]:
"""Computes the window and number of windows based on given time stamps.

Args:
start_stamp: int, starting time stamp
end_stamp: int, ending time stamp
window_minutes: int, window in minutes
window_hours: int, window in hours
split_day_night: bool, True if split day and night
Returns:
A tuple of two integers (window, num_windows):
window: int, window in seconds
num_windows: int, number of windows
"""

window = window_minutes * 60
window = window_hours * 60 * 60
num_windows = (end_stamp - start_stamp) // window
if split_day_night:
num_windows *= 2
Expand Down Expand Up @@ -1533,12 +1535,44 @@ def gps_quality_check(study_folder: str, study_id: str) -> float:
# check if there are enough data for the following algorithm
quality_yes = 0.
for i, _ in enumerate(file_path_array):
df = pd.read_csv(file_path_array[i])
try:
df = pd.read_csv(file_path_array[i])
## df = pd.read_csv(file_path_array[i], error_bad_lines=False, warn_bad_lines=True) Removed "bad arguments"
except UnicodeDecodeError:
print(f"unicode error")
quality_check = 0.
#philip line
# Convert timestamp from milliseconds to seconds
if 'timestamp' in df.columns:
df['timestamp_seconds'] = df['timestamp'] / 1000
# Convert to UTC datetime and format
df['UTC time'] = df['timestamp_seconds'].apply(lambda x: datetime.utcfromtimestamp(x).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3])
cols_to_check = ['longitude', 'altitude', 'latitude', 'accuracy']
cols_present = [col for col in cols_to_check if col in df.columns]
if len(cols_present) == len(cols_to_check):
df = df[['timestamp', 'UTC time', 'latitude', 'longitude', 'altitude', 'accuracy']]
print(f"here is df:{df} ")
else :
quality_check = 0.
#end philip line

if df.shape[0] > 60:
quality_yes = quality_yes + 1.
quality_check = quality_yes / (len(file_path_array) + 0.0001)
return quality_check

def validate_trajectory(traj):
if (
np.max(traj[:, 1]) > 90 or np.min(traj[:, 1]) < -90 or
np.max(traj[:, 2]) > 180 or np.min(traj[:, 2]) < -180 or
np.max(traj[:, 4]) > 90 or np.min(traj[:, 4]) < -90 or
np.max(traj[:, 5]) > 180 or np.min(traj[:, 5]) < -180
):
raise ValueError(
f"Trajectory coordinates are not in the range of {traj}"
"[-90, 90] and [-180, 180]."
)


def gps_stats_main(
study_folder: str,
Expand Down Expand Up @@ -1595,14 +1629,8 @@ def gps_stats_main(
as pickle files for future use
and a record csv file to show which users are processed
and logger csv file to show warnings and bugs during the run
Raises:
ValueError: Frequency is not valid
"""

# no minutely analysis on GPS data
if frequency == Frequency.MINUTELY:
raise ValueError("Frequency cannot be minutely.")

os.makedirs(output_folder, exist_ok=True)

if parameters is None:
Expand All @@ -1620,6 +1648,8 @@ def gps_stats_main(
# participant_ids should be a list of str
if participant_ids is None:
participant_ids = get_ids(study_folder)
participant_ids.sort()

# create a record of processed user participant_id and starting/ending time

if all_memory_dict is None:
Expand All @@ -1645,10 +1675,15 @@ def gps_stats_main(
if quality > parameters.quality_threshold:
# read data
logger.info("Read in the csv files ...")
data, _, _ = read_data(
participant_id, study_folder, "gps",
tz_str, time_start, time_end,
)
try:
data, _, _ = read_data(
participant_id, study_folder, "gps",
tz_str, time_start, time_end,
)
except Exception as e:
logger.error("Error: %s", e)
continue
print(f"data{data}")
if data.shape == (0, 0):
logger.info("No data available.")
continue
Expand All @@ -1661,14 +1696,20 @@ def gps_stats_main(
else:
params_h = parameters.h
if parameters.w is None:
params_w = np.mean(data.accuracy)
try:
params_w = np.mean(data.accuracy)
except AttributeError:
print("No accuracy column in the data.")
continue
else:
params_w = parameters.w
# process data
mobmat1 = gps_to_mobmat(
data, parameters.itrvl, parameters.accuracylim,
params_r, params_w, params_h
)
print(f"mobmat1{mobmat1}")

mobmat2 = infer_mobmat(mobmat1, parameters.itrvl, params_r)
out_dict = bv_select(
mobmat2,
Expand All @@ -1693,7 +1734,28 @@ def gps_stats_main(
traj = imp_to_traj(imp_table, mobmat2, params_w)
# raise error if traj coordinates are not in the range of
# [-90, 90] and [-180, 180]
print(f"traj before if{traj}")

print("Max of column 1 (longitude) > 90:", np.max(traj[:, 1]) > 90, "| Value:", np.max(traj[:, 1]))
print("Min of column 1 (longitude) < -90:", np.min(traj[:, 1]) < -90, "| Value:", np.min(traj[:, 1]))
print("Max of column 2 (latitude) > 180:", np.max(traj[:, 2]) > 180, "| Value:", np.max(traj[:, 2]))
print("Min of column 2 (latitude) < -180:", np.min(traj[:, 2]) < -180, "| Value:", np.min(traj[:, 2]))
print("Max of column 4 (duplicate altitude) > 90:", np.max(traj[:, 4]) > 90, "| Value:", np.max(traj[:, 4]))
print("Min of column 4 (duplicate altitude) < -90:", np.min(traj[:, 4]) < -90, "| Value:", np.min(traj[:, 4]))
print("Max of column 5 (timestamp?) > 180:", np.max(traj[:, 5]) > 180, "| Value:", np.max(traj[:, 5]))
print("Min of column 5 (timestamp?) < -180:", np.min(traj[:, 5]) < -180, "| Value:", np.min(traj[:, 5]))



if traj.shape[0] > 0:
try:
validate_trajectory(traj)
# Further processing if validation is successful
except ValueError as e:
print("trajectory failed:", e)
continue

""" philip commented this out
if (
np.max(traj[:, 1]) > 90
or np.min(traj[:, 1]) < -90
Expand All @@ -1705,9 +1767,16 @@ def gps_stats_main(
or np.min(traj[:, 5]) < -180
):
raise ValueError(
"Trajectory coordinates are not in the range of "
f"Trajectory coordinates are not in the range of {traj}"
"[-90, 90] and [-180, 180]."
)

philip lines
"""




# save all_memory_dict and all_bv_set
with open(f"{output_folder}/all_memory_dict.pkl", "wb") as f:
pickle.dump(all_memory_dict, f)
Expand Down
Loading