-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update fix/clean up #4062
Update fix/clean up #4062
Changes from all commits
c8def23
a176685
c15a9a1
c057a19
688e416
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,31 +8,47 @@ | |
class DailyDataUtils: | ||
@staticmethod | ||
def average_data(data: pd.DataFrame) -> pd.DataFrame: | ||
averaged_data = pd.DataFrame() | ||
data["timestamp"] = data["timestamp"].apply(pd.to_datetime) | ||
|
||
for _, by_tenant in data.groupby("tenant"): | ||
tenant = by_tenant.iloc[0]["tenant"] | ||
del by_tenant["tenant"] | ||
for _, by_device in by_tenant.groupby("device_id"): | ||
site_id = by_device.iloc[0]["site_id"] | ||
device_id = by_device.iloc[0]["device_id"] | ||
device_number = by_device.iloc[0]["device_number"] | ||
|
||
del by_device["site_id"] | ||
del by_device["device_id"] | ||
del by_device["device_number"] | ||
|
||
device_averages = by_device.resample("1D", on="timestamp").mean() | ||
device_averages["timestamp"] = device_averages.index | ||
device_averages["device_id"] = device_id | ||
device_averages["site_id"] = site_id | ||
device_averages["device_number"] = device_number | ||
device_averages["tenant"] = tenant | ||
|
||
averaged_data = pd.concat( | ||
[averaged_data, device_averages], ignore_index=True | ||
) | ||
""" | ||
Averages data in a pandas DataFrame on a daily basis for each device, | ||
grouped by network and device ID. The function resamples data | ||
to compute daily averages for numerical columns. | ||
|
||
Args: | ||
data (pd.DataFrame): A pandas DataFrame containing the following columns: | ||
- "timestamp": Timestamps of the data. | ||
- "network": The network the data belongs to. | ||
- "device_id": Unique identifier for the device. | ||
- "site_id": Unique identifier for the site associated with the device. | ||
- "device_number": Device number. | ||
|
||
Returns: | ||
pd.DataFrame: A DataFrame containing daily averages for each device, | ||
including metadata columns such as "tenant", "device_id", "site_id", | ||
and "device_number". | ||
""" | ||
Comment on lines
+11
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Update docstring to replace 'tenant' with 'network' for consistency In the |
||
data["timestamp"] = pd.to_datetime(data["timestamp"]) | ||
|
||
averaged_data_list = [] | ||
|
||
for (network, device_id), group in data.groupby(["network", "device_id"]): | ||
network = group["network"].iloc[0] | ||
site_id = group["site_id"].iloc[0] | ||
device_number = group["device_number"].iloc[0] | ||
|
||
device_averages = ( | ||
group.resample("1D", on="timestamp") | ||
.mean(numeric_only=True) | ||
.reset_index() | ||
) | ||
|
||
device_averages["network"] = network | ||
device_averages["device_id"] = device_id | ||
device_averages["site_id"] = site_id | ||
device_averages["device_number"] = device_number | ||
|
||
averaged_data_list.append(device_averages) | ||
|
||
averaged_data = pd.concat(averaged_data_list, ignore_index=True) | ||
|
||
return averaged_data | ||
|
||
|
@@ -77,7 +93,7 @@ def cleanup_and_reload( | |
) | ||
|
||
bigquery_api.reload_data( | ||
tenant=Tenant.ALL, | ||
network="all", | ||
table=table, | ||
dataframe=data, | ||
start_date_time=start_date_time, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Clarify the purpose of copying the DataFrame or remove the unnecessary copy
The comment
# Not sure why this dataframe is being copied. # Memory wastage?
suggests uncertainty about the necessity of copying the DataFrame. If the copy is not required, consider removingdata = dataframe.copy()
to conserve memory. If it is necessary, please provide an explanation to clarify its purpose for future reference.