From addab43addc5365feb1581093a16a8624f86002f Mon Sep 17 00:00:00 2001 From: Jancauskas Date: Thu, 5 Dec 2024 15:44:20 +0100 Subject: [PATCH] added create_dataset.py --- src/methane_super_emitters/convert.py | 5 +- src/methane_super_emitters/create_dataset.py | 54 ++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 src/methane_super_emitters/create_dataset.py diff --git a/src/methane_super_emitters/convert.py b/src/methane_super_emitters/convert.py index c4fb8a1..f630253 100644 --- a/src/methane_super_emitters/convert.py +++ b/src/methane_super_emitters/convert.py @@ -10,6 +10,7 @@ import matplotlib.pyplot as plt import click import numpy.ma as ma +import datetime @click.command() @click.option('-i', '--input-file', help='Input netCDF file') @@ -36,7 +37,9 @@ def main(input_file, output_file): methane_matrix[sl][gp] = m lat_matrix[sl][gp] = lt lon_matrix[sl][gp] = ln - time_matrix[sl][gp] = time + if time is not None: + time_matrix[sl][gp] = datetime.datetime(time.data[0], time.data[1], time.data[2], + time.data[3], time.data[4], time.data[5]) np.savez(output_file, methane=methane_matrix, lat=lat_matrix, lon=lon_matrix, time=time_matrix) diff --git a/src/methane_super_emitters/create_dataset.py b/src/methane_super_emitters/create_dataset.py new file mode 100644 index 0000000..1609023 --- /dev/null +++ b/src/methane_super_emitters/create_dataset.py @@ -0,0 +1,54 @@ +import click +import numpy as np +import datetime + +def check_if_inside(csv_line, lat_window, lon_window, time_window): + if np.count_nonzero(lat_window == -1000) == (lat_window.shape[0] * lat_window.shape[1]): + return False + csv_line = csv_line.strip().split(',') + csv_year = int(csv_line[0][0:4]) + csv_month = int(csv_line[0][4:6]) + csv_day = int(csv_line[0][6:8]) + csv_time = csv_line[1].split(':') + csv_hour = int(csv_time[0]) + csv_minute = int(csv_time[1]) + csv_second = int(csv_time[2]) + csv_datetime = datetime.datetime(csv_year, csv_month, csv_day, csv_hour, csv_minute, csv_second) + csv_lat = float(csv_line[2]) + csv_lon = float(csv_line[3]) + mask = lat_window != -1000 + min_datetime = time_window[mask].min() + max_datetime = time_window[mask].max() + min_lat = lat_window[mask].min() + max_lat = lat_window[mask].max() + min_lon = lon_window[mask].min() + max_lon = lon_window[mask].max() + return ((min_datetime <= csv_datetime <= max_datetime) and + (min_lat <= csv_lat <= max_lat) and + (min_lon <= csv_lon <= max_lon)) + +@click.command() +@click.option('-i', '--input-file', help='Input CSV with super-emitter locations') +@click.option('-m', '--matrix-file', help='Input NPZ file with methane data from TROPOMI') +def main(input_file, matrix_file): + methane_data = np.load(matrix_file, allow_pickle=True) + with open(input_file, 'r') as fd: + data = fd.readlines()[1:] + methane_matrix = methane_data['methane'] + lat_matrix = methane_data['lat'] + lon_matrix = methane_data['lon'] + time_matrix = methane_data['time'] + rows, cols = methane_matrix.shape + for row in range(0, rows, 16): + for col in range(0, cols, 16): + if row + 32 < rows and col + 32 < cols: + methane_window = methane_matrix[row:row + 32][:, col:col + 32] + lat_window = lat_matrix[row:row + 32][:, col:col + 32] + lon_window = lon_matrix[row:row + 32][:, col:col + 32] + time_window = time_matrix[row:row + 32][:, col:col + 32] + for csv_line in data: + if check_if_inside(csv_line, lat_window, lon_window, time_window): + print("FOUND!") + +if __name__ == '__main__': + main()