Skip to content

Commit

Permalink
added create_dataset.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Jancauskas committed Dec 5, 2024
1 parent 3792fd1 commit addab43
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/methane_super_emitters/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import matplotlib.pyplot as plt
import click
import numpy.ma as ma
import datetime

@click.command()
@click.option('-i', '--input-file', help='Input netCDF file')
Expand All @@ -36,7 +37,9 @@ def main(input_file, output_file):
methane_matrix[sl][gp] = m
lat_matrix[sl][gp] = lt
lon_matrix[sl][gp] = ln
time_matrix[sl][gp] = time
if time is not None:
time_matrix[sl][gp] = datetime.datetime(time.data[0], time.data[1], time.data[2],
time.data[3], time.data[4], time.data[5])

np.savez(output_file, methane=methane_matrix, lat=lat_matrix, lon=lon_matrix, time=time_matrix)

Expand Down
54 changes: 54 additions & 0 deletions src/methane_super_emitters/create_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import click
import numpy as np
import datetime

def check_if_inside(csv_line, lat_window, lon_window, time_window):
if np.count_nonzero(lat_window == -1000) == (lat_window.shape[0] * lat_window.shape[1]):
return False
csv_line = csv_line.strip().split(',')
csv_year = int(csv_line[0][0:4])
csv_month = int(csv_line[0][4:6])
csv_day = int(csv_line[0][6:8])
csv_time = csv_line[1].split(':')
csv_hour = int(csv_time[0])
csv_minute = int(csv_time[1])
csv_second = int(csv_time[2])
csv_datetime = datetime.datetime(csv_year, csv_month, csv_day, csv_hour, csv_minute, csv_second)
csv_lat = float(csv_line[2])
csv_lon = float(csv_line[3])
mask = lat_window != -1000
min_datetime = time_window[mask].min()
max_datetime = time_window[mask].max()
min_lat = lat_window[mask].min()
max_lat = lat_window[mask].max()
min_lon = lon_window[mask].min()
max_lon = lon_window[mask].max()
return ((min_datetime <= csv_datetime <= max_datetime) and
(min_lat <= csv_lat <= max_lat) and
(min_lon <= csv_lon <= max_lon))

@click.command()
@click.option('-i', '--input-file', help='Input CSV with super-emitter locations')
@click.option('-m', '--matrix-file', help='Input NPZ file with methane data from TROPOMI')
def main(input_file, matrix_file):
methane_data = np.load(matrix_file, allow_pickle=True)
with open(input_file, 'r') as fd:
data = fd.readlines()[1:]
methane_matrix = methane_data['methane']
lat_matrix = methane_data['lat']
lon_matrix = methane_data['lon']
time_matrix = methane_data['time']
rows, cols = methane_matrix.shape
for row in range(0, rows, 16):
for col in range(0, cols, 16):
if row + 32 < rows and col + 32 < cols:
methane_window = methane_matrix[row:row + 32][:, col:col + 32]
lat_window = lat_matrix[row:row + 32][:, col:col + 32]
lon_window = lon_matrix[row:row + 32][:, col:col + 32]
time_window = time_matrix[row:row + 32][:, col:col + 32]
for csv_line in data:
if check_if_inside(csv_line, lat_window, lon_window, time_window):
print("FOUND!")

if __name__ == '__main__':
main()

0 comments on commit addab43

Please sign in to comment.