Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor GCBM API #163

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified GCBM_New_Demo_Run.zip
Binary file not shown.
5 changes: 3 additions & 2 deletions local/rest_api_gcbm/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
version: "3.9"
services:
flint.example:
build: .
gcbm:
image: ghcr.io/moja-global/rest_api_gcbm:master
container_name: flint.gcbm
ports:
- "8080:8080"
volumes:
Expand Down
355 changes: 355 additions & 0 deletions local/rest_api_gcbm/gcbm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,355 @@
import os
import shutil
import pathlib
import json

import rasterio


class GCBMList:
"""
This is a base class for GCBM pre-processing scripts to use. It prevents users to do: <config>._append(<anything that is not a file>)
"""

def __init__(self, files=dict(), config=[], category=None):
# TODO: set simulation folder as global
self.dirpath = "input/test-run"
self.data = list()
self.files = files
self.config = config
self.category = category

def __iter__(self):
return self.data

def __getitem__(self, idx):
return self.data[idx]

def is_category(self, path):
if self.category is None:
raise NotImplementedError(
"Please implement `is_category` method, which is used by _append() method"
)
else:
return self.category in path

# Unlike list.append() in Python, this returns a bool - whether the append was successful or not + checks if the file path is of the current category
def _append(self, file_path):
if self.is_category(file_path):
self.data.append(file_path)
return True
return False

def _update_config(self):
for file in self.data:
json_config_file = GCBMList.change_extension(file, ".json")
json_filepath = os.path.join(self.dirpath, json_config_file)

if json_config_file.name not in self.config:
self.generate_config(file, json_config_file)
else:
with open(json_filepath, "r+") as _config:
json.dump(
self.files[file], _config, indent=4
)

def _populate_config_with_hard_coded_config(
self, config, hc_config, nodata
):
# Note: hc_config => hard_coded_config
for key in hc_config.keys():
if key.startswith("_"):
# the format is: _<key>_<obj>: <index> (index is useless here, TODO: remove it)
original_key = key.split("_")[1]
if hc_config[key] is None:
continue
else:
config[original_key] = nodata
else:
config[key] = hc_config[key]
return config

def generate_config(self, file, json_config_file):
filepath = os.path.join(self.dirpath, file)
json_filepath = os.path.join(self.dirpath, json_config_file)

mode = "w+"
if os.path.exists(json_filepath):
mode = "r+"

# AO: disabling in favour of user defined attributes
# hard_coded_path = f"hard_coded_values/{json_config_file}"
aornugent marked this conversation as resolved.
Show resolved Hide resolved
# hard_coded_config = None
# if os.path.exists(hard_coded_path):
# with open(hard_coded_path) as hard_coded_file:
# try:
# hard_coded_config = json.load(hard_coded_file)
# except json.decoder.JSONDecodeError as e:
# raise e

with open(json_filepath, mode) as _file:
if mode == "r+":
config = json.load(_file)
else:
config = dict()

# Defaults
if ".tiff" in file:
with rasterio.open(filepath) as raster_obj:
tr = raster_obj.transform
config["cellLatSize"] = tr[0]
config["cellLonSize"] = -tr[4]
config["nodata"] = raster_obj.nodata

config["blockLonSize"] = config["cellLonSize"] * 400
config["blockLatSize"] = config["cellLatSize"] * 400
config["tileLatSize"] = config["cellLatSize"] * 4000
config["tileLonSize"] = config["cellLonSize"] * 4000
config["layer_type"] = "GridLayer"
config["layer_data"] = "Byte"
# config["has_year"] = False
# config["has_type"] = False


# Now populate if hard_coded_config exists
# config = self._populate_config_with_hard_coded_config(
# config, hard_coded_config, raster_obj.nodata
# )

print("Dumping config: ", config)
json.dump(config, _file, indent=4)

print(file)
self.files[file] = config
self.config.append(json_config_file.name)

# AO: I think this has been replaced with _update_config
# self.sync_config()

def setattr(self, file, attributes):
config = self.files[file]
config["attributes"] = attributes

if config["attributes"]["year"]:
config["has_year"] = True

self.files[file] = config
self._update_config()

@staticmethod
def change_extension(file_path, new_extension):
# TODO: let's use pathlib.Path everywhere, for now it's okay here
pathlib_path = pathlib.Path(file_path)
return pathlib_path.with_suffix(new_extension)


class GCBMDisturbanceList(GCBMList):
def __init__(self, files, config):
category = "disturbances"
self.dirpath = "input/test-run"
self.files = files
self.config = config
super().__init__(files=files, config=config, category=category)


class GCBMClassifiersList(GCBMList):
def __init__(self, files, config):
self.dirpath = "input/test-run"
category = "classifiers"
self.files = files
self.config = config
super().__init__(category=category)


class GCBMMiscellaneousList(GCBMList):
def __init__(self, files, config):
self.dirpath = "input/test-run"
category = "miscellaneous"
self.files = files
self.config = config
super().__init__(category=category)


class GCBMSimulation:
def __init__(self):
# create a global index
self.dirpath = "input/test-run"
self.files = {}

# create sub-indices of different types
self.config = list()
self.parameters = [] # this is the input_db

self.create_simulation_folder()
self.create_file_index()

self.classifiers = GCBMClassifiersList(files=self.files, config=self.config)
self.disturbances = GCBMDisturbanceList(files=self.files, config=self.config)
self.miscellaneous = GCBMMiscellaneousList(files=self.files, config=self.config)

def create_simulation_folder(self):
if not os.path.exists(self.dirpath):
os.makedirs(self.dirpath)


def create_file_index(self):
config_dir_path = "templates"
assert os.path.isdir(
config_dir_path
), f"Given config directory path: {config_dir_path} either does not exist or is not a directory."
for dirpath, _, filenames in os.walk(config_dir_path):
for filename in filenames:
# Don't read any data, but create the json file
abs_filepath = os.path.abspath(os.path.join(dirpath, filename))

data = GCBMSimulation.safe_read_json(abs_filepath)

# TODO: Discussion - should this be abs_filepath, or do we want just the filename?
self.files[filename] = data

# TODO: This should not happen here? maybe connect an endpoint directly to the sync_config method
# self.sync_config(abs_filepath)

# AO: sync_config is a write method, saving the current config
# state file - doing dumb copy until implemented.
sim_filepath = os.path.join(self.dirpath, filename)
shutil.copy(abs_filepath, sim_filepath)

# file_path: disturbances (NOT MUST), classifiers (MUST), miscellaneous (MUST)
def add_file(self, file_path: str):
"""
This function:

1. Checks if the given file is one of the categories: registers, classifiers, and miscellaneous.
2. The provided file path to the buffer, and updates the config (JSON).

Parameters
==========
1. file_path (str), no default
"""

# TODO: update to accept input from Flask endpoint
filename = os.path.basename(file_path)
shutil.copy(file_path, os.path.join(self.dirpath, filename))

if self.disturbances._append(filename):
self.disturbances._update_config()
return
if self.classifiers._append(filename):
self.classifiers._update_config()
return
if self.miscellaneous._append(filename):
self.miscellaneous._update_config()
return
# TODO: Add covariates here

# TODO
# self._save(file_path)

def sync_config(self, file_path):
def _write_to_file(file_path, data):
with open(file_path, "w+") as _file:
_file.write(data)

data = GCBMSimulation.safe_read_json(file_path)

if self.files[file_path] != data:
# Means data has changed, so update the file_path
_write_to_file(file_path, data)
# Also update the dict
self.files[file_path] = data

# TODO (@ankitaS11): We can just have these as class methods later, this will reduce the redundancy in the code later
def update_disturbance_config(self):
self.disturbances._update_config()

def set_disturbance_attributes(self, file, payload):
self.disturbances.setattr(file, payload)

def update_classifier_config(self):
self.classifiers._update_config()

def set_classifier_attributes(self, file, payload):
self.classifiers.setattr(file, payload)

def update_miscellaneous_config(self):
self.miscellaneous._update_config()

def set_miscellaneous_attributes(self, file, payload):
self.miscellaneous.setattr(file, payload)

@staticmethod
def safe_read_json(path):

# TODO: add read method for gcbm_config.cfg and logging.conf
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aornugent - Thanks for adding these TODOs in the code, they are extremely useful for me to track what needs to be done. However, I'm a bit confused about this one - do we want to just read these files, and store them in another folder? Or in a JSON? Or do we want to extract any information, and use it somewhere else? I'm not sure as I couldn't find a relevant part of it in app.py, so I'll really appreciate it if you can help me with this.

Copy link
Contributor Author

@aornugent aornugent Aug 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ankitaS11 - this one is a little cryptic. The two files need to be copied from templates and the default behaviour was to traverse all the config. files in the folder with safe_read_json, read their contents into the files dictionary, and then save a copy in the simulation folder.

But safe_read_json() only reads JSON, and was erroring out when given the .cfg and .conf files.

The contents of these files don't change very often (if ever!) but presently we have no way of reading them as part of the simulation configuration.

I'd appreciate your advice on how they can be handled.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @aornugent. I'm not Ankita, but I think configparser library would be a good choice for parsing the .conf files. They provide easy parsing for .ini style syntax.

For .cfg files, we can prepend a pseduo-section and then use configparser.

The code would look something like this:

import configparser

# For conf files
parser = configparser.ConfigParser()
parser.read('what.conf')
log = parser['Core']['DisableLogging'])

# For cfg files
parser = ConfigParser()
with open("gcbm_config.cfg") as config:
    parser.read_string("[top]\n" + config.read())

The problem is that all keys in the gcbm_config.cfg have the same name. This causes an error too.

config=localdomain.json
config=pools_cbm.json
config=modules_cbm.json
config=modules_output.json
config=spinup.json
config=variables.json
config=internal_variables.json

we can maybe rename some fields? Like localdomain_config=localdomain.json? :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @Crystalsage - that's a good suggestion. I don't think we can change the field names, because they're in the format that the GCBM understands.

Maybe we can just read these files in as strings for now?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aornugent Ah I see. In that case - what do you think about preparing a makeshift .cfg config in memory for better parsing and using that throughout the API? This way, we won't need to restructure the config file - and we can always change a few things around the API. A very rough sketch looks something like this:

import configparser

parser = configparser.ConfigParser()

gcbm_config_file = open("gcbm_config.cfg").read()
config_file_new = ""

# Build a makeshift config
for config in config_file.splitlines():
    # Get the json file name
    file_name = config.split('=')[1]

    # Trim off the extension to get the config type
    config_name = file_name[:-5]

    # Format into .ini style
    # e.g. localdomain_config=localdomain.json
    config_file_new += f"{config_name}_config={file_name}\n"

parser.read_string("[top]\n" + config_record)

# Returns 'localdomain.json'
parser['top']['localdomain_config']

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Crystalsage - sorry for the delay. Your proposal looks great!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Crystalsage - are you append your suggested changes to this branch please?

if ".cfg" in path:
filename = os.path.join('input/test-run', "gcbm_config.cfg")
shutil.copy(path, filename)
return {}
if ".conf" in path:
filename = os.path.join('input/test-run', "logging.conf")
shutil.copy(path, filename)
return {}

# check JSON
if ".json" not in path:
raise UserWarning(f"Given path {path} not a valid json file")
return {}

# Make sure it's a file and not a directory
if not os.path.isfile(path):
raise UserWarning(
f"Got a directory {path} inside the config directory path, skipping it."
)
return {}
with open(path, "r") as json_file:
data = json.load(json_file)
return data


if __name__ == "__main__":
sim = GCBMSimulation()
sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2011.tiff")
sim.set_disturbance_attributes("disturbances_2011.tiff", {"year": 2011, "disturbance_type": "Wildfire", "transition": 1})

sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2012.tiff")
sim.set_disturbance_attributes("disturbances_2012.tiff",
{"year": 2012, "disturbance_type": "Wildfire", "transition": 1})


sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2013.tiff")
sim.set_disturbance_attributes("disturbances_2013.tiff",
{"year": 2013, "disturbance_type": "Mountain pine beetle — Very severe impact", "transition": 1})

# TODO: Check how to handle multiple attributes entries (L442-451 of `app.py:master`)
# sim.set_disturbance_attributes("disturbances_2013.tiff",
# {"year": 2013, "disturbance_type": "Wildfire", "transition": 1})

sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2014.tiff")
sim.set_disturbance_attributes("disturbances_2014.tiff",
{"year": 2014, "disturbance_type": "Mountain pine beetle — Very severe impact", "transition": 1})

sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2015.tiff")
sim.set_disturbance_attributes("disturbances_2015.tiff",
{"year": 2015, "disturbance_type": "Wildfire", "transition": 1})

sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2016.tiff")
sim.set_disturbance_attributes("disturbances_2016.tiff",
{"year": 2016, "disturbance_type": "Wildfire", "transition": 1})

sim.add_file("tests/GCBM_New_Demo_Run/disturbances/disturbances_2018.tiff")
sim.set_disturbance_attributes("disturbances_2018.tiff",
{"year": 2018, "disturbance_type": "Wildfire", "transition": 1})

# TODO: classifiers don't have 'year' attributes
sim.add_file("tests/GCBM_New_Demo_Run/classifiers/Classifier1.tiff")
# sim.set_classifier_attributes("classifier1.tiff",
# {"1": "TA", "2": "BP", "3": "BS", "4": "JP", "5": "WS", "6": "WB", "7": "BF", "8": "GA"})

sim.add_file("tests/GCBM_New_Demo_Run/classifiers/Classifier2.tiff")
# sim.set_classifier_attributes("classifier1.tiff",
# {"1": "5", "2": "6", "3": "7", "4": "8"})

sim.add_file("tests/GCBM_New_Demo_Run/db/gcbm_input.db")
sim.add_file("tests/GCBM_New_Demo_Run/miscellaneous/initial_age.tiff")
sim.add_file("tests/GCBM_New_Demo_Run/miscellaneous/mean_annual_temperature.tiff")
6 changes: 2 additions & 4 deletions local/rest_api_gcbm/templates/modules_cbm.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
"order": 3,
"library": "moja.modules.cbm",
"settings": {
"vars": [

]
"vars": []
}
},
"CBMDisturbanceEventModule": {
Expand Down Expand Up @@ -56,4 +54,4 @@
"library": "internal.flint"
}
}
}
}
2 changes: 1 addition & 1 deletion local/rest_api_gcbm/templates/provider_config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"Providers": {
"SQLite": {
"path": "../input_database/gcbm_input.db",
"path": "",
"type": "SQLite"
},
"RasterTiled": {
Expand Down
Loading