Skip to content

Commit

Permalink
Merge pull request #131 from VegeWaterDynamics/130_changes_to_paper
Browse files Browse the repository at this point in the history
130 changes to paper
  • Loading branch information
rogerkuou authored Aug 20, 2024
2 parents af9c9f7 + 9792a16 commit 5738581
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 9 deletions.
8 changes: 7 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,27 @@ authors:
orcid: 'https://orcid.org/0000-0002-5373-5209'
- given-names: Fakhereh (Sarah)
family-names: Alidoost
orcid: 'https://orcid.org/0000-0001-8407-6472'
affiliation: Netherlands eScience Center
- given-names: Xu
family-names: Shan
orcid: 'https://orcid.org/0000-0002-0569-4326'
affiliation: Delft University of Technology
- given-names: Pranav
family-names: Chandramouli
orcid: 'https://orcid.org/0000-0002-7896-2969'
affiliation: Netherlands eScience Center
- given-names: Georgievska
family-names: Sonja
orcid: 'https://orcid.org/0000-0002-8094-4532'
affiliation: Netherlands eScience Center
- given-names: Meiert
affiliation: Netherlands eScience Center
family-names: Grootes
orcid: 'https://orcid.org/0000-0002-5733-4795'
affiliation: Netherlands eScience Center
- given-names: Susan
family-names: Steele-Dunne
orcid: 'https://orcid.org/0000-0002-8644-3077'
affiliation: Delft University of Technology
identifiers:
- type: doi
Expand Down
72 changes: 69 additions & 3 deletions paper/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ @article{shan:2022
pages = {113116},
year = {2022},
issn = {0034-4257},
doi = {https://doi.org/10.1016/j.rse.2022.113116},
doi = {10.1016/j.rse.2022.113116},
url = {https://www.sciencedirect.com/science/article/pii/S0034425722002309},
author = {Xu Shan and Susan Steele-Dunne and Manuel Huber and Sebastian Hahn and Wolfgang Wagner and Bertrand Bonan and Clement Albergel and Jean-Christophe Calvet and Ou Ku and Sonja Georgievska},
keywords = {ASCAT, Scatterometry, Radar, Vegetation, Land surface model, Machine learning, Deep Neural Network, Plant water dynamics, Soil moisture},
Expand All @@ -31,7 +31,7 @@ @article{XUE:2015
pages = {153-165},
year = {2015},
issn = {0034-4257},
doi = {https://doi.org/10.1016/j.rse.2015.09.009},
doi = {10.1016/j.rse.2015.09.009},
url = {https://www.sciencedirect.com/science/article/pii/S0034425715301322},
author = {Yuan Xue and Barton A. Forman},
keywords = {Sensitivity analysis, Machine learning, Brightness temperature, Snow, Data assimilation},
Expand Down Expand Up @@ -79,5 +79,71 @@ @inproceedings{Rocklin2015DaskPC
author={Matthew Rocklin},
booktitle={SciPy},
year={2015},
url={https://api.semanticscholar.org/CorpusID:63554230}
url={https://api.semanticscholar.org/CorpusID:63554230},
doi={10.25080/majora-7b98e3ed-013}
}

@article{REICHLE20081411,
title = {Data assimilation methods in the Earth sciences},
journal = {Advances in Water Resources},
volume = {31},
number = {11},
pages = {1411-1418},
year = {2008},
note = {Hydrologic Remote Sensing},
issn = {0309-1708},
doi = {10.1016/j.advwatres.2008.01.001},
url = {https://www.sciencedirect.com/science/article/pii/S0309170808000043},
author = {Rolf H. Reichle},
keywords = {Data assimilation, Remote sensing, Land surface hydrology, Variational methods, Kalman filter},
abstract = {Although remote sensing data are often plentiful, they do not usually satisfy the users’ needs directly. Data assimilation is required to extract information about geophysical fields of interest from the remote sensing observations and to make the data more accessible to users. Remote sensing may provide, for example, measurements of surface soil moisture, snow water equivalent, snow cover, or land surface (skin) temperature. Data assimilation can then be used to estimate variables that are not directly observed from space but are needed for applications, for instance root zone soil moisture or land surface fluxes. The paper provides a brief introduction to modern data assimilation methods in the Earth sciences, their applications, and pertinent research questions. Our general overview is readily accessible to hydrologic remote sensing scientists. Within the general context of Earth science data assimilation, we point to examples of the assimilation of remotely sensed observations in land surface hydrology.}
}

@article{Carrassi2018,
title={Data assimilation in the geosciences: An overview of methods, issues, and perspectives},
author={Carrassi, Alberto and Bocquet, Marc and Bertino, Laurent and Evensen, Geir},
journal={Wiley Interdisciplinary Reviews: Climate Change},
volume={9},
number={5},
pages={e535},
year={2018},
publisher={Wiley Online Library},
doi={10.1002/wcc.535}
}

@ARTICLE{Evensen2009,
author={Evensen, Geir},
journal={IEEE Control Systems Magazine},
title={The ensemble Kalman filter for combined state and parameter estimation},
year={2009},
volume={29},
number={3},
pages={83-104},
keywords={Parameter estimation;Particle filters;Atmospheric modeling;Covariance matrix;Nonlinear equations;Error analysis;Kalman filters;Computational modeling;Computational efficiency;Bayesian methods},
doi={10.1109/MCS.2009.932223}}

@Article{Clement2018,
AUTHOR = {Albergel, Clement and Munier, Simon and Bocher, Aymeric and Bonan, Bertrand and Zheng, Yongjun and Draper, Clara and Leroux, Delphine J. and Calvet, Jean-Christophe},
TITLE = {LDAS-Monde Sequential Assimilation of Satellite Derived Observations Applied to the Contiguous US: An ERA-5 Driven Reanalysis of the Land Surface Variables},
JOURNAL = {Remote Sensing},
VOLUME = {10},
YEAR = {2018},
NUMBER = {10},
ARTICLE-NUMBER = {1627},
URL = {https://www.mdpi.com/2072-4292/10/10/1627},
ISSN = {2072-4292},
ABSTRACT = {Land data assimilation system (LDAS)-Monde, an offline land data assimilation system with global capacity, is applied over the CONtiguous US (CONUS) domain to enhance monitoring accuracy for water and energy states and fluxes. LDAS-Monde ingests satellite-derived surface soil moisture (SSM) and leaf area index (LAI) estimates to constrain the interactions between soil, biosphere, and atmosphere (ISBA) land surface model (LSM) coupled with the CNRM (Centre National de Recherches Météorologiques) version of the total runoff integrating pathways (CTRIP) continental hydrological system (ISBA-CTRIP). LDAS-Monde is forced by the ERA-5 atmospheric reanalysis from the European Center for Medium Range Weather Forecast (ECMWF) from 2010 to 2016 leading to a seven-year, quarter degree spatial resolution offline reanalysis of land surface variables (LSVs) over CONUS. The impact of assimilating LAI and SSM into LDAS-Monde is assessed over North America, by comparison to satellite-driven model estimates of land evapotranspiration from the Global Land Evaporation Amsterdam Model (GLEAM) project, and upscaled ground-based observations of gross primary productivity from the FLUXCOM project. Taking advantage of the relatively dense data networks over CONUS, we have also evaluated the impact of the assimilation against in situ measurements of soil moisture from the USCRN (US Climate Reference Network), together with river discharges from the United States Geological Survey (USGS) and the Global Runoff Data Centre (GRDC). Those data sets highlight the added value of assimilating satellite derived observations compared with an open-loop simulation (i.e., no assimilation). It is shown that LDAS-Monde has the ability not only to monitor land surface variables but also to forecast them, by providing improved initial conditions, which impacts persist through time. LDAS-Monde reanalysis also has the potential to be used to monitor extreme events like agricultural drought. Finally, limitations related to LDAS-Monde and current satellite-derived observations are exposed as well as several insights on how to use alternative datasets to analyze soil moisture and vegetation state.},
DOI = {10.3390/rs10101627}
}

@article{zhou2008ensemble,
title={An ensemble multiscale filter for large nonlinear data assimilation problems},
author={Zhou, Yuhua and McLaughlin, Dennis and Entekhabi, Dara and Ng, Gene-Hua Crystal},
journal={Monthly Weather Review},
volume={136},
number={2},
pages={678--698},
year={2008},
publisher={American Meteorological Society},
DOI = {10.1175/2007MWR2064.1}
}
16 changes: 11 additions & 5 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,22 @@ authors:
orcid: 0000-0002-5373-5209
affiliation: 1
- name: Fakhereh Alidoost
orcid: 0000-0001-8407-6472
affiliation: 1
- name: Xu Shan
orcid: 0000-0002-0569-4326
affiliation: 2
- name: Pranav Chandramouli
orcid: 0000-0002-7896-2969
affiliation: 1
- name: Sonja Georgievska
orcid: 0000-0002-8094-4532
affiliation: 1
- name: Meiert W. Grootes
orcid: 0000-0002-5733-4795
affiliation: 1
- name: Susan Steele-Dunne
orcid: 0000-0002-8644-3077
corresponding: true
affiliation: 2
affiliations:
Expand All @@ -34,17 +40,17 @@ bibliography: paper.bib

## Summary

Data assimilation (DA) is an essential procedure in Earth and environmental sciences, enabling physical model states to be constrained using observational data.
Data assimilation (DA) is an essential procedure in Earth and environmental sciences, enabling physical model states to be constrained using observational data. [@REICHLE20081411; @Evensen2009; @Clement2018; @Carrassi2018]

In the DA process, observations are integrated into the physical model through the application of a Measurement Operator (MO) – a connection model mapping physical model states to observations. Researchers have observed that employing a Machine-Learning (ML) model as a surrogate MO can bypass the limitations associated with using an overly simplified MO [@Forman:2014; @XUE:2015; @Forman:2017].
In the DA process, observations are integrated into the physical model through the application of a Measurement Operator (MO) – a connection model mapping physical model states to observations. Researchers have observed that employing a Machine-Learning (ML) model as a surrogate MO can bypass the limitations associated with using an overly simplified MO. [@Forman:2014; @XUE:2015; @Forman:2017]

## Statement of Need

A surrogate MO, as a ML model is trained with the assumption that a single MO applies when mapping physical model states to observations. When dealing with a large spatio-temporal scale, multiple mapping processes may exist, prompting consideration for training separate MOs for distinct spatial and/or temporal partitions of the dataset. As the number of partitions increases, a challenge arises in distributing these training tasks effectively among the partitions.
A surrogate MO, trained as a ML model, is generally considered valid within a specific spatio-temporal range. [@zhou2008ensemble; @REICHLE20081411; @shan:2022] When dealing with a large spatio-temporal scale, multiple mapping processes may exist, prompting consideration for training separate MOs for distinct spatial and/or temporal partitions of the dataset. As the number of partitions increases, a challenge arises in distributing these training tasks effectively among the partitions.

To address this challenge, we developed a novel approach for distributed training of MOs. We present the open Python library `MOTrainer`, which to the best of our knowledge, is the first Python library catering to researchers requiring training independent MOs across extensive spatio-temporal coverage in a distributed manner. `MOTrainer` leverages Xarray's [@Hoyer_xarray_N-D_labeled_2017] support for multi-dimensional datasets to accommodate spatio-temporal features of input/output data of the training tasks. It provides user-friendly functionalities implemented with the Dask [@Rocklin2015DaskPC] library, facilitating the partitioning of large spatio-temporal data for independent model training tasks. Additionally, it streamlines the train-test data split based on customized spatio-temporal coordinates. The Jackknife method [@mccuen1998hydrologic] is implemented as an external Cross-Validation (CV) method for Deep Neural Network (DNN) training, with support for Dask parallelization. This feature enables the scaling of training tasks across various computational infrastructures.
To address this challenge, we developed a novel approach for distributed training of MOs. We present the open Python library `MOTrainer`, which to the best of our knowledge, is the first Python library catering to researchers requiring training independent MOs across extensive spatio-temporal coverage in a distributed manner. `MOTrainer` leverages Xarray's [@Hoyer_xarray_N-D_labeled_2017] support for multi-dimensional datasets to accommodate spatio-temporal features of input/output data of the training tasks. It provides user-friendly functionalities implemented with the Dask [@Rocklin2015DaskPC] library, facilitating the partitioning of large spatio-temporal data for independent model training tasks. Additionally, it streamlines the train-test data split based on customized spatio-temporal coordinates. The Jackknife method [@mccuen1998hydrologic] is implemented as an external Cross-Validation method for Deep Neural Network (DNN) training, with support for Dask parallelization. This feature enables the scaling of training tasks across various computational infrastructures.

`MOTrainer` has been employed in a study of vegetation water dynamics [@shan:2022], where it facilitated the mapping of Land-Scape Model (LSM) states to satellite radar observations.
`MOTrainer` has been employed in a study of vegetation water dynamics [@shan:2022], where it facilitated the mapping of Land-Scape Model states to satellite radar observations.

## Tutorial

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ exclude = [
"node_modules",
"venv",
"docs",
"exploration",
"site",
]

line-length = 88
Expand Down

0 comments on commit 5738581

Please sign in to comment.