From 99283da50691adcccf056d12e1d1d18efaf98554 Mon Sep 17 00:00:00 2001 From: Kevin Phan <98072684+ph-kev@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:04:34 -0700 Subject: [PATCH] Add leaderboard component --- docs/make.jl | 1 + docs/src/leaderboard/leaderboard.md | 123 ++++++ experiments/long_runs/land_leaderboard.jl | 403 ++++++++++++++++++ .../long_runs/leaderboard/data_sources.jl | 163 +++++++ .../long_runs/leaderboard/leaderboard.jl | 167 ++++++++ 5 files changed, 857 insertions(+) create mode 100644 docs/src/leaderboard/leaderboard.md create mode 100644 experiments/long_runs/land_leaderboard.jl create mode 100644 experiments/long_runs/leaderboard/data_sources.jl create mode 100644 experiments/long_runs/leaderboard/leaderboard.jl diff --git a/docs/make.jl b/docs/make.jl index 08d9695e4f..b1d6827fcd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -55,6 +55,7 @@ pages = Any[ "Tutorials" => tutorials, "Standalone models" => standalone_models, "Diagnostics" => diagnostics, + "Leaderboard" => "leaderboard/leaderboard.md", "Restarts" => "restarts.md", "Contribution guide" => "Contributing.md", "Repository structure" => "folderstructure.md", diff --git a/docs/src/leaderboard/leaderboard.md b/docs/src/leaderboard/leaderboard.md new file mode 100644 index 0000000000..80c5db9d7e --- /dev/null +++ b/docs/src/leaderboard/leaderboard.md @@ -0,0 +1,123 @@ +# Leaderboard + +## Long run + +### Add a new variable to compare against observations +Computing errors against observations are all contained in the `leaderboard` folder. The +files in the leaderboard folder are `data_sources.jl` and `leaderboard.jl`. Loading and +preprocessing variables of interest are done in `data_sources.jl` and computing the errors +and plotting are done in `leaderboard.jl`. To add a new variable, you ideally only need to +modify `data_sources.jl`. + +### Computation +As of now, the leaderboard produces bias plots with the global bias and global root mean +squared error (RMSE). These quantities are computed for each month with the first year of +the simulation not considered as that is the spinup time. The start date of the simulation +is 2012 which means that only the year 2013 is used to compare against observational data. +See the plots below for what this look like. + +![bias_with_custom_mask_plot](./leaderboard/images/global_rmse_and_bias_graphs.png) +![gpp_bias_plot](./leaderboard/images/gpp_bias_plot.png) + +### Add a new variable to the bias plots +There are four dictionaries that you need to modify to add a new variable which are +`sim_var_dict`, `obs_var_dict`, `mask_dict`, and `compare_vars_biases_plot_extrema`. + +To add a variable for the leaderboard, add a key-value pair to the dictionary `sim_var_dict` +whose key is the short name of the variable and the value is a function that returns a +[`OutputVar`](https://clima.github.io/ClimaAnalysis.jl/dev/var/). Any preprocessing is done +in the function which includes unit conversion and shifting the dates. + +```julia +sim_var_dict["et"] = + () -> begin + # Load in variable + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "et", + ) + # Shift to the first day and subtract one month as preprocessing + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end +``` + +Then, add a key-value pair to the dictionary `obs_var_dict` whose key is the same short name +as before and the value is a function that takes in a start date and returns a `OutputVar`. +Any preprocessing is done in the function. + +```julia +obs_var_dict["et"] = + (start_date) -> begin + # We use ClimaArtifacts to use a dataset from ILAMB + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "evspsbl_MODIS_et_0.5x0.5.nc", + ), + "et", + # start_date is used to align the dates in the observational data + # with the simulation data + new_start_date = start_date, + # Shift dates to the first day of the month before aligning the dates + shift_by = Dates.firstdayofmonth, + ) + # More preprocessing to match the units with the simulation data + ClimaAnalysis.units(obs_var) == "kg/m2/s" && + (obs_var = ClimaAnalysis.set_units(obs_var, "kg m^-2 s^-1")) + # ClimaAnalysis cannot handle `missing` values, but does support handling NaNs + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end +``` + +!!! tip "Preprocessing" + Observational and simulational data should be preprocessed for dates and units. For + simulation data, monthly averages correspond to the first day following the month. + For instance, the monthly average corresponding to January 2010 is on the date + 2/1/2010. Preprocessing is done to shift this date to 1/1/2010. When preprocessing + data, we follow the convention that the first day corresponds to the monthly average + for that month. For observational data, you should check the convention being followed + and preprocess the dates if necessary. + + For `obs_var_dict`, the anonymous function must take in a start date. The start date is + used in `leaderboard.jl` to adjust the seconds in the `OutputVar` to match between start + date in the simulation data. + + Units should be the same between the simulation and observational data. + +Next, add a key-value pair to the dictionary `mask_dict` whose key is the same short name +as before and the value is a function that takes in a `OutputVar` representing simulation +data and a `OutputVar` representing observational data and returns a masking function or +`nothing` if no masking function is needed. The masking function is used to correctly +normalize the global bias and global RMSE. See the example below where a mask is made using +the observational data. + +```julia +mask_dict["et"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + # We do this to get a `OutputVar` with only two dimensions: + # longitude and latitude + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + # Any values that are NaN should be 0.0 + set_to_val = isnan, + true_val = 0.0 + ) + end +``` + +Finally, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose +key is the same short name as before and the value is a tuple of floats which determine +the range of the bias plots. + +```julia +compare_vars_biases_plot_extrema = Dict( + "et" => (-0.00001, 0.00001), + "gpp" => (-8.0, 8.0), + "lwu" => (-40.0, 40.0), +) +``` diff --git a/experiments/long_runs/land_leaderboard.jl b/experiments/long_runs/land_leaderboard.jl new file mode 100644 index 0000000000..78a1c8e05c --- /dev/null +++ b/experiments/long_runs/land_leaderboard.jl @@ -0,0 +1,403 @@ +# # Global run of land model + +# The code sets up and runs the soil/canopy model for 6 hours on a spherical domain, +# using ERA5 data. In this simulation, we have +# turned lateral flow off because horizontal boundary conditions and the +# land/sea mask are not yet supported by ClimaCore. + +# Simulation Setup +# Number of spatial elements: 101 in horizontal, 15 in vertical +# Soil depth: 50 m +# Simulation duration: 365 d +# Timestep: 450 s +# Timestepper: ARS111 +# Fixed number of iterations: 3 +# Jacobian update: every new Newton iteration +# Atmos forcing update: every 3 hours +import SciMLBase +import ClimaComms +ClimaComms.@import_required_backends +import ClimaTimeSteppers as CTS +using ClimaCore +using ClimaUtilities.ClimaArtifacts + +import ClimaDiagnostics +import ClimaAnalysis +import ClimaAnalysis.Visualize as viz +import ClimaUtilities + +import ClimaUtilities.TimeVaryingInputs: + TimeVaryingInput, LinearInterpolation, PeriodicCalendar +import ClimaUtilities.ClimaArtifacts: @clima_artifact +import ClimaParams as CP + +using ClimaLand +using ClimaLand.Soil +using ClimaLand.Canopy +import ClimaLand +import ClimaLand.Parameters as LP + +using Statistics +using CairoMakie +import GeoMakie +using Dates +import NCDatasets + +const FT = Float64; +time_interpolation_method = LinearInterpolation(PeriodicCalendar()) +context = ClimaComms.context() +device = ClimaComms.device() +device_suffix = device isa ClimaComms.CPUSingleThreaded ? "cpu" : "gpu" +root_path = "land_longrun_$(device_suffix)" +diagnostics_outdir = joinpath(root_path, "global_diagnostics") +outdir = + ClimaUtilities.OutputPathGenerator.generate_output_path(diagnostics_outdir) + +function setup_prob(t0, tf, Δt; outdir = outdir, nelements = (101, 15)) + earth_param_set = LP.LandParameters(FT) + radius = FT(6378.1e3) + depth = FT(50) + domain = ClimaLand.Domains.SphericalShell(; + radius = radius, + depth = depth, + nelements = nelements, + npolynomial = 1, + dz_tuple = FT.((10.0, 0.05)), + ) + surface_space = domain.space.surface + subsurface_space = domain.space.subsurface + + start_date = DateTime(2021) + # Forcing data + era5_artifact_path = + ClimaLand.Artifacts.era5_land_forcing_data2021_folder_path(; context) + era5_ncdata_path = joinpath(era5_artifact_path, "era5_2021_0.9x1.25.nc") + atmos, radiation = ClimaLand.prescribed_forcing_era5( + era5_ncdata_path, + surface_space, + start_date, + earth_param_set, + FT, + ) + + spatially_varying_soil_params = + ClimaLand.default_spatially_varying_soil_parameters( + subsurface_space, + surface_space, + FT, + ) + (; + ν, + ν_ss_om, + ν_ss_quartz, + ν_ss_gravel, + hydrology_cm, + K_sat, + S_s, + θ_r, + PAR_albedo, + NIR_albedo, + f_max, + ) = spatially_varying_soil_params + soil_params = Soil.EnergyHydrologyParameters( + FT; + ν, + ν_ss_om, + ν_ss_quartz, + ν_ss_gravel, + hydrology_cm, + K_sat, + S_s, + θ_r, + PAR_albedo = PAR_albedo, + NIR_albedo = NIR_albedo, + ) + f_over = FT(3.28) # 1/m + R_sb = FT(1.484e-4 / 1000) # m/s + runoff_model = ClimaLand.Soil.Runoff.TOPMODELRunoff{FT}(; + f_over = f_over, + f_max = f_max, + R_sb = R_sb, + ) + + # Spatially varying canopy parameters from CLM + clm_parameters = ClimaLand.clm_canopy_parameters(surface_space) + (; + Ω, + rooting_depth, + is_c3, + Vcmax25, + g1, + G_Function, + α_PAR_leaf, + τ_PAR_leaf, + α_NIR_leaf, + τ_NIR_leaf, + ) = clm_parameters + + # Energy Balance model + ac_canopy = FT(2.5e3) + # Plant Hydraulics and general plant parameters + SAI = FT(0.0) # m2/m2 + f_root_to_shoot = FT(3.5) + RAI = FT(1.0) + K_sat_plant = FT(5e-9) # m/s # seems much too small? + ψ63 = FT(-4 / 0.0098) # / MPa to m, Holtzman's original parameter value is -4 MPa + Weibull_param = FT(4) # unitless, Holtzman's original c param value + a = FT(0.05 * 0.0098) # Holtzman's original parameter for the bulk modulus of elasticity + conductivity_model = + Canopy.PlantHydraulics.Weibull{FT}(K_sat_plant, ψ63, Weibull_param) + retention_model = Canopy.PlantHydraulics.LinearRetentionCurve{FT}(a) + plant_ν = FT(1.44e-4) + plant_S_s = FT(1e-2 * 0.0098) # m3/m3/MPa to m3/m3/m + n_stem = 0 + n_leaf = 1 + h_stem = FT(0.0) + h_leaf = FT(1.0) + zmax = FT(0.0) + h_canopy = h_stem + h_leaf + compartment_midpoints = + n_stem > 0 ? [h_stem / 2, h_stem + h_leaf / 2] : [h_leaf / 2] + compartment_surfaces = + n_stem > 0 ? [zmax, h_stem, h_canopy] : [zmax, h_leaf] + + z0_m = FT(0.13) * h_canopy + z0_b = FT(0.1) * z0_m + + + soilco2_ps = Soil.Biogeochemistry.SoilCO2ModelParameters(FT) + + soil_args = (domain = domain, parameters = soil_params) + soil_model_type = Soil.EnergyHydrology{FT} + + # Soil microbes model + soilco2_type = Soil.Biogeochemistry.SoilCO2Model{FT} + + # soil microbes args + Csom = ClimaLand.PrescribedSoilOrganicCarbon{FT}(TimeVaryingInput((t) -> 5)) + + # Set the soil CO2 BC to being atmospheric CO2 + soilco2_top_bc = Soil.Biogeochemistry.AtmosCO2StateBC() + soilco2_bot_bc = Soil.Biogeochemistry.SoilCO2FluxBC((p, t) -> 0.0) # no flux + soilco2_sources = (Soil.Biogeochemistry.MicrobeProduction{FT}(),) + + soilco2_boundary_conditions = + (; top = soilco2_top_bc, bottom = soilco2_bot_bc) + + soilco2_args = (; + boundary_conditions = soilco2_boundary_conditions, + sources = soilco2_sources, + domain = domain, + parameters = soilco2_ps, + ) + + # Now we set up the canopy model, which we set up by component: + # Component Types + canopy_component_types = (; + autotrophic_respiration = Canopy.AutotrophicRespirationModel{FT}, + radiative_transfer = Canopy.TwoStreamModel{FT}, + photosynthesis = Canopy.FarquharModel{FT}, + conductance = Canopy.MedlynConductanceModel{FT}, + hydraulics = Canopy.PlantHydraulicsModel{FT}, + energy = Canopy.BigLeafEnergyModel{FT}, + ) + # Individual Component arguments + # Set up autotrophic respiration + autotrophic_respiration_args = + (; parameters = Canopy.AutotrophicRespirationParameters(FT)) + # Set up radiative transfer + radiative_transfer_args = (; + parameters = Canopy.TwoStreamParameters( + FT; + Ω, + α_PAR_leaf, + τ_PAR_leaf, + α_NIR_leaf, + τ_NIR_leaf, + G_Function, + ) + ) + # Set up conductance + conductance_args = + (; parameters = Canopy.MedlynConductanceParameters(FT; g1)) + # Set up photosynthesis + photosynthesis_args = + (; parameters = Canopy.FarquharParameters(FT, is_c3; Vcmax25 = Vcmax25)) + # Set up plant hydraulics + + LAIfunction = ClimaLand.prescribed_lai_era5( + era5_ncdata_path, + surface_space, + start_date; + time_interpolation_method = time_interpolation_method, + ) + ai_parameterization = + Canopy.PrescribedSiteAreaIndex{FT}(LAIfunction, SAI, RAI) + + plant_hydraulics_ps = Canopy.PlantHydraulics.PlantHydraulicsParameters(; + ai_parameterization = ai_parameterization, + ν = plant_ν, + S_s = plant_S_s, + rooting_depth = rooting_depth, + conductivity_model = conductivity_model, + retention_model = retention_model, + ) + plant_hydraulics_args = ( + parameters = plant_hydraulics_ps, + n_stem = n_stem, + n_leaf = n_leaf, + compartment_midpoints = compartment_midpoints, + compartment_surfaces = compartment_surfaces, + ) + + energy_args = (parameters = Canopy.BigLeafEnergyParameters{FT}(ac_canopy),) + + # Canopy component args + canopy_component_args = (; + autotrophic_respiration = autotrophic_respiration_args, + radiative_transfer = radiative_transfer_args, + photosynthesis = photosynthesis_args, + conductance = conductance_args, + hydraulics = plant_hydraulics_args, + energy = energy_args, + ) + + # Other info needed + shared_params = Canopy.SharedCanopyParameters{FT, typeof(earth_param_set)}( + z0_m, + z0_b, + earth_param_set, + ) + + canopy_model_args = (; + parameters = shared_params, + domain = ClimaLand.obtain_surface_domain(domain), + ) + + # Integrated plant hydraulics and soil model + land_input = ( + atmos = atmos, + radiation = radiation, + runoff = runoff_model, + soil_organic_carbon = Csom, + ) + land = SoilCanopyModel{FT}(; + soilco2_type = soilco2_type, + soilco2_args = soilco2_args, + land_args = land_input, + soil_model_type = soil_model_type, + soil_args = soil_args, + canopy_component_types = canopy_component_types, + canopy_component_args = canopy_component_args, + canopy_model_args = canopy_model_args, + ) + + Y, p, cds = initialize(land) + + init_soil(ν, θ_r) = θ_r + (ν - θ_r) / 2 + Y.soil.ϑ_l .= init_soil.(ν, θ_r) + Y.soil.θ_i .= FT(0.0) + T = FT(276.85) + ρc_s = + Soil.volumetric_heat_capacity.( + Y.soil.ϑ_l, + Y.soil.θ_i, + soil_params.ρc_ds, + soil_params.earth_param_set, + ) + Y.soil.ρe_int .= + Soil.volumetric_internal_energy.( + Y.soil.θ_i, + ρc_s, + T, + soil_params.earth_param_set, + ) + Y.soilco2.C .= FT(0.000412) # set to atmospheric co2, mol co2 per mol air + Y.canopy.hydraulics.ϑ_l.:1 .= plant_ν + evaluate!(Y.canopy.energy.T, atmos.T, t0) + + set_initial_cache! = make_set_initial_cache(land) + exp_tendency! = make_exp_tendency(land) + imp_tendency! = ClimaLand.make_imp_tendency(land) + jacobian! = ClimaLand.make_jacobian(land) + set_initial_cache!(p, Y, t0) + + # set up jacobian info + jac_kwargs = + (; jac_prototype = ImplicitEquationJacobian(Y), Wfact = jacobian!) + + prob = SciMLBase.ODEProblem( + CTS.ClimaODEFunction( + T_exp! = exp_tendency!, + T_imp! = SciMLBase.ODEFunction(imp_tendency!; jac_kwargs...), + dss! = ClimaLand.dss!, + ), + Y, + (t0, tf), + p, + ) + + updateat = Array(t0:(3600 * 3):tf) + drivers = ClimaLand.get_drivers(land) + updatefunc = ClimaLand.make_update_drivers(drivers) + + # ClimaDiagnostics + + nc_writer = ClimaDiagnostics.Writers.NetCDFWriter( + subsurface_space, + outdir; + start_date, + ) + + diags = ClimaLand.default_diagnostics( + land, + start_date; + output_writer = nc_writer, + output_vars = :short, + average_period = :monthly, + ) + + diagnostic_handler = + ClimaDiagnostics.DiagnosticsHandler(diags, Y, p, t0; dt = Δt) + + diag_cb = ClimaDiagnostics.DiagnosticsCallback(diagnostic_handler) + + driver_cb = ClimaLand.DriverUpdateCallback(updateat, updatefunc) + return prob, SciMLBase.CallbackSet(driver_cb, diag_cb) +end + +function setup_and_solve_problem(; greet = false) + + t0 = 0.0 + tf = 60 * 60.0 * 24 * 365 + Δt = 450.0 + nelements = (101, 15) + if greet + @info "Run: Global Soil-Canopy Model" + @info "Resolution: $nelements" + @info "Timestep: $Δt s" + @info "Duration: $(tf - t0) s" + end + + prob, cb = setup_prob(t0, tf, Δt; nelements) + + # Define timestepper and ODE algorithm + stepper = CTS.ARS111() + ode_algo = CTS.IMEXAlgorithm( + stepper, + CTS.NewtonsMethod( + max_iters = 3, + update_j = CTS.UpdateEvery(CTS.NewNewtonIteration), + ), + ) + SciMLBase.solve(prob, ode_algo; dt = Δt, callback = cb, adaptive = false) + return nothing +end + +setup_and_solve_problem(; greet = true); + +# Make bias plots +include("leaderboard/leaderboard.jl") +diagnostics_folder_path = outdir +leaderboard_base_path = root_path +compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) diff --git a/experiments/long_runs/leaderboard/data_sources.jl b/experiments/long_runs/leaderboard/data_sources.jl new file mode 100644 index 0000000000..2d28ae4741 --- /dev/null +++ b/experiments/long_runs/leaderboard/data_sources.jl @@ -0,0 +1,163 @@ +import ClimaAnalysis + +""" + get_data_sources(diagnostics_folder_path) + +Get the necessary data and do the preprocessing for computing the leaderboard. Return +`sim_var_dict` which contains simulation data, `obs_var_dict` which contains observational +data, `mask_dict` which contains masks, `compare_vars_biases_plot_extrema` which contains +the ranges for the colormaps. + +To add a variable for the leaderboard, add a key-value pair to the dictionary +`sim_var_dict` whose key is the short name of the variable and the value is a function +that returns a `OutputVar` Any preprocessing is done in the function which includes unit +conversion and shifting the dates. + +Then, add a key-value pair to the dictionary `obs_var_dict` whose key is the same short +name as before and the value is a function that takes in a start date and returns a +`OutputVar`. Any preprocessing is done in the function. + +Next, add a key-value pair to the dictionary `mask_dict` whose key is the same short name +as before and the value is a function that takes in a `OutputVar` representing simulation +data and a `OutputVar` representing observational data and returns a masking function or +`nothing` if no masking function is needed. The masking function is used to correctly +normalize the global bias and global RMSE. + +Finally, add a key-value pair to the dictionary `compare_vars_biases_plot_extrema` whose +key is the same short name as before and the value is a tuple of floats which determine +the range of the bias plots. +""" +function get_data_sources(diagnostics_folder_path) + # For plotting bias plots + compare_vars_biases_plot_extrema = Dict( + "et" => (-0.00001, 0.00001), + "gpp" => (-0.000005, 0.000005), + "lwu" => (-40.0, 40.0), + ) + + # Dict for loading in simulation data + sim_var_dict = Dict{String, Any}() + + for short_name in ["et", "lwu"] + sim_var_dict[short_name] = + () -> begin + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = short_name, + ) + # Remove the line below later when start_date is added to the diagnostics + # TODO TODO TODO: Remove this line, this is fixed already + haskey(sim_var.attributes, "start_date") || ( + sim_var.attributes["start_date"] = "2012-01-01T00:00:00" + ) + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end + end + + sim_var_dict["gpp"] = + () -> begin + sim_var = get( + ClimaAnalysis.SimDir(diagnostics_folder_path), + short_name = "gpp", + ) + # Remove the line below later when start_date is added to the diagnostics + haskey(sim_var.attributes, "start_date") || + (sim_var.attributes["start_date"] = "2012-01-01T00:00:00") + sim_var = + ClimaAnalysis.shift_to_start_of_previous_month(sim_var) + return sim_var + end + + # Dict for loading in observational data + obs_var_dict = Dict{String, Any}() + obs_var_dict["et"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "evspsbl_MODIS_et_0.5x0.5.nc", + ), + "et", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + ClimaAnalysis.units(obs_var) == "kg/m2/s" && + (obs_var = ClimaAnalysis.set_units(obs_var, "kg m^-2 s^-1")) + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end + + obs_var_dict["gpp"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "gpp_FLUXCOM_gpp.nc", + ), + "gpp", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + ClimaAnalysis.dim_units(obs_var, "lon") == "degree" && + (obs_var.dim_attributes["lon"]["units"] = "degrees_east") + ClimaAnalysis.dim_units(obs_var, "lat") == "degree" && + (obs_var.dim_attributes["lat"]["units"] = "degrees_north") + # converting from `g C m-2 day-1` in obs to `mol CO2 m^-2 s^-1` in sim + obs_var = ClimaAnalysis.convert_units( + obs_var, + "mol CO2 m^-2 s^-1", + conversion_function = units -> (units / 86400.0) / 12.011, + ) + obs_var = ClimaAnalysis.replace(obs_var, missing => NaN) + return obs_var + end + + obs_var_dict["lwu"] = + (start_date) -> begin + obs_var = ClimaAnalysis.OutputVar( + ClimaLand.Artifacts.ilamb_dataset_path(; + context = "rlus_CERESed4.2_rlus.nc", + ), + "rlus", + new_start_date = start_date, + shift_by = Dates.firstdayofmonth, + ) + ClimaAnalysis.units(obs_var) == "W m-2" && + (obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2")) + return obs_var + end + + # Dict for loading in masks + mask_dict = Dict{String, Any}() + + mask_dict["et"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + set_to_val = isnan, + ) + end + + mask_dict["gpp"] = + (sim_var, obs_var) -> begin + return ClimaAnalysis.make_lonlat_mask( + ClimaAnalysis.slice( + obs_var, + time = ClimaAnalysis.times(obs_var) |> first, + ); + set_to_val = isnan, + ) + end + + mask_dict["lwu"] = (sim_var, obs_var) -> begin + return nothing + end + + return sim_var_dict, + obs_var_dict, + mask_dict, + compare_vars_biases_plot_extrema +end diff --git a/experiments/long_runs/leaderboard/leaderboard.jl b/experiments/long_runs/leaderboard/leaderboard.jl new file mode 100644 index 0000000000..84a9fb1b57 --- /dev/null +++ b/experiments/long_runs/leaderboard/leaderboard.jl @@ -0,0 +1,167 @@ +import ClimaAnalysis +import GeoMakie +import CairoMakie +import Dates + +include("data_sources.jl") + +""" + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + +Plot the biases against observations and simulation data. + +The argument `leaderboard_base_path` is the path to save the leaderboards and bias plots and +the argument `diagnostics_folder_path` is the path to the simulation data. +""" +function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) + sim_var_dict, obs_var_dict, mask_dict, compare_vars_biases_plot_extrema = + get_data_sources(diagnostics_folder_path) + @info "Error against observations" + + # Set up dict for storing simulation and observational data after processing + sim_obs_comparsion_dict = Dict() + + # Print dates for debugging + _, var_func = first(sim_var_dict) + var = var_func() + output_dates = + Dates.DateTime(var.attributes["start_date"]) .+ + Dates.Second.(ClimaAnalysis.times(var)) + @info "Working with dates:" + @info output_dates + + short_names = keys(sim_var_dict) + for short_name in short_names + @info short_name + # Simulation data + sim_var = sim_var_dict[short_name]() + + # Observational data + obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"]) + + # Remove first spin_up_months months from simulation + spin_up_months = 12 + spinup_cutoff = spin_up_months * 30 * 86400.0 + ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff && ( + sim_var = + ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff) + ) + + # Get 12 or less months of data + num_times = length(ClimaAnalysis.times(sim_var)) + num_times > 12 && ( + sim_var = ClimaAnalysis.window( + sim_var, + "time", + right = ClimaAnalysis.times(sim_var)[12], + ) + ) + + obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var) + + sim_obs_comparsion_dict[short_name] = (sim_var, obs_var) + end + + # Get the right number of months for plotting + _, sim_obs_tuple = first(sim_obs_comparsion_dict) + num_times = length(ClimaAnalysis.times(sim_obs_tuple[1])) + months = Dates.monthname.(1:num_times) + + # Plot monthly comparsions + for short_name in short_names + fig = CairoMakie.Figure(size = (650 * ceil(num_times / 2), 450 * 2)) + sim_var, obs_var = sim_obs_comparsion_dict[short_name] + mask = mask_dict[short_name](sim_var, obs_var) + times = ClimaAnalysis.times(sim_var) |> copy + times = vcat( + times, + Array{Union{Missing, eltype(times)}}(missing, 12 - num_times), + ) + times = reshape(times, (2, 6)) + for ((indices, t), month) in zip(pairs(times), months) + layout = fig[Tuple(indices)...] = CairoMakie.GridLayout() + ClimaAnalysis.Visualize.plot_bias_on_globe!( + layout, + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + cmap_extrema = compare_vars_biases_plot_extrema[short_name], + mask = mask, + ) + CairoMakie.Label( + layout[0, 1], + month, + tellwidth = false, + fontsize = 30, + ) + end + CairoMakie.save( + joinpath(leaderboard_base_path, "$(short_name)_bias_plot.png"), + fig, + ) + end + + # Plot month (x-axis) and global bias and global RMSE (y-axis) + fig = CairoMakie.Figure(size = (450 * length(keys(sim_var_dict)), 600)) + for (col, short_name) in enumerate(keys(sim_var_dict)) + sim_var, obs_var = sim_obs_comparsion_dict[short_name] + mask = mask_dict[short_name](sim_var, obs_var) + # Compute globas bias and global rmse + rmse_vec = [] + bias_vec = [] + times = ClimaAnalysis.times(sim_var) |> copy + for t in times + g_rmse = ClimaAnalysis.global_rmse( + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + mask = mask, + ) + g_bias = ClimaAnalysis.global_bias( + ClimaAnalysis.slice(sim_var, time = t), + ClimaAnalysis.slice(obs_var, time = t), + mask = mask, + ) + push!(rmse_vec, g_rmse) + push!(bias_vec, g_bias) + end + ax_rmse = CairoMakie.Axis( + fig[1, col], + title = "Global RMSE for $short_name", + xlabel = "Month", + ylabel = "Global RMSE ($(ClimaAnalysis.units(sim_var)))", + xticks = ( + 1:num_times, + Dates.monthname.(1:num_times) .|> x -> x[1:3], + ), + ) + CairoMakie.lines!(ax_rmse, 1:num_times |> collect, rmse_vec) + CairoMakie.scatter!(ax_rmse, 1:num_times |> collect, rmse_vec) + + ax_bias = CairoMakie.Axis( + fig[2, col], + title = "Global Bias for $short_name", + xlabel = "Month", + ylabel = "Global Bias ($(ClimaAnalysis.units(sim_var)))", + xticks = ( + 1:num_times, + Dates.monthname.(1:num_times) .|> x -> x[1:3], + ), + ) + CairoMakie.lines!(ax_bias, 1:num_times |> collect, bias_vec) + CairoMakie.scatter!(ax_bias, 1:num_times |> collect, bias_vec) + end + CairoMakie.save( + joinpath(leaderboard_base_path, "global_rmse_and_bias_graphs.png"), + fig, + ) +end + +if abspath(PROGRAM_FILE) == @__FILE__ + if length(ARGS) < 2 + error( + "Usage: julia leaderboard.jl ", + ) + end + leaderboard_base_path = ARGS[begin] + diagnostics_folder_path = ARGS[begin + 1] + compute_leaderboard(leaderboard_base_path, diagnostics_folder_path) +end