Skip to content

Commit

Permalink
Merge branch 'jgfouca/cime/model_specific_logging' into master (PR #2556
Browse files Browse the repository at this point in the history
)

Add model-specific logging capability

And use this capability to restore timing dumps in case_run.

[BFB]

* jgfouca/cime/model_specific_logging:
  Add model-specific logging capability
  • Loading branch information
jgfouca committed Oct 29, 2018
2 parents 7b5f24d + 29b71ee commit ae06047
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 25 deletions.
50 changes: 25 additions & 25 deletions cime/scripts/lib/CIME/case/case_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'"""
from CIME.XML.standard_module_setup import *
from CIME.utils import gzip_existing_file, new_lid, run_and_log_case_status
from CIME.utils import run_sub_or_cmd, append_status, safe_copy
from CIME.utils import run_sub_or_cmd, append_status, safe_copy, model_log
from CIME.get_timing import get_timing
from CIME.provenance import save_prerun_provenance, save_postrun_provenance

Expand Down Expand Up @@ -62,14 +62,14 @@ def _pre_run_check(case, lid, skip_pnl=False, da_cycle=0):
# This needs to be done everytime the LID changes in order for log files to be set up correctly
# The following also needs to be called in case a user changes a user_nl_xxx file OR an env_run.xml
# variable while the job is in the queue
logger.debug("{} NAMELIST CREATION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} NAMELIST CREATION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
if skip_pnl:
case.create_namelists(component='cpl')
else:
logger.info("Generating namelists for {}".format(caseroot))
case.create_namelists()

logger.debug("{} NAMELIST CREATION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} NAMELIST CREATION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

logger.info("-------------------------------------------------------------------------")
logger.info(" - Prestage required restarts into {}".format(rundir))
Expand All @@ -81,9 +81,9 @@ def _pre_run_check(case, lid, skip_pnl=False, da_cycle=0):
def _run_model_impl(case, lid, skip_pnl=False, da_cycle=0):
###############################################################################

logger.debug("{} PRE_RUN_CHECK BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} PRE_RUN_CHECK BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
_pre_run_check(case, lid, skip_pnl=skip_pnl, da_cycle=da_cycle)
logger.debug("{} PRE_RUN_CHECK HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} PRE_RUN_CHECK HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

model = case.get_value("MODEL")

Expand Down Expand Up @@ -113,14 +113,14 @@ def _run_model_impl(case, lid, skip_pnl=False, da_cycle=0):
while loop:
loop = False

logger.debug("{} SAVE_PRERUN_PROVENANCE BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} SAVE_PRERUN_PROVENANCE BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
save_prerun_provenance(case)
logger.debug("{} SAVE_PRERUN_PROVENANCE HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} SAVE_PRERUN_PROVENANCE HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

logger.debug("{} MODEL EXECUTION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} MODEL EXECUTION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
run_func = lambda: run_cmd(cmd, from_dir=rundir)[0]
stat = run_and_log_case_status(run_func, "model execution", caseroot=case.get_value("CASEROOT"))
logger.debug("{} MODEL EXECUTION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} MODEL EXECUTION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

model_logfile = os.path.join(rundir, model + ".log." + lid)
# Determine if failure was due to a failed node, if so, try to restart
Expand Down Expand Up @@ -159,9 +159,9 @@ def _run_model_impl(case, lid, skip_pnl=False, da_cycle=0):
# We failed and we're not restarting
expect(False, "RUN FAIL: Command '{}' failed\nSee log file for details: {}".format(cmd, model_logfile))

logger.debug("{} POST_RUN_CHECK BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} POST_RUN_CHECK BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
_post_run_check(case, lid)
logger.debug("{} POST_RUN_CHECK HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} POST_RUN_CHECK HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

return lid

Expand Down Expand Up @@ -265,7 +265,7 @@ def _do_data_assimilation(da_script, caseroot, cycle, lid, rundir):
###############################################################################
def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=False):
###############################################################################
logger.debug("{} CASE.RUN BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} CASE.RUN BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
# Set up the run, run the model, do the postrun steps
prerun_script = self.get_value("PRERUN_SCRIPT")
postrun_script = self.get_value("POSTRUN_SCRIPT")
Expand All @@ -279,12 +279,12 @@ def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=Fals
lid = new_lid()

if prerun_script:
logger.debug("{} PRERUN_SCRIPT BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} PRERUN_SCRIPT BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
self.flush()
_do_external(prerun_script, self.get_value("CASEROOT"), self.get_value("RUNDIR"),
lid, prefix="prerun")
self.read_xml()
logger.debug("{} PRERUN_SCRIPT HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} PRERUN_SCRIPT HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

for cycle in range(data_assimilation_cycles):
# After the first DA cycle, runs are restart runs
Expand All @@ -293,37 +293,37 @@ def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=Fals
self.set_value("CONTINUE_RUN",
self.get_value("RESUBMIT_SETS_CONTINUE_RUN"))

logger.debug("{} RUN_MODEL BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} RUN_MODEL BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
lid = _run_model(self, lid, skip_pnl, da_cycle=cycle)
logger.debug("{} RUN_MODEL HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} RUN_MODEL HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

if self.get_value("CHECK_TIMING") or self.get_value("SAVE_TIMING"):
logger.debug("{} GET_TIMING BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} GET_TIMING BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
get_timing(self, lid) # Run the getTiming script
logger.debug("{} GET_TIMING HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} GET_TIMING HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

if data_assimilation:
logger.debug("{} DO_DATA_ASSIMILATION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} DO_DATA_ASSIMILATION BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
self.flush()
_do_data_assimilation(data_assimilation_script, self.get_value("CASEROOT"), cycle, lid,
self.get_value("RUNDIR"))
self.read_xml()
logger.debug("{} DO_DATA_ASSIMILATION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} DO_DATA_ASSIMILATION HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

_save_logs(self, lid) # Copy log files back to caseroot

logger.debug("{} SAVE_POSTRUN_PROVENANCE BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} SAVE_POSTRUN_PROVENANCE BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
save_postrun_provenance(self)
logger.debug("{} SAVE_POSTRUN_PROVENANCE HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} SAVE_POSTRUN_PROVENANCE HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

if postrun_script:
logger.debug("{} POSTRUN_SCRIPT BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} POSTRUN_SCRIPT BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")))
self.flush()
_do_external(postrun_script, self.get_value("CASEROOT"), self.get_value("RUNDIR"),
lid, prefix="postrun")
self.read_xml()
_save_logs(self, lid)
logger.debug("{} POSTRUN_SCRIPT HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} POSTRUN_SCRIPT HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

if set_continue_run:
self.set_value("CONTINUE_RUN",
Expand All @@ -333,6 +333,6 @@ def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=Fals
if submit_resubmits:
_resubmit_check(self)

logger.debug("{} CASE.RUN HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))
model_log("e3sm", logger, "{} CASE.RUN HAS FINISHED".format(time.strftime("%Y-%m-%d %H:%M:%S")))

return True
4 changes: 4 additions & 0 deletions cime/scripts/lib/CIME/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1756,3 +1756,7 @@ def run_bld_cmd_ensure_logging(cmd, arg_logger, from_dir=None):

def get_batch_script_for_job(job):
return job if "st_archive" in job else "." + job

def model_log(model, arg_logger, msg):
if get_model() == model:
arg_logger.info(msg)

0 comments on commit ae06047

Please sign in to comment.