Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port cdat_info functions #1103

Merged
merged 24 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion conda-env/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dependencies:
- numpy=1.23.5
- cartopy=0.22.0
- matplotlib=3.7.1
- cdat_info=8.2.1
- cdms2=3.1.5
- genutil=8.2.1
- cdutil=8.2.1
Expand Down
1 change: 0 additions & 1 deletion conda-env/readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ dependencies:
- numpy=1.23.5
- cartopy=0.21.1
- matplotlib=3.7.1
- cdat_info=8.2.1
- cdms2=3.1.5
- genutil=8.2.1
- cdutil=8.2.1
Expand Down
104 changes: 35 additions & 69 deletions doc/jupyter/Demo/Demo_0_download_data.ipynb

Large diffs are not rendered by default.

273 changes: 192 additions & 81 deletions doc/jupyter/Demo/Demo_6_ENSO.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/supporting-data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ A location where you want to store the demo data locally can be set: ::

After you have set the location for the demo_output you can download it by entering the following: ::

import cdat_info
cdat_info.download_sample_data_files("data_files.txt", demo_data_directory)
from pcmdi_metrics.io.base import download_sample_data_files
download_sample_data_files("data_files.txt", demo_data_directory)

The PMP demo data is used for multiple demos. It is ~300MB. The best way to run these demos is via Jupyter notebooks. Running this initial demo for downloading sample data also on-the-fly creates demo parameter files with the user selection of the demo_data_directory.
217 changes: 194 additions & 23 deletions pcmdi_metrics/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,20 @@
import logging
import os
import re
import shlex
import sys
from collections import OrderedDict
from collections.abc import Mapping
from datetime import datetime
from subprocess import PIPE, Popen

import cdat_info
import cdms2
import cdp.cdp_io
import cdutil
import genutil
import MV2
import numpy
import requests
import xcdat
import xcdat as xc

Expand All @@ -33,6 +37,195 @@
except Exception:
basestring = str

CONDA = os.environ.get("CONDA_PYTHON_EXE", "")
if CONDA != "":
CONDA = os.path.join(os.path.dirname(CONDA), "conda")
else:
CONDA = "conda"


def download_sample_data_files(files_md5, path):
"""Downloads sample data from a list of files"""
if not os.path.exists(files_md5) or os.path.isdir(files_md5):
raise RuntimeError("Invalid file type for list of files: %s" % files_md5)
samples = open(files_md5).readlines()
download_url_root = samples[0].strip()
for sample in samples[1:]:
good_md5, name = sample.split()
local_filename = os.path.join(path, name)
try:
os.makedirs(os.path.dirname(local_filename))
except BaseException:
pass
attempts = 0
while attempts < 3:
md5 = hashlib.md5()
if os.path.exists(local_filename):
f = open(local_filename, "rb")
md5.update(f.read())
if md5.hexdigest() == good_md5:
attempts = 5
continue
print(
"Downloading: '%s' from '%s' in: %s"
% (name, download_url_root, local_filename)
)
r = requests.get("%s/%s" % (download_url_root, name), stream=True)
with open(local_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter local_filename keep-alive new chunks
f.write(chunk)
md5.update(chunk)
f.close()
if md5.hexdigest() == good_md5:
attempts = 5
else:
attempts += 1
return


def populate_prov(prov, cmd, pairs, sep=None, index=1, fill_missing=False):
try:
p = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE)
except Exception:
return
out, stde = p.communicate()
if stde.decode("utf-8") != "":
return
for strBit in out.decode("utf-8").splitlines():
for key, value in pairs.items():
if value in strBit:
prov[key] = strBit.split(sep)[index].strip()
if fill_missing is not False:
for k in pairs:
if k not in prov:
prov[k] = fill_missing
return


def generateProvenance(extra_pairs={}, history=True):
"""Generates provenance info for PMP
extra_pairs is a dictionary of format: {"name_in_provenance_list" : "python_package"}
"""
prov = OrderedDict()
platform = os.uname()
platfrm = OrderedDict()
platfrm["OS"] = platform[0]
platfrm["Version"] = platform[2]
platfrm["Name"] = platform[1]
prov["platform"] = platfrm
try:
logname = os.getlogin()
except Exception:
try:
import pwd

logname = pwd.getpwuid(os.getuid())[0]
except Exception:
try:
logname = os.environ.get("LOGNAME", "unknown")
except Exception:
logname = "unknown-loginname"
prov["userId"] = logname
prov["osAccess"] = bool(os.access("/", os.W_OK) * os.access("/", os.R_OK))
prov["commandLine"] = " ".join(sys.argv)
prov["date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
prov["conda"] = OrderedDict()
pairs = {
"Platform": "platform ",
"Version": "conda version ",
"IsPrivate": "conda is private ",
"envVersion": "conda-env version ",
"buildVersion": "conda-build version ",
"PythonVersion": "python version ",
"RootEnvironment": "root environment ",
"DefaultEnvironment": "default environment ",
}
populate_prov(prov["conda"], CONDA + " info", pairs, sep=":", index=-1)
pairs = {
"cdp": "cdp ",
"cdat_info": "cdat_info ",
"cdms": "cdms2 ",
"cdtime": "cdtime ",
"cdutil": "cdutil ",
"esmf": "esmf ",
"esmpy": "esmpy ",
"matplotlib": "matplotlib-base ",
"numpy": "numpy ",
"python": "python ",
"scipy": "scipy ",
"xcdat": "xcdat ",
"xarray": "xarray ",
}
# Actual environement used
p = Popen(shlex.split(CONDA + " env export"), stdout=PIPE, stderr=PIPE)
o, e = p.communicate()
prov["conda"]["yaml"] = o.decode("utf-8")
prov["packages"] = OrderedDict()
populate_prov(prov["packages"], CONDA + " list", pairs, fill_missing=None)
populate_prov(prov["packages"], CONDA + " list", extra_pairs, fill_missing=None)
# Trying to capture glxinfo
pairs = {
"vendor": "OpenGL vendor string",
"renderer": "OpenGL renderer string",
"version": "OpenGL version string",
"shading language version": "OpenGL shading language version string",
}
prov["openGL"] = OrderedDict()
populate_prov(prov["openGL"], "glxinfo", pairs, sep=":", index=-1)
prov["openGL"]["GLX"] = {"server": OrderedDict(), "client": OrderedDict()}
pairs = {
"version": "GLX version",
}
populate_prov(prov["openGL"]["GLX"], "glxinfo", pairs, sep=":", index=-1)
pairs = {
"vendor": "server glx vendor string",
"version": "server glx version string",
}
populate_prov(prov["openGL"]["GLX"]["server"], "glxinfo", pairs, sep=":", index=-1)
pairs = {
"vendor": "client glx vendor string",
"version": "client glx version string",
}
populate_prov(prov["openGL"]["GLX"]["client"], "glxinfo", pairs, sep=":", index=-1)

prov["packages"]["PMP"] = pcmdi_metrics.version.__git_tag_describe__
prov["packages"][
"PMPObs"
] = "See 'References' key below, for detailed obs provenance information."

# Now the history if requested
if history:
session_history = ""
try:
import IPython

profile_hist = IPython.core.history.HistoryAccessor()
session = profile_hist.get_last_session_id()
cursor = profile_hist.get_range(session)
for session_id, line, cmd in cursor.fetchall():
session_history += "{}\n".format(cmd)
if session_history == "": # empty history
# trying to force fallback on readline
raise
except Exception:
# Fallback but does not seem to always work
import readline

for i in range(readline.get_current_history_length()):
session_history += "{}\n".format(readline.get_history_item(i + 1))
pass
try:
import __main__

with open(__main__.__file__) as f:
script = f.read()
prov["script"] = script
except Exception:
pass
prov["history"] = session_history
return prov


# Convert cdms MVs to json
def MV2Json(data, dic={}, struct=None):
Expand Down Expand Up @@ -82,21 +275,6 @@ def update_dict(d, u):
return d


def generateProvenance():
extra_pairs = {
"matplotlib": "matplotlib ",
"scipy": "scipy",
"xcdat": "xcdat",
"xarray": "xarray",
}
prov = cdat_info.generateProvenance(extra_pairs=extra_pairs)
prov["packages"]["PMP"] = pcmdi_metrics.version.__git_tag_describe__
prov["packages"][
"PMPObs"
] = "See 'References' key below, for detailed obs provenance information."
return prov


def sort_human(input_list):
lst = copy.copy(input_list)

Expand Down Expand Up @@ -235,13 +413,6 @@ def write(
f.close()

elif self.type == "nc":
"""
f = cdms2.open(file_name, "w")
f.write(data, *args, **kwargs)
f.metrics_git_sha1 = pcmdi_metrics.__git_sha1__
f.uvcdat_version = cdat_info.get_version()
f.close()
"""
data.to_netcdf(file_name)

else:
Expand Down
4 changes: 2 additions & 2 deletions pcmdi_metrics/misc/scripts/get_pmp_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import os
import tempfile

import cdat_info
import requests

from pcmdi_metrics.io.base import download_sample_data_files
from pcmdi_metrics.mean_climate.lib.pmp_parser import PMPParser


Expand Down Expand Up @@ -60,4 +60,4 @@ def download_file(download_url_root, name, local_filename):
header = f.readline().strip()
version = header.split("_")[-1]
pathout = os.path.join(p.output_path, version)
cdat_info.download_sample_data_files(file, path=pathout)
download_sample_data_files(file, pathout)
Loading