Skip to content

Commit

Permalink
Fix ProxyDatabase.to_df()
Browse files Browse the repository at this point in the history
  • Loading branch information
fzhu2e committed Aug 1, 2024
1 parent 833dd95 commit 5e1b0ec
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 12 deletions.
1 change: 1 addition & 0 deletions cfr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .ts import EnsTS
from .gcm import GCMCase, GCMCases
from . import utils
from . import psm

try:
from . import ml
Expand Down
22 changes: 16 additions & 6 deletions cfr/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,11 @@ def __init__(self, pid=None, time=None, value=None, lat=None, lon=None, elev=Non
self.ptype = ptype
self.tags = set() if tags is None else tags

self.dt = np.median(np.diff(time)) if time is not None else None
if time is not None and len(time) > 1:
self.dt = np.median(np.diff(time))
else:
self.dt = None

self.value_name = 'Proxy Value' if value_name is None else value_name
self.value_unit = value_unit
self.time_name = 'Time' if time_name is None else time_name
Expand Down Expand Up @@ -1155,7 +1159,7 @@ def fetch(self, name=None, **kwargs):

def from_df(self, df, pid_column='paleoData_pages2kID', lat_column='geo_meanLat', lon_column='geo_meanLon', elev_column='geo_meanElev',
time_column='year', value_column='paleoData_values', proxy_type_column='paleoData_proxy', archive_type_column='archiveType',
ptype_column='ptype', value_name_column='paleoData_variableName', value_unit_column='paleoData_units',
ptype_column='ptype', value_name_column='paleoData_variableName', value_unit_column='paleoData_units', R_column='R',
verbose=False):
''' Load database from a `pandas.DataFrame`. Note that in most cases, the column names have to be specified.
Expand Down Expand Up @@ -1212,11 +1216,15 @@ def from_df(self, df, pid_column='paleoData_pages2kID', lat_column='geo_meanLat'
value_name=row[value_name_column] if value_name_column in row else None
value_unit=row[value_unit_column] if value_name_column in row else None


record = ProxyRecord(
pid=pid, lat=lat, lon=lon, elev=elev,
time=time, value=value, ptype=ptype,
value_name=value_name, value_unit=value_unit,
)
if R_column in row:
record.R = row[R_column]

records[pid] = record

# update the attributes
Expand Down Expand Up @@ -1774,8 +1782,8 @@ def get_clim(self, field, tag=None, verbose=False, load=True, **kwargs):
def to_df(self):
''' Convert the proxy database to a `pandas.DataFrame`.'''
df = pd.DataFrame(columns=['pid', 'lat', 'lon', 'elev', 'ptype', 'time', 'value'])
# df['time'] = df['time'].astype(object) # not necessary after pandas 1.5.2
# df['value'] = df['value'].astype(object) # not necessary after pandas 1.5.2
df['time'] = df['time'].astype(object)
df['value'] = df['value'].astype(object)

i = 0
for pid, pobj in self.records.items():
Expand All @@ -1784,8 +1792,10 @@ def to_df(self):
df.loc[i, 'lon'] = pobj.lon
df.loc[i, 'elev'] = pobj.elev
df.loc[i, 'ptype'] = pobj.ptype
df.loc[i, 'time'] = pobj.time
df.loc[i, 'value'] = pobj.value
df.at[i, 'time'] = np.array(pobj.time)
df.at[i, 'value'] = np.array(pobj.value)
if hasattr(pobj, 'R'):
df.loc[i, 'R'] = pobj.R
i += 1

return df
Expand Down
22 changes: 17 additions & 5 deletions cfr/psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def forward(self, vn='model.tas', no_noise=False):
value=value,
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -182,6 +183,7 @@ def forward(self, exog_name=None):
value=np.array(self.model.predict(exog=exog_dict).values),
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -322,6 +324,7 @@ def forward(self, exog1_name=None, exog2_name=None):
value=np.array(self.model.predict(exog=exog_dict).values),
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -706,6 +709,7 @@ def conv(sig, i):
value=ice_diffused[::-1],
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -829,6 +833,7 @@ def gammify(X, shape=1.5, mean=1, jitter=False, seed=None):
value=np.array(varve_res['varves'])[0],
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -865,6 +870,7 @@ def forward(self, b=10.553, a=None, seed=None):
value=SrCa,
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -1034,6 +1040,7 @@ def pseudocoral(sst, sss=None, d18O=None, species="default", lat=None, lon=None,
value=value,
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -1142,6 +1149,7 @@ def forward(self, **vsl_kwargs):
value=vsl_res['trw'],
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -1169,10 +1177,11 @@ def forward(self, seed=2333, type='SST', mode='analog', tolerance=1):

pp = ProxyRecord(
pid=self.pobj.pid,
time=self.pobj.clim[vn].da.time,
time=np.array(self.pobj.clim[vn].da.time.values),
value=pct[1], # median
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand All @@ -1199,10 +1208,11 @@ def forward(self, seed=2333):

pp = ProxyRecord(
pid=self.pobj.pid,
time=self.pobj.clim[vn].da.time,
time=np.array(self.pobj.clim[vn].da.time.values),
value=pct[1], # median
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand All @@ -1222,18 +1232,19 @@ def __init__(self, pobj=None, climate_required=['sst', 'd18Osw']):
self.pobj = pobj
self.climate_required = climate_required

def forward(self, seed=2333, species='all_sea'):
def forward(self, seed=2333, species='all'):
vn1 = f'model.{self.climate_required[0]}'
vn2 = f'model.{self.climate_required[1]}'
res, sigma = pb.d18Oc_forward(sst=self.pobj.clim[vn1].da.values, d18Osw=self.pobj.clim[vn2].da.values, seed=seed, species=species)
pct = np.percentile(res, q=[5, 50, 95], axis=1)

pp = ProxyRecord(
pid=self.pobj.pid,
time=self.pobj.clim[vn1].da.time,
time=np.array(self.pobj.clim[vn1].da.time.values),
value=pct[1], # median
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down Expand Up @@ -1263,10 +1274,11 @@ def forward(self, seed=2333, species='all', clean=1, pH=1, omega=1, sw=2, H=1, a

pp = ProxyRecord(
pid=self.pobj.pid,
time=self.pobj.clim[vn1].da.time,
time=np.array(self.pobj.clim[vn1].da.time.values),
value=np.exp(pct[1]), # median; the raw output is ln(Mg/Ca), so taking exp to recover Mg/Ca
lat=self.pobj.lat,
lon=self.pobj.lon,
elev=self.pobj.elev,
ptype=self.pobj.ptype,
value_name=self.pobj.value_name,
value_unit=self.pobj.value_unit,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='cfr', # required
version='2024.7.1',
version='2024.8.1',
description='cfr: a Python package for Climate Field Reconstruction',
long_description=long_description,
long_description_content_type='text/x-rst',
Expand Down

0 comments on commit 5e1b0ec

Please sign in to comment.