# -*- coding: utf-8 -*-
# Copyright (C) 2019 Christoph G. Hoffmann. All rights reserved.
# This file is part of mjoindices
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# Contact: christoph.hoffmann@uni-greifswald.de
"""
This module provides basic functionality to handle OLR data, which is the basic input for the OMI calculation.
"""
from pathlib import Path
import numpy as np
import scipy
import scipy.interpolate
from matplotlib.figure import Figure
import scipy.io
import netCDF4 as netcdf4
import matplotlib.pyplot as plt
import mjoindices.tools as tools
[docs]class OLRData:
"""
This class serves as a container for spatially distributed and temporally resolved OLR data.
A filled object of this class has to be provided by the user in order to start the OMI calculation.
:param olr: The OLR data as a 3-dim array. The three dimensions correspond to time, latitude, and longitude, in this
order.
:param time: The temporal grid as 1-dim array of :class:`numpy.datetime64` dates.
:param lat: The latitude grid as 1-dim array.
:param long: The longitude grid as 1-dim array.
"""
def __init__(self, olr: np.ndarray, time: np.ndarray, lat: np.ndarray, long: np.ndarray) -> None:
"""
Initialization of basic variables.
"""
if olr.shape[0] != time.size:
raise ValueError('Length of time grid does not fit to first dimension of OLR data cube')
if olr.shape[1] != lat.size:
raise ValueError('Length of lat grid does not fit to second dimension of OLR data cube')
if olr.shape[2] != long.size:
raise ValueError('Length of long grid does not fit to third dimension of OLR data cube')
self._olr = olr.copy()
self._time = time.copy()
self._lat = lat.copy()
self._long = long.copy()
@property
def olr(self):
"""
The OLR data as a 3-dim array. The three dimensions correspond to time, latitude, and longitude, in this
order.
"""
return self._olr
@property
def time(self):
"""
The temporal grid as 1-dim array of :class:`numpy.datetime64` dates.
"""
return self._time
@property
def lat(self):
"""
The latitude grid as 1-dim array.
"""
return self._lat
@property
def long(self):
"""
The longitude grid as 1-dim array.
"""
return self._long
def __eq__(self, other: "OLRData") -> bool:
"""
Override the default Equals behavior
"""
return (np.all(self.lat == other.lat)
and np.all(self.long == other.long)
and np.all(self.time == other.time)
and np.all(self.olr == other.olr))
[docs] def close(self, other: "OLRData") -> bool:
"""
Checks equality of two :py:class:`OLRData` objects, but allows for numerical tolerances.
:param other: The object to compare with.
:return: Equality of all members considering the default tolerances of :py:func:`numpy.allclose`
"""
return (np.allclose(self.lat, other.lat)
and np.allclose(self.long, other.long)
and np.allclose(self.time.astype("float"), other.time.astype("float")) # allclose does not work with datetime64
and np.allclose(self.olr, other.olr))
[docs] def get_olr_for_date(self, date: np.datetime64) -> np.ndarray:
"""
Returns the spatially distributed OLR map for a particular date.
:param date: The date, which has to be exactly matched by one of the dates in the OLR time grid.
:return: The excerpt of the OLR data as a 2-dim array. The two dimensions correspond to
latitude and longitude, in this order. Returns ``None`` if the date is not contained in the OLR time series.
"""
cand = self.time == date
if not np.all(cand == False): # noqa: E712
return np.squeeze(self.olr[cand, :, :])
else:
return None
[docs] def save_to_npzfile(self, filename: Path) -> None:
"""
Saves the data arrays contained in the OLRData object to a numpy file.
:param filename: The full filename.
"""
np.savez(filename, olr=self.olr, time=self.time, lat=self.lat, long=self.long)
[docs]def interpolate_spatial_grid_to_original(olr: OLRData) -> OLRData:
"""
Convenience function that interpolates the OLR data in an :class:`OLRData` object spatially onto the spatial grid,
which was used for the original OMI calculation by :ref:`refKiladis2014`.
This original grid has the following properties:
* Latitude: 2.5 deg-sampling in the tropics from -20 to 20 deg (20 S to 20 N).
* Longitude: Whole globe with 2.5 deg-sampling.
:param olr: The OLR data
:return: A new :class:`OLRData` object with the interpolated data.
"""
# FIXME Combine with definition in empirical_or....py
orig_lat = np.arange(-20., 20.1, 2.5)
orig_long = np.arange(0., 359.9, 2.5)
return interpolate_spatial_grid(olr, orig_lat, orig_long)
[docs]def interpolate_spatial_grid(olr: OLRData, target_lat: np.ndarray, target_long: np.ndarray) -> OLRData:
"""
Interpolates the OLR data linearly onto the given grids.
No extrapolation will be done. Instead, a :py:class:`ValueError` is raised if the data does not cover the target
grid.
Note that no sophisticated resampling is provided here. If some kind of averaging, etc., is needed, it should
be performed by the user before injecting the data into the OMI calculation.
:param olr: The OLR data to resample.
:param target_lat: The new latitude grid.
:param target_long: The new longitude grid.
:return: A new :class:`OLRData` object containing the resampled OLR data.
"""
no_days = olr.time.size
olr_interpol = np.empty((no_days, target_lat.size, target_long.size))
for idx in range(0, no_days):
f = scipy.interpolate.interp2d(olr.long, olr.lat, np.squeeze(olr.olr[idx, :, :]), kind='linear', bounds_error=True)
olr_interpol[idx, :, :] = f(target_long, target_lat)
return OLRData(olr_interpol, olr.time, target_lat, target_long)
[docs]def restrict_time_coverage(olr: OLRData, start: np.datetime64, stop: np.datetime64) -> OLRData:
"""
Cuts the OLR time series at the given dates (given dates are included).
This is useful when the OLR data should be restricted for the EOF calculation, which is based on a subset.
Of course, it can also be used to limit the PC calculation to a specific period.
Note that a temporal resampling method is not provided here, since the possible resampling methods,
which the users might want to apply, are too diverse. Hence, it is assumed that the temporal spacing is already
correct (daily averages recommended) and only a restriction of the period is needed before calculation.
:param olr: The OLR data to restrict.
:param start: The beginning of the wanted period of OLR data (included).
:param stop: The ending of the wanted period (included).
:return: A new :class:`OLRData` object with restricted temporal coverage.
:raises: :py:class:`ValueError` if no OLR Data is found for the specified period
"""
window_inds = (olr.time >= start) & (olr.time <= stop)
if np.all(window_inds == False): # noqa: E712
raise ValueError("No OLR data within specified period found. Data covers the period from %s to %s."
% (str(olr.time[0]), str(olr.time[-1])))
else:
return OLRData(olr.olr[window_inds, :, :], olr.time[window_inds], olr.lat, olr.long)
[docs]def remove_leap_years(olr: OLRData) -> OLRData:
"""
Removes any leap days (any instances of February 29) and returns an :class:`OLRData` object
with the remaining OLR data
:param olr: The OLR data to restrict.
:return: A new :class:`OLRData` object with no leap days
"""
window_inds = [(i.astype(object).month != 2) | (i.astype(object).day != 29) for i in olr.time]
return OLRData(olr.olr[window_inds, :, :], olr.time[window_inds], olr.lat, olr.long)
[docs]def load_noaa_interpolated_olr(filename: Path, use_xarray: bool = False) -> OLRData:
"""
Loads the standard OLR data product provided by NOAA in NetCDF3 format.
This is mainly used to load the OLR files originally used for the OMI calculation some years ago.
ATTENTION: Note that the file format was changed by NOAA from NetCDF3 to NetCDF4 sometime
between the years 2019 and 2021. If you are using a recent download of the data and experience problems
with this loader method, you should use :py:func:`load_noaa_interpolated_olr_netcdf4` instead.
The original OLR data file is contained in the reference data package found at: https://doi.org/10.5281/zenodo.3746562
The current dataset can be obtained from
https://www.psl.noaa.gov/thredds/catalog/Datasets/interp_OLR/catalog.html?dataset=Datasets/interp_OLR/olr.day.mean.nc
A description is found at
https://psl.noaa.gov/data/gridded/data.olrcdr.interp.html
:param filename: Full filename of a local copy of OLR data file.
:param use_xarray: Option to use xarray instead of SciPy netcdf for faster loading of data and timestamps. Note that OMI values
based on this parameter have not been validated. The respective unit tests currently fail when this option is activated,
however, this is probably only caused by very small numerical differences, which are irrelevant for the actual use.
Still make sure that you trust the values when activating the option. A further advantage of this option is that is works for
NetCDF3 and NetCDF4 files, hence for the older and the newer NOAA OLR datafiles.
:return: The OLR data.
"""
# ToDo: Validate the complete OMI calculation chain with OLR data loaded using the xarray option (Quick tests showed
# that the OMI values are probably essentially the same but numerical differeces lead to failiures of the integration tests).
# If the values are ok, make the xarray option the default (which also means that Python 3.8 is at least needed) and remove the
# special function for NetCDF 4 below.
if use_xarray:
import xarray as xr
f = xr.open_dataset(filename)
lat = f.lat.data.copy()
lon = f.lon.data.copy()
olr = f.olr.data.copy()
time = np.array(f.olr.time.values, dtype='datetime64[D]')
else:
f = scipy.io.netcdf_file(str(filename), 'r')
lat = f.variables['lat'].data.copy()
lon = f.variables['lon'].data.copy()
# scaling and offset as given in meta data of nc file
olr = f.variables['olr'].data.copy() / 100. + 327.65
hours_since1800 = f.variables['time'].data.copy()
f.close()
temptime = []
for item in hours_since1800:
delta = np.timedelta64(int(item / 24), 'D')
day = np.datetime64('1800-01-01') + delta
temptime.append(day)
time = np.array(temptime, dtype=np.datetime64)
result = OLRData(np.squeeze(olr), time, lat, lon)
return result
[docs]def load_noaa_interpolated_olr_netcdf4(filename: Path, use_xarray: bool = False) -> OLRData:
"""
Loads the standard OLR data product provided by NOAA in NetCDF4 format.
ATTENTION: Whereas NetCDF4 seems to be the format of the recent NOAA OLR data files, the files originally used some
years ago were saved as NetCDF3. So, if you are going to load the original files for reference purposes, you should
use the loader function :py:func:`load_noaa_interpolated_olr` instead.
The dataset can be obtained from
https://www.psl.noaa.gov/thredds/catalog/Datasets/interp_OLR/catalog.html?dataset=Datasets/interp_OLR/olr.day.mean.nc
A description is found at
https://psl.noaa.gov/data/gridded/data.olrcdr.interp.html
:param filename: Full filename of a local copy of OLR data file.
:param use_xarray: Option to use xarray instead of NetCDF4 for faster loading of data and timestamps. If you consider
activating this parameter, you should call instead :py:func:`load_noaa_interpolated_olr`, since it works for NetCDF
3 and 4. Later on, when the xarray option is established, the present function particularly for NetCDF4 will be removed.
Please also take into account the warnings in the docs of :py:func:`load_noaa_interpolated_olr`.
:return: The OLR data.
"""
if use_xarray:
result = load_noaa_interpolated_olr(filename=filename, use_xarray=True)
else:
f = netcdf4.Dataset(filename, "r")
lat = f.variables['lat'][:].data.copy()
lon = f.variables['lon'][:].data.copy()
olr = f.variables['olr'][:].data.copy()
hours_since1800 = f.variables['time'][:].data.copy()
f.close()
temptime = []
for item in hours_since1800:
delta = np.timedelta64(int(item / 24), 'D')
day = np.datetime64('1800-01-01') + delta
temptime.append(day)
time = np.array(temptime, dtype=np.datetime64)
result = OLRData(np.squeeze(olr), time, lat, lon)
return result
[docs]def restore_from_npzfile(filename: Path) -> OLRData:
"""
Loads an :py:class:`OLRData` object from a numpy file, which has been saved with the function
:py:func:`mjoindices.olr_handling.OLRData.save_to_npzfile`
:param filename: The filename to the .npz file.
:return: The OLR data.
"""
with np.load(filename) as data:
olr = data["olr"]
time = data["time"]
lat = data["lat"]
long = data["long"]
return OLRData(olr, time, lat, long)
[docs]def plot_olr_map_for_date(olr: OLRData, date: np.datetime64) -> Figure:
"""
Plots a map pf the OLR data for a specific date.
:param olr: The complete OLR data.
:param date: The date for which the OLR data should be plotted
(has to be exactly matched by a date of the OLR time grid).
:return: The handle to the figure.
"""
# TODO: Plot underlying map
mapdata = olr.get_olr_for_date(date)
if mapdata is not None:
fig, axs = plt.subplots(1, 1, num="plot_olr_map_for_date", clear=True,
figsize=(10, 5), dpi=150, sharex=True, sharey=True)
plt.subplots_adjust(wspace=0.35, hspace=0.35)
ax = axs
c = ax.contourf(olr.long, olr.lat, mapdata)
fig.colorbar(c, ax=ax, label="OLR [W/m²]")
ax.set_title("OLR")
ax.set_ylabel("Latitude [°]")
ax.set_xlabel("Longitude [°]")
else:
raise ValueError("No OLR data found for given date.")
return fig