Source code for gldas.interface

import warnings
import numpy as np
import os

try:
    import pygrib
except ImportError:
    warnings.warn("pygrib has not been imported")

from pygeobase.io_base import ImageBase, MultiTemporalImageBase
from pygeobase.object_base import Image
from pynetcf.time_series import GriddedNcOrthoMultiTs

from datetime import timedelta

from gldas.grid import GLDAS025Cellgrid
from netCDF4 import Dataset
from pygeogrids.netcdf import load_grid
from gldas.utils import deprecated


[docs]class GLDAS_Noah_v2_025Img(ImageBase): """ Class for reading one GLDAS Noah v2.1 nc file in 0.25 deg grid. """ def __init__( self, filename, mode="r", parameter="SoilMoi0_10cm_inst", subgrid=None, array_1D=False, ): """ Parameters ---------- filename: str filename of the GLDAS nc file mode: string, optional mode of opening the file, only 'r' is implemented at the moment parameter : string or list, optional one or list of parameters to read, see GLDAS v2.1 documentation for more information (default: 'SoilMoi0_10cm_inst'). subgrid : Cell Grid Subgrid of the global GLDAS Grid to use for reading image data (e.g only land points) array_1D: boolean, optional if set then the data is read into 1D arrays. Needed for some legacy code. """ super(GLDAS_Noah_v2_025Img, self).__init__(filename, mode=mode) if type(parameter) != list: parameter = [parameter] self.parameters = parameter self.fill_values = np.repeat(9999.0, 1440 * 120) self.grid = GLDAS025Cellgrid() if not subgrid else subgrid self.array_1D = array_1D
[docs] def read(self, timestamp=None): # print 'read file: %s' %self.filename # Returns the selected parameters for a gldas image and # according metadata return_img = {} return_metadata = {} try: dataset = Dataset(self.filename) except IOError: raise IOError(f"Error opening file {self.filename}") param_names = [] for parameter in self.parameters: param_names.append(parameter) for parameter, variable in dataset.variables.items(): if parameter in param_names: param_metadata = {} param_data = {} for attrname in variable.ncattrs(): if attrname in ["long_name", "units"]: param_metadata.update( {str(attrname): getattr(variable, attrname)} ) param_data = dataset.variables[parameter][:] np.ma.set_fill_value(param_data, 9999) param_data = np.concatenate( ( self.fill_values, np.ma.getdata(param_data.filled()).flatten(), ) ) return_img.update( {str(parameter): param_data[self.grid.activegpis]} ) return_metadata.update({str(parameter): param_metadata}) # Check for corrupt files try: return_img[parameter] except KeyError: path, thefile = os.path.split(self.filename) print( "%s in %s is corrupt - filling" "image with NaN values" % (parameter, thefile) ) return_img[parameter] = np.empty(self.grid.n_gpi).fill( np.nan ) return_metadata["corrupt_parameters"].append() dataset.close() if self.array_1D: return Image( self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp, ) else: for key in return_img: return_img[key] = np.flipud( return_img[key].reshape((720, 1440)) ) return Image( np.flipud(self.grid.activearrlon.reshape((720, 1440))), np.flipud(self.grid.activearrlat.reshape((720, 1440))), return_img, return_metadata, timestamp, )
[docs] def write(self, data): raise NotImplementedError()
[docs] def flush(self): pass
[docs] def close(self): pass
[docs]class GLDAS_Noah_v21_025Img(GLDAS_Noah_v2_025Img): def __init__( self, filename, mode="r", parameter="SoilMoi0_10cm_inst", subgrid=None, array_1D=False, ): warnings.warn( "GLDAS_Noah_v21_025Img is outdated and replaced by the general" "GLDAS_Noah_v2_025Img class to read gldas v2.0 and v2.1 " "0.25 DEG netcdf files." "The old class will be removed soon.", category=DeprecationWarning, ) super(GLDAS_Noah_v21_025Img, self).__init__( filename=filename, mode=mode, parameter=parameter, subgrid=subgrid, array_1D=array_1D, )
[docs]class GLDAS_Noah_v1_025Img(ImageBase): """ Class for reading one GLDAS Noah v1 grib file in 0.25 deg grid. Parameters ---------- filename: string filename of the GLDAS grib file mode: string, optional mode of opening the file, only 'r' is implemented at the moment parameter : string or list, optional one or list of ['001', '011', '032', '051', '057', '065', '071', '085_L1', '085_L2', '085_L3', '085_L4', '086_L1', '086_L2', '086_L3', '086_L4', '099', '111', '112', '121', '122', '131', '132', '138', '155', '204', '205', '234', '235'] parameters to read, see GLDAS documentation for more information Default : '086_L1' subgrid : Cell Grid Subgrid of the global GLDAS Grid to use for reading image data (e.g only land points) array_1D: boolean, optional if set then the data is read into 1D arrays. Needed for some legacy code. """ @deprecated(message="GLDAS Noah v1 data is deprecated, v2 should be used.") def __init__( self, filename, mode="r", parameter="086_L1", subgrid=None, array_1D=False, ): super(GLDAS_Noah_v1_025Img, self).__init__(filename, mode=mode) if type(parameter) != list: parameter = [parameter] self.parameters = parameter self.fill_values = np.repeat(9999.0, 1440 * 120) self.grid = subgrid if subgrid else GLDAS025Cellgrid() self.array_1D = array_1D
[docs] def read(self, timestamp=None): return_img = {} return_metadata = {} layers = {"085": 1, "086": 1} try: grbs = pygrib.open(self.filename) except IOError as e: print(e) print(" ".join([self.filename, "can not be opened"])) raise e ids = [] for parameter in self.parameters: ids.append(int(parameter.split("_")[0])) parameter_ids = np.unique(np.array(ids)) for message in grbs: if message["indicatorOfParameter"] in parameter_ids: parameter_id = "{:03d}".format(message["indicatorOfParameter"]) param_metadata = {} # read metadata in any case param_metadata["units"] = message["units"] param_metadata["long_name"] = message["parameterName"] if parameter_id in layers.keys(): parameter = "_".join( (parameter_id, "L" + str(layers[parameter_id])) ) if parameter in self.parameters: param_data = np.concatenate( ( self.fill_values, np.ma.getdata(message["values"]).flatten(), ) ) return_img[parameter] = param_data[ self.grid.activegpis ] return_metadata[parameter] = param_metadata layers[parameter_id] += 1 else: parameter = parameter_id param_data = np.concatenate( ( self.fill_values, np.ma.getdata(message["values"]).flatten(), ) ) return_img[parameter] = param_data[self.grid.activegpis] return_metadata[parameter] = param_metadata grbs.close() for parameter in self.parameters: try: return_img[parameter] except KeyError: print( self.filename[self.filename.rfind("GLDAS") :], "corrupt file - filling image with nan values", ) return_img[parameter] = np.empty(self.grid.n_gpi) return_img[parameter].fill(np.nan) if self.array_1D: return Image( self.grid.activearrlon, self.grid.activearrlat, return_img, return_metadata, timestamp, ) else: for key in return_img: return_img[key] = np.flipud( return_img[key].reshape((720, 1440)) ) lons = np.flipud(self.grid.activearrlon.reshape((720, 1440))) lats = np.flipud(self.grid.activearrlat.reshape((720, 1440))) return Image(lons, lats, return_img, return_metadata, timestamp)
[docs] def write(self, data): raise NotImplementedError()
[docs] def flush(self): pass
[docs] def close(self): pass
[docs]class GLDAS_Noah_v21_025Ds(MultiTemporalImageBase): """ Class for reading GLDAS v2.1 images in nc format. Parameters ---------- data_path : string Path to the nc files parameter : string or list, optional one or list of parameters to read, see GLDAS v2.1 documentation for more information (default: 'SoilMoi0_10cm_inst'). subgrid : Cell Grid Subgrid of the global GLDAS Grid to use for reading image data (e.g only land points) array_1D: boolean, optional If set then the data is read into 1D arrays. Needed for some legacy code. """ def __init__( self, data_path, parameter="SoilMoi0_10cm_inst", subgrid=None, array_1D=False, ): ioclass_kws = { "parameter": parameter, "subgrid": subgrid, "array_1D": array_1D, } sub_path = ["%Y", "%j"] filename_templ = "GLDAS_NOAH025_3H*.A{datetime}.*.nc4" super(GLDAS_Noah_v21_025Ds, self).__init__( data_path, GLDAS_Noah_v21_025Img, fname_templ=filename_templ, datetime_format="%Y%m%d.%H%M", subpath_templ=sub_path, exact_templ=False, ioclass_kws=ioclass_kws, )
[docs] def tstamps_for_daterange(self, start_date, end_date): """ return timestamps for daterange, Parameters ---------- start_date: datetime start of date range end_date: datetime end of date range Returns ------- timestamps : list list of datetime objects of each available image between start_date and end_date """ img_offsets = np.array( [ timedelta(hours=0), timedelta(hours=3), timedelta(hours=6), timedelta(hours=9), timedelta(hours=12), timedelta(hours=15), timedelta(hours=18), timedelta(hours=21), ] ) timestamps = [] diff = end_date - start_date for i in range(diff.days + 1): daily_dates = start_date + timedelta(days=i) + img_offsets timestamps.extend(daily_dates.tolist()) return timestamps
[docs]class GLDAS_Noah_v1_025Ds(MultiTemporalImageBase): """ Class for reading GLDAS images in grib format. Parameters ---------- data_path : string path to the grib files parameter : string or list, optional one or list of ['001', '011', '032', '051', '057', '065', '071', '085_L1', '085_L2', '085_L3', '085_L4', '086_L1', '086_L2', '086_L3', '086_L4', '099', '111', '112', '121', '122', '131', '132', '138', '155', '204', '205', '234', '235'] parameters to read, see GLDAS documentation for more information Default : '086_L1' subgrid : Cell Grid Subgrid of the global GLDAS Grid to use for reading image data (e.g only land points) array_1D: boolean, optional if set then the data is read into 1D arrays. Needed for some legacy code. """ @deprecated("GLDAS Noah v1 data is deprecated, v2 should be used.") def __init__( self, data_path, parameter="086_L1", subgrid=None, array_1D=False ): ioclass_kws = { "parameter": parameter, "subgrid": subgrid, "array_1D": array_1D, } sub_path = ["%Y", "%j"] filename_templ = "GLDAS_NOAH025SUBP_3H.A{datetime}.001.*.grb" super(GLDAS_Noah_v1_025Ds, self).__init__( data_path, GLDAS_Noah_v1_025Img, fname_templ=filename_templ, datetime_format="%Y%j.%H%M", subpath_templ=sub_path, exact_templ=False, ioclass_kws=ioclass_kws, )
[docs] def tstamps_for_daterange(self, start_date, end_date): """ return timestamps for daterange, Parameters ---------- start_date: datetime start of date range end_date: datetime end of date range Returns ------- timestamps : list list of datetime objects of each available image between start_date and end_date """ img_offsets = np.array( [ timedelta(hours=0), timedelta(hours=3), timedelta(hours=6), timedelta(hours=9), timedelta(hours=12), timedelta(hours=15), timedelta(hours=18), timedelta(hours=21), ] ) timestamps = [] diff = end_date - start_date for i in range(diff.days + 1): daily_dates = start_date + timedelta(days=i) + img_offsets timestamps.extend(daily_dates.tolist()) return timestamps
[docs]class GLDASTs(GriddedNcOrthoMultiTs): def __init__(self, ts_path, grid_path=None, **kwargs): """ Class for reading GLDAS time series after reshuffling. Parameters ---------- ts_path : str Directory where the netcdf time series files are stored grid_path : str, optional (default: None) Path to grid file, that is used to organize the location of time series to read. If None is passed, grid.nc is searched for in the ts_path. Optional keyword arguments that are passed to the Gridded Base: ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. offsets : dict, optional (default:None) Offsets (values) that are added to the parameters (keys) scale_factors : dict, optional (default:None) Offset (value) that the parameters (key) is multiplied with ioclass_kws: dict Optional keyword arguments to pass to OrthoMultiTs class: ---------------------------------------------------------------- read_bulk : boolean, optional (default:False) if set to True the data of all locations is read into memory, and subsequent calls to read_ts read from the cache and not from disk this makes reading complete files faster# read_dates : boolean, optional (default:False) if false dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets """ if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(GLDASTs, self).__init__(ts_path, grid, **kwargs)