Source code for cmip6_downscaling.data.observations

from __future__ import annotations

import intake
import xarray as xr

from .. import config
from .utils import lon_to_180

xr.set_options(keep_attrs=True)

variable_name_dict = {
    "tasmax": "air_temperature_at_2_metres_1hour_Maximum",
    "tasmin": "air_temperature_at_2_metres_1hour_Minimum",
    "pr": "precipitation_amount_1hour_Accumulation",
}


[docs] def open_era5(variables: str | list[str], time_period: slice) -> xr.Dataset: """Open ERA5 daily data for one or more variables for period 1979-2021 Parameters ---------- variables : str or list of string The variable(s) you want to grab from the ERA5 dataset. time_period : slice Start and end year slice. Ex: slice('2020','2020') Returns ------- xarray.Dataset A daily dataset for one variable. """ cat = intake.open_esm_datastore(config.get("data_catalog.era5_daily.json")) if isinstance(variables, str): variables = [variables] years = list(range(int(time_period.start), int(time_period.stop) + 1)) wind_vars, non_wind_vars = [], [] for variable in variables: if variable in ['ua', 'va']: wind_vars.append(variable) else: non_wind_vars.append(variable) # Note: hardcoded tasmax is intended. The zarr store is built to include all variables per year, so variables are subset after concat. ds = xr.concat( list(cat.search(year=years, cf_variable_name=['tasmax']).to_dataset_dict().values()), dim='time', )[non_wind_vars] for wind_var in wind_vars: era5_winds = xr.open_zarr(config.get("data_catalog.era5_daily_winds.uri")).rename( {'latitude': 'lat', 'longitude': 'lon'} ) name_dict = {'ua': 'U', 'va': 'V'} ds[wind_var] = era5_winds[name_dict[wind_var]].drop('level') if 'pr' in variables: # convert to mm/day - helpful to prevent rounding errors from very tiny numbers ds['pr'] *= 86400 ds['pr'] = ds['pr'].astype('float32') ds['pr'].attrs = { 'least_significant_digit': 4, 'standard_name': 'precipitation_amount', 'units': 'mm', 'long_name': 'Total precipitation', 'nameECMWF': 'Total precipitation', 'shortNameECMWF': 'tp', 'product_type': 'forecast', } # correct error in raw data attributes for tasmin/tasmax if 'tasmin' in variables: ds['tasmin'].attrs = { 'least_significant_digit': 1, 'standard_name': 'air_temperature', 'units': 'K', 'long_name': 'Minimum temperature at 2 metres since previous post-processing', 'nameECMWF': 'Minimum temperature at 2 metres since previous post-processing', 'shortNameECMWF': 'mn2t', 'nameCDM': 'Minimum_temperature_at_2_metres_since_previous_post-processing_surface_1_Hour_2', 'product_type': 'forecast', } if 'tasmax' in variables: ds['tasmax'].attrs = { 'least_significant_digit': 1, 'standard_name': 'air_temperature', 'units': 'K', 'long_name': 'Maximum temperature at 2 metres since previous post-processing', 'nameECMWF': 'Maximum temperature at 2 metres since previous post-processing', 'shortNameECMWF': 'mx2t', 'nameCDM': 'Maximum_temperature_at_2_metres_since_previous_post-processing_surface_1_Hour_2', 'product_type': 'forecast', } # TODO adjust attrs of other variables ds = lon_to_180(ds) # Reorders latitudes to [-90, 90] if ds.lat[0] > ds.lat[-1]: ds = ds.reindex({"lat": ds.lat[::-1]}) return ds