Source code for cmip6_downscaling.methods.deepsd.utils

import fsspec
import numpy as np
import xarray as xr
import xesmf as xe

EPSILON = 1e-6  # small value to add to the denominator when normalizing to avoid division by 0
INPUT_SIZE = 51  # number of pixels in a patch example used for training deepsd model (in both lat/lon (or x/y) directions)
PATCH_STRIDE = 20  # number of pixels to skip when generating patches for deepsd training
INFERENCE_BATCH_SIZE = 500  # number of timesteps in each inference iteration
starting_resolutions = {
    'ERA5': 2.0,
    'GISS-E2-1-G': 2.0,
    'BCC-CSM2-MR': 1.0,
    'AWI-CM-1-1-MR': 1.0,
    'BCC-ESM1': 2.0,
    'SAM0-UNICON': 1.0,
    'CanESM5': 2.0,
    'MRI-ESM2-0': 1.0,
    'MPI-ESM-1-2-HAM': 2.0,
    'MPI-ESM1-2-HR': 1.0,
    'MPI-ESM1-2-LR': 2.0,
    'NESM3': 2.0,
    'NorESM2-LM': 2.0,
    'FGOALS-g3': 2.0,
    'MIROC6': 1.0,
    'ACCESS-CM2': 1.0,
    'NorESM2-MM': 1.0,
    'ACCESS-ESM1-5': 1.0,
    'AWI-ESM-1-1-LR': 2.0,
    'TaiESM1': 1.0,
    'NorCPM1': 2.0,
    'CMCC-ESM2': 1.0,
}
stacked_model_path = 'az://cmip6downscaling/training/deepsd/deepsd_models/{var}_{starting_resolution}d_to_0_25d/frozen_graph.pb'
output_node_name = '{var}_0_25/prediction:0'


def res_to_str(r):
    return str(np.round(r, 2)).replace('.', '_')



[docs]
def bilinear_interpolate(ds: xr.Dataset, output_degree: float) -> xr.Dataset:
    """
    Bilinear inperpolate dataset to a global grid with specified step size

    Parameters
    ----------
    ds : xr.Dataset
        Input dataset
    output_degree : float
        Step size for output dataset

    Returns
    -------
    xr.Dataset
        regridded dataset
    """

    target_grid_ds = xe.util.grid_global(output_degree, output_degree, cf=True)
    regridder = xe.Regridder(ds, target_grid_ds, "bilinear", extrap_method="nearest_s2d")
    return regridder(ds, keep_attrs=True)




[docs]
def conservative_interpolate(ds: xr.Dataset, output_degree: float) -> xr.Dataset:
    """
    Conservative inperpolate dataset to a global grid with specified spacing

    Parameters
    ----------
    ds : xr.Dataset
        Input dataset
    output_degree : float
        Spacing for output dataset

    Returns
    -------
    xr.Dataset
        Regridded dataset
    """
    target_grid_ds = xe.util.grid_global(output_degree, output_degree, cf=True)
    # conservative area regridding needs lat_bands and lon_bands
    regridder = xe.Regridder(ds, target_grid_ds, "conservative")
    return regridder(ds, keep_attrs=True)




[docs]
def normalize(
    ds: xr.Dataset, dims: list[str] = ['lat', 'lon'], epsilon: float = 1e-6
) -> xr.Dataset:
    """
    Normalize dataset

    Parameters
    ----------
    ds : xr.Dataset
        Input dataset
    dim : list
        Dimensions over which to apply mean and standard deviation
    epsilon : float
        Value to add to standard deviation during normalization

    Returns
    -------
    xr.Dataset
        Normalized dataset
    """
    mean = ds.mean(dim=dims).compute()
    std = ds.std(dim=dims).compute()
    norm = (ds - mean) / (std + epsilon)

    return norm



def build_grid_spec(
    output_degree,
):
    output_degree = np.round(output_degree, 2)
    gcm_grid = xe.util.grid_global(output_degree, output_degree, cf=True)

    nlat = len(gcm_grid.lat)
    nlon = len(gcm_grid.lon)
    lat_spacing = int(np.round(abs(gcm_grid.lat[0] - gcm_grid.lat[1]), 1) * 10)
    lon_spacing = int(np.round(abs(gcm_grid.lon[0] - gcm_grid.lon[1]), 1) * 10)
    min_lat = int(np.round(gcm_grid.lat.min(), 1))
    min_lon = int(np.round(gcm_grid.lon.min(), 1))

    grid_spec = f'{nlat:d}x{nlon:d}_gridsize_{lat_spacing:d}_{lon_spacing:d}_llcorner_{min_lat:d}_{min_lon:d}'
    return grid_spec


def make_coarse_elev_path(
    output_degree,
):
    grid_spec = build_grid_spec(output_degree)
    return f'az://scratch/deepsd/intermediate/elev/ERA5_full_space_{grid_spec}.zarr'


def get_elevation_data(output_degree):
    elev_path = make_coarse_elev_path(output_degree)
    elev_store = fsspec.get_mapper(elev_path)
    return xr.open_zarr(elev_store)


def initialize_empty_dataset(lats, lons, times, output_path, var, chunks, attrs={}):
    """
    Create an empty zarr store for output from inference

    Parameters
    ----------
    lats : coords
        Coordinates for the new dataset
    lons : coords
        Coordinates for the new dataset
    times : coords
        Coordinates for the new dataset
    output_path : UPath
        Path to the zarr store
    var : std
        Name to give the variable in the empty dataset
    chunks : dict
        Chunking scheme for the empty dataset
    attrs : dict
        Attrs for the empty dataset

    Returns
    -------
    xr.Dataset
        Normalized dataset
    """
    ds = xr.DataArray(
        np.empty(shape=(len(times), len(lats), len(lons)), dtype=np.float32),
        dims=["time", "lat", "lon"],
        coords=[times, lats, lons],
        attrs=attrs,
    )
    ds = ds.to_dataset(name=var).chunk(chunks)

    print(output_path)
    ds.to_zarr(output_path, mode="w", compute=False)