Source code for earthkit.transforms.climatology._aggregate

# Copyright 2024-, European Centre for Medium Range Weather Forecasts.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing as T

import xarray as xr
from earthkit.utils.decorators import format_handler

from earthkit.transforms import _tools
from earthkit.transforms._aggregate import reduce as _reduce
from earthkit.transforms._tools import groupby_time
from earthkit.transforms.temporal import reduce as _temporal_reduce


[docs] @format_handler() @_tools.time_dim_decorator @_tools.groupby_kwargs_decorator(climatology=True) @_tools.season_order_decorator def reduce( dataarray: xr.Dataset | xr.DataArray, time_dim: str | None = None, how: str | T.Callable | None = "mean", groupby_kwargs: dict | None = None, climatology_range: tuple | list | None = None, **reduce_kwargs, ): """Group data annually over a given `frequency` and reduce using the specified `how` method. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. how: str or callable Method used to reduce data. Default='mean', which will implement the xarray in-built mean. If string, it must be an in-built xarray reduce method, an earthkit how method or any method compatible with the array namespace of the data. In the case of duplicate names, method selection is first in the order: xarray, earthkit, array_namespace. Otherwise it can be any function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an array over an integer valued axis frequency : str (optional) Frequency used for grouping the data in climatology mode. Typical values include `dayofyear`, `weekofyear`, `month`, `year`, etc. The full set of accepted options matches those supported by `earthkit.transforms._tools.groupby_time`. If not provided, the climatology is calculated over the entire period. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object climatology_range : (list or tuple, optional) Start and end year of the period to be used for the reference climatology. Default is to use the entire time-series. groupby_kwargs : dict Any other kwargs that are accepted by `earthkit.transforms.aggregate.groupby_time` **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ # Validate and normalize climatology_range if provided if climatology_range is not None: try: start, end = climatology_range # expect exactly two items except (TypeError, ValueError) as exc: raise ValueError( "climatology_range must be a sequence of exactly two items (start, end), or None." ) from exc climatology_range = (start, end) # If climate range is defined, use it if climatology_range is not None and all(c_r is not None for c_r in climatology_range): selection = dataarray.sel({time_dim: slice(*climatology_range)}) else: selection = dataarray groupby_kwargs = groupby_kwargs or {} if groupby_kwargs.get("frequency") is not None: grouped_data = groupby_time( selection, time_dim=time_dim, **groupby_kwargs, ) return _reduce(grouped_data, how=how, dim=time_dim, **reduce_kwargs) return _reduce(selection, how=how, dim=time_dim, **reduce_kwargs)
[docs] def mean(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the climatological mean. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "mean" return reduce(*_args, **_kwargs)
[docs] def median(*_args, **_kwargs) -> xr.DataArray: """Calculate the climatological median. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological median. Must contain a `time` dimension. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ result = quantiles(*_args, q=[0.5], **_kwargs) return result.isel(quantile=0)
[docs] def min(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the climatological minimum. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological minimum. Must contain a `time` dimension. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "min" return reduce(*_args, **_kwargs)
[docs] def max(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the climatological maximum. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological maximum. Must contain a `time` dimension. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "max" return reduce(*_args, **_kwargs)
[docs] def std(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the climatological standard deviation. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological standard deviation. Must contain a `time` dimension. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "std" return reduce(*_args, **_kwargs)
[docs] def daily_reduce(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Reduce the data to the daily climatology of the provided "how" method. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. how: str or callable Method used to reduce data. Default='mean', which will implement the xarray in-built mean. If string, it must be an in-built xarray reduce method, an earthkit how method or any method compatible with the array namespace of the data. In the case of duplicate names, method selection is first in the order: xarray, earthkit, array_namespace. Otherwise it can be any function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an array over an integer valued axis bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["frequency"] = "dayofyear" return reduce(*_args, **_kwargs)
[docs] def daily_mean(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the daily climatological mean. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "mean" return daily_reduce(*_args, **_kwargs)
[docs] def daily_median(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the daily climatological median. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological median. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "median" return daily_reduce(*_args, **_kwargs)
[docs] def daily_min(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the daily climatological min. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological min. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "min" return daily_reduce(*_args, **_kwargs)
[docs] def daily_max(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the daily climatological max. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological max. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "max" return daily_reduce(*_args, **_kwargs)
[docs] def daily_std(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the daily climatological standard deviation. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological standard deviation. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "std" return daily_reduce(*_args, **_kwargs)
[docs] def monthly_reduce(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Reduce the data to the monthly climatology of the provided "how" method. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. how: str or callable Method used to reduce data. Default='mean', which will implement the xarray in-built mean. If string, it must be an in-built xarray reduce method, an earthkit how method or any method compatible with the array namespace of the data. In the case of duplicate names, method selection is first in the order: xarray, earthkit, array_namespace. Otherwise it can be any function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an array over an integer valued axis bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["frequency"] = "month" return reduce(*_args, **_kwargs)
[docs] def monthly_mean(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the monthly climatological mean. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological mean. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "mean" return monthly_reduce(*_args, **_kwargs)
[docs] def monthly_median(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the monthly climatological median. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological median. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "median" return monthly_reduce(*_args, **_kwargs)
[docs] def monthly_min(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the monthly climatological min. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological min. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "min" return monthly_reduce(*_args, **_kwargs)
[docs] def monthly_max(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the monthly climatological max. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological max. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "max" return monthly_reduce(*_args, **_kwargs)
[docs] def monthly_std(*_args, **_kwargs) -> xr.Dataset | xr.DataArray: """Calculate the monthly climatological standard deviation. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological standard deviation. Must contain a `time` dimension. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ _kwargs["how"] = "std" return monthly_reduce(*_args, **_kwargs)
[docs] @format_handler() @_tools.time_dim_decorator @_tools.groupby_kwargs_decorator(climatology=True) @_tools.season_order_decorator def quantiles( dataarray: xr.Dataset | xr.DataArray, q: float | list, time_dim: str | None = None, groupby_kwargs: dict | None = None, climatology_range: tuple | list | None = None, **reduce_kwargs, ) -> xr.DataArray: """Calculate a set of climatological quantiles. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological quantiles. Must contain a `time` dimension. q : float | list The quantile, or list of quantiles, to calculate the climatology. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is `year`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object climatology_range : (list or tuple, optional) Start and end year of the period to be used for the reference climatology. Default is to use the entire time-series. groupby_kwargs : dict Any other kwargs that are accepted by `earthkit.transforms.aggregate.groupby_time` **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ # Validate and normalize climatology_range if provided if climatology_range is not None: try: start, end = climatology_range # expect exactly two items except (TypeError, ValueError) as exc: raise ValueError( "climatology_range must be a sequence of exactly two items (start, end), or None." ) from exc climatology_range = (start, end) # If climate range is defined, use it if climatology_range is not None and all(c_r is not None for c_r in climatology_range): selection = dataarray.sel({time_dim: slice(*climatology_range)}) else: selection = dataarray groupby_kwargs = groupby_kwargs or {} groupby_kwargs.setdefault("frequency", "year") grouped_data = groupby_time(selection.chunk({time_dim: -1}), time_dim=time_dim, **groupby_kwargs) results = [] if not isinstance(q, (list, tuple)): q = [q] for quantile in q: results.append( grouped_data.quantile( q=quantile, dim=time_dim, **reduce_kwargs, ) ) result = xr.concat(results, dim="quantile") return result
[docs] def percentiles( dataarray: xr.Dataset | xr.DataArray, p: float | list, **_kwargs, ) -> xr.DataArray: """Calculate a set of climatological percentiles. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the climatological percentiles. Must contain a `time` dimension. p : float | list The pecentile, or list of percentiles, to calculate the climatology. frequency : str (optional) Valid options are `day`, `week` and `month`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.reduce` (except how) Returns ------- xarray.DataArray """ if not isinstance(p, (list, tuple)): p = [p] q = [_p * 1e-2 for _p in p] quantile_data = quantiles( dataarray, q, **_kwargs, ) result = quantile_data.assign_coords(percentile=("quantile", p)) result = result.swap_dims({"quantile": "percentile"}) result = result.drop_vars("quantile") return result
[docs] @format_handler() def anomaly( dataarray: xr.Dataset | xr.DataArray, climatology: xr.Dataset | xr.DataArray, **_kwargs, ) -> xr.Dataset | xr.DataArray: """Calculate the anomaly from a reference climatology. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the anomaly from the reference climatology. Must contain a time dimension indicated by time_dim. climatology : (xarray.DataArray, optional) Reference climatology data against which the anomaly is to be calculated. If not provided then the climatological mean is calculated from dataarray. frequency : str (optional) Valid options are `day`, `week` and `month`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object relative : bool (optional) Return the relative anomaly, i.e. the percentage change w.r.t the climatological period **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.climatology.mean` Returns ------- xarray.DataArray """ if isinstance(dataarray, xr.Dataset): out_ds = xr.Dataset().assign_attrs(dataarray.attrs) for var in dataarray.data_vars: out_da = _anomaly_dataarray(dataarray[var], climatology, **_kwargs) out_ds[out_da.name] = out_da return out_ds else: return _anomaly_dataarray(dataarray, climatology, **_kwargs)
@_tools.time_dim_decorator @_tools.groupby_kwargs_decorator(climatology=True) @_tools.season_order_decorator def _anomaly_dataarray( dataarray: xr.DataArray, climatology: xr.Dataset | xr.DataArray, time_dim: str | None = None, groupby_kwargs: dict | None = None, relative: bool = False, climatology_how_tag: str = "", how_label: str | None = None, **reduce_kwargs, ) -> xr.DataArray: """Calculate the anomaly from a reference climatology. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the anomaly from the reference climatology. Must contain a time dimension indicated by time_dim. climatology : (xarray.DataArray) Reference climatology data against which the anomaly is to be calculated. If not provided then the climatological mean is calculated from dataarray. frequency : str (optional) Valid options are `day`, `week` and `month`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object relative : bool (optional) Return the relative anomaly, i.e. the percentage change w.r.t the climatological period climatology_how_tag : str (optional) Tag to identify the climatology variable in the climatology dataset how_label : str (optional) Label to append to the variable name of the anomaly dataarray groupby_kwargs : dict Any other kwargs that are accepted by `earthkit.transforms.aggregate.groupby_time` **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.climatology.mean` Returns ------- xarray.DataArray """ reduce_kwargs.setdefault("how", "mean") var_name = dataarray.name if isinstance(climatology, xr.Dataset): if var_name in climatology: climatology_da = climatology[var_name] else: potential_clim_vars = [c_var for c_var in climatology.data_vars if str(var_name) in str(c_var)] if len(potential_clim_vars) == 1: climatology_da = climatology[potential_clim_vars[0]] elif f"{var_name}_{climatology_how_tag}" in potential_clim_vars: climatology_da = climatology[f"{var_name}_{climatology_how_tag}"] elif len(potential_clim_vars) > 1: raise KeyError( "Multiple potential climatologies found in climatology dataset, " "please identify appropriate statistic with `climatology_how_tag`.\n" f"Potential climatology variables found: {potential_clim_vars}" ) else: raise ValueError( "Could not find a variable in the climatology dataset that matches " f"the name of the anomaly dataarray: {var_name}" ) else: climatology_da = climatology # If frequency not defined, it is deduced from the climatology. # This is somewhat hardcoded, but it is best practice, so for now it can stay here for clim_freq in _tools.VALID_CLIMATOLOGY_FREQUENCIES: if clim_freq in climatology_da.dims: break else: clim_freq = "year" groupby_kwargs = groupby_kwargs or {} if groupby_kwargs.get("frequency") == "climatology": groupby_kwargs["frequency"] = clim_freq # Annual anomalies are simpler and do not need to be subtracted from before resampling frequency = groupby_kwargs.get("frequency") clim_groupby_kwargs = {k: v for k, v in groupby_kwargs.items() if k != "frequency"} if frequency is None: if clim_freq == "year": # If frequency is None, and clim frequency is year, then we can just take the difference anomaly_array = dataarray - climatology_da if relative: anomaly_array = (anomaly_array / climatology_da) * 100.0 else: # If clim_freq is not year, then we need to groupby the dataarray before taking # the difference anomaly_array = ( groupby_time(dataarray, time_dim=time_dim, frequency=clim_freq, **clim_groupby_kwargs) - climatology_da ) if relative: anomaly_array = ( groupby_time(anomaly_array, time_dim=time_dim, frequency=clim_freq, **clim_groupby_kwargs) / climatology_da * 100.0 ) anomaly_array = anomaly_array.broadcast_like(dataarray) elif frequency == "year": anomaly_array = ( _temporal_reduce(dataarray, time_dim=time_dim, **groupby_kwargs, **reduce_kwargs) - climatology_da ) if relative: anomaly_array = (anomaly_array / climatology_da) * 100.0 else: if clim_freq == "year": anomaly_array = dataarray - climatology_da if relative: anomaly_array = (anomaly_array / climatology_da) * 100.0 else: # Need to group the dataarray to the same frequency as the climatology before taking the difference, # and then broadcast back to the original dataarray dimensions anomaly_array = ( groupby_time(dataarray, time_dim=time_dim, frequency=clim_freq, **clim_groupby_kwargs) - climatology_da ) if relative: anomaly_array = ( groupby_time(anomaly_array, time_dim=time_dim, frequency=clim_freq, **clim_groupby_kwargs) / climatology_da * 100.0 ) # The broadcast_like is probably not necessary as the _temporal_reduce should take care # of things, but it is a useful safeguard against any potential changes in downstream processing anomaly_array = anomaly_array.broadcast_like(dataarray) anomaly_array = _temporal_reduce(anomaly_array, time_dim=time_dim, **groupby_kwargs, **reduce_kwargs) if relative: name_tag = "relative anomaly" update_attrs = {"units": "%"} else: name_tag = "anomaly" update_attrs = {} return _update_anomaly_array(anomaly_array, dataarray, var_name, name_tag, update_attrs, how_label=how_label) def _update_anomaly_array(anomaly_array, original_array, var_name, name_tag, update_attrs, how_label=None): if how_label is not None: var_name = f"{var_name}_{how_label}" anomaly_array = anomaly_array.rename(f"{var_name}") update_attrs = {**original_array.attrs, **update_attrs} if "standard_name" in update_attrs: update_attrs["standard_name"] += f"_{name_tag.replace(' ', '_')}" if "long_name" in update_attrs: update_attrs["long_name"] += f" {name_tag}" anomaly_array = anomaly_array.assign_attrs(update_attrs) return anomaly_array
[docs] @_tools.time_dim_decorator @_tools.groupby_kwargs_decorator(climatology=True) @_tools.season_order_decorator def relative_anomaly(*_args, **_kwargs): """Calculate the relative anomaly from a reference climatology, i.e. percentage change. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the anomaly from the reference climatology. Must contain a `time` dimension. climatology : (xarray.DataArray, optional) Reference climatology data against which the anomaly is to be calculated. If not provided then the climatological mean is calculated from dataarray. climatology_range : (list or tuple, optional) Start and end year of the period to be used for the reference climatology. Default is to use the entire time-series. frequency : str (optional) Valid options are `day`, `week` and `month`. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.aggregate.climatology.mean` Returns ------- xarray.DataArray """ anomaly_xarray = anomaly(*_args, relative=True, **_kwargs) return anomaly_xarray
[docs] @_tools.time_dim_decorator @_tools.groupby_kwargs_decorator(climatology=True) @format_handler() def auto_anomaly( dataarray: xr.Dataset | xr.DataArray, *_args, climatology_range: tuple | None = None, climatology_how: str = "mean", climatology_frequency: str | None = None, relative: bool = False, **_kwargs, ): """Calculate the anomaly from a reference climatology. Parameters ---------- dataarray : xarray.DataArray The DataArray over which to calculate the anomaly from the reference climatology. Must contain a `time` dimension. climatology_range : (list or tuple, optional) Start and end year of the period to be used for the reference climatology. Default is to use the entire time-series. climatology_how : string Method used to calculate climatology, default is "mean". Accepted values are "median", "min", "max" climatology_frequency : str (optional) Valid options are None (default), `dayofyear`, `weekofyear` and `month`. If None, the climatology is calculated over all time-steps and the anomaly is returned on the same frequency as the input data. frequency : str (optional) Valid options are `day`, `week`, `month` and `year`. The default is to return the anomaly on the same frequency as the input data. bin_widths : int or list (optional) If `bin_widths` is an `int`, it defines the width of each group bin on the frequency provided by `frequency`. If `bin_widths` is a sequence it defines the edges of each bin, allowing for non-uniform bin widths. time_dim : str (optional) Name of the time dimension in the data object, default behaviour is to detect the time dimension from the input object relative : bool (optional) Return the relative anomaly, i.e. the percentage change w.r.t the climatological period **reduce_kwargs : Any other kwargs that are accepted by `earthkit.transforms.resample` Returns ------- xarray.DataArray """ climatology = reduce( dataarray, *_args, how=climatology_how, climatology_range=climatology_range, frequency=climatology_frequency, ) return anomaly(dataarray, climatology, *_args, relative=relative, **_kwargs)