Source code for spaemis.utils

"""
General utility functions
"""
import os
from collections.abc import Generator
from contextlib import contextmanager
from typing import TypeVar, cast

import geopandas  # type: ignore
import numpy as np
import pooch  # type: ignore
import rioxarray  # noqa
import xarray as xr
from numpy.typing import ArrayLike, NDArray

T = TypeVar("T", xr.DataArray, xr.Dataset)


[docs]def area_grid(lat: ArrayLike, lon: ArrayLike) -> xr.DataArray: """ Calculate the area of each grid cell Area is in square meters Parameters ---------- lat Vector of latitude in degrees lon Vector of longitude in degrees Returns ------- area: grid-cell area in square-meters with dimensions, [lat,lon] Notes ----- Based on the function in https://github.com/chadagreene/CDT/blob/master/cdt/cdtarea.m """ xlon, ylat = np.meshgrid(lon, lat) R = earth_radius(ylat) dlat = np.deg2rad(np.gradient(ylat, axis=0)) dlon = np.deg2rad(np.gradient(xlon, axis=1)) dy = dlat * R dx = dlon * R * np.cos(np.deg2rad(ylat)) area = np.abs(dy * dx) xda = xr.DataArray( area, dims=["lat", "lon"], coords={"lat": lat, "lon": lon}, attrs={ "long_name": "area_per_cell", "description": "area per cell", "units": "m^2", }, ) return xda
[docs]def earth_radius(lat: NDArray[np.float_]) -> NDArray[np.float_]: """ Calculate radius of Earth assuming oblate spheroid Defined by WGS84 Parameters ---------- lat: vector or latitudes in degrees Returns ------- Vector of radius in meters Notes ----- WGS84: https://earth-info.nga.mil/GandG/publications/tr8350.2/tr8350.2-a/Chapter%203.pdf """ # define oblate spheroid from WGS84 a = 6378137 b = 6356752.3142 e2 = 1 - (b**2 / a**2) # convert from geodecic to geocentric # see equation 3-110 in WGS84 lat = np.deg2rad(lat) lat_gc = np.arctan((1 - e2) * np.tan(lat)) # radius equation # see equation 3-107 in WGS84 r: NDArray[np.float_] = (a * (1 - e2) ** 0.5) / ( 1 - (e2 * np.cos(lat_gc) ** 2) ) ** 0.5 return r
[docs]def clip_region(da: T, boundary: geopandas.GeoDataFrame) -> T: """ Clip a region out of a larger DS Parameters ---------- da boundary Boundary to cut out GeoJSON is expected so it must have a geometry array Returns ------- Dataset which only includes the selected area """ return cast( T, da.rio.set_spatial_dims("lon", "lat") .rio.write_crs("WGS84") .rio.clip(boundary.geometry.values, all_touched=True) .drop_vars("spatial_ref"), )
[docs]def weighted_annual_mean( ds: xr.Dataset, variable: str ) -> xr.DataArray: # pragma: no cover """ Calculate a weighted temporal annual mean This method takes into account the different number of days in each month Parameters ---------- ds variable Variable to calculate the weighted mean of Returns ------- Weighted annual mean of the target variable """ # Determine the month length month_length = ds.time.dt.days_in_month weights = ( month_length.groupby("time.year") / month_length.groupby("time.year").sum() ) # Make sure the weights in each year add up to 1 np.testing.assert_allclose(weights.groupby("time.year").sum(xr.ALL_DIMS), 1.0) target = ds[variable] # Mask out any nan values ones: xr.DataArray = xr.where(target.isnull(), 0.0, 1.0) # type: ignore obs_sum = (target * weights).resample(time="AS").sum(dim="time") ones_out = (ones * weights).resample(time="AS").sum(dim="time") return cast(xr.DataArray, obs_sum / ones_out)
[docs]@contextmanager def chdir(current_directory: str) -> Generator[None, None, None]: """ Context manager to temporarily change the current working directory Should not be used in async or parallel methods as it changes the global state Parameters ---------- current_directory """ previous = os.getcwd() try: os.chdir(current_directory) yield finally: os.chdir(previous)
[docs]def load_australia_boundary() -> geopandas.GeoDataFrame: """ Load Australia boundary shapefile Returns ------- GeoDataFrame containing borders for each state """ aus_boundary_dir = pooch.retrieve( "https://www.github.com/wmgeolab/geoBoundaries/raw/c9c6efd0c2e035a5453fd8549bd1ca507a3910b4/releaseData/gbOpen/AUS/ADM1/geoBoundaries-AUS-ADM1-all.zip", known_hash="d531bbed14d9c98652b619cffa6bcdaa972ea49eff1f74b4650c0287deb5ffe9", processor=pooch.Unzip(), ) aus_boundary = geopandas.read_file( os.path.join(os.path.dirname(aus_boundary_dir[0]), "geoBoundaries-AUS-ADM1.shp") ).to_crs("EPSG:4326") return aus_boundary
[docs]def covers(dataarray: xr.DataArray, dim: str, value: float) -> bool: """ Check if a dimension of a DataArray can be interpolated for a given value If this check fails an extrapolation will be required Parameters ---------- dataarray DataArray to check dim Dimension of interest value Value to check Returns ------- True if `value` could be interpolated in `DataArray`'s dimension `dim` """ return bool(dataarray[dim].min() <= value <= dataarray[dim].max())