I have two sets of satellite data. For both sets, I have the pixel geometry (latitude and longitude of each corner of the pixel). I would like to regrid one set to the other. Thus, my goal is area-weighted regridding from an irregular grid to another irregular grid. I am aware of xESMF, but am unsure if that is the best tool for the job. Perhaps iris area weighting regrid would be appropriate?
1 Answers
I've ran into similar things in the past. I'm on Windows, and xEMSF wasn't really an option for me.
I've written this package, and added some methods for computing grid to grid weights: https://github.com/Deltares/numba_celltree
(You can pip install it.)
The data structure can deal with fully unstructured 2D meshes, and expects the data in such a format. See the code below.
You will need to make some changes: your coordinates aren't named x and y most likely. You will also need to update the ugrid2d_topology
function somewhat, since I'm assuming regular quadrilateral grids here (but they're irregular when seen in each others coordinate system).
It's still pretty straightforward, just make sure you have 2D array of vertices, and a face_node_connectivity
array of shape (n_cell, 4)
which maps for every face its four vertices. See this documention for a little more background:
https://ugrid-conventions.github.io/ugrid-conventions/
import numpy as np
import pandas as pd
import pyproj
import xarray as xr
from numba_celltree import CellTree2d
FloatArray = np.ndarray
IntArray = np.ndarray
def _coord(da, dim):
"""
Transform N xarray midpoints into N + 1 vertex edges
"""
delta_dim = "d" + dim # e.g. dx, dy, dz, etc.
# If empty array, return empty
if da[dim].size == 0:
return np.array(())
if delta_dim in da.coords: # equidistant or non-equidistant
dx = da[delta_dim].values
if dx.shape == () or dx.shape == (1,): # scalar -> equidistant
dxs = np.full(da[dim].size, dx)
else: # array -> non-equidistant
dxs = dx
_check_monotonic(dxs, dim)
else: # undefined -> equidistant
if da[dim].size == 1:
raise ValueError(
f"DataArray has size 1 along {dim}, so cellsize must be provided"
" as a coordinate."
)
dxs = np.diff(da[dim].values)
dx = dxs[0]
atolx = abs(1.0e-4 * dx)
if not np.allclose(dxs, dx, atolx):
raise ValueError(
f"DataArray has to be equidistant along {dim}, or cellsizes"
" must be provided as a coordinate."
)
dxs = np.full(da[dim].size, dx)
dxs = np.abs(dxs)
x = da[dim].values
if not da.indexes[dim].is_monotonic_increasing:
x = x[::-1]
dxs = dxs[::-1]
# This assumes the coordinate to be monotonic increasing
x0 = x[0] - 0.5 * dxs[0]
x = np.full(dxs.size + 1, x0)
x[1:] += np.cumsum(dxs)
return x
def _ugrid2d_dataset(
node_x: FloatArray,
node_y: FloatArray,
face_x: FloatArray,
face_y: FloatArray,
face_nodes: IntArray,
) -> xr.Dataset:
ds = xr.Dataset()
ds["mesh2d"] = xr.DataArray(
data=0,
attrs={
"cf_role": "mesh_topology",
"long_name": "Topology data of 2D mesh",
"topology_dimension": 2,
"node_coordinates": "node_x node_y",
"face_node_connectivity": "face_nodes",
"edge_node_connectivity": "edge_nodes",
},
)
ds = ds.assign_coords(
node_x=xr.DataArray(
data=node_x,
dims=["node"],
)
)
ds = ds.assign_coords(
node_y=xr.DataArray(
data=node_y,
dims=["node"],
)
)
ds["face_nodes"] = xr.DataArray(
data=face_nodes,
coords={
"face_x": ("face", face_x),
"face_y": ("face", face_y),
},
dims=["face", "nmax_face"],
attrs={
"cf_role": "face_node_connectivity",
"long_name": "Vertex nodes of mesh faces (counterclockwise)",
"start_index": 0,
"_FillValue": -1,
},
)
ds.attrs = {"Conventions": "CF-1.8 UGRID-1.0"}
return ds
def ugrid2d_topology(data: Union[xr.DataArray, xr.Dataset]) -> xr.Dataset:
"""
Derive the 2D-UGRID quadrilateral mesh topology from a structured DataArray
or Dataset, with (2D-dimensions) "y" and "x".
Parameters
----------
data: Union[xr.DataArray, xr.Dataset]
Structured data from which the "x" and "y" coordinate will be used to
define the UGRID-2D topology.
Returns
-------
ugrid_topology: xr.Dataset
Dataset with the required arrays describing 2D unstructured topology:
node_x, node_y, face_x, face_y, face_nodes (connectivity).
"""
# Transform midpoints into vertices
# These are always returned monotonically increasing
x = data["x"].values
xcoord = _coord(data, "x")
if not data.indexes["x"].is_monotonic_increasing:
xcoord = xcoord[::-1]
y = data["y"].values
ycoord = _coord(data, "y")
if not data.indexes["y"].is_monotonic_increasing:
ycoord = ycoord[::-1]
# Compute all vertices, these are the ugrid nodes
node_y, node_x = (a.ravel() for a in np.meshgrid(ycoord, xcoord, indexing="ij"))
face_y, face_x = (a.ravel() for a in np.meshgrid(y, x, indexing="ij"))
linear_index = np.arange(node_x.size, dtype=np.int32).reshape(
ycoord.size, xcoord.size
)
# Allocate face_node_connectivity
nfaces = (ycoord.size - 1) * (xcoord.size - 1)
face_nodes = np.empty((nfaces, 4))
# Set connectivity in counterclockwise manner
face_nodes[:, 0] = linear_index[:-1, 1:].ravel() # upper right
face_nodes[:, 1] = linear_index[:-1, :-1].ravel() # upper left
face_nodes[:, 2] = linear_index[1:, :-1].ravel() # lower left
face_nodes[:, 3] = linear_index[1:, 1:].ravel() # lower right
# Tie it together
ds = _ugrid2d_dataset(node_x, node_y, face_x, face_y, face_nodes)
return ds
def area_weighted_mean(
da: xr.DataArray,
destination_index: np.ndarray,
source_index: np.ndarray,
weights: np.ndarray,
):
"""
Area weighted mean.
Parameters
----------
da: xr.DataArray
Contains source data.
destination_index: np.ndarray
In which destination the overlap is located.
source_index: np.ndarray
In which source cell the overlap is located.
weights: np.ndarray
Area of each overlap.
Returns
-------
destination_index: np.ndarray
values: np.ndarray
"""
values = da.data.ravel()[source_index]
df = pd.DataFrame(
{"dst": destination_index, "area": weights, "av": weights * values}
)
aggregated = df.groupby("dst").sum("sum", min_count=1)
out = aggregated["av"] / aggregated["area"]
return out.index.values, out.values
class Regridder:
"""
Regridder to reproject and/or regrid rasters. When no ``crs_source`` and
``crs_destination`` are provided, it is assumed that ``source`` and
``destination`` share the same coordinate system.
Note that an area weighted regridding method only makes sense for projected
(Cartesian!) coordinate systems.
Parameters
----------
source: xr.DataArray
Source example. Must have dimensions ("y", "x").
destination: xr.DataArray
Destination example. Must have dimensions ("y", "x").
crs_source: optional, default: None
crs_destination: optional, default: None
"""
def __init__(
self,
source: xr.DataArray,
destination: xr.DataArray,
crs_source=None,
crs_destination=None,
):
src = ugrid2d_topology(source)
dst = ugrid2d_topology(destination)
src_yy = src["node_y"].values
src_xx = src["node_x"].values
if crs_source and crs_destination:
transformer = pyproj.Transformer.from_crs(
crs_from=crs_source, crs_to=crs_destination, always_xy=True
)
src_xx, src_yy = transformer.transform(xx=src_xx, yy=src_yy)
elif crs_source ^ crs_destination:
raise ValueError("Received only one of (crs_source, crs_destination)")
src_vertices = np.column_stack([src_xx, src_yy])
src_faces = src["face_nodes"].values.astype(int)
dst_vertices = np.column_stack((dst["node_x"].values, dst["node_y"].values))
dst_faces = dst["face_nodes"].values
celltree = CellTree2d(src_vertices, src_faces, fill_value=-1)
self.source = source.copy()
self.destination = destination.copy()
(
self.destination_index,
self.source_index,
self.weights,
) = celltree.intersect_faces(
dst_vertices,
dst_faces,
fill_value=-1,
)
def regrid(self, da: xr.DataArray, fill_value=np.nan):
"""
Parameters
----------
da: xr.DataArray
Data to regrid.
fill_value: optional, default: np.nan
Default value of the output grid, e.g. where no overlap occurs.
Returns
-------
regridded: xr.DataArray
Data of da, regridded using an area weighted mean.
"""
src = self.source
if not (np.allclose(da["y"], src["y"]) and np.allclose(da["x"], src["x"])):
raise ValueError("da does not match source")
index, values = area_weighted_mean(
da,
self.destination_index,
self.source_index,
self.weights,
)
data = np.full(self.destination.shape, fill_value)
data.ravel()[index] = values
out = self.destination.copy(data=data)
out.name = da.name
return out
# Example use
da = xr.open_dataarray("gw_abstraction_sum.nc")
like = xr.open_dataarray("example.nc")
regridder = Regridder(
source=da, destination=like, crs_source=4326, crs_destination=3035
)
result = regridder.regrid(da)
result.to_netcdf("area-weighted_sum.nc")

- 451
- 2
- 6