Skip to content
forked from pydata/xarray

Commit

Permalink
Refactor resampling.
Browse files Browse the repository at this point in the history
Toward pydata#8510
1. Rename to Resampler from ResampleGrouper
2. Move code from common.resample to TimeResampler
  • Loading branch information
dcherian committed Jan 3, 2024
1 parent 41d33f5 commit 03d9c61
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 64 deletions.
42 changes: 10 additions & 32 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
emit_user_level_warning,
is_scalar,
)
from xarray.namedarray.core import _raise_if_any_duplicate_dimensions
Expand Down Expand Up @@ -984,8 +983,7 @@ def _resample(
# TODO support non-string indexer after removing the old API.

from xarray.core.dataarray import DataArray
from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper
from xarray.core.pdcompat import _convert_base_to_offset
from xarray.core.groupby import ResolvedTimeResampler, TimeResampler
from xarray.core.resample import RESAMPLE_DIM

# note: the second argument (now 'skipna') use to be 'dim'
Expand All @@ -1008,44 +1006,24 @@ def _resample(
dim_name: Hashable = dim
dim_coord = self[dim]

if loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample is deprecated. "
"Switch to updating the resampled dataset time coordinate using "
"time offset arithmetic. For example:\n"
" >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
FutureWarning,
)

if base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if base is not None and offset is not None:
raise ValueError("base and offset cannot be present at the same time")

if base is not None:
index = self._indexes[dim_name].to_pandas_index()
offset = _convert_base_to_offset(base, freq, index)
group = DataArray(
dim_coord,
coords=dim_coord.coords,
dims=dim_coord.dims,
name=RESAMPLE_DIM,
)

grouper = TimeResampleGrouper(
grouper = TimeResampler(
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
loffset=loffset,
base=base,
)

group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)

rgrouper = ResolvedTimeResampleGrouper(grouper, group, self)
rgrouper = ResolvedTimeResampler(grouper, group, self)

return resample_cls(
self,
Expand Down
106 changes: 74 additions & 32 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from xarray.core.utils import (
FrozenMappingWarningOnValuesAccess,
either_dict_or_kwargs,
emit_user_level_warning,
hashable,
is_scalar,
maybe_wrap_array,
Expand Down Expand Up @@ -482,43 +483,66 @@ def _factorize(self, squeeze: bool) -> T_FactorizeOut:


@dataclass
class ResolvedTimeResampleGrouper(ResolvedGrouper):
grouper: TimeResampleGrouper
class ResolvedTimeResampler(ResolvedGrouper):
grouper: TimeResampler
index_grouper: CFTimeGrouper | pd.Grouper = field(init=False)
group_as_index: pd.Index = field(init=False)

def __post_init__(self):
if self.loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample will be deprecated "
"in a future version of xarray. Switch to using time offset arithmetic.",
FutureWarning,
)

def __post_init__(self) -> None:
super().__post_init__()
if self.base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if self.base is not None and self.offset is not None:
raise ValueError("base and offset cannot be present at the same time")

def _init_properties(self, group):
from xarray import CFTimeIndex
from xarray.core.pdcompat import _convert_base_to_offset

group_as_index = safe_cast_to_index(self.group)
self._group_as_index = group_as_index
group_as_index = safe_cast_to_index(group)

if self.base is not None:
# grouper constructor verifies that grouper.offset is None at this point
offset = _convert_base_to_offset(self.base, self.freq, group_as_index)
else:
offset = self.offset

if not group_as_index.is_monotonic_increasing:
# TODO: sort instead of raising an error
raise ValueError("index must be monotonic for resampling")

grouper = self.grouper
if isinstance(group_as_index, CFTimeIndex):
from xarray.core.resample_cftime import CFTimeGrouper

index_grouper = CFTimeGrouper(
freq=grouper.freq,
closed=grouper.closed,
label=grouper.label,
origin=grouper.origin,
offset=grouper.offset,
loffset=grouper.loffset,
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=offset,
loffset=self.loffset,
)
else:
index_grouper = pd.Grouper(
freq=grouper.freq,
closed=grouper.closed,
label=grouper.label,
origin=grouper.origin,
offset=grouper.offset,
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=offset,
)
self.index_grouper = index_grouper
self.group_as_index = group_as_index

def _get_index_and_items(self) -> tuple[pd.Index, pd.Series, np.ndarray]:
first_items, codes = self.first_items()
Expand All @@ -543,22 +567,21 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
# So for _flox_reduce we avoid one reindex and copy by avoiding
# _maybe_restore_empty_groups
codes = np.repeat(np.arange(len(first_items)), counts)
if self.grouper.loffset is not None:
_apply_loffset(self.grouper.loffset, first_items)
if self.loffset is not None:
_apply_loffset(self.loffset, first_items)
return first_items, codes

def _factorize(self, squeeze: bool) -> T_FactorizeOut:
def _factorize(self, group) -> T_FactorizeOut:
self._init_properties(group)
full_index, first_items, codes_ = self._get_index_and_items()
sbins = first_items.values.astype(np.int64)
group_indices: T_GroupIndices = [
slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])
]
group_indices += [slice(sbins[-1], None)]

unique_coord = IndexVariable(
self.group.name, first_items.index, self.group.attrs
)
codes = self.group.copy(data=codes_)
unique_coord = IndexVariable(group.name, first_items.index, group.attrs)
codes = group.copy(data=codes_)

return codes, group_indices, unique_coord, full_index

Expand All @@ -583,13 +606,32 @@ def __post_init__(self) -> None:


@dataclass
class TimeResampleGrouper(Grouper):
class TimeResampler(Grouper):
freq: str
closed: SideOptions | None
label: SideOptions | None
origin: str | DatetimeLike | None
offset: pd.Timedelta | datetime.timedelta | str | None
loffset: datetime.timedelta | str | None
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
loffset: datetime.timedelta | str | None = field(default=None)
base: str | None = field(default=None)

def __post_init__(self):
if self.loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample will be deprecated "
"in a future version of xarray. Switch to using time offset arithmetic.",
FutureWarning,
)

if self.base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if self.base is not None and self.offset is not None:
raise ValueError("base and offset cannot be present at the same time")


def _validate_groupby_squeeze(squeeze: bool) -> None:
Expand Down Expand Up @@ -936,7 +978,7 @@ def _maybe_restore_empty_groups(self, combined):
"""
(grouper,) = self.groupers
if (
isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampleGrouper))
isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampler))
and grouper.name in combined.dims
):
indexers = {grouper.name: grouper.full_index}
Expand Down

0 comments on commit 03d9c61

Please sign in to comment.