Coverage for src/basanos/math/_engine_ic.py: 100%
59 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
1"""Signal-evaluation mixin for BasanosEngine.
3Provides information-coefficient (IC) metrics as a reusable mixin so that
4``optimizer.py`` stays focused on the core position-solving logic.
6Classes in this module are **private implementation details**. The public API
7is `BasanosEngine`, which inherits from
8`_SignalEvaluatorMixin`.
9"""
11from __future__ import annotations
13from typing import TYPE_CHECKING
15import numpy as np
16import polars as pl
17from scipy.stats import spearmanr
19if TYPE_CHECKING:
20 from ._engine_protocol import _EngineProtocol
23class _SignalEvaluatorMixin:
24 """Mixin providing cross-sectional information-coefficient (IC) metrics.
26 The consuming class must satisfy `_EngineProtocol`,
27 i.e. it must expose:
29 * ``assets`` — list of asset column names
30 * ``prices`` — Polars DataFrame with a ``'date'`` column
31 * ``mu`` — Polars DataFrame of expected-return signals
32 """
34 def _ic_series(self: _EngineProtocol, use_rank: bool, h: int = 1) -> pl.DataFrame:
35 """Compute the cross-sectional IC time series.
37 For each timestamp *t* (from 0 to T-1-h), correlates the signal vector
38 ``mu[t, :]`` with the *h*-period forward return vector
39 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both
40 quantities are finite. When fewer than two valid asset pairs are
41 available, the IC value is set to ``NaN``.
43 Args:
44 use_rank: When ``True`` the Spearman rank correlation is used
45 (Rank IC); when ``False`` the Pearson correlation is used (IC).
46 h: Forward-return horizon in periods. ``h=1`` (default) gives the
47 classic one-period IC; ``h=5`` evaluates signal quality against
48 five-period returns. Must be >= 1.
50 Returns:
51 pl.DataFrame: Two-column frame with ``date`` (signal date) and
52 either ``ic`` or ``rank_ic``.
54 Raises:
55 ValueError: If *h* < 1.
56 """
57 if h < 1:
58 msg = f"h must be >= 1, got {h}"
59 raise ValueError(msg)
61 assets = self.assets
62 prices_np = self.prices.select(assets).to_numpy().astype(float)
63 mu_np = self.mu.select(assets).to_numpy().astype(float)
64 dates = self.prices["date"].to_list()
66 col_name = "rank_ic" if use_rank else "ic"
67 ic_values: list[float] = []
68 ic_dates = []
70 for t in range(len(dates) - h):
71 fwd_ret = prices_np[t + h] / prices_np[t] - 1.0
72 signal = mu_np[t]
74 # Both signal and forward return must be finite
75 mask = np.isfinite(signal) & np.isfinite(fwd_ret)
76 n_valid = int(mask.sum())
78 if n_valid < 2:
79 ic_values.append(float("nan"))
80 elif use_rank:
81 corr, _ = spearmanr(signal[mask], fwd_ret[mask])
82 ic_values.append(float(corr))
83 else:
84 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1]))
86 ic_dates.append(dates[t])
88 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)})
90 def ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame:
91 """Cross-sectional Pearson Information Coefficient (IC) time series.
93 For each timestamp *t*, computes the Pearson correlation between the
94 signal ``mu[t, :]`` and the *h*-period forward return
95 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both
96 quantities are finite.
98 An IC value close to +1 means the signal ranked assets in the same
99 order as forward returns; close to -1 means the opposite; near 0 means
100 no predictive relationship.
102 Args:
103 h: Forward-return horizon in periods (default 1).
105 Returns:
106 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is
107 the timestamp at which the signal was observed. ``ic`` is a
108 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs
109 are available for a given timestamp).
111 See Also:
112 `rank_ic` — Spearman variant, more robust to outliers.
113 `ic_mean`, `ic_std`, `icir` — summary
114 statistics.
115 """
116 return self._ic_series(use_rank=False, h=h)
118 def rank_ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame:
119 """Cross-sectional Spearman Rank Information Coefficient time series.
121 Identical to `ic` but uses the Spearman rank correlation
122 instead of the Pearson correlation, making it more robust to fat-tailed
123 return distributions and outliers.
125 Args:
126 h: Forward-return horizon in periods (default 1).
128 Returns:
129 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``.
130 ``rank_ic`` is a ``Float64`` series.
132 See Also:
133 `ic` — Pearson variant.
134 `rank_ic_mean`, `rank_ic_std` — summary
135 statistics.
136 """
137 return self._ic_series(use_rank=True, h=h)
139 def ic_mean(self: _EngineProtocol, h: int = 1) -> float:
140 """Mean of the IC time series, ignoring NaN values.
142 Args:
143 h: Forward-return horizon in periods (default 1).
145 Returns:
146 float: Arithmetic mean of all finite IC values, or ``NaN`` if
147 no finite values exist.
148 """
149 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy()
150 finite = arr[np.isfinite(arr)]
151 return float(np.mean(finite)) if len(finite) > 0 else float("nan")
153 def ic_std(self: _EngineProtocol, h: int = 1) -> float:
154 """Standard deviation of the IC time series, ignoring NaN values.
156 Uses ``ddof=1`` (sample standard deviation).
158 Args:
159 h: Forward-return horizon in periods (default 1).
161 Returns:
162 float: Sample standard deviation of all finite IC values, or
163 ``NaN`` if fewer than 2 finite values exist.
164 """
165 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy()
166 finite = arr[np.isfinite(arr)]
167 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")
169 def icir(self: _EngineProtocol, h: int = 1) -> float:
170 """Information Coefficient Information Ratio (ICIR).
172 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a
173 more consistent signal: the mean IC is large relative to its
174 variability.
176 Args:
177 h: Forward-return horizon in periods (default 1).
179 Returns:
180 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero
181 or non-finite.
182 """
183 ic_df = self._ic_series(use_rank=False, h=h)
184 arr = ic_df["ic"].drop_nulls().to_numpy()
185 finite = arr[np.isfinite(arr)]
186 mean = float(np.mean(finite)) if len(finite) > 0 else float("nan")
187 std = float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")
188 if not np.isfinite(std) or std == 0.0:
189 return float("nan")
190 return float(mean / std)
192 def rank_ic_mean(self: _EngineProtocol, h: int = 1) -> float:
193 """Mean of the Rank IC time series, ignoring NaN values.
195 Args:
196 h: Forward-return horizon in periods (default 1).
198 Returns:
199 float: Arithmetic mean of all finite Rank IC values, or ``NaN``
200 if no finite values exist.
201 """
202 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy()
203 finite = arr[np.isfinite(arr)]
204 return float(np.mean(finite)) if len(finite) > 0 else float("nan")
206 def rank_ic_std(self: _EngineProtocol, h: int = 1) -> float:
207 """Standard deviation of the Rank IC time series, ignoring NaN values.
209 Uses ``ddof=1`` (sample standard deviation).
211 Args:
212 h: Forward-return horizon in periods (default 1).
214 Returns:
215 float: Sample standard deviation of all finite Rank IC values, or
216 ``NaN`` if fewer than 2 finite values exist.
217 """
218 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy()
219 finite = arr[np.isfinite(arr)]
220 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")