Coverage for src / basanos / math / _engine_ic.py: 100%
60 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
1"""Signal-evaluation mixin for BasanosEngine.
3Provides information-coefficient (IC) metrics as a reusable mixin so that
4``optimizer.py`` stays focused on the core position-solving logic.
6Classes in this module are **private implementation details**. The public API
7is :class:`~basanos.math.optimizer.BasanosEngine`, which inherits from
8:class:`_SignalEvaluatorMixin`.
9"""
11from __future__ import annotations
13from typing import TYPE_CHECKING
15import numpy as np
16import polars as pl
17from scipy.stats import spearmanr
19if TYPE_CHECKING:
20 from ._engine_protocol import _EngineProtocol
23class _SignalEvaluatorMixin:
24 """Mixin providing cross-sectional information-coefficient (IC) metrics.
26 The consuming class must satisfy :class:`~._engine_protocol._EngineProtocol`,
27 i.e. it must expose:
29 * ``assets`` — list of asset column names
30 * ``prices`` — Polars DataFrame with a ``'date'`` column
31 * ``mu`` — Polars DataFrame of expected-return signals
32 """
34 def _ic_series(self: _EngineProtocol, use_rank: bool) -> pl.DataFrame:
35 """Compute the cross-sectional IC time series.
37 For each timestamp *t* (from 0 to T-2), correlates the signal vector
38 ``mu[t, :]`` with the one-period forward return vector
39 ``prices[t+1, :] / prices[t, :] - 1`` across all assets where both
40 quantities are finite. When fewer than two valid asset pairs are
41 available, the IC value is set to ``NaN``.
43 Args:
44 use_rank: When ``True`` the Spearman rank correlation is used
45 (Rank IC); when ``False`` the Pearson correlation is used (IC).
47 Returns:
48 pl.DataFrame: Two-column frame with ``date`` (signal date) and
49 either ``ic`` or ``rank_ic``.
50 """
51 assets = self.assets
52 prices_np = self.prices.select(assets).to_numpy().astype(float)
53 mu_np = self.mu.select(assets).to_numpy().astype(float)
54 dates = self.prices["date"].to_list()
56 col_name = "rank_ic" if use_rank else "ic"
57 ic_values: list[float] = []
58 ic_dates = []
60 for t in range(len(dates) - 1):
61 fwd_ret = prices_np[t + 1] / prices_np[t] - 1.0
62 signal = mu_np[t]
64 # Both signal and forward return must be finite
65 mask = np.isfinite(signal) & np.isfinite(fwd_ret)
66 n_valid = int(mask.sum())
68 if n_valid < 2:
69 ic_values.append(float("nan"))
70 elif use_rank:
71 corr, _ = spearmanr(signal[mask], fwd_ret[mask])
72 ic_values.append(float(corr))
73 else:
74 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1]))
76 ic_dates.append(dates[t])
78 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)})
80 @property
81 def ic(self: _EngineProtocol) -> pl.DataFrame:
82 """Cross-sectional Pearson Information Coefficient (IC) time series.
84 For each timestamp *t* (excluding the last), computes the Pearson
85 correlation between the signal ``mu[t, :]`` and the one-period forward
86 return ``prices[t+1, :] / prices[t, :] - 1`` across all assets where
87 both quantities are finite.
89 An IC value close to +1 means the signal ranked assets in the same
90 order as forward returns; close to -1 means the opposite; near 0 means
91 no predictive relationship.
93 Returns:
94 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is
95 the timestamp at which the signal was observed. ``ic`` is a
96 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs
97 are available for a given timestamp).
99 See Also:
100 :py:attr:`rank_ic` — Spearman variant, more robust to outliers.
101 :py:attr:`ic_mean`, :py:attr:`ic_std`, :py:attr:`icir` — summary
102 statistics.
103 """
104 return self._ic_series(use_rank=False)
106 @property
107 def rank_ic(self: _EngineProtocol) -> pl.DataFrame:
108 """Cross-sectional Spearman Rank Information Coefficient time series.
110 Identical to :py:attr:`ic` but uses the Spearman rank correlation
111 instead of the Pearson correlation, making it more robust to fat-tailed
112 return distributions and outliers.
114 Returns:
115 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``.
116 ``rank_ic`` is a ``Float64`` series.
118 See Also:
119 :py:attr:`ic` — Pearson variant.
120 :py:attr:`rank_ic_mean`, :py:attr:`rank_ic_std` — summary
121 statistics.
122 """
123 return self._ic_series(use_rank=True)
125 @property
126 def ic_mean(self) -> float:
127 """Mean of the IC time series, ignoring NaN values.
129 Returns:
130 float: Arithmetic mean of all finite IC values, or ``NaN`` if
131 no finite values exist.
132 """
133 arr = self.ic["ic"].drop_nulls().to_numpy()
134 finite = arr[np.isfinite(arr)]
135 return float(np.mean(finite)) if len(finite) > 0 else float("nan")
137 @property
138 def ic_std(self) -> float:
139 """Standard deviation of the IC time series, ignoring NaN values.
141 Uses ``ddof=1`` (sample standard deviation).
143 Returns:
144 float: Sample standard deviation of all finite IC values, or
145 ``NaN`` if fewer than 2 finite values exist.
146 """
147 arr = self.ic["ic"].drop_nulls().to_numpy()
148 finite = arr[np.isfinite(arr)]
149 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")
151 @property
152 def icir(self) -> float:
153 """Information Coefficient Information Ratio (ICIR).
155 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a
156 more consistent signal: the mean IC is large relative to its
157 variability.
159 Returns:
160 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero
161 or non-finite.
162 """
163 mean = self.ic_mean
164 std = self.ic_std
165 if not np.isfinite(std) or std == 0.0:
166 return float("nan")
167 return float(mean / std)
169 @property
170 def rank_ic_mean(self) -> float:
171 """Mean of the Rank IC time series, ignoring NaN values.
173 Returns:
174 float: Arithmetic mean of all finite Rank IC values, or ``NaN``
175 if no finite values exist.
176 """
177 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy()
178 finite = arr[np.isfinite(arr)]
179 return float(np.mean(finite)) if len(finite) > 0 else float("nan")
181 @property
182 def rank_ic_std(self) -> float:
183 """Standard deviation of the Rank IC time series, ignoring NaN values.
185 Uses ``ddof=1`` (sample standard deviation).
187 Returns:
188 float: Sample standard deviation of all finite Rank IC values, or
189 ``NaN`` if fewer than 2 finite values exist.
190 """
191 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy()
192 finite = arr[np.isfinite(arr)]
193 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")