Coverage for src/basanos/math/_engine

1"""Signal-evaluation mixin for BasanosEngine.

3Provides information-coefficient (IC) metrics as a reusable mixin so that

4``optimizer.py`` stays focused on the core position-solving logic.

6Classes in this module are **private implementation details**. The public API

7is `BasanosEngine`, which inherits from

8`_SignalEvaluatorMixin`.

9"""

11from __future__ import annotations

13from typing import TYPE_CHECKING

15import numpy as np

16import polars as pl

17from scipy.stats import spearmanr

19if TYPE_CHECKING:

20 from ._engine_protocol import _EngineProtocol

23class _SignalEvaluatorMixin:

24 """Mixin providing cross-sectional information-coefficient (IC) metrics.

26 The consuming class must satisfy `_EngineProtocol`,

27 i.e. it must expose:

29 * ``assets`` — list of asset column names

30 * ``prices`` — Polars DataFrame with a ``'date'`` column

31 * ``mu`` — Polars DataFrame of expected-return signals

32 """

34 def _ic_series(self: _EngineProtocol, use_rank: bool, h: int = 1) -> pl.DataFrame:

35 """Compute the cross-sectional IC time series.

37 For each timestamp *t* (from 0 to T-1-h), correlates the signal vector

38 ``mu[t, :]`` with the *h*-period forward return vector

39 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both

40 quantities are finite. When fewer than two valid asset pairs are

41 available, the IC value is set to ``NaN``.

43 Args:

44 use_rank: When ``True`` the Spearman rank correlation is used

45 (Rank IC); when ``False`` the Pearson correlation is used (IC).

46 h: Forward-return horizon in periods. ``h=1`` (default) gives the

47 classic one-period IC; ``h=5`` evaluates signal quality against

48 five-period returns. Must be >= 1.

50 Returns:

51 pl.DataFrame: Two-column frame with ``date`` (signal date) and

52 either ``ic`` or ``rank_ic``.

54 Raises:

55 ValueError: If *h* < 1.

56 """

57 if h < 1:

58 msg = f"h must be >= 1, got {h}"

59 raise ValueError(msg)

61 assets = self.assets

62 prices_np = self.prices.select(assets).to_numpy().astype(float)

63 mu_np = self.mu.select(assets).to_numpy().astype(float)

64 dates = self.prices["date"].to_list()

66 col_name = "rank_ic" if use_rank else "ic"

67 ic_values: list[float] = []

68 ic_dates = []

70 for t in range(len(dates) - h):

71 fwd_ret = prices_np[t + h] / prices_np[t] - 1.0

72 signal = mu_np[t]

74 # Both signal and forward return must be finite

75 mask = np.isfinite(signal) & np.isfinite(fwd_ret)

76 n_valid = int(mask.sum())

78 if n_valid < 2:

79 ic_values.append(float("nan"))

80 elif use_rank:

81 corr, _ = spearmanr(signal[mask], fwd_ret[mask])

82 ic_values.append(float(corr))

83 else:

84 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1]))

86 ic_dates.append(dates[t])

88 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)})

90 def ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame:

91 """Cross-sectional Pearson Information Coefficient (IC) time series.

93 For each timestamp *t*, computes the Pearson correlation between the

94 signal ``mu[t, :]`` and the *h*-period forward return

95 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both

96 quantities are finite.

98 An IC value close to +1 means the signal ranked assets in the same

99 order as forward returns; close to -1 means the opposite; near 0 means

100 no predictive relationship.

101

102 Args:

103 h: Forward-return horizon in periods (default 1).

104

105 Returns:

106 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is

107 the timestamp at which the signal was observed. ``ic`` is a

108 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs

109 are available for a given timestamp).

110

111 See Also:

112 `rank_ic` — Spearman variant, more robust to outliers.

113 `ic_mean`, `ic_std`, `icir` — summary

114 statistics.

115 """

116 return self._ic_series(use_rank=False, h=h)

117

118 def rank_ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame:

119 """Cross-sectional Spearman Rank Information Coefficient time series.

120

121 Identical to `ic` but uses the Spearman rank correlation

122 instead of the Pearson correlation, making it more robust to fat-tailed

123 return distributions and outliers.

124

125 Args:

126 h: Forward-return horizon in periods (default 1).

127

128 Returns:

129 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``.

130 ``rank_ic`` is a ``Float64`` series.

131

132 See Also:

133 `ic` — Pearson variant.

134 `rank_ic_mean`, `rank_ic_std` — summary

135 statistics.

136 """

137 return self._ic_series(use_rank=True, h=h)

138

139 def ic_mean(self: _EngineProtocol, h: int = 1) -> float:

140 """Mean of the IC time series, ignoring NaN values.

141

142 Args:

143 h: Forward-return horizon in periods (default 1).

144

145 Returns:

146 float: Arithmetic mean of all finite IC values, or ``NaN`` if

147 no finite values exist.

148 """

149 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy()

150 finite = arr[np.isfinite(arr)]

151 return float(np.mean(finite)) if len(finite) > 0 else float("nan")

152

153 def ic_std(self: _EngineProtocol, h: int = 1) -> float:

154 """Standard deviation of the IC time series, ignoring NaN values.

155

156 Uses ``ddof=1`` (sample standard deviation).

157

158 Args:

159 h: Forward-return horizon in periods (default 1).

160

161 Returns:

162 float: Sample standard deviation of all finite IC values, or

163 ``NaN`` if fewer than 2 finite values exist.

164 """

165 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy()

166 finite = arr[np.isfinite(arr)]

167 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")

168

169 def icir(self: _EngineProtocol, h: int = 1) -> float:

170 """Information Coefficient Information Ratio (ICIR).

171

172 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a

173 more consistent signal: the mean IC is large relative to its

174 variability.

175

176 Args:

177 h: Forward-return horizon in periods (default 1).

178

179 Returns:

180 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero

181 or non-finite.

182 """

183 ic_df = self._ic_series(use_rank=False, h=h)

184 arr = ic_df["ic"].drop_nulls().to_numpy()

185 finite = arr[np.isfinite(arr)]

186 mean = float(np.mean(finite)) if len(finite) > 0 else float("nan")

187 std = float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")

188 if not np.isfinite(std) or std == 0.0:

189 return float("nan")

190 return float(mean / std)

191

192 def rank_ic_mean(self: _EngineProtocol, h: int = 1) -> float:

193 """Mean of the Rank IC time series, ignoring NaN values.

194

195 Args:

196 h: Forward-return horizon in periods (default 1).

197

198 Returns:

199 float: Arithmetic mean of all finite Rank IC values, or ``NaN``

200 if no finite values exist.

201 """

202 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy()

203 finite = arr[np.isfinite(arr)]

204 return float(np.mean(finite)) if len(finite) > 0 else float("nan")

205

206 def rank_ic_std(self: _EngineProtocol, h: int = 1) -> float:

207 """Standard deviation of the Rank IC time series, ignoring NaN values.

208

209 Uses ``ddof=1`` (sample standard deviation).

210

211 Args:

212 h: Forward-return horizon in periods (default 1).

213

214 Returns:

215 float: Sample standard deviation of all finite Rank IC values, or

216 ``NaN`` if fewer than 2 finite values exist.

217 """

218 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy()

219 finite = arr[np.isfinite(arr)]

220 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")

Coverage for src/basanos/math/_engine_ic.py: 100%

59 statements