Coverage for src/basanos/math/_engine_ic.py: 100%

59 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 05:58 +0000

1"""Signal-evaluation mixin for BasanosEngine. 

2 

3Provides information-coefficient (IC) metrics as a reusable mixin so that 

4``optimizer.py`` stays focused on the core position-solving logic. 

5 

6Classes in this module are **private implementation details**. The public API 

7is `BasanosEngine`, which inherits from 

8`_SignalEvaluatorMixin`. 

9""" 

10 

11from __future__ import annotations 

12 

13from typing import TYPE_CHECKING 

14 

15import numpy as np 

16import polars as pl 

17from scipy.stats import spearmanr 

18 

19if TYPE_CHECKING: 

20 from ._engine_protocol import _EngineProtocol 

21 

22 

23class _SignalEvaluatorMixin: 

24 """Mixin providing cross-sectional information-coefficient (IC) metrics. 

25 

26 The consuming class must satisfy `_EngineProtocol`, 

27 i.e. it must expose: 

28 

29 * ``assets`` — list of asset column names 

30 * ``prices`` — Polars DataFrame with a ``'date'`` column 

31 * ``mu`` — Polars DataFrame of expected-return signals 

32 """ 

33 

34 def _ic_series(self: _EngineProtocol, use_rank: bool, h: int = 1) -> pl.DataFrame: 

35 """Compute the cross-sectional IC time series. 

36 

37 For each timestamp *t* (from 0 to T-1-h), correlates the signal vector 

38 ``mu[t, :]`` with the *h*-period forward return vector 

39 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both 

40 quantities are finite. When fewer than two valid asset pairs are 

41 available, the IC value is set to ``NaN``. 

42 

43 Args: 

44 use_rank: When ``True`` the Spearman rank correlation is used 

45 (Rank IC); when ``False`` the Pearson correlation is used (IC). 

46 h: Forward-return horizon in periods. ``h=1`` (default) gives the 

47 classic one-period IC; ``h=5`` evaluates signal quality against 

48 five-period returns. Must be >= 1. 

49 

50 Returns: 

51 pl.DataFrame: Two-column frame with ``date`` (signal date) and 

52 either ``ic`` or ``rank_ic``. 

53 

54 Raises: 

55 ValueError: If *h* < 1. 

56 """ 

57 if h < 1: 

58 msg = f"h must be >= 1, got {h}" 

59 raise ValueError(msg) 

60 

61 assets = self.assets 

62 prices_np = self.prices.select(assets).to_numpy().astype(float) 

63 mu_np = self.mu.select(assets).to_numpy().astype(float) 

64 dates = self.prices["date"].to_list() 

65 

66 col_name = "rank_ic" if use_rank else "ic" 

67 ic_values: list[float] = [] 

68 ic_dates = [] 

69 

70 for t in range(len(dates) - h): 

71 fwd_ret = prices_np[t + h] / prices_np[t] - 1.0 

72 signal = mu_np[t] 

73 

74 # Both signal and forward return must be finite 

75 mask = np.isfinite(signal) & np.isfinite(fwd_ret) 

76 n_valid = int(mask.sum()) 

77 

78 if n_valid < 2: 

79 ic_values.append(float("nan")) 

80 elif use_rank: 

81 corr, _ = spearmanr(signal[mask], fwd_ret[mask]) 

82 ic_values.append(float(corr)) 

83 else: 

84 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1])) 

85 

86 ic_dates.append(dates[t]) 

87 

88 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)}) 

89 

90 def ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame: 

91 """Cross-sectional Pearson Information Coefficient (IC) time series. 

92 

93 For each timestamp *t*, computes the Pearson correlation between the 

94 signal ``mu[t, :]`` and the *h*-period forward return 

95 ``prices[t+h, :] / prices[t, :] - 1`` across all assets where both 

96 quantities are finite. 

97 

98 An IC value close to +1 means the signal ranked assets in the same 

99 order as forward returns; close to -1 means the opposite; near 0 means 

100 no predictive relationship. 

101 

102 Args: 

103 h: Forward-return horizon in periods (default 1). 

104 

105 Returns: 

106 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is 

107 the timestamp at which the signal was observed. ``ic`` is a 

108 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs 

109 are available for a given timestamp). 

110 

111 See Also: 

112 `rank_ic` — Spearman variant, more robust to outliers. 

113 `ic_mean`, `ic_std`, `icir` — summary 

114 statistics. 

115 """ 

116 return self._ic_series(use_rank=False, h=h) 

117 

118 def rank_ic(self: _EngineProtocol, h: int = 1) -> pl.DataFrame: 

119 """Cross-sectional Spearman Rank Information Coefficient time series. 

120 

121 Identical to `ic` but uses the Spearman rank correlation 

122 instead of the Pearson correlation, making it more robust to fat-tailed 

123 return distributions and outliers. 

124 

125 Args: 

126 h: Forward-return horizon in periods (default 1). 

127 

128 Returns: 

129 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``. 

130 ``rank_ic`` is a ``Float64`` series. 

131 

132 See Also: 

133 `ic` — Pearson variant. 

134 `rank_ic_mean`, `rank_ic_std` — summary 

135 statistics. 

136 """ 

137 return self._ic_series(use_rank=True, h=h) 

138 

139 def ic_mean(self: _EngineProtocol, h: int = 1) -> float: 

140 """Mean of the IC time series, ignoring NaN values. 

141 

142 Args: 

143 h: Forward-return horizon in periods (default 1). 

144 

145 Returns: 

146 float: Arithmetic mean of all finite IC values, or ``NaN`` if 

147 no finite values exist. 

148 """ 

149 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy() 

150 finite = arr[np.isfinite(arr)] 

151 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

152 

153 def ic_std(self: _EngineProtocol, h: int = 1) -> float: 

154 """Standard deviation of the IC time series, ignoring NaN values. 

155 

156 Uses ``ddof=1`` (sample standard deviation). 

157 

158 Args: 

159 h: Forward-return horizon in periods (default 1). 

160 

161 Returns: 

162 float: Sample standard deviation of all finite IC values, or 

163 ``NaN`` if fewer than 2 finite values exist. 

164 """ 

165 arr = self._ic_series(use_rank=False, h=h)["ic"].drop_nulls().to_numpy() 

166 finite = arr[np.isfinite(arr)] 

167 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan") 

168 

169 def icir(self: _EngineProtocol, h: int = 1) -> float: 

170 """Information Coefficient Information Ratio (ICIR). 

171 

172 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a 

173 more consistent signal: the mean IC is large relative to its 

174 variability. 

175 

176 Args: 

177 h: Forward-return horizon in periods (default 1). 

178 

179 Returns: 

180 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero 

181 or non-finite. 

182 """ 

183 ic_df = self._ic_series(use_rank=False, h=h) 

184 arr = ic_df["ic"].drop_nulls().to_numpy() 

185 finite = arr[np.isfinite(arr)] 

186 mean = float(np.mean(finite)) if len(finite) > 0 else float("nan") 

187 std = float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan") 

188 if not np.isfinite(std) or std == 0.0: 

189 return float("nan") 

190 return float(mean / std) 

191 

192 def rank_ic_mean(self: _EngineProtocol, h: int = 1) -> float: 

193 """Mean of the Rank IC time series, ignoring NaN values. 

194 

195 Args: 

196 h: Forward-return horizon in periods (default 1). 

197 

198 Returns: 

199 float: Arithmetic mean of all finite Rank IC values, or ``NaN`` 

200 if no finite values exist. 

201 """ 

202 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy() 

203 finite = arr[np.isfinite(arr)] 

204 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

205 

206 def rank_ic_std(self: _EngineProtocol, h: int = 1) -> float: 

207 """Standard deviation of the Rank IC time series, ignoring NaN values. 

208 

209 Uses ``ddof=1`` (sample standard deviation). 

210 

211 Args: 

212 h: Forward-return horizon in periods (default 1). 

213 

214 Returns: 

215 float: Sample standard deviation of all finite Rank IC values, or 

216 ``NaN`` if fewer than 2 finite values exist. 

217 """ 

218 arr = self._ic_series(use_rank=True, h=h)["rank_ic"].drop_nulls().to_numpy() 

219 finite = arr[np.isfinite(arr)] 

220 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")