Coverage for src / basanos / math / _engine_ic.py: 100%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 17:47 +0000

1"""Signal-evaluation mixin for BasanosEngine. 

2 

3Provides information-coefficient (IC) metrics as a reusable mixin so that 

4``optimizer.py`` stays focused on the core position-solving logic. 

5 

6Classes in this module are **private implementation details**. The public API 

7is :class:`~basanos.math.optimizer.BasanosEngine`, which inherits from 

8:class:`_SignalEvaluatorMixin`. 

9""" 

10 

11from __future__ import annotations 

12 

13from typing import TYPE_CHECKING 

14 

15import numpy as np 

16import polars as pl 

17from scipy.stats import spearmanr 

18 

19if TYPE_CHECKING: 

20 from ._engine_protocol import _EngineProtocol 

21 

22 

23class _SignalEvaluatorMixin: 

24 """Mixin providing cross-sectional information-coefficient (IC) metrics. 

25 

26 The consuming class must satisfy :class:`~._engine_protocol._EngineProtocol`, 

27 i.e. it must expose: 

28 

29 * ``assets`` — list of asset column names 

30 * ``prices`` — Polars DataFrame with a ``'date'`` column 

31 * ``mu`` — Polars DataFrame of expected-return signals 

32 """ 

33 

34 def _ic_series(self: _EngineProtocol, use_rank: bool) -> pl.DataFrame: 

35 """Compute the cross-sectional IC time series. 

36 

37 For each timestamp *t* (from 0 to T-2), correlates the signal vector 

38 ``mu[t, :]`` with the one-period forward return vector 

39 ``prices[t+1, :] / prices[t, :] - 1`` across all assets where both 

40 quantities are finite. When fewer than two valid asset pairs are 

41 available, the IC value is set to ``NaN``. 

42 

43 Args: 

44 use_rank: When ``True`` the Spearman rank correlation is used 

45 (Rank IC); when ``False`` the Pearson correlation is used (IC). 

46 

47 Returns: 

48 pl.DataFrame: Two-column frame with ``date`` (signal date) and 

49 either ``ic`` or ``rank_ic``. 

50 """ 

51 assets = self.assets 

52 prices_np = self.prices.select(assets).to_numpy().astype(float) 

53 mu_np = self.mu.select(assets).to_numpy().astype(float) 

54 dates = self.prices["date"].to_list() 

55 

56 col_name = "rank_ic" if use_rank else "ic" 

57 ic_values: list[float] = [] 

58 ic_dates = [] 

59 

60 for t in range(len(dates) - 1): 

61 fwd_ret = prices_np[t + 1] / prices_np[t] - 1.0 

62 signal = mu_np[t] 

63 

64 # Both signal and forward return must be finite 

65 mask = np.isfinite(signal) & np.isfinite(fwd_ret) 

66 n_valid = int(mask.sum()) 

67 

68 if n_valid < 2: 

69 ic_values.append(float("nan")) 

70 elif use_rank: 

71 corr, _ = spearmanr(signal[mask], fwd_ret[mask]) 

72 ic_values.append(float(corr)) 

73 else: 

74 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1])) 

75 

76 ic_dates.append(dates[t]) 

77 

78 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)}) 

79 

80 @property 

81 def ic(self: _EngineProtocol) -> pl.DataFrame: 

82 """Cross-sectional Pearson Information Coefficient (IC) time series. 

83 

84 For each timestamp *t* (excluding the last), computes the Pearson 

85 correlation between the signal ``mu[t, :]`` and the one-period forward 

86 return ``prices[t+1, :] / prices[t, :] - 1`` across all assets where 

87 both quantities are finite. 

88 

89 An IC value close to +1 means the signal ranked assets in the same 

90 order as forward returns; close to -1 means the opposite; near 0 means 

91 no predictive relationship. 

92 

93 Returns: 

94 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is 

95 the timestamp at which the signal was observed. ``ic`` is a 

96 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs 

97 are available for a given timestamp). 

98 

99 See Also: 

100 :py:attr:`rank_ic` — Spearman variant, more robust to outliers. 

101 :py:attr:`ic_mean`, :py:attr:`ic_std`, :py:attr:`icir` — summary 

102 statistics. 

103 """ 

104 return self._ic_series(use_rank=False) 

105 

106 @property 

107 def rank_ic(self: _EngineProtocol) -> pl.DataFrame: 

108 """Cross-sectional Spearman Rank Information Coefficient time series. 

109 

110 Identical to :py:attr:`ic` but uses the Spearman rank correlation 

111 instead of the Pearson correlation, making it more robust to fat-tailed 

112 return distributions and outliers. 

113 

114 Returns: 

115 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``. 

116 ``rank_ic`` is a ``Float64`` series. 

117 

118 See Also: 

119 :py:attr:`ic` — Pearson variant. 

120 :py:attr:`rank_ic_mean`, :py:attr:`rank_ic_std` — summary 

121 statistics. 

122 """ 

123 return self._ic_series(use_rank=True) 

124 

125 @property 

126 def ic_mean(self) -> float: 

127 """Mean of the IC time series, ignoring NaN values. 

128 

129 Returns: 

130 float: Arithmetic mean of all finite IC values, or ``NaN`` if 

131 no finite values exist. 

132 """ 

133 arr = self.ic["ic"].drop_nulls().to_numpy() 

134 finite = arr[np.isfinite(arr)] 

135 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

136 

137 @property 

138 def ic_std(self) -> float: 

139 """Standard deviation of the IC time series, ignoring NaN values. 

140 

141 Uses ``ddof=1`` (sample standard deviation). 

142 

143 Returns: 

144 float: Sample standard deviation of all finite IC values, or 

145 ``NaN`` if fewer than 2 finite values exist. 

146 """ 

147 arr = self.ic["ic"].drop_nulls().to_numpy() 

148 finite = arr[np.isfinite(arr)] 

149 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan") 

150 

151 @property 

152 def icir(self) -> float: 

153 """Information Coefficient Information Ratio (ICIR). 

154 

155 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a 

156 more consistent signal: the mean IC is large relative to its 

157 variability. 

158 

159 Returns: 

160 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero 

161 or non-finite. 

162 """ 

163 mean = self.ic_mean 

164 std = self.ic_std 

165 if not np.isfinite(std) or std == 0.0: 

166 return float("nan") 

167 return float(mean / std) 

168 

169 @property 

170 def rank_ic_mean(self) -> float: 

171 """Mean of the Rank IC time series, ignoring NaN values. 

172 

173 Returns: 

174 float: Arithmetic mean of all finite Rank IC values, or ``NaN`` 

175 if no finite values exist. 

176 """ 

177 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy() 

178 finite = arr[np.isfinite(arr)] 

179 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

180 

181 @property 

182 def rank_ic_std(self) -> float: 

183 """Standard deviation of the Rank IC time series, ignoring NaN values. 

184 

185 Uses ``ddof=1`` (sample standard deviation). 

186 

187 Returns: 

188 float: Sample standard deviation of all finite Rank IC values, or 

189 ``NaN`` if fewer than 2 finite values exist. 

190 """ 

191 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy() 

192 finite = arr[np.isfinite(arr)] 

193 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan")