Coverage for src/basanos/math/_engine_diagnostics.py: 100%

93 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 05:58 +0000

1"""Diagnostics mixin for BasanosEngine. 

2 

3Provides matrix-quality and solver-quality properties as a reusable mixin so 

4that ``optimizer.py`` stays focused on the core position-solving logic. 

5 

6Classes in this module are **private implementation details**. The public API 

7is `BasanosEngine`, which inherits from 

8`_DiagnosticsMixin`. 

9""" 

10 

11from __future__ import annotations 

12 

13import logging 

14from typing import TYPE_CHECKING 

15 

16import numpy as np 

17import polars as pl 

18from cvx.linalg import SingularMatrixError, solve, valid 

19 

20if TYPE_CHECKING: 

21 from ._engine_protocol import _EngineProtocol 

22 

23_logger = logging.getLogger(__name__) 

24 

25 

26class _DiagnosticsMixin: 

27 """Mixin providing matrix-quality and solver-quality diagnostic properties. 

28 

29 The consuming class must satisfy `_EngineProtocol`, 

30 i.e. it must expose: 

31 

32 * ``assets`` — list of asset column names 

33 * ``prices`` — Polars DataFrame with a ``'date'`` column 

34 * ``mu`` — Polars DataFrame of expected-return signals 

35 * ``_iter_matrices()`` — generator yielding ``(i, t, mask, bundle)`` 

36 """ 

37 

38 @property 

39 def condition_number(self: _EngineProtocol) -> pl.DataFrame: 

40 """Condition number κ of the effective correlation matrix at each timestamp. 

41 

42 Uses the same covariance mode as `cash_position`: for 

43 ``ewma_shrink`` this is the shrunk EWMA matrix; for ``sliding_window`` 

44 it is the factor-model covariance. Only the sub-matrix corresponding 

45 to assets with finite prices at that timestamp is used; rows with no 

46 finite prices yield ``NaN``. 

47 

48 Returns: 

49 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``. 

50 """ 

51 kappas: list[float] = [] 

52 for _i, _t, _mask, bundle in self._iter_matrices(): 

53 if bundle is None: 

54 kappas.append(float(np.nan)) 

55 continue 

56 _v, mat = valid(bundle.matrix) 

57 if not _v.any(): 

58 kappas.append(float(np.nan)) 

59 continue 

60 kappas.append(float(np.linalg.cond(mat))) 

61 

62 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)}) 

63 

64 @property 

65 def effective_rank(self: _EngineProtocol) -> pl.DataFrame: 

66 r"""Effective rank of the effective correlation matrix at each timestamp. 

67 

68 Measures the true dimensionality of the portfolio by computing the 

69 entropy-based effective rank: 

70 

71 $$ 

72 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right), 

73 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j} 

74 $$ 

75 

76 where $\\lambda_i$ are the eigenvalues of the effective 

77 correlation matrix (restricted to assets with finite prices at that 

78 timestamp). Uses the same covariance mode as `cash_position`. 

79 A value equal to the number of assets indicates a perfectly uniform 

80 spectrum; a value of 1 indicates a rank-1 matrix. 

81 

82 Returns: 

83 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``. 

84 """ 

85 ranks: list[float] = [] 

86 for _i, _t, _mask, bundle in self._iter_matrices(): 

87 if bundle is None: 

88 ranks.append(float(np.nan)) 

89 continue 

90 _v, mat = valid(bundle.matrix) 

91 if not _v.any(): 

92 ranks.append(float(np.nan)) 

93 continue 

94 eigvals = np.linalg.eigvalsh(mat) 

95 eigvals = np.clip(eigvals, 0.0, None) 

96 total = eigvals.sum() 

97 if total <= 0.0: 

98 ranks.append(float(np.nan)) 

99 continue 

100 p = eigvals / total 

101 p_pos = p[p > 0.0] 

102 entropy = float(-np.sum(p_pos * np.log(p_pos))) 

103 ranks.append(float(np.exp(entropy))) 

104 

105 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)}) 

106 

107 @property 

108 def solver_residual(self: _EngineProtocol) -> pl.DataFrame: 

109 r"""Per-timestamp solver residual ``‖C·x - μ‖₂``. 

110 

111 After solving the normalised linear system ``C · x = μ`` at 

112 each timestamp, this property reports the Euclidean residual norm. 

113 For a well-posed, well-conditioned system the residual is near machine 

114 epsilon; large values flag numerical difficulties (near-singular 

115 matrices, extreme condition numbers, or solver fall-back to LU). 

116 Uses the same covariance mode as `cash_position`. 

117 

118 Returns: 

119 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``. 

120 Zero is returned when ``μ`` is the zero vector (no solve is 

121 performed). ``NaN`` is returned when no asset has finite prices. 

122 """ 

123 assets = self.assets 

124 mu_np = self.mu.select(assets).to_numpy() 

125 

126 residuals: list[float] = [] 

127 for i, t, mask, bundle in self._iter_matrices(): 

128 if bundle is None: 

129 residuals.append(float(np.nan)) 

130 continue 

131 matrix = bundle.matrix 

132 expected_mu = np.nan_to_num(mu_np[i][mask]) 

133 if np.allclose(expected_mu, 0.0): 

134 residuals.append(0.0) 

135 continue 

136 try: 

137 x = solve(matrix, expected_mu) 

138 except SingularMatrixError: 

139 # The covariance matrix is degenerate — residual is undefined. 

140 _logger.warning( 

141 "solver_residual: SingularMatrixError at t=%s - covariance matrix is " 

142 "degenerate; residual set to NaN.", 

143 t, 

144 ) 

145 residuals.append(float(np.nan)) 

146 continue 

147 finite_x = np.isfinite(x) 

148 if not finite_x.any(): 

149 residuals.append(float(np.nan)) 

150 continue 

151 residuals.append( 

152 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x])) 

153 ) 

154 

155 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)}) 

156 

157 @property 

158 def signal_utilisation(self: _EngineProtocol) -> pl.DataFrame: 

159 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter. 

160 

161 For each asset *i* and timestamp *t*, computes 

162 

163 $$ 

164 u_i = \\frac{(C^{-1}\\,\\mu)_i}{\\mu_i} 

165 $$ 

166 

167 where $C^{-1}\\,\\mu$ is the unnormalised solve result using 

168 the effective correlation matrix for the current 

169 `covariance_mode`. When $C = I$ 

170 (identity) all assets have utilisation 1. Off-diagonal correlations 

171 attenuate some assets ($u_i < 1$) and may amplify negatively 

172 correlated ones ($u_i > 1$). 

173 

174 A value of ``0.0`` is returned when the entire signal vector 

175 $\\mu$ is near zero at that timestamp (no solve is performed). 

176 ``NaN`` is returned for individual assets where $|\\mu_i|$ is 

177 below machine-epsilon precision or where prices are unavailable. 

178 

179 Returns: 

180 pl.DataFrame: DataFrame with columns ``['date'] + assets``. 

181 """ 

182 assets = self.assets 

183 mu_np = self.mu.select(assets).to_numpy() 

184 

185 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios 

186 n_assets = len(assets) 

187 util_np = np.full((self.prices.height, n_assets), np.nan) 

188 

189 for i, t, mask, bundle in self._iter_matrices(): 

190 if bundle is None: 

191 continue 

192 matrix = bundle.matrix 

193 expected_mu = np.nan_to_num(mu_np[i][mask]) 

194 if np.allclose(expected_mu, 0.0): 

195 util_np[i, mask] = 0.0 

196 continue 

197 try: 

198 x = solve(matrix, expected_mu) 

199 except SingularMatrixError: 

200 # The covariance matrix is degenerate — utilisation is undefined. 

201 _logger.warning( 

202 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is " 

203 "degenerate; utilisation set to NaN.", 

204 t, 

205 ) 

206 continue 

207 with np.errstate(divide="ignore", invalid="ignore"): 

208 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan) 

209 util_np[i, mask] = ratio 

210 

211 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)])