Coverage for src / basanos / math / _engine_diagnostics.py: 100%

94 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 17:47 +0000

1"""Diagnostics mixin for BasanosEngine. 

2 

3Provides matrix-quality and solver-quality properties as a reusable mixin so 

4that ``optimizer.py`` stays focused on the core position-solving logic. 

5 

6Classes in this module are **private implementation details**. The public API 

7is :class:`~basanos.math.optimizer.BasanosEngine`, which inherits from 

8:class:`_DiagnosticsMixin`. 

9""" 

10 

11from __future__ import annotations 

12 

13import logging 

14from typing import TYPE_CHECKING 

15 

16import numpy as np 

17import polars as pl 

18 

19from ..exceptions import SingularMatrixError 

20from ._linalg import solve, valid 

21 

22if TYPE_CHECKING: 

23 from ._engine_protocol import _EngineProtocol 

24 

25_logger = logging.getLogger(__name__) 

26 

27 

28class _DiagnosticsMixin: 

29 """Mixin providing matrix-quality and solver-quality diagnostic properties. 

30 

31 The consuming class must satisfy :class:`~._engine_protocol._EngineProtocol`, 

32 i.e. it must expose: 

33 

34 * ``assets`` — list of asset column names 

35 * ``prices`` — Polars DataFrame with a ``'date'`` column 

36 * ``mu`` — Polars DataFrame of expected-return signals 

37 * ``_iter_matrices()`` — generator yielding ``(i, t, mask, bundle)`` 

38 """ 

39 

40 @property 

41 def condition_number(self: _EngineProtocol) -> pl.DataFrame: 

42 """Condition number κ of the effective correlation matrix at each timestamp. 

43 

44 Uses the same covariance mode as :attr:`cash_position`: for 

45 ``ewma_shrink`` this is the shrunk EWMA matrix; for ``sliding_window`` 

46 it is the factor-model covariance. Only the sub-matrix corresponding 

47 to assets with finite prices at that timestamp is used; rows with no 

48 finite prices yield ``NaN``. 

49 

50 Returns: 

51 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``. 

52 """ 

53 kappas: list[float] = [] 

54 for _i, _t, _mask, bundle in self._iter_matrices(): 

55 if bundle is None: 

56 kappas.append(float(np.nan)) 

57 continue 

58 _v, mat = valid(bundle.matrix) 

59 if not _v.any(): 

60 kappas.append(float(np.nan)) 

61 continue 

62 kappas.append(float(np.linalg.cond(mat))) 

63 

64 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)}) 

65 

66 @property 

67 def effective_rank(self: _EngineProtocol) -> pl.DataFrame: 

68 r"""Effective rank of the effective correlation matrix at each timestamp. 

69 

70 Measures the true dimensionality of the portfolio by computing the 

71 entropy-based effective rank: 

72 

73 .. math:: 

74 

75 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right), 

76 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j} 

77 

78 where :math:`\\lambda_i` are the eigenvalues of the effective 

79 correlation matrix (restricted to assets with finite prices at that 

80 timestamp). Uses the same covariance mode as :attr:`cash_position`. 

81 A value equal to the number of assets indicates a perfectly uniform 

82 spectrum; a value of 1 indicates a rank-1 matrix. 

83 

84 Returns: 

85 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``. 

86 """ 

87 ranks: list[float] = [] 

88 for _i, _t, _mask, bundle in self._iter_matrices(): 

89 if bundle is None: 

90 ranks.append(float(np.nan)) 

91 continue 

92 _v, mat = valid(bundle.matrix) 

93 if not _v.any(): 

94 ranks.append(float(np.nan)) 

95 continue 

96 eigvals = np.linalg.eigvalsh(mat) 

97 eigvals = np.clip(eigvals, 0.0, None) 

98 total = eigvals.sum() 

99 if total <= 0.0: 

100 ranks.append(float(np.nan)) 

101 continue 

102 p = eigvals / total 

103 p_pos = p[p > 0.0] 

104 entropy = float(-np.sum(p_pos * np.log(p_pos))) 

105 ranks.append(float(np.exp(entropy))) 

106 

107 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)}) 

108 

109 @property 

110 def solver_residual(self: _EngineProtocol) -> pl.DataFrame: 

111 r"""Per-timestamp solver residual ``‖C·x - μ‖₂``. 

112 

113 After solving the normalised linear system ``C · x = μ`` at 

114 each timestamp, this property reports the Euclidean residual norm. 

115 For a well-posed, well-conditioned system the residual is near machine 

116 epsilon; large values flag numerical difficulties (near-singular 

117 matrices, extreme condition numbers, or solver fall-back to LU). 

118 Uses the same covariance mode as :attr:`cash_position`. 

119 

120 Returns: 

121 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``. 

122 Zero is returned when ``μ`` is the zero vector (no solve is 

123 performed). ``NaN`` is returned when no asset has finite prices. 

124 """ 

125 assets = self.assets 

126 mu_np = self.mu.select(assets).to_numpy() 

127 

128 residuals: list[float] = [] 

129 for i, t, mask, bundle in self._iter_matrices(): 

130 if bundle is None: 

131 residuals.append(float(np.nan)) 

132 continue 

133 matrix = bundle.matrix 

134 expected_mu = np.nan_to_num(mu_np[i][mask]) 

135 if np.allclose(expected_mu, 0.0): 

136 residuals.append(0.0) 

137 continue 

138 try: 

139 x = solve(matrix, expected_mu) 

140 except SingularMatrixError: 

141 # The covariance matrix is degenerate — residual is undefined. 

142 _logger.warning( 

143 "solver_residual: SingularMatrixError at t=%s - covariance matrix is " 

144 "degenerate; residual set to NaN.", 

145 t, 

146 ) 

147 residuals.append(float(np.nan)) 

148 continue 

149 finite_x = np.isfinite(x) 

150 if not finite_x.any(): 

151 residuals.append(float(np.nan)) 

152 continue 

153 residuals.append( 

154 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x])) 

155 ) 

156 

157 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)}) 

158 

159 @property 

160 def signal_utilisation(self: _EngineProtocol) -> pl.DataFrame: 

161 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter. 

162 

163 For each asset *i* and timestamp *t*, computes 

164 

165 .. math:: 

166 

167 u_i = \\frac{(C^{-1}\\,\\mu)_i}{\\mu_i} 

168 

169 where :math:`C^{-1}\\,\\mu` is the unnormalised solve result using 

170 the effective correlation matrix for the current 

171 :attr:`~BasanosConfig.covariance_mode`. When :math:`C = I` 

172 (identity) all assets have utilisation 1. Off-diagonal correlations 

173 attenuate some assets (:math:`u_i < 1`) and may amplify negatively 

174 correlated ones (:math:`u_i > 1`). 

175 

176 A value of ``0.0`` is returned when the entire signal vector 

177 :math:`\\mu` is near zero at that timestamp (no solve is performed). 

178 ``NaN`` is returned for individual assets where :math:`|\\mu_i|` is 

179 below machine-epsilon precision or where prices are unavailable. 

180 

181 Returns: 

182 pl.DataFrame: DataFrame with columns ``['date'] + assets``. 

183 """ 

184 assets = self.assets 

185 mu_np = self.mu.select(assets).to_numpy() 

186 

187 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios 

188 n_assets = len(assets) 

189 util_np = np.full((self.prices.height, n_assets), np.nan) 

190 

191 for i, t, mask, bundle in self._iter_matrices(): 

192 if bundle is None: 

193 continue 

194 matrix = bundle.matrix 

195 expected_mu = np.nan_to_num(mu_np[i][mask]) 

196 if np.allclose(expected_mu, 0.0): 

197 util_np[i, mask] = 0.0 

198 continue 

199 try: 

200 x = solve(matrix, expected_mu) 

201 except SingularMatrixError: 

202 # The covariance matrix is degenerate — utilisation is undefined. 

203 _logger.warning( 

204 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is " 

205 "degenerate; utilisation set to NaN.", 

206 t, 

207 ) 

208 continue 

209 with np.errstate(divide="ignore", invalid="ignore"): 

210 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan) 

211 util_np[i, mask] = ratio 

212 

213 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)])