Coverage for src/basanos/math/_engine

1"""Diagnostics mixin for BasanosEngine.

3Provides matrix-quality and solver-quality properties as a reusable mixin so

4that ``optimizer.py`` stays focused on the core position-solving logic.

6Classes in this module are **private implementation details**. The public API

7is :class:`~basanos.math.optimizer.BasanosEngine`, which inherits from

8:class:`_DiagnosticsMixin`.

9"""

11from __future__ import annotations

13import logging

14from typing import TYPE_CHECKING

16import numpy as np

17import polars as pl

19from ..exceptions import SingularMatrixError

20from ._linalg import solve, valid

22if TYPE_CHECKING:

23 from ._engine_protocol import _EngineProtocol

25_logger = logging.getLogger(__name__)

28class _DiagnosticsMixin:

29 """Mixin providing matrix-quality and solver-quality diagnostic properties.

31 The consuming class must satisfy :class:`~._engine_protocol._EngineProtocol`,

32 i.e. it must expose:

34 * ``assets`` — list of asset column names

35 * ``prices`` — Polars DataFrame with a ``'date'`` column

36 * ``mu`` — Polars DataFrame of expected-return signals

37 * ``_iter_matrices()`` — generator yielding ``(i, t, mask, bundle)``

38 """

40 @property

41 def condition_number(self: _EngineProtocol) -> pl.DataFrame:

42 """Condition number κ of the effective correlation matrix at each timestamp.

44 Uses the same covariance mode as :attr:`cash_position`: for

45 ``ewma_shrink`` this is the shrunk EWMA matrix; for ``sliding_window``

46 it is the factor-model covariance. Only the sub-matrix corresponding

47 to assets with finite prices at that timestamp is used; rows with no

48 finite prices yield ``NaN``.

50 Returns:

51 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``.

52 """

53 kappas: list[float] = []

54 for _i, _t, _mask, bundle in self._iter_matrices():

55 if bundle is None:

56 kappas.append(float(np.nan))

57 continue

58 _v, mat = valid(bundle.matrix)

59 if not _v.any():

60 kappas.append(float(np.nan))

61 continue

62 kappas.append(float(np.linalg.cond(mat)))

64 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)})

66 @property

67 def effective_rank(self: _EngineProtocol) -> pl.DataFrame:

68 r"""Effective rank of the effective correlation matrix at each timestamp.

70 Measures the true dimensionality of the portfolio by computing the

71 entropy-based effective rank:

73 .. math::

75 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right),

76 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j}

78 where :math:`\\lambda_i` are the eigenvalues of the effective

79 correlation matrix (restricted to assets with finite prices at that

80 timestamp). Uses the same covariance mode as :attr:`cash_position`.

81 A value equal to the number of assets indicates a perfectly uniform

82 spectrum; a value of 1 indicates a rank-1 matrix.

84 Returns:

85 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``.

86 """

87 ranks: list[float] = []

88 for _i, _t, _mask, bundle in self._iter_matrices():

89 if bundle is None:

90 ranks.append(float(np.nan))

91 continue

92 _v, mat = valid(bundle.matrix)

93 if not _v.any():

94 ranks.append(float(np.nan))

95 continue

96 eigvals = np.linalg.eigvalsh(mat)

97 eigvals = np.clip(eigvals, 0.0, None)

98 total = eigvals.sum()

99 if total <= 0.0:

100 ranks.append(float(np.nan))

101 continue

102 p = eigvals / total

103 p_pos = p[p > 0.0]

104 entropy = float(-np.sum(p_pos * np.log(p_pos)))

105 ranks.append(float(np.exp(entropy)))

106

107 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)})

108

109 @property

110 def solver_residual(self: _EngineProtocol) -> pl.DataFrame:

111 r"""Per-timestamp solver residual ``‖C·x - μ‖₂``.

112

113 After solving the normalised linear system ``C · x = μ`` at

114 each timestamp, this property reports the Euclidean residual norm.

115 For a well-posed, well-conditioned system the residual is near machine

116 epsilon; large values flag numerical difficulties (near-singular

117 matrices, extreme condition numbers, or solver fall-back to LU).

118 Uses the same covariance mode as :attr:`cash_position`.

119

120 Returns:

121 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``.

122 Zero is returned when ``μ`` is the zero vector (no solve is

123 performed). ``NaN`` is returned when no asset has finite prices.

124 """

125 assets = self.assets

126 mu_np = self.mu.select(assets).to_numpy()

127

128 residuals: list[float] = []

129 for i, t, mask, bundle in self._iter_matrices():

130 if bundle is None:

131 residuals.append(float(np.nan))

132 continue

133 matrix = bundle.matrix

134 expected_mu = np.nan_to_num(mu_np[i][mask])

135 if np.allclose(expected_mu, 0.0):

136 residuals.append(0.0)

137 continue

138 try:

139 x = solve(matrix, expected_mu)

140 except SingularMatrixError:

141 # The covariance matrix is degenerate — residual is undefined.

142 _logger.warning(

143 "solver_residual: SingularMatrixError at t=%s - covariance matrix is "

144 "degenerate; residual set to NaN.",

145 t,

146 )

147 residuals.append(float(np.nan))

148 continue

149 finite_x = np.isfinite(x)

150 if not finite_x.any():

151 residuals.append(float(np.nan))

152 continue

153 residuals.append(

154 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x]))

155 )

156

157 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)})

158

159 @property

160 def signal_utilisation(self: _EngineProtocol) -> pl.DataFrame:

161 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter.

162

163 For each asset *i* and timestamp *t*, computes

164

165 .. math::

166

167 u_i = \\frac{(C^{-1}\\,\\mu)_i}{\\mu_i}

168

169 where :math:`C^{-1}\\,\\mu` is the unnormalised solve result using

170 the effective correlation matrix for the current

171 :attr:`~BasanosConfig.covariance_mode`. When :math:`C = I`

172 (identity) all assets have utilisation 1. Off-diagonal correlations

173 attenuate some assets (:math:`u_i < 1`) and may amplify negatively

174 correlated ones (:math:`u_i > 1`).

175

176 A value of ``0.0`` is returned when the entire signal vector

177 :math:`\\mu` is near zero at that timestamp (no solve is performed).

178 ``NaN`` is returned for individual assets where :math:`|\\mu_i|` is

179 below machine-epsilon precision or where prices are unavailable.

180

181 Returns:

182 pl.DataFrame: DataFrame with columns ``['date'] + assets``.

183 """

184 assets = self.assets

185 mu_np = self.mu.select(assets).to_numpy()

186

187 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios

188 n_assets = len(assets)

189 util_np = np.full((self.prices.height, n_assets), np.nan)

190

191 for i, t, mask, bundle in self._iter_matrices():

192 if bundle is None:

193 continue

194 matrix = bundle.matrix

195 expected_mu = np.nan_to_num(mu_np[i][mask])

196 if np.allclose(expected_mu, 0.0):

197 util_np[i, mask] = 0.0

198 continue

199 try:

200 x = solve(matrix, expected_mu)

201 except SingularMatrixError:

202 # The covariance matrix is degenerate — utilisation is undefined.

203 _logger.warning(

204 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is "

205 "degenerate; utilisation set to NaN.",

206 t,

207 )

208 continue

209 with np.errstate(divide="ignore", invalid="ignore"):

210 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan)

211 util_np[i, mask] = ratio

212

213 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)])

Coverage for src / basanos / math / _engine_diagnostics.py: 100%

94 statements