Coverage for src/basanos/math/_engine_diagnostics.py: 100%
93 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
1"""Diagnostics mixin for BasanosEngine.
3Provides matrix-quality and solver-quality properties as a reusable mixin so
4that ``optimizer.py`` stays focused on the core position-solving logic.
6Classes in this module are **private implementation details**. The public API
7is `BasanosEngine`, which inherits from
8`_DiagnosticsMixin`.
9"""
11from __future__ import annotations
13import logging
14from typing import TYPE_CHECKING
16import numpy as np
17import polars as pl
18from cvx.linalg import SingularMatrixError, solve, valid
20if TYPE_CHECKING:
21 from ._engine_protocol import _EngineProtocol
23_logger = logging.getLogger(__name__)
26class _DiagnosticsMixin:
27 """Mixin providing matrix-quality and solver-quality diagnostic properties.
29 The consuming class must satisfy `_EngineProtocol`,
30 i.e. it must expose:
32 * ``assets`` — list of asset column names
33 * ``prices`` — Polars DataFrame with a ``'date'`` column
34 * ``mu`` — Polars DataFrame of expected-return signals
35 * ``_iter_matrices()`` — generator yielding ``(i, t, mask, bundle)``
36 """
38 @property
39 def condition_number(self: _EngineProtocol) -> pl.DataFrame:
40 """Condition number κ of the effective correlation matrix at each timestamp.
42 Uses the same covariance mode as `cash_position`: for
43 ``ewma_shrink`` this is the shrunk EWMA matrix; for ``sliding_window``
44 it is the factor-model covariance. Only the sub-matrix corresponding
45 to assets with finite prices at that timestamp is used; rows with no
46 finite prices yield ``NaN``.
48 Returns:
49 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``.
50 """
51 kappas: list[float] = []
52 for _i, _t, _mask, bundle in self._iter_matrices():
53 if bundle is None:
54 kappas.append(float(np.nan))
55 continue
56 _v, mat = valid(bundle.matrix)
57 if not _v.any():
58 kappas.append(float(np.nan))
59 continue
60 kappas.append(float(np.linalg.cond(mat)))
62 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)})
64 @property
65 def effective_rank(self: _EngineProtocol) -> pl.DataFrame:
66 r"""Effective rank of the effective correlation matrix at each timestamp.
68 Measures the true dimensionality of the portfolio by computing the
69 entropy-based effective rank:
71 $$
72 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right),
73 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j}
74 $$
76 where $\\lambda_i$ are the eigenvalues of the effective
77 correlation matrix (restricted to assets with finite prices at that
78 timestamp). Uses the same covariance mode as `cash_position`.
79 A value equal to the number of assets indicates a perfectly uniform
80 spectrum; a value of 1 indicates a rank-1 matrix.
82 Returns:
83 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``.
84 """
85 ranks: list[float] = []
86 for _i, _t, _mask, bundle in self._iter_matrices():
87 if bundle is None:
88 ranks.append(float(np.nan))
89 continue
90 _v, mat = valid(bundle.matrix)
91 if not _v.any():
92 ranks.append(float(np.nan))
93 continue
94 eigvals = np.linalg.eigvalsh(mat)
95 eigvals = np.clip(eigvals, 0.0, None)
96 total = eigvals.sum()
97 if total <= 0.0:
98 ranks.append(float(np.nan))
99 continue
100 p = eigvals / total
101 p_pos = p[p > 0.0]
102 entropy = float(-np.sum(p_pos * np.log(p_pos)))
103 ranks.append(float(np.exp(entropy)))
105 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)})
107 @property
108 def solver_residual(self: _EngineProtocol) -> pl.DataFrame:
109 r"""Per-timestamp solver residual ``‖C·x - μ‖₂``.
111 After solving the normalised linear system ``C · x = μ`` at
112 each timestamp, this property reports the Euclidean residual norm.
113 For a well-posed, well-conditioned system the residual is near machine
114 epsilon; large values flag numerical difficulties (near-singular
115 matrices, extreme condition numbers, or solver fall-back to LU).
116 Uses the same covariance mode as `cash_position`.
118 Returns:
119 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``.
120 Zero is returned when ``μ`` is the zero vector (no solve is
121 performed). ``NaN`` is returned when no asset has finite prices.
122 """
123 assets = self.assets
124 mu_np = self.mu.select(assets).to_numpy()
126 residuals: list[float] = []
127 for i, t, mask, bundle in self._iter_matrices():
128 if bundle is None:
129 residuals.append(float(np.nan))
130 continue
131 matrix = bundle.matrix
132 expected_mu = np.nan_to_num(mu_np[i][mask])
133 if np.allclose(expected_mu, 0.0):
134 residuals.append(0.0)
135 continue
136 try:
137 x = solve(matrix, expected_mu)
138 except SingularMatrixError:
139 # The covariance matrix is degenerate — residual is undefined.
140 _logger.warning(
141 "solver_residual: SingularMatrixError at t=%s - covariance matrix is "
142 "degenerate; residual set to NaN.",
143 t,
144 )
145 residuals.append(float(np.nan))
146 continue
147 finite_x = np.isfinite(x)
148 if not finite_x.any():
149 residuals.append(float(np.nan))
150 continue
151 residuals.append(
152 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x]))
153 )
155 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)})
157 @property
158 def signal_utilisation(self: _EngineProtocol) -> pl.DataFrame:
159 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter.
161 For each asset *i* and timestamp *t*, computes
163 $$
164 u_i = \\frac{(C^{-1}\\,\\mu)_i}{\\mu_i}
165 $$
167 where $C^{-1}\\,\\mu$ is the unnormalised solve result using
168 the effective correlation matrix for the current
169 `covariance_mode`. When $C = I$
170 (identity) all assets have utilisation 1. Off-diagonal correlations
171 attenuate some assets ($u_i < 1$) and may amplify negatively
172 correlated ones ($u_i > 1$).
174 A value of ``0.0`` is returned when the entire signal vector
175 $\\mu$ is near zero at that timestamp (no solve is performed).
176 ``NaN`` is returned for individual assets where $|\\mu_i|$ is
177 below machine-epsilon precision or where prices are unavailable.
179 Returns:
180 pl.DataFrame: DataFrame with columns ``['date'] + assets``.
181 """
182 assets = self.assets
183 mu_np = self.mu.select(assets).to_numpy()
185 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios
186 n_assets = len(assets)
187 util_np = np.full((self.prices.height, n_assets), np.nan)
189 for i, t, mask, bundle in self._iter_matrices():
190 if bundle is None:
191 continue
192 matrix = bundle.matrix
193 expected_mu = np.nan_to_num(mu_np[i][mask])
194 if np.allclose(expected_mu, 0.0):
195 util_np[i, mask] = 0.0
196 continue
197 try:
198 x = solve(matrix, expected_mu)
199 except SingularMatrixError:
200 # The covariance matrix is degenerate — utilisation is undefined.
201 _logger.warning(
202 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is "
203 "degenerate; utilisation set to NaN.",
204 t,
205 )
206 continue
207 with np.errstate(divide="ignore", invalid="ignore"):
208 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan)
209 util_np[i, mask] = ratio
211 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)])