Coverage for src / basanos / math / _engine_diagnostics.py: 100%
94 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
1"""Diagnostics mixin for BasanosEngine.
3Provides matrix-quality and solver-quality properties as a reusable mixin so
4that ``optimizer.py`` stays focused on the core position-solving logic.
6Classes in this module are **private implementation details**. The public API
7is :class:`~basanos.math.optimizer.BasanosEngine`, which inherits from
8:class:`_DiagnosticsMixin`.
9"""
11from __future__ import annotations
13import logging
14from typing import TYPE_CHECKING
16import numpy as np
17import polars as pl
19from ..exceptions import SingularMatrixError
20from ._linalg import solve, valid
22if TYPE_CHECKING:
23 from ._engine_protocol import _EngineProtocol
25_logger = logging.getLogger(__name__)
28class _DiagnosticsMixin:
29 """Mixin providing matrix-quality and solver-quality diagnostic properties.
31 The consuming class must satisfy :class:`~._engine_protocol._EngineProtocol`,
32 i.e. it must expose:
34 * ``assets`` — list of asset column names
35 * ``prices`` — Polars DataFrame with a ``'date'`` column
36 * ``mu`` — Polars DataFrame of expected-return signals
37 * ``_iter_matrices()`` — generator yielding ``(i, t, mask, bundle)``
38 """
40 @property
41 def condition_number(self: _EngineProtocol) -> pl.DataFrame:
42 """Condition number κ of the effective correlation matrix at each timestamp.
44 Uses the same covariance mode as :attr:`cash_position`: for
45 ``ewma_shrink`` this is the shrunk EWMA matrix; for ``sliding_window``
46 it is the factor-model covariance. Only the sub-matrix corresponding
47 to assets with finite prices at that timestamp is used; rows with no
48 finite prices yield ``NaN``.
50 Returns:
51 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``.
52 """
53 kappas: list[float] = []
54 for _i, _t, _mask, bundle in self._iter_matrices():
55 if bundle is None:
56 kappas.append(float(np.nan))
57 continue
58 _v, mat = valid(bundle.matrix)
59 if not _v.any():
60 kappas.append(float(np.nan))
61 continue
62 kappas.append(float(np.linalg.cond(mat)))
64 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)})
66 @property
67 def effective_rank(self: _EngineProtocol) -> pl.DataFrame:
68 r"""Effective rank of the effective correlation matrix at each timestamp.
70 Measures the true dimensionality of the portfolio by computing the
71 entropy-based effective rank:
73 .. math::
75 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right),
76 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j}
78 where :math:`\\lambda_i` are the eigenvalues of the effective
79 correlation matrix (restricted to assets with finite prices at that
80 timestamp). Uses the same covariance mode as :attr:`cash_position`.
81 A value equal to the number of assets indicates a perfectly uniform
82 spectrum; a value of 1 indicates a rank-1 matrix.
84 Returns:
85 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``.
86 """
87 ranks: list[float] = []
88 for _i, _t, _mask, bundle in self._iter_matrices():
89 if bundle is None:
90 ranks.append(float(np.nan))
91 continue
92 _v, mat = valid(bundle.matrix)
93 if not _v.any():
94 ranks.append(float(np.nan))
95 continue
96 eigvals = np.linalg.eigvalsh(mat)
97 eigvals = np.clip(eigvals, 0.0, None)
98 total = eigvals.sum()
99 if total <= 0.0:
100 ranks.append(float(np.nan))
101 continue
102 p = eigvals / total
103 p_pos = p[p > 0.0]
104 entropy = float(-np.sum(p_pos * np.log(p_pos)))
105 ranks.append(float(np.exp(entropy)))
107 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)})
109 @property
110 def solver_residual(self: _EngineProtocol) -> pl.DataFrame:
111 r"""Per-timestamp solver residual ``‖C·x - μ‖₂``.
113 After solving the normalised linear system ``C · x = μ`` at
114 each timestamp, this property reports the Euclidean residual norm.
115 For a well-posed, well-conditioned system the residual is near machine
116 epsilon; large values flag numerical difficulties (near-singular
117 matrices, extreme condition numbers, or solver fall-back to LU).
118 Uses the same covariance mode as :attr:`cash_position`.
120 Returns:
121 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``.
122 Zero is returned when ``μ`` is the zero vector (no solve is
123 performed). ``NaN`` is returned when no asset has finite prices.
124 """
125 assets = self.assets
126 mu_np = self.mu.select(assets).to_numpy()
128 residuals: list[float] = []
129 for i, t, mask, bundle in self._iter_matrices():
130 if bundle is None:
131 residuals.append(float(np.nan))
132 continue
133 matrix = bundle.matrix
134 expected_mu = np.nan_to_num(mu_np[i][mask])
135 if np.allclose(expected_mu, 0.0):
136 residuals.append(0.0)
137 continue
138 try:
139 x = solve(matrix, expected_mu)
140 except SingularMatrixError:
141 # The covariance matrix is degenerate — residual is undefined.
142 _logger.warning(
143 "solver_residual: SingularMatrixError at t=%s - covariance matrix is "
144 "degenerate; residual set to NaN.",
145 t,
146 )
147 residuals.append(float(np.nan))
148 continue
149 finite_x = np.isfinite(x)
150 if not finite_x.any():
151 residuals.append(float(np.nan))
152 continue
153 residuals.append(
154 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x]))
155 )
157 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)})
159 @property
160 def signal_utilisation(self: _EngineProtocol) -> pl.DataFrame:
161 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter.
163 For each asset *i* and timestamp *t*, computes
165 .. math::
167 u_i = \\frac{(C^{-1}\\,\\mu)_i}{\\mu_i}
169 where :math:`C^{-1}\\,\\mu` is the unnormalised solve result using
170 the effective correlation matrix for the current
171 :attr:`~BasanosConfig.covariance_mode`. When :math:`C = I`
172 (identity) all assets have utilisation 1. Off-diagonal correlations
173 attenuate some assets (:math:`u_i < 1`) and may amplify negatively
174 correlated ones (:math:`u_i > 1`).
176 A value of ``0.0`` is returned when the entire signal vector
177 :math:`\\mu` is near zero at that timestamp (no solve is performed).
178 ``NaN`` is returned for individual assets where :math:`|\\mu_i|` is
179 below machine-epsilon precision or where prices are unavailable.
181 Returns:
182 pl.DataFrame: DataFrame with columns ``['date'] + assets``.
183 """
184 assets = self.assets
185 mu_np = self.mu.select(assets).to_numpy()
187 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios
188 n_assets = len(assets)
189 util_np = np.full((self.prices.height, n_assets), np.nan)
191 for i, t, mask, bundle in self._iter_matrices():
192 if bundle is None:
193 continue
194 matrix = bundle.matrix
195 expected_mu = np.nan_to_num(mu_np[i][mask])
196 if np.allclose(expected_mu, 0.0):
197 util_np[i, mask] = 0.0
198 continue
199 try:
200 x = solve(matrix, expected_mu)
201 except SingularMatrixError:
202 # The covariance matrix is degenerate — utilisation is undefined.
203 _logger.warning(
204 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is "
205 "degenerate; utilisation set to NaN.",
206 t,
207 )
208 continue
209 with np.errstate(divide="ignore", invalid="ignore"):
210 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan)
211 util_np[i, mask] = ratio
213 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)])