Coverage for src/basanos/math/_factor_model.py: 97%
78 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 05:58 +0000
1r"""Factor risk model decomposition (Section 4.1 of basanos.pdf).
3This private module provides the `FactorModel` frozen dataclass, which
4encapsulates the three-component factor model
6$$
7\\bm{\\Sigma} = \\mathbf{B}\\mathbf{F}\\mathbf{B}^\\top + \\mathbf{D}
8$$
10and a class method for fitting the model from a return matrix via the
11Singular Value Decomposition (Section 4.2).
12"""
14from __future__ import annotations
16import dataclasses
17from typing import cast
19import numpy as np
20from cvx.linalg import DimensionMismatchError, SingularMatrixError
21from cvx.linalg import check_and_warn_condition as _check_and_warn_condition
22from cvx.linalg import inv as _inv
23from cvx.linalg import solve as _solve
25try: # cvx-linalg >= 0.7 exposes this constant as a public top-level name
26 from cvx.linalg import DEFAULT_COND_THRESHOLD as _DEFAULT_COND_THRESHOLD
27except ImportError: # older cvx-linalg only exposes the private name
28 from cvx.linalg.solve import ( # type: ignore[attr-defined,no-redef]
29 _DEFAULT_COND_THRESHOLD, # ty: ignore[unresolved-import]
30 )
32from basanos.exceptions import FactorModelError
35@dataclasses.dataclass(frozen=True)
36class FactorModel:
37 r"""Frozen dataclass for a factor risk model decomposition (Section 4.1).
39 Encapsulates the three components of the factor model
41 $$
42 \bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + \mathbf{D}
43 $$
45 where
47 - $\mathbf{B} \in \mathbb{R}^{n \times k}$ is the *factor loading
48 matrix*: column $j$ gives the sensitivity of each asset to
49 factor $j$.
50 - $\mathbf{F} \in \mathbb{R}^{k \times k}$ is the *factor covariance
51 matrix* (positive definite), capturing how the $k$ factors
52 co-vary.
53 - $\mathbf{D} = \operatorname{diag}(d_1, \dots, d_n)$ with
54 $d_i > 0$ is the *idiosyncratic variance* diagonal, capturing
55 the asset-specific variance unexplained by the common factors.
57 The central assumption is $k \ll n$: the dominant systematic sources
58 of risk are captured by a handful of factors while the idiosyncratic
59 component is, by construction, uncorrelated across assets.
61 Attributes:
62 factor_loadings: Factor loading matrix $\mathbf{B}$,
63 shape ``(n, k)``.
64 factor_covariance: Factor covariance matrix $\mathbf{F}$,
65 shape ``(k, k)``.
66 idiosyncratic_var: Idiosyncratic variance vector
67 $(d_1, \dots, d_n)$, shape ``(n,)``. All entries must be
68 strictly positive.
70 Examples:
71 >>> import numpy as np
72 >>> loadings = np.eye(3, 2)
73 >>> cov = np.eye(2) * 0.5
74 >>> idio = np.array([0.5, 0.5, 1.0])
75 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
76 >>> fm.n_assets
77 3
78 >>> fm.n_factors
79 2
80 >>> fm.covariance.shape
81 (3, 3)
82 """
84 factor_loadings: np.ndarray
85 factor_covariance: np.ndarray
86 idiosyncratic_var: np.ndarray
88 def __post_init__(self) -> None:
89 """Validate shape consistency and strict positivity after initialization.
91 Raises:
92 FactorModelError: If ``factor_loadings`` is not 2-D.
93 FactorModelError: If ``factor_covariance`` shape does not
94 match the number of factors inferred from ``factor_loadings``.
95 FactorModelError: If ``idiosyncratic_var`` length does
96 not match the number of assets inferred from ``factor_loadings``.
97 FactorModelError: If any element of
98 ``idiosyncratic_var`` is not strictly positive.
99 """
100 if self.factor_loadings.ndim != 2:
101 raise FactorModelError(f"factor_loadings must be 2-D, got ndim={self.factor_loadings.ndim}.") # noqa: TRY003
102 n, k = self.factor_loadings.shape
103 if self.factor_covariance.shape != (k, k):
104 raise FactorModelError( # noqa: TRY003
105 f"factor_covariance must have shape ({k}, {k}) to match "
106 f"factor_loadings columns, got {self.factor_covariance.shape}."
107 )
108 if self.idiosyncratic_var.shape != (n,):
109 raise FactorModelError( # noqa: TRY003
110 f"idiosyncratic_var must have shape ({n},) to match factor_loadings rows, "
111 f"got {self.idiosyncratic_var.shape}."
112 )
113 if not np.all(self.idiosyncratic_var > 0):
114 raise FactorModelError("All entries of idiosyncratic_var must be strictly positive.") # noqa: TRY003
116 @property
117 def n_assets(self) -> int:
118 """Number of assets *n* (rows of ``factor_loadings``)."""
119 return int(self.factor_loadings.shape[0])
121 @property
122 def n_factors(self) -> int:
123 """Number of factors *k* (columns of ``factor_loadings``)."""
124 return int(self.factor_loadings.shape[1])
126 @property
127 def covariance(self) -> np.ndarray:
128 r"""Reconstruct the full $n \times n$ covariance matrix.
130 Computes $\bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top +
131 \mathbf{D}$ by combining the low-rank systematic component with the
132 diagonal idiosyncratic component.
134 Returns:
135 np.ndarray: Shape ``(n, n)`` symmetric covariance matrix.
137 Examples:
138 >>> import numpy as np
139 >>> loadings = np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]])
140 >>> cov = np.eye(2)
141 >>> idio = np.ones(3)
142 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
143 >>> fm.covariance.diagonal().tolist()
144 [2.0, 2.0, 1.0]
145 """
146 return cast(
147 "np.ndarray",
148 self.factor_loadings @ self.factor_covariance @ self.factor_loadings.T + np.diag(self.idiosyncratic_var),
149 )
151 @property
152 def woodbury_condition_number(self) -> float:
153 r"""Condition number of the inner $k \times k$ Woodbury matrix.
155 Returns the condition number of the matrix
157 $$
158 \mathbf{M} = \mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}
159 $$
161 which is the matrix actually inverted during `solve`. A large
162 value (above ``_DEFAULT_COND_THRESHOLD`` ≈ 1e12) indicates that the
163 Woodbury solve is numerically unreliable.
165 This property gives callers a way to inspect the numerical health of
166 the model without performing a full solve. Unlike the condition number
167 of the full $n \times n$ covariance matrix, this measure is
168 specific to the $k \times k$ inner system solved inside the
169 Woodbury identity.
171 Returns:
172 float: Condition number $\kappa(\mathbf{M})$. Returns
173 ``inf`` when $\mathbf{F}$ is not positive-definite (e.g.
174 singular or indefinite), as the Cholesky decomposition used to
175 form $\mathbf{F}^{-1}$ fails in that case.
177 Examples:
178 >>> import numpy as np
179 >>> loadings = np.eye(3, 1)
180 >>> cov = np.eye(1)
181 >>> idio = np.ones(3)
182 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
183 >>> fm.woodbury_condition_number > 0
184 True
185 """
186 d_inv = 1.0 / self.idiosyncratic_var # (n,)
187 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k)
188 try:
189 f_inv = _inv(self.factor_covariance)
190 mid = f_inv + self.factor_loadings.T @ d_inv_b_mat # (k, k)
191 except (np.linalg.LinAlgError, SingularMatrixError):
192 return float("inf")
193 return float(np.linalg.cond(mid))
195 def solve(
196 self,
197 rhs: np.ndarray,
198 cond_threshold: float = _DEFAULT_COND_THRESHOLD,
199 ) -> np.ndarray:
200 r"""Solve $\bm{\Sigma}\,\mathbf{x} = \mathbf{b}$ via the Woodbury identity.
202 Applies the Sherman--Morrison--Woodbury formula (Section 4.3 of
203 basanos.pdf) to avoid forming or factorising the full
204 $n \times n$ covariance matrix:
206 $$
207 (\mathbf{D} + \mathbf{B}\mathbf{F}\mathbf{B}^\top)^{-1}
208 = \mathbf{D}^{-1}
209 - \mathbf{D}^{-1}\mathbf{B}
210 \bigl(\mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}\bigr)^{-1}
211 \mathbf{B}^\top\mathbf{D}^{-1}.
212 $$
214 Because $\mathbf{D}$ is diagonal, $\mathbf{D}^{-1}$ is
215 free. The inner matrix is $k \times k$ with cost
216 $O(k^3)$, and the surrounding multiplications cost
217 $O(kn)$. Total cost is $O(k^3 + kn)$ rather than
218 $O(n^3)$.
220 Args:
221 rhs: Right-hand side vector $\mathbf{b}$, shape ``(n,)``.
222 cond_threshold: Condition-number threshold above which an
223 `IllConditionedMatrixWarning` is
224 emitted. The check is applied to both ``factor_covariance``
225 ($\mathbf{F}$) and to the inner $k \times k$
226 Woodbury matrix $\mathbf{F}^{-1} + \mathbf{B}^\top
227 \mathbf{D}^{-1}\mathbf{B}$. Defaults to ``1e12``.
229 Returns:
230 np.ndarray: Solution vector $\mathbf{x}$, shape ``(n,)``.
232 Raises:
233 DimensionMismatchError: If ``rhs`` length does not match
234 ``n_assets``.
235 SingularMatrixError: If the inner $k \times k$ matrix is
236 singular.
238 Examples:
239 >>> import numpy as np
240 >>> loadings = np.eye(3, 1)
241 >>> cov = np.eye(1)
242 >>> idio = np.ones(3)
243 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
244 >>> rhs = np.array([1.0, 2.0, 3.0])
245 >>> x = fm.solve(rhs)
246 >>> np.allclose(fm.covariance @ x, rhs)
247 True
248 """
249 n = self.n_assets
250 if rhs.shape != (n,):
251 raise DimensionMismatchError(rhs.size, n)
253 # D^{-1} is free because D is diagonal
254 d_inv = 1.0 / self.idiosyncratic_var # (n,)
255 d_inv_rhs = d_inv * rhs # D^{-1} b, shape (n,)
256 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k)
258 # Solve mid * w = B^T D^{-1} b, where mid = F^{-1} + B^T D^{-1} B.
259 # F^{-1} is obtained via a Cholesky solve rather than an explicit
260 # inversion, consistent with the Cholesky-first discipline in _linalg.py.
261 # A condition-number check on factor_covariance is applied first so
262 # that ill-conditioned F is flagged before its inverse enters mid.
263 rhs_k = self.factor_loadings.T @ d_inv_rhs # (k,)
264 try:
265 _check_and_warn_condition(self.factor_covariance, cond_threshold)
266 mid = _inv(self.factor_covariance, cond_threshold) + self.factor_loadings.T @ d_inv_b_mat # (k, k)
267 _check_and_warn_condition(mid, cond_threshold)
268 w = _solve(mid, rhs_k, cond_threshold) # (k,)
269 except np.linalg.LinAlgError as exc:
270 raise SingularMatrixError(str(exc)) from exc
272 # x = D^{-1} b - D^{-1} B w
273 return cast("np.ndarray", d_inv_rhs - d_inv_b_mat @ w)
275 @classmethod
276 def from_returns(cls, returns: np.ndarray, k: int) -> FactorModel:
277 r"""Fit a rank-*k* factor model from a return matrix via truncated SVD.
279 Extracts latent factors from the return matrix
280 $\mathbf{R} \in \mathbb{R}^{T \times n}$ using the Singular
281 Value Decomposition (SVD). The top-*k* singular triplets define the
282 factor model components:
284 $$
285 \mathbf{B} = \mathbf{V}_k, \quad
286 \mathbf{F} = \bm{\Sigma}_k^2 / T, \quad
287 \hat{d}_i = 1 - \bigl(\mathbf{B}\mathbf{F}\mathbf{B}^\top\bigr)_{ii}
288 $$
290 where $\mathbf{V}_k$ and $\bm{\Sigma}_k$ are the top-*k*
291 right singular vectors and singular values of $\mathbf{R}$
292 respectively. When *returns* contains unit-variance columns (as
293 produced by `vol_adj`), the sample
294 covariance has unit diagonal; the idiosyncratic term
295 $\hat{d}_i = 1 - (\mathbf{B}\mathbf{F}\mathbf{B}^\top)_{ii}$
296 absorbs the residual so the full covariance $\hat{\mathbf{C}}^{(k)}$
297 also has unit diagonal. Each $\hat{d}_i$ is clamped from below
298 at machine epsilon to guarantee strict positivity.
300 Args:
301 returns: Return matrix of shape ``(T, n)``, typically
302 volatility-adjusted log returns with rows as timestamps and
303 columns as assets.
304 k: Number of factors to retain. Must satisfy
305 ``1 <= k <= min(T, n)``.
307 Returns:
308 FactorModel: Fitted factor model with ``n_assets = n`` and
309 ``n_factors = k``.
311 Raises:
312 FactorModelError: If *returns* is not 2-D.
313 FactorModelError: If *k* is outside the range ``[1, min(T, n)]``.
315 Examples:
316 >>> import numpy as np
317 >>> rng = np.random.default_rng(0)
318 >>> ret = rng.standard_normal((50, 5))
319 >>> fm = FactorModel.from_returns(ret, k=2)
320 >>> fm.n_factors
321 2
322 >>> fm.n_assets
323 5
324 >>> fm.covariance.shape
325 (5, 5)
326 """
327 if returns.ndim != 2:
328 raise FactorModelError(f"Return matrix must be 2-D, got ndim={returns.ndim}.") # noqa: TRY003
329 t_len, n = returns.shape
330 if not (1 <= k <= min(t_len, n)):
331 raise FactorModelError(f"k must satisfy 1 <= k <= min(T, n) = {min(t_len, n)}, got k={k}.") # noqa: TRY003
333 _, s, vt = np.linalg.svd(returns, full_matrices=False)
335 # Top-k right singular vectors as columns: shape (n, k)
336 v_k = vt[:k].T
337 s_k = s[:k]
339 # Factor covariance: diagonal matrix with entries s_j**2 / T
340 factor_cov = np.diag(s_k**2 / t_len)
342 # Diagonal of B*F*B^T = sum_j (s_j**2/T) * B[:,j]**2
343 factor_diag = (v_k**2) @ (s_k**2 / t_len)
345 # Idiosyncratic variance: target diagonal is 1.0 (unit-variance columns
346 # assumed); residual = 1.0 - systematic contribution, clamped to (0, inf)
347 _unit_variance = 1.0
348 d = np.maximum(_unit_variance - factor_diag, np.finfo(float).eps)
350 return cls(factor_loadings=v_k, factor_covariance=factor_cov, idiosyncratic_var=d)