Coverage for src / basanos / math / _factor_model.py: 100%
69 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 17:47 +0000
1r"""Factor risk model decomposition (Section 4.1 of basanos.pdf).
3This private module provides the :class:`FactorModel` frozen dataclass, which
4encapsulates the three-component factor model
6.. math::
8 \\bm{\\Sigma} = \\mathbf{B}\\mathbf{F}\\mathbf{B}^\\top + \\mathbf{D}
10and a class method for fitting the model from a return matrix via the
11Singular Value Decomposition (Section 4.2).
12"""
14from __future__ import annotations
16import dataclasses
18import numpy as np
20from basanos.exceptions import (
21 DimensionMismatchError,
22 FactorModelError,
23 SingularMatrixError,
24)
25from basanos.math._linalg import (
26 _DEFAULT_COND_THRESHOLD,
27 _check_and_warn_condition,
28 _cholesky_solve,
29)
32@dataclasses.dataclass(frozen=True)
33class FactorModel:
34 r"""Frozen dataclass for a factor risk model decomposition (Section 4.1).
36 Encapsulates the three components of the factor model
38 .. math::
40 \bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + \mathbf{D}
42 where
44 - :math:`\mathbf{B} \in \mathbb{R}^{n \times k}` is the *factor loading
45 matrix*: column :math:`j` gives the sensitivity of each asset to
46 factor :math:`j`.
47 - :math:`\mathbf{F} \in \mathbb{R}^{k \times k}` is the *factor covariance
48 matrix* (positive definite), capturing how the :math:`k` factors
49 co-vary.
50 - :math:`\mathbf{D} = \operatorname{diag}(d_1, \dots, d_n)` with
51 :math:`d_i > 0` is the *idiosyncratic variance* diagonal, capturing
52 the asset-specific variance unexplained by the common factors.
54 The central assumption is :math:`k \ll n`: the dominant systematic sources
55 of risk are captured by a handful of factors while the idiosyncratic
56 component is, by construction, uncorrelated across assets.
58 Attributes:
59 factor_loadings: Factor loading matrix :math:`\mathbf{B}`,
60 shape ``(n, k)``.
61 factor_covariance: Factor covariance matrix :math:`\mathbf{F}`,
62 shape ``(k, k)``.
63 idiosyncratic_var: Idiosyncratic variance vector
64 :math:`(d_1, \dots, d_n)`, shape ``(n,)``. All entries must be
65 strictly positive.
67 Examples:
68 >>> import numpy as np
69 >>> loadings = np.eye(3, 2)
70 >>> cov = np.eye(2) * 0.5
71 >>> idio = np.array([0.5, 0.5, 1.0])
72 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
73 >>> fm.n_assets
74 3
75 >>> fm.n_factors
76 2
77 >>> fm.covariance.shape
78 (3, 3)
79 """
81 factor_loadings: np.ndarray
82 factor_covariance: np.ndarray
83 idiosyncratic_var: np.ndarray
85 def __post_init__(self) -> None:
86 """Validate shape consistency and strict positivity after initialization.
88 Raises:
89 FactorModelError: If ``factor_loadings`` is not 2-D.
90 FactorModelError: If ``factor_covariance`` shape does not
91 match the number of factors inferred from ``factor_loadings``.
92 FactorModelError: If ``idiosyncratic_var`` length does
93 not match the number of assets inferred from ``factor_loadings``.
94 FactorModelError: If any element of
95 ``idiosyncratic_var`` is not strictly positive.
96 """
97 if self.factor_loadings.ndim != 2:
98 raise FactorModelError(f"factor_loadings must be 2-D, got ndim={self.factor_loadings.ndim}.") # noqa: TRY003
99 n, k = self.factor_loadings.shape
100 if self.factor_covariance.shape != (k, k):
101 raise FactorModelError( # noqa: TRY003
102 f"factor_covariance must have shape ({k}, {k}) to match "
103 f"factor_loadings columns, got {self.factor_covariance.shape}."
104 )
105 if self.idiosyncratic_var.shape != (n,):
106 raise FactorModelError( # noqa: TRY003
107 f"idiosyncratic_var must have shape ({n},) to match factor_loadings rows, "
108 f"got {self.idiosyncratic_var.shape}."
109 )
110 if not np.all(self.idiosyncratic_var > 0):
111 raise FactorModelError("All entries of idiosyncratic_var must be strictly positive.") # noqa: TRY003
113 @property
114 def n_assets(self) -> int:
115 """Number of assets *n* (rows of ``factor_loadings``)."""
116 return self.factor_loadings.shape[0]
118 @property
119 def n_factors(self) -> int:
120 """Number of factors *k* (columns of ``factor_loadings``)."""
121 return self.factor_loadings.shape[1]
123 @property
124 def covariance(self) -> np.ndarray:
125 r"""Reconstruct the full :math:`n \times n` covariance matrix.
127 Computes :math:`\bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top +
128 \mathbf{D}` by combining the low-rank systematic component with the
129 diagonal idiosyncratic component.
131 Returns:
132 np.ndarray: Shape ``(n, n)`` symmetric covariance matrix.
134 Examples:
135 >>> import numpy as np
136 >>> loadings = np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]])
137 >>> cov = np.eye(2)
138 >>> idio = np.ones(3)
139 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
140 >>> fm.covariance.diagonal().tolist()
141 [2.0, 2.0, 1.0]
142 """
143 return self.factor_loadings @ self.factor_covariance @ self.factor_loadings.T + np.diag(self.idiosyncratic_var)
145 @property
146 def woodbury_condition_number(self) -> float:
147 r"""Condition number of the inner :math:`k \times k` Woodbury matrix.
149 Returns the condition number of the matrix
151 .. math::
153 \mathbf{M} = \mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}
155 which is the matrix actually inverted during :meth:`solve`. A large
156 value (above ``_DEFAULT_COND_THRESHOLD`` ≈ 1e12) indicates that the
157 Woodbury solve is numerically unreliable.
159 This property gives callers a way to inspect the numerical health of
160 the model without performing a full solve. Unlike the condition number
161 of the full :math:`n \times n` covariance matrix, this measure is
162 specific to the :math:`k \times k` inner system solved inside the
163 Woodbury identity.
165 Returns:
166 float: Condition number :math:`\kappa(\mathbf{M})`. Returns
167 ``inf`` when :math:`\mathbf{F}` is not positive-definite (e.g.
168 singular or indefinite), as the Cholesky decomposition used to
169 form :math:`\mathbf{F}^{-1}` fails in that case.
171 Examples:
172 >>> import numpy as np
173 >>> loadings = np.eye(3, 1)
174 >>> cov = np.eye(1)
175 >>> idio = np.ones(3)
176 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
177 >>> fm.woodbury_condition_number > 0
178 True
179 """
180 d_inv = 1.0 / self.idiosyncratic_var # (n,)
181 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k)
182 try:
183 mid = (
184 _cholesky_solve(self.factor_covariance, np.eye(self.n_factors)) + self.factor_loadings.T @ d_inv_b_mat
185 ) # (k, k)
186 except np.linalg.LinAlgError:
187 return float("inf")
188 return float(np.linalg.cond(mid))
190 def solve(
191 self,
192 rhs: np.ndarray,
193 cond_threshold: float = _DEFAULT_COND_THRESHOLD,
194 ) -> np.ndarray:
195 r"""Solve :math:`\bm{\Sigma}\,\mathbf{x} = \mathbf{b}` via the Woodbury identity.
197 Applies the Sherman--Morrison--Woodbury formula (Section 4.3 of
198 basanos.pdf) to avoid forming or factorising the full
199 :math:`n \times n` covariance matrix:
201 .. math::
203 (\mathbf{D} + \mathbf{B}\mathbf{F}\mathbf{B}^\top)^{-1}
204 = \mathbf{D}^{-1}
205 - \mathbf{D}^{-1}\mathbf{B}
206 \bigl(\mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}\bigr)^{-1}
207 \mathbf{B}^\top\mathbf{D}^{-1}.
209 Because :math:`\mathbf{D}` is diagonal, :math:`\mathbf{D}^{-1}` is
210 free. The inner matrix is :math:`k \times k` with cost
211 :math:`O(k^3)`, and the surrounding multiplications cost
212 :math:`O(kn)`. Total cost is :math:`O(k^3 + kn)` rather than
213 :math:`O(n^3)`.
215 Args:
216 rhs: Right-hand side vector :math:`\mathbf{b}`, shape ``(n,)``.
217 cond_threshold: Condition-number threshold above which an
218 :class:`~basanos.exceptions.IllConditionedMatrixWarning` is
219 emitted. The check is applied to both ``factor_covariance``
220 (:math:`\mathbf{F}`) and to the inner :math:`k \times k`
221 Woodbury matrix :math:`\mathbf{F}^{-1} + \mathbf{B}^\top
222 \mathbf{D}^{-1}\mathbf{B}`. Defaults to ``1e12``.
224 Returns:
225 np.ndarray: Solution vector :math:`\mathbf{x}`, shape ``(n,)``.
227 Raises:
228 DimensionMismatchError: If ``rhs`` length does not match
229 ``n_assets``.
230 SingularMatrixError: If the inner :math:`k \times k` matrix is
231 singular.
233 Examples:
234 >>> import numpy as np
235 >>> loadings = np.eye(3, 1)
236 >>> cov = np.eye(1)
237 >>> idio = np.ones(3)
238 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio)
239 >>> rhs = np.array([1.0, 2.0, 3.0])
240 >>> x = fm.solve(rhs)
241 >>> np.allclose(fm.covariance @ x, rhs)
242 True
243 """
244 n = self.n_assets
245 if rhs.shape != (n,):
246 raise DimensionMismatchError(rhs.size, n)
248 # D^{-1} is free because D is diagonal
249 d_inv = 1.0 / self.idiosyncratic_var # (n,)
250 d_inv_rhs = d_inv * rhs # D^{-1} b, shape (n,)
251 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k)
253 # Solve mid * w = B^T D^{-1} b, where mid = F^{-1} + B^T D^{-1} B.
254 # F^{-1} is obtained via a Cholesky solve rather than an explicit
255 # inversion, consistent with the Cholesky-first discipline in _linalg.py.
256 # A condition-number check on factor_covariance is applied first so
257 # that ill-conditioned F is flagged before its inverse enters mid.
258 rhs_k = self.factor_loadings.T @ d_inv_rhs # (k,)
259 try:
260 _check_and_warn_condition(self.factor_covariance, cond_threshold)
261 mid = (
262 _cholesky_solve(self.factor_covariance, np.eye(self.n_factors)) + self.factor_loadings.T @ d_inv_b_mat
263 ) # (k, k)
264 _check_and_warn_condition(mid, cond_threshold)
265 w = _cholesky_solve(mid, rhs_k) # (k,)
266 except np.linalg.LinAlgError as exc:
267 raise SingularMatrixError(str(exc)) from exc
269 # x = D^{-1} b - D^{-1} B w
270 return d_inv_rhs - d_inv_b_mat @ w
272 @classmethod
273 def from_returns(cls, returns: np.ndarray, k: int) -> FactorModel:
274 r"""Fit a rank-*k* factor model from a return matrix via truncated SVD.
276 Extracts latent factors from the return matrix
277 :math:`\mathbf{R} \in \mathbb{R}^{T \times n}` using the Singular
278 Value Decomposition (SVD). The top-*k* singular triplets define the
279 factor model components:
281 .. math::
283 \mathbf{B} = \mathbf{V}_k, \quad
284 \mathbf{F} = \bm{\Sigma}_k^2 / T, \quad
285 \hat{d}_i = 1 - \bigl(\mathbf{B}\mathbf{F}\mathbf{B}^\top\bigr)_{ii}
287 where :math:`\mathbf{V}_k` and :math:`\bm{\Sigma}_k` are the top-*k*
288 right singular vectors and singular values of :math:`\mathbf{R}`
289 respectively. When *returns* contains unit-variance columns (as
290 produced by :func:`~basanos.math._signal.vol_adj`), the sample
291 covariance has unit diagonal; the idiosyncratic term
292 :math:`\hat{d}_i = 1 - (\mathbf{B}\mathbf{F}\mathbf{B}^\top)_{ii}`
293 absorbs the residual so the full covariance :math:`\hat{\mathbf{C}}^{(k)}`
294 also has unit diagonal. Each :math:`\hat{d}_i` is clamped from below
295 at machine epsilon to guarantee strict positivity.
297 Args:
298 returns: Return matrix of shape ``(T, n)``, typically
299 volatility-adjusted log returns with rows as timestamps and
300 columns as assets.
301 k: Number of factors to retain. Must satisfy
302 ``1 <= k <= min(T, n)``.
304 Returns:
305 FactorModel: Fitted factor model with ``n_assets = n`` and
306 ``n_factors = k``.
308 Raises:
309 FactorModelError: If *returns* is not 2-D.
310 FactorModelError: If *k* is outside the range ``[1, min(T, n)]``.
312 Examples:
313 >>> import numpy as np
314 >>> rng = np.random.default_rng(0)
315 >>> ret = rng.standard_normal((50, 5))
316 >>> fm = FactorModel.from_returns(ret, k=2)
317 >>> fm.n_factors
318 2
319 >>> fm.n_assets
320 5
321 >>> fm.covariance.shape
322 (5, 5)
323 """
324 if returns.ndim != 2:
325 raise FactorModelError(f"Return matrix must be 2-D, got ndim={returns.ndim}.") # noqa: TRY003
326 t_len, n = returns.shape
327 if not (1 <= k <= min(t_len, n)):
328 raise FactorModelError(f"k must satisfy 1 <= k <= min(T, n) = {min(t_len, n)}, got k={k}.") # noqa: TRY003
330 _, s, vt = np.linalg.svd(returns, full_matrices=False)
332 # Top-k right singular vectors as columns: shape (n, k)
333 v_k = vt[:k].T
334 s_k = s[:k]
336 # Factor covariance: diagonal matrix with entries s_j**2 / T
337 factor_cov = np.diag(s_k**2 / t_len)
339 # Diagonal of B*F*B^T = sum_j (s_j**2/T) * B[:,j]**2
340 factor_diag = (v_k**2) @ (s_k**2 / t_len)
342 # Idiosyncratic variance: target diagonal is 1.0 (unit-variance columns
343 # assumed); residual = 1.0 - systematic contribution, clamped to (0, inf)
344 _unit_variance = 1.0
345 d = np.maximum(_unit_variance - factor_diag, np.finfo(float).eps)
347 return cls(factor_loadings=v_k, factor_covariance=factor_cov, idiosyncratic_var=d)