Coverage for src/basanos/math/_factor_model.py: 97%

78 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 05:58 +0000

1r"""Factor risk model decomposition (Section 4.1 of basanos.pdf). 

2 

3This private module provides the `FactorModel` frozen dataclass, which 

4encapsulates the three-component factor model 

5 

6$$ 

7\\bm{\\Sigma} = \\mathbf{B}\\mathbf{F}\\mathbf{B}^\\top + \\mathbf{D} 

8$$ 

9 

10and a class method for fitting the model from a return matrix via the 

11Singular Value Decomposition (Section 4.2). 

12""" 

13 

14from __future__ import annotations 

15 

16import dataclasses 

17from typing import cast 

18 

19import numpy as np 

20from cvx.linalg import DimensionMismatchError, SingularMatrixError 

21from cvx.linalg import check_and_warn_condition as _check_and_warn_condition 

22from cvx.linalg import inv as _inv 

23from cvx.linalg import solve as _solve 

24 

25try: # cvx-linalg >= 0.7 exposes this constant as a public top-level name 

26 from cvx.linalg import DEFAULT_COND_THRESHOLD as _DEFAULT_COND_THRESHOLD 

27except ImportError: # older cvx-linalg only exposes the private name 

28 from cvx.linalg.solve import ( # type: ignore[attr-defined,no-redef] 

29 _DEFAULT_COND_THRESHOLD, # ty: ignore[unresolved-import] 

30 ) 

31 

32from basanos.exceptions import FactorModelError 

33 

34 

35@dataclasses.dataclass(frozen=True) 

36class FactorModel: 

37 r"""Frozen dataclass for a factor risk model decomposition (Section 4.1). 

38 

39 Encapsulates the three components of the factor model 

40 

41 $$ 

42 \bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + \mathbf{D} 

43 $$ 

44 

45 where 

46 

47 - $\mathbf{B} \in \mathbb{R}^{n \times k}$ is the *factor loading 

48 matrix*: column $j$ gives the sensitivity of each asset to 

49 factor $j$. 

50 - $\mathbf{F} \in \mathbb{R}^{k \times k}$ is the *factor covariance 

51 matrix* (positive definite), capturing how the $k$ factors 

52 co-vary. 

53 - $\mathbf{D} = \operatorname{diag}(d_1, \dots, d_n)$ with 

54 $d_i > 0$ is the *idiosyncratic variance* diagonal, capturing 

55 the asset-specific variance unexplained by the common factors. 

56 

57 The central assumption is $k \ll n$: the dominant systematic sources 

58 of risk are captured by a handful of factors while the idiosyncratic 

59 component is, by construction, uncorrelated across assets. 

60 

61 Attributes: 

62 factor_loadings: Factor loading matrix $\mathbf{B}$, 

63 shape ``(n, k)``. 

64 factor_covariance: Factor covariance matrix $\mathbf{F}$, 

65 shape ``(k, k)``. 

66 idiosyncratic_var: Idiosyncratic variance vector 

67 $(d_1, \dots, d_n)$, shape ``(n,)``. All entries must be 

68 strictly positive. 

69 

70 Examples: 

71 >>> import numpy as np 

72 >>> loadings = np.eye(3, 2) 

73 >>> cov = np.eye(2) * 0.5 

74 >>> idio = np.array([0.5, 0.5, 1.0]) 

75 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

76 >>> fm.n_assets 

77 3 

78 >>> fm.n_factors 

79 2 

80 >>> fm.covariance.shape 

81 (3, 3) 

82 """ 

83 

84 factor_loadings: np.ndarray 

85 factor_covariance: np.ndarray 

86 idiosyncratic_var: np.ndarray 

87 

88 def __post_init__(self) -> None: 

89 """Validate shape consistency and strict positivity after initialization. 

90 

91 Raises: 

92 FactorModelError: If ``factor_loadings`` is not 2-D. 

93 FactorModelError: If ``factor_covariance`` shape does not 

94 match the number of factors inferred from ``factor_loadings``. 

95 FactorModelError: If ``idiosyncratic_var`` length does 

96 not match the number of assets inferred from ``factor_loadings``. 

97 FactorModelError: If any element of 

98 ``idiosyncratic_var`` is not strictly positive. 

99 """ 

100 if self.factor_loadings.ndim != 2: 

101 raise FactorModelError(f"factor_loadings must be 2-D, got ndim={self.factor_loadings.ndim}.") # noqa: TRY003 

102 n, k = self.factor_loadings.shape 

103 if self.factor_covariance.shape != (k, k): 

104 raise FactorModelError( # noqa: TRY003 

105 f"factor_covariance must have shape ({k}, {k}) to match " 

106 f"factor_loadings columns, got {self.factor_covariance.shape}." 

107 ) 

108 if self.idiosyncratic_var.shape != (n,): 

109 raise FactorModelError( # noqa: TRY003 

110 f"idiosyncratic_var must have shape ({n},) to match factor_loadings rows, " 

111 f"got {self.idiosyncratic_var.shape}." 

112 ) 

113 if not np.all(self.idiosyncratic_var > 0): 

114 raise FactorModelError("All entries of idiosyncratic_var must be strictly positive.") # noqa: TRY003 

115 

116 @property 

117 def n_assets(self) -> int: 

118 """Number of assets *n* (rows of ``factor_loadings``).""" 

119 return int(self.factor_loadings.shape[0]) 

120 

121 @property 

122 def n_factors(self) -> int: 

123 """Number of factors *k* (columns of ``factor_loadings``).""" 

124 return int(self.factor_loadings.shape[1]) 

125 

126 @property 

127 def covariance(self) -> np.ndarray: 

128 r"""Reconstruct the full $n \times n$ covariance matrix. 

129 

130 Computes $\bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + 

131 \mathbf{D}$ by combining the low-rank systematic component with the 

132 diagonal idiosyncratic component. 

133 

134 Returns: 

135 np.ndarray: Shape ``(n, n)`` symmetric covariance matrix. 

136 

137 Examples: 

138 >>> import numpy as np 

139 >>> loadings = np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]]) 

140 >>> cov = np.eye(2) 

141 >>> idio = np.ones(3) 

142 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

143 >>> fm.covariance.diagonal().tolist() 

144 [2.0, 2.0, 1.0] 

145 """ 

146 return cast( 

147 "np.ndarray", 

148 self.factor_loadings @ self.factor_covariance @ self.factor_loadings.T + np.diag(self.idiosyncratic_var), 

149 ) 

150 

151 @property 

152 def woodbury_condition_number(self) -> float: 

153 r"""Condition number of the inner $k \times k$ Woodbury matrix. 

154 

155 Returns the condition number of the matrix 

156 

157 $$ 

158 \mathbf{M} = \mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B} 

159 $$ 

160 

161 which is the matrix actually inverted during `solve`. A large 

162 value (above ``_DEFAULT_COND_THRESHOLD`` ≈ 1e12) indicates that the 

163 Woodbury solve is numerically unreliable. 

164 

165 This property gives callers a way to inspect the numerical health of 

166 the model without performing a full solve. Unlike the condition number 

167 of the full $n \times n$ covariance matrix, this measure is 

168 specific to the $k \times k$ inner system solved inside the 

169 Woodbury identity. 

170 

171 Returns: 

172 float: Condition number $\kappa(\mathbf{M})$. Returns 

173 ``inf`` when $\mathbf{F}$ is not positive-definite (e.g. 

174 singular or indefinite), as the Cholesky decomposition used to 

175 form $\mathbf{F}^{-1}$ fails in that case. 

176 

177 Examples: 

178 >>> import numpy as np 

179 >>> loadings = np.eye(3, 1) 

180 >>> cov = np.eye(1) 

181 >>> idio = np.ones(3) 

182 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

183 >>> fm.woodbury_condition_number > 0 

184 True 

185 """ 

186 d_inv = 1.0 / self.idiosyncratic_var # (n,) 

187 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k) 

188 try: 

189 f_inv = _inv(self.factor_covariance) 

190 mid = f_inv + self.factor_loadings.T @ d_inv_b_mat # (k, k) 

191 except (np.linalg.LinAlgError, SingularMatrixError): 

192 return float("inf") 

193 return float(np.linalg.cond(mid)) 

194 

195 def solve( 

196 self, 

197 rhs: np.ndarray, 

198 cond_threshold: float = _DEFAULT_COND_THRESHOLD, 

199 ) -> np.ndarray: 

200 r"""Solve $\bm{\Sigma}\,\mathbf{x} = \mathbf{b}$ via the Woodbury identity. 

201 

202 Applies the Sherman--Morrison--Woodbury formula (Section 4.3 of 

203 basanos.pdf) to avoid forming or factorising the full 

204 $n \times n$ covariance matrix: 

205 

206 $$ 

207 (\mathbf{D} + \mathbf{B}\mathbf{F}\mathbf{B}^\top)^{-1} 

208 = \mathbf{D}^{-1} 

209 - \mathbf{D}^{-1}\mathbf{B} 

210 \bigl(\mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}\bigr)^{-1} 

211 \mathbf{B}^\top\mathbf{D}^{-1}. 

212 $$ 

213 

214 Because $\mathbf{D}$ is diagonal, $\mathbf{D}^{-1}$ is 

215 free. The inner matrix is $k \times k$ with cost 

216 $O(k^3)$, and the surrounding multiplications cost 

217 $O(kn)$. Total cost is $O(k^3 + kn)$ rather than 

218 $O(n^3)$. 

219 

220 Args: 

221 rhs: Right-hand side vector $\mathbf{b}$, shape ``(n,)``. 

222 cond_threshold: Condition-number threshold above which an 

223 `IllConditionedMatrixWarning` is 

224 emitted. The check is applied to both ``factor_covariance`` 

225 ($\mathbf{F}$) and to the inner $k \times k$ 

226 Woodbury matrix $\mathbf{F}^{-1} + \mathbf{B}^\top 

227 \mathbf{D}^{-1}\mathbf{B}$. Defaults to ``1e12``. 

228 

229 Returns: 

230 np.ndarray: Solution vector $\mathbf{x}$, shape ``(n,)``. 

231 

232 Raises: 

233 DimensionMismatchError: If ``rhs`` length does not match 

234 ``n_assets``. 

235 SingularMatrixError: If the inner $k \times k$ matrix is 

236 singular. 

237 

238 Examples: 

239 >>> import numpy as np 

240 >>> loadings = np.eye(3, 1) 

241 >>> cov = np.eye(1) 

242 >>> idio = np.ones(3) 

243 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

244 >>> rhs = np.array([1.0, 2.0, 3.0]) 

245 >>> x = fm.solve(rhs) 

246 >>> np.allclose(fm.covariance @ x, rhs) 

247 True 

248 """ 

249 n = self.n_assets 

250 if rhs.shape != (n,): 

251 raise DimensionMismatchError(rhs.size, n) 

252 

253 # D^{-1} is free because D is diagonal 

254 d_inv = 1.0 / self.idiosyncratic_var # (n,) 

255 d_inv_rhs = d_inv * rhs # D^{-1} b, shape (n,) 

256 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k) 

257 

258 # Solve mid * w = B^T D^{-1} b, where mid = F^{-1} + B^T D^{-1} B. 

259 # F^{-1} is obtained via a Cholesky solve rather than an explicit 

260 # inversion, consistent with the Cholesky-first discipline in _linalg.py. 

261 # A condition-number check on factor_covariance is applied first so 

262 # that ill-conditioned F is flagged before its inverse enters mid. 

263 rhs_k = self.factor_loadings.T @ d_inv_rhs # (k,) 

264 try: 

265 _check_and_warn_condition(self.factor_covariance, cond_threshold) 

266 mid = _inv(self.factor_covariance, cond_threshold) + self.factor_loadings.T @ d_inv_b_mat # (k, k) 

267 _check_and_warn_condition(mid, cond_threshold) 

268 w = _solve(mid, rhs_k, cond_threshold) # (k,) 

269 except np.linalg.LinAlgError as exc: 

270 raise SingularMatrixError(str(exc)) from exc 

271 

272 # x = D^{-1} b - D^{-1} B w 

273 return cast("np.ndarray", d_inv_rhs - d_inv_b_mat @ w) 

274 

275 @classmethod 

276 def from_returns(cls, returns: np.ndarray, k: int) -> FactorModel: 

277 r"""Fit a rank-*k* factor model from a return matrix via truncated SVD. 

278 

279 Extracts latent factors from the return matrix 

280 $\mathbf{R} \in \mathbb{R}^{T \times n}$ using the Singular 

281 Value Decomposition (SVD). The top-*k* singular triplets define the 

282 factor model components: 

283 

284 $$ 

285 \mathbf{B} = \mathbf{V}_k, \quad 

286 \mathbf{F} = \bm{\Sigma}_k^2 / T, \quad 

287 \hat{d}_i = 1 - \bigl(\mathbf{B}\mathbf{F}\mathbf{B}^\top\bigr)_{ii} 

288 $$ 

289 

290 where $\mathbf{V}_k$ and $\bm{\Sigma}_k$ are the top-*k* 

291 right singular vectors and singular values of $\mathbf{R}$ 

292 respectively. When *returns* contains unit-variance columns (as 

293 produced by `vol_adj`), the sample 

294 covariance has unit diagonal; the idiosyncratic term 

295 $\hat{d}_i = 1 - (\mathbf{B}\mathbf{F}\mathbf{B}^\top)_{ii}$ 

296 absorbs the residual so the full covariance $\hat{\mathbf{C}}^{(k)}$ 

297 also has unit diagonal. Each $\hat{d}_i$ is clamped from below 

298 at machine epsilon to guarantee strict positivity. 

299 

300 Args: 

301 returns: Return matrix of shape ``(T, n)``, typically 

302 volatility-adjusted log returns with rows as timestamps and 

303 columns as assets. 

304 k: Number of factors to retain. Must satisfy 

305 ``1 <= k <= min(T, n)``. 

306 

307 Returns: 

308 FactorModel: Fitted factor model with ``n_assets = n`` and 

309 ``n_factors = k``. 

310 

311 Raises: 

312 FactorModelError: If *returns* is not 2-D. 

313 FactorModelError: If *k* is outside the range ``[1, min(T, n)]``. 

314 

315 Examples: 

316 >>> import numpy as np 

317 >>> rng = np.random.default_rng(0) 

318 >>> ret = rng.standard_normal((50, 5)) 

319 >>> fm = FactorModel.from_returns(ret, k=2) 

320 >>> fm.n_factors 

321 2 

322 >>> fm.n_assets 

323 5 

324 >>> fm.covariance.shape 

325 (5, 5) 

326 """ 

327 if returns.ndim != 2: 

328 raise FactorModelError(f"Return matrix must be 2-D, got ndim={returns.ndim}.") # noqa: TRY003 

329 t_len, n = returns.shape 

330 if not (1 <= k <= min(t_len, n)): 

331 raise FactorModelError(f"k must satisfy 1 <= k <= min(T, n) = {min(t_len, n)}, got k={k}.") # noqa: TRY003 

332 

333 _, s, vt = np.linalg.svd(returns, full_matrices=False) 

334 

335 # Top-k right singular vectors as columns: shape (n, k) 

336 v_k = vt[:k].T 

337 s_k = s[:k] 

338 

339 # Factor covariance: diagonal matrix with entries s_j**2 / T 

340 factor_cov = np.diag(s_k**2 / t_len) 

341 

342 # Diagonal of B*F*B^T = sum_j (s_j**2/T) * B[:,j]**2 

343 factor_diag = (v_k**2) @ (s_k**2 / t_len) 

344 

345 # Idiosyncratic variance: target diagonal is 1.0 (unit-variance columns 

346 # assumed); residual = 1.0 - systematic contribution, clamped to (0, inf) 

347 _unit_variance = 1.0 

348 d = np.maximum(_unit_variance - factor_diag, np.finfo(float).eps) 

349 

350 return cls(factor_loadings=v_k, factor_covariance=factor_cov, idiosyncratic_var=d)