Coverage for src / basanos / math / _factor_model.py: 100%

69 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 17:47 +0000

1r"""Factor risk model decomposition (Section 4.1 of basanos.pdf). 

2 

3This private module provides the :class:`FactorModel` frozen dataclass, which 

4encapsulates the three-component factor model 

5 

6.. math:: 

7 

8 \\bm{\\Sigma} = \\mathbf{B}\\mathbf{F}\\mathbf{B}^\\top + \\mathbf{D} 

9 

10and a class method for fitting the model from a return matrix via the 

11Singular Value Decomposition (Section 4.2). 

12""" 

13 

14from __future__ import annotations 

15 

16import dataclasses 

17 

18import numpy as np 

19 

20from basanos.exceptions import ( 

21 DimensionMismatchError, 

22 FactorModelError, 

23 SingularMatrixError, 

24) 

25from basanos.math._linalg import ( 

26 _DEFAULT_COND_THRESHOLD, 

27 _check_and_warn_condition, 

28 _cholesky_solve, 

29) 

30 

31 

32@dataclasses.dataclass(frozen=True) 

33class FactorModel: 

34 r"""Frozen dataclass for a factor risk model decomposition (Section 4.1). 

35 

36 Encapsulates the three components of the factor model 

37 

38 .. math:: 

39 

40 \bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + \mathbf{D} 

41 

42 where 

43 

44 - :math:`\mathbf{B} \in \mathbb{R}^{n \times k}` is the *factor loading 

45 matrix*: column :math:`j` gives the sensitivity of each asset to 

46 factor :math:`j`. 

47 - :math:`\mathbf{F} \in \mathbb{R}^{k \times k}` is the *factor covariance 

48 matrix* (positive definite), capturing how the :math:`k` factors 

49 co-vary. 

50 - :math:`\mathbf{D} = \operatorname{diag}(d_1, \dots, d_n)` with 

51 :math:`d_i > 0` is the *idiosyncratic variance* diagonal, capturing 

52 the asset-specific variance unexplained by the common factors. 

53 

54 The central assumption is :math:`k \ll n`: the dominant systematic sources 

55 of risk are captured by a handful of factors while the idiosyncratic 

56 component is, by construction, uncorrelated across assets. 

57 

58 Attributes: 

59 factor_loadings: Factor loading matrix :math:`\mathbf{B}`, 

60 shape ``(n, k)``. 

61 factor_covariance: Factor covariance matrix :math:`\mathbf{F}`, 

62 shape ``(k, k)``. 

63 idiosyncratic_var: Idiosyncratic variance vector 

64 :math:`(d_1, \dots, d_n)`, shape ``(n,)``. All entries must be 

65 strictly positive. 

66 

67 Examples: 

68 >>> import numpy as np 

69 >>> loadings = np.eye(3, 2) 

70 >>> cov = np.eye(2) * 0.5 

71 >>> idio = np.array([0.5, 0.5, 1.0]) 

72 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

73 >>> fm.n_assets 

74 3 

75 >>> fm.n_factors 

76 2 

77 >>> fm.covariance.shape 

78 (3, 3) 

79 """ 

80 

81 factor_loadings: np.ndarray 

82 factor_covariance: np.ndarray 

83 idiosyncratic_var: np.ndarray 

84 

85 def __post_init__(self) -> None: 

86 """Validate shape consistency and strict positivity after initialization. 

87 

88 Raises: 

89 FactorModelError: If ``factor_loadings`` is not 2-D. 

90 FactorModelError: If ``factor_covariance`` shape does not 

91 match the number of factors inferred from ``factor_loadings``. 

92 FactorModelError: If ``idiosyncratic_var`` length does 

93 not match the number of assets inferred from ``factor_loadings``. 

94 FactorModelError: If any element of 

95 ``idiosyncratic_var`` is not strictly positive. 

96 """ 

97 if self.factor_loadings.ndim != 2: 

98 raise FactorModelError(f"factor_loadings must be 2-D, got ndim={self.factor_loadings.ndim}.") # noqa: TRY003 

99 n, k = self.factor_loadings.shape 

100 if self.factor_covariance.shape != (k, k): 

101 raise FactorModelError( # noqa: TRY003 

102 f"factor_covariance must have shape ({k}, {k}) to match " 

103 f"factor_loadings columns, got {self.factor_covariance.shape}." 

104 ) 

105 if self.idiosyncratic_var.shape != (n,): 

106 raise FactorModelError( # noqa: TRY003 

107 f"idiosyncratic_var must have shape ({n},) to match factor_loadings rows, " 

108 f"got {self.idiosyncratic_var.shape}." 

109 ) 

110 if not np.all(self.idiosyncratic_var > 0): 

111 raise FactorModelError("All entries of idiosyncratic_var must be strictly positive.") # noqa: TRY003 

112 

113 @property 

114 def n_assets(self) -> int: 

115 """Number of assets *n* (rows of ``factor_loadings``).""" 

116 return self.factor_loadings.shape[0] 

117 

118 @property 

119 def n_factors(self) -> int: 

120 """Number of factors *k* (columns of ``factor_loadings``).""" 

121 return self.factor_loadings.shape[1] 

122 

123 @property 

124 def covariance(self) -> np.ndarray: 

125 r"""Reconstruct the full :math:`n \times n` covariance matrix. 

126 

127 Computes :math:`\bm{\Sigma} = \mathbf{B}\mathbf{F}\mathbf{B}^\top + 

128 \mathbf{D}` by combining the low-rank systematic component with the 

129 diagonal idiosyncratic component. 

130 

131 Returns: 

132 np.ndarray: Shape ``(n, n)`` symmetric covariance matrix. 

133 

134 Examples: 

135 >>> import numpy as np 

136 >>> loadings = np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]]) 

137 >>> cov = np.eye(2) 

138 >>> idio = np.ones(3) 

139 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

140 >>> fm.covariance.diagonal().tolist() 

141 [2.0, 2.0, 1.0] 

142 """ 

143 return self.factor_loadings @ self.factor_covariance @ self.factor_loadings.T + np.diag(self.idiosyncratic_var) 

144 

145 @property 

146 def woodbury_condition_number(self) -> float: 

147 r"""Condition number of the inner :math:`k \times k` Woodbury matrix. 

148 

149 Returns the condition number of the matrix 

150 

151 .. math:: 

152 

153 \mathbf{M} = \mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B} 

154 

155 which is the matrix actually inverted during :meth:`solve`. A large 

156 value (above ``_DEFAULT_COND_THRESHOLD`` ≈ 1e12) indicates that the 

157 Woodbury solve is numerically unreliable. 

158 

159 This property gives callers a way to inspect the numerical health of 

160 the model without performing a full solve. Unlike the condition number 

161 of the full :math:`n \times n` covariance matrix, this measure is 

162 specific to the :math:`k \times k` inner system solved inside the 

163 Woodbury identity. 

164 

165 Returns: 

166 float: Condition number :math:`\kappa(\mathbf{M})`. Returns 

167 ``inf`` when :math:`\mathbf{F}` is not positive-definite (e.g. 

168 singular or indefinite), as the Cholesky decomposition used to 

169 form :math:`\mathbf{F}^{-1}` fails in that case. 

170 

171 Examples: 

172 >>> import numpy as np 

173 >>> loadings = np.eye(3, 1) 

174 >>> cov = np.eye(1) 

175 >>> idio = np.ones(3) 

176 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

177 >>> fm.woodbury_condition_number > 0 

178 True 

179 """ 

180 d_inv = 1.0 / self.idiosyncratic_var # (n,) 

181 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k) 

182 try: 

183 mid = ( 

184 _cholesky_solve(self.factor_covariance, np.eye(self.n_factors)) + self.factor_loadings.T @ d_inv_b_mat 

185 ) # (k, k) 

186 except np.linalg.LinAlgError: 

187 return float("inf") 

188 return float(np.linalg.cond(mid)) 

189 

190 def solve( 

191 self, 

192 rhs: np.ndarray, 

193 cond_threshold: float = _DEFAULT_COND_THRESHOLD, 

194 ) -> np.ndarray: 

195 r"""Solve :math:`\bm{\Sigma}\,\mathbf{x} = \mathbf{b}` via the Woodbury identity. 

196 

197 Applies the Sherman--Morrison--Woodbury formula (Section 4.3 of 

198 basanos.pdf) to avoid forming or factorising the full 

199 :math:`n \times n` covariance matrix: 

200 

201 .. math:: 

202 

203 (\mathbf{D} + \mathbf{B}\mathbf{F}\mathbf{B}^\top)^{-1} 

204 = \mathbf{D}^{-1} 

205 - \mathbf{D}^{-1}\mathbf{B} 

206 \bigl(\mathbf{F}^{-1} + \mathbf{B}^\top\mathbf{D}^{-1}\mathbf{B}\bigr)^{-1} 

207 \mathbf{B}^\top\mathbf{D}^{-1}. 

208 

209 Because :math:`\mathbf{D}` is diagonal, :math:`\mathbf{D}^{-1}` is 

210 free. The inner matrix is :math:`k \times k` with cost 

211 :math:`O(k^3)`, and the surrounding multiplications cost 

212 :math:`O(kn)`. Total cost is :math:`O(k^3 + kn)` rather than 

213 :math:`O(n^3)`. 

214 

215 Args: 

216 rhs: Right-hand side vector :math:`\mathbf{b}`, shape ``(n,)``. 

217 cond_threshold: Condition-number threshold above which an 

218 :class:`~basanos.exceptions.IllConditionedMatrixWarning` is 

219 emitted. The check is applied to both ``factor_covariance`` 

220 (:math:`\mathbf{F}`) and to the inner :math:`k \times k` 

221 Woodbury matrix :math:`\mathbf{F}^{-1} + \mathbf{B}^\top 

222 \mathbf{D}^{-1}\mathbf{B}`. Defaults to ``1e12``. 

223 

224 Returns: 

225 np.ndarray: Solution vector :math:`\mathbf{x}`, shape ``(n,)``. 

226 

227 Raises: 

228 DimensionMismatchError: If ``rhs`` length does not match 

229 ``n_assets``. 

230 SingularMatrixError: If the inner :math:`k \times k` matrix is 

231 singular. 

232 

233 Examples: 

234 >>> import numpy as np 

235 >>> loadings = np.eye(3, 1) 

236 >>> cov = np.eye(1) 

237 >>> idio = np.ones(3) 

238 >>> fm = FactorModel(factor_loadings=loadings, factor_covariance=cov, idiosyncratic_var=idio) 

239 >>> rhs = np.array([1.0, 2.0, 3.0]) 

240 >>> x = fm.solve(rhs) 

241 >>> np.allclose(fm.covariance @ x, rhs) 

242 True 

243 """ 

244 n = self.n_assets 

245 if rhs.shape != (n,): 

246 raise DimensionMismatchError(rhs.size, n) 

247 

248 # D^{-1} is free because D is diagonal 

249 d_inv = 1.0 / self.idiosyncratic_var # (n,) 

250 d_inv_rhs = d_inv * rhs # D^{-1} b, shape (n,) 

251 d_inv_b_mat = d_inv[:, None] * self.factor_loadings # D^{-1} B, shape (n, k) 

252 

253 # Solve mid * w = B^T D^{-1} b, where mid = F^{-1} + B^T D^{-1} B. 

254 # F^{-1} is obtained via a Cholesky solve rather than an explicit 

255 # inversion, consistent with the Cholesky-first discipline in _linalg.py. 

256 # A condition-number check on factor_covariance is applied first so 

257 # that ill-conditioned F is flagged before its inverse enters mid. 

258 rhs_k = self.factor_loadings.T @ d_inv_rhs # (k,) 

259 try: 

260 _check_and_warn_condition(self.factor_covariance, cond_threshold) 

261 mid = ( 

262 _cholesky_solve(self.factor_covariance, np.eye(self.n_factors)) + self.factor_loadings.T @ d_inv_b_mat 

263 ) # (k, k) 

264 _check_and_warn_condition(mid, cond_threshold) 

265 w = _cholesky_solve(mid, rhs_k) # (k,) 

266 except np.linalg.LinAlgError as exc: 

267 raise SingularMatrixError(str(exc)) from exc 

268 

269 # x = D^{-1} b - D^{-1} B w 

270 return d_inv_rhs - d_inv_b_mat @ w 

271 

272 @classmethod 

273 def from_returns(cls, returns: np.ndarray, k: int) -> FactorModel: 

274 r"""Fit a rank-*k* factor model from a return matrix via truncated SVD. 

275 

276 Extracts latent factors from the return matrix 

277 :math:`\mathbf{R} \in \mathbb{R}^{T \times n}` using the Singular 

278 Value Decomposition (SVD). The top-*k* singular triplets define the 

279 factor model components: 

280 

281 .. math:: 

282 

283 \mathbf{B} = \mathbf{V}_k, \quad 

284 \mathbf{F} = \bm{\Sigma}_k^2 / T, \quad 

285 \hat{d}_i = 1 - \bigl(\mathbf{B}\mathbf{F}\mathbf{B}^\top\bigr)_{ii} 

286 

287 where :math:`\mathbf{V}_k` and :math:`\bm{\Sigma}_k` are the top-*k* 

288 right singular vectors and singular values of :math:`\mathbf{R}` 

289 respectively. When *returns* contains unit-variance columns (as 

290 produced by :func:`~basanos.math._signal.vol_adj`), the sample 

291 covariance has unit diagonal; the idiosyncratic term 

292 :math:`\hat{d}_i = 1 - (\mathbf{B}\mathbf{F}\mathbf{B}^\top)_{ii}` 

293 absorbs the residual so the full covariance :math:`\hat{\mathbf{C}}^{(k)}` 

294 also has unit diagonal. Each :math:`\hat{d}_i` is clamped from below 

295 at machine epsilon to guarantee strict positivity. 

296 

297 Args: 

298 returns: Return matrix of shape ``(T, n)``, typically 

299 volatility-adjusted log returns with rows as timestamps and 

300 columns as assets. 

301 k: Number of factors to retain. Must satisfy 

302 ``1 <= k <= min(T, n)``. 

303 

304 Returns: 

305 FactorModel: Fitted factor model with ``n_assets = n`` and 

306 ``n_factors = k``. 

307 

308 Raises: 

309 FactorModelError: If *returns* is not 2-D. 

310 FactorModelError: If *k* is outside the range ``[1, min(T, n)]``. 

311 

312 Examples: 

313 >>> import numpy as np 

314 >>> rng = np.random.default_rng(0) 

315 >>> ret = rng.standard_normal((50, 5)) 

316 >>> fm = FactorModel.from_returns(ret, k=2) 

317 >>> fm.n_factors 

318 2 

319 >>> fm.n_assets 

320 5 

321 >>> fm.covariance.shape 

322 (5, 5) 

323 """ 

324 if returns.ndim != 2: 

325 raise FactorModelError(f"Return matrix must be 2-D, got ndim={returns.ndim}.") # noqa: TRY003 

326 t_len, n = returns.shape 

327 if not (1 <= k <= min(t_len, n)): 

328 raise FactorModelError(f"k must satisfy 1 <= k <= min(T, n) = {min(t_len, n)}, got k={k}.") # noqa: TRY003 

329 

330 _, s, vt = np.linalg.svd(returns, full_matrices=False) 

331 

332 # Top-k right singular vectors as columns: shape (n, k) 

333 v_k = vt[:k].T 

334 s_k = s[:k] 

335 

336 # Factor covariance: diagonal matrix with entries s_j**2 / T 

337 factor_cov = np.diag(s_k**2 / t_len) 

338 

339 # Diagonal of B*F*B^T = sum_j (s_j**2/T) * B[:,j]**2 

340 factor_diag = (v_k**2) @ (s_k**2 / t_len) 

341 

342 # Idiosyncratic variance: target diagonal is 1.0 (unit-variance columns 

343 # assumed); residual = 1.0 - systematic contribution, clamped to (0, inf) 

344 _unit_variance = 1.0 

345 d = np.maximum(_unit_variance - factor_diag, np.finfo(float).eps) 

346 

347 return cls(factor_loadings=v_k, factor_covariance=factor_cov, idiosyncratic_var=d)