Coverage for src / cvx / linalg / pca.py: 100%
21 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 05:40 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 05:40 +0000
1"""PCA analysis (pure NumPy implementation).
3This module provides Principal Component Analysis (PCA) for dimensionality
4reduction of return data. PCA is commonly used to construct factor models
5for portfolio optimization.
7Example:
8 Perform PCA on stock returns:
10 >>> import numpy as np
11 >>> from cvx.linalg import pca
12 >>> np.random.seed(42)
13 >>> returns = np.random.randn(100, 5)
14 >>> result = pca(returns, n_components=3)
15 >>> len(result.explained_variance)
16 3
17 >>> result.factors.shape
18 (100, 3)
19 >>> result.exposure.shape
20 (3, 5)
22"""
24from __future__ import annotations
26from collections import namedtuple
28import numpy as np
30from .svd import svd
32Matrix = np.ndarray
34PCA = namedtuple(
35 "PCA",
36 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"],
37)
38"""Named tuple containing the results of PCA analysis.
40Attributes:
41 explained_variance: Explained variance ratio for each component.
42 Shape (n_components,).
43 factors: Factor returns (principal components). Shape (n_samples, n_components).
44 exposure: Factor exposures (loadings). Shape (n_components, n_assets).
45 cov: Covariance matrix of the factors. Shape (n_components, n_components).
46 systematic: Returns explained by the factors. Shape (n_samples, n_assets).
47 idiosyncratic: Residual returns not explained by factors. Shape (n_samples, n_assets).
49Example:
50 >>> import numpy as np
51 >>> from cvx.linalg import pca
52 >>> np.random.seed(42)
53 >>> returns = np.random.randn(50, 4)
54 >>> result = pca(returns, n_components=2)
55 >>> result.explained_variance.sum() < 1
56 True
57 >>> np.allclose(result.systematic + result.idiosyncratic, returns, atol=1e-10)
58 True
60"""
63def pca(returns: Matrix, n_components: int = 10) -> PCA:
64 """Compute the first n principal components for a return matrix using SVD.
66 Args:
67 returns: Array of asset returns with shape (n_samples, n_assets).
68 n_components: Number of principal components to extract. Defaults to 10.
70 Returns:
71 PCA named tuple containing:
72 - explained_variance: Ratio of variance explained by each component
73 - factors: Factor returns (scores)
74 - exposure: Factor exposures (loadings)
75 - cov: Factor covariance matrix
76 - systematic: Returns explained by factors
77 - idiosyncratic: Residual returns
79 Example:
80 >>> import numpy as np
81 >>> from cvx.linalg import pca
82 >>> np.random.seed(42)
83 >>> returns = np.random.randn(100, 10)
84 >>> result = pca(returns, n_components=3)
85 >>> bool(result.explained_variance[0] > result.explained_variance[1])
86 True
87 >>> factor_corr = np.corrcoef(result.factors.T)
88 >>> bool(np.allclose(factor_corr, np.eye(3), atol=0.1))
89 True
90 >>> VtV = result.exposure @ result.exposure.T
91 >>> bool(np.allclose(VtV, np.eye(3), atol=1e-10))
92 True
93 >>> all(result.explained_variance[i] >= result.explained_variance[i+1]
94 ... for i in range(len(result.explained_variance)-1))
95 True
96 >>> reconstructed = result.factors @ result.exposure
97 >>> centered_systematic = result.systematic - returns.mean(axis=0)
98 >>> bool(np.allclose(reconstructed, centered_systematic, atol=1e-10))
99 True
101 """
102 x_mean = returns.mean(axis=0)
103 x_centered = returns - x_mean
105 u, s_full, vt = svd(x_centered)
107 u = u[:, :n_components]
108 s = s_full[:n_components]
109 vt = vt[:n_components, :]
111 factors: Matrix = u * s
112 exposure: Matrix = vt
113 explained_variance: Matrix = (s**2) / np.sum(s_full**2)
114 cov: Matrix = np.cov(factors.T)
115 systematic: Matrix = factors @ vt + x_mean
116 idiosyncratic: Matrix = x_centered - factors @ vt
118 return PCA(
119 explained_variance=explained_variance,
120 factors=factors,
121 exposure=exposure,
122 cov=cov,
123 systematic=systematic,
124 idiosyncratic=idiosyncratic,
125 )