Coverage for src / cvx / linalg / pca.py: 100%

21 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 05:40 +0000

1"""PCA analysis (pure NumPy implementation). 

2 

3This module provides Principal Component Analysis (PCA) for dimensionality 

4reduction of return data. PCA is commonly used to construct factor models 

5for portfolio optimization. 

6 

7Example: 

8 Perform PCA on stock returns: 

9 

10 >>> import numpy as np 

11 >>> from cvx.linalg import pca 

12 >>> np.random.seed(42) 

13 >>> returns = np.random.randn(100, 5) 

14 >>> result = pca(returns, n_components=3) 

15 >>> len(result.explained_variance) 

16 3 

17 >>> result.factors.shape 

18 (100, 3) 

19 >>> result.exposure.shape 

20 (3, 5) 

21 

22""" 

23 

24from __future__ import annotations 

25 

26from collections import namedtuple 

27 

28import numpy as np 

29 

30from .svd import svd 

31 

32Matrix = np.ndarray 

33 

34PCA = namedtuple( 

35 "PCA", 

36 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"], 

37) 

38"""Named tuple containing the results of PCA analysis. 

39 

40Attributes: 

41 explained_variance: Explained variance ratio for each component. 

42 Shape (n_components,). 

43 factors: Factor returns (principal components). Shape (n_samples, n_components). 

44 exposure: Factor exposures (loadings). Shape (n_components, n_assets). 

45 cov: Covariance matrix of the factors. Shape (n_components, n_components). 

46 systematic: Returns explained by the factors. Shape (n_samples, n_assets). 

47 idiosyncratic: Residual returns not explained by factors. Shape (n_samples, n_assets). 

48 

49Example: 

50 >>> import numpy as np 

51 >>> from cvx.linalg import pca 

52 >>> np.random.seed(42) 

53 >>> returns = np.random.randn(50, 4) 

54 >>> result = pca(returns, n_components=2) 

55 >>> result.explained_variance.sum() < 1 

56 True 

57 >>> np.allclose(result.systematic + result.idiosyncratic, returns, atol=1e-10) 

58 True 

59 

60""" 

61 

62 

63def pca(returns: Matrix, n_components: int = 10) -> PCA: 

64 """Compute the first n principal components for a return matrix using SVD. 

65 

66 Args: 

67 returns: Array of asset returns with shape (n_samples, n_assets). 

68 n_components: Number of principal components to extract. Defaults to 10. 

69 

70 Returns: 

71 PCA named tuple containing: 

72 - explained_variance: Ratio of variance explained by each component 

73 - factors: Factor returns (scores) 

74 - exposure: Factor exposures (loadings) 

75 - cov: Factor covariance matrix 

76 - systematic: Returns explained by factors 

77 - idiosyncratic: Residual returns 

78 

79 Example: 

80 >>> import numpy as np 

81 >>> from cvx.linalg import pca 

82 >>> np.random.seed(42) 

83 >>> returns = np.random.randn(100, 10) 

84 >>> result = pca(returns, n_components=3) 

85 >>> bool(result.explained_variance[0] > result.explained_variance[1]) 

86 True 

87 >>> factor_corr = np.corrcoef(result.factors.T) 

88 >>> bool(np.allclose(factor_corr, np.eye(3), atol=0.1)) 

89 True 

90 >>> VtV = result.exposure @ result.exposure.T 

91 >>> bool(np.allclose(VtV, np.eye(3), atol=1e-10)) 

92 True 

93 >>> all(result.explained_variance[i] >= result.explained_variance[i+1] 

94 ... for i in range(len(result.explained_variance)-1)) 

95 True 

96 >>> reconstructed = result.factors @ result.exposure 

97 >>> centered_systematic = result.systematic - returns.mean(axis=0) 

98 >>> bool(np.allclose(reconstructed, centered_systematic, atol=1e-10)) 

99 True 

100 

101 """ 

102 x_mean = returns.mean(axis=0) 

103 x_centered = returns - x_mean 

104 

105 u, s_full, vt = svd(x_centered) 

106 

107 u = u[:, :n_components] 

108 s = s_full[:n_components] 

109 vt = vt[:n_components, :] 

110 

111 factors: Matrix = u * s 

112 exposure: Matrix = vt 

113 explained_variance: Matrix = (s**2) / np.sum(s_full**2) 

114 cov: Matrix = np.cov(factors.T) 

115 systematic: Matrix = factors @ vt + x_mean 

116 idiosyncratic: Matrix = x_centered - factors @ vt 

117 

118 return PCA( 

119 explained_variance=explained_variance, 

120 factors=factors, 

121 exposure=exposure, 

122 cov=cov, 

123 systematic=systematic, 

124 idiosyncratic=idiosyncratic, 

125 )