Coverage for src/cvx/linalg/pca.py: 100%

1"""PCA analysis (pure NumPy implementation).

3This module provides Principal Component Analysis (PCA) for dimensionality

4reduction of return data. PCA is commonly used to construct factor models

5for portfolio optimization.

7Example:

8 Perform PCA on stock returns:

10 >>> import numpy as np

11 >>> from cvx.linalg import pca

12 >>> np.random.seed(42)

13 >>> returns = np.random.randn(100, 5)

14 >>> result = pca(returns, n_components=3)

15 >>> len(result.explained_variance)

16 3

17 >>> result.factors.shape

18 (100, 3)

19 >>> result.exposure.shape

20 (3, 5)

22"""

24from __future__ import annotations

26from collections import namedtuple

28import numpy as np

30from .svd import svd

32Matrix = np.ndarray

34PCA = namedtuple(

35 "PCA",

36 ["explained_variance", "factors", "exposure", "cov", "systematic", "idiosyncratic"],

37)

38"""Named tuple containing the results of PCA analysis.

40Attributes:

41 explained_variance: Explained variance ratio for each component.

42 Shape (n_components,).

43 factors: Factor returns (principal components). Shape (n_samples, n_components).

44 exposure: Factor exposures (loadings). Shape (n_components, n_assets).

45 cov: Covariance matrix of the factors. Shape (n_components, n_components).

46 systematic: Returns explained by the factors. Shape (n_samples, n_assets).

47 idiosyncratic: Residual returns not explained by factors. Shape (n_samples, n_assets).

49Example:

50 >>> import numpy as np

51 >>> from cvx.linalg import pca

52 >>> np.random.seed(42)

53 >>> returns = np.random.randn(50, 4)

54 >>> result = pca(returns, n_components=2)

55 >>> result.explained_variance.sum() < 1

56 True

57 >>> np.allclose(result.systematic + result.idiosyncratic, returns, atol=1e-10)

58 True

60"""

63def pca(returns: Matrix, n_components: int = 10) -> PCA:

64 """Compute the first n principal components for a return matrix using SVD.

66 Args:

67 returns: Array of asset returns with shape (n_samples, n_assets).

68 n_components: Number of principal components to extract. Defaults to 10.

70 Returns:

71 PCA named tuple containing:

72 - explained_variance: Ratio of variance explained by each component

73 - factors: Factor returns (scores)

74 - exposure: Factor exposures (loadings)

75 - cov: Factor covariance matrix

76 - systematic: Returns explained by factors

77 - idiosyncratic: Residual returns

79 Example:

80 >>> import numpy as np

81 >>> from cvx.linalg import pca

82 >>> np.random.seed(42)

83 >>> returns = np.random.randn(100, 10)

84 >>> result = pca(returns, n_components=3)

85 >>> bool(result.explained_variance[0] > result.explained_variance[1])

86 True

87 >>> factor_corr = np.corrcoef(result.factors.T)

88 >>> bool(np.allclose(factor_corr, np.eye(3), atol=0.1))

89 True

90 >>> VtV = result.exposure @ result.exposure.T

91 >>> bool(np.allclose(VtV, np.eye(3), atol=1e-10))

92 True

93 >>> all(result.explained_variance[i] >= result.explained_variance[i+1]

94 ... for i in range(len(result.explained_variance)-1))

95 True

96 >>> reconstructed = result.factors @ result.exposure

97 >>> centered_systematic = result.systematic - returns.mean(axis=0)

98 >>> bool(np.allclose(reconstructed, centered_systematic, atol=1e-10))

99 True

100

101 """

102 x_mean = returns.mean(axis=0)

103 x_centered = returns - x_mean

104

105 u, s_full, vt = svd(x_centered)

106

107 u = u[:, :n_components]

108 s = s_full[:n_components]

109 vt = vt[:n_components, :]

110

111 factors: Matrix = u * s

112 exposure: Matrix = vt

113 explained_variance: Matrix = (s**2) / np.sum(s_full**2)

114 cov: Matrix = np.cov(factors.T)

115 systematic: Matrix = factors @ vt + x_mean

116 idiosyncratic: Matrix = x_centered - factors @ vt

117

118 return PCA(

119 explained_variance=explained_variance,

120 factors=factors,

121 exposure=exposure,

122 cov=cov,

123 systematic=systematic,

124 idiosyncratic=idiosyncratic,

125 )

Coverage for src / cvx / linalg / pca.py: 100%

21 statements