Coverage for src/fast_minimum_variance/data/_simulate.py: 100%

18 statements  

« prev     ^ index     » next       coverage.py v7.15.0, created at 2026-07-02 13:28 +0000

1"""Simulate equity returns from a latent factor model.""" 

2 

3import numpy as np 

4 

5__all__ = ["simulate_equity_returns"] 

6 

7 

8def simulate_equity_returns( 

9 n: int, 

10 T: int, # noqa: N803 

11 *, 

12 k: int | None = None, 

13 rng: np.random.Generator | int | None = None, 

14) -> np.ndarray: 

15 """Simulate a TxN demeaned equity return matrix with latent factor structure. 

16 

17 Returns are generated from the model 

18 

19 X = F @ B.T + E 

20 

21 where F (TxK) are factor returns, B (NxK) are factor loadings, and E (TxN) 

22 is idiosyncratic noise. The first factor is a market factor with universally 

23 positive loadings and high variance; the remaining k-1 factors are 

24 style/industry factors with sparse loadings. This produces a covariance 

25 spectrum qualitatively similar to equity universes: a dominant market 

26 eigenvalue, a handful of secondary factor eigenvalues, and a long tail of 

27 near-equal idiosyncratic eigenvalues. 

28 

29 Parameters 

30 ---------- 

31 n: 

32 Number of assets. 

33 T: 

34 Number of time periods (trading days). 

35 k: 

36 Number of latent factors. Defaults to ``max(3, n // 10)``. 

37 rng: 

38 Random state — a :class:`numpy.random.Generator`, an integer seed, 

39 or ``None`` (non-reproducible). 

40 

41 Returns: 

42 ------- 

43 X : ndarray of shape (T, n) 

44 Demeaned return matrix. Each column has zero mean. 

45 

46 Examples: 

47 -------- 

48 >>> X = simulate_equity_returns(100, 200, k=5, rng=0) 

49 >>> X.shape 

50 (200, 100) 

51 >>> bool(abs(X.mean(axis=0)).max() < 1e-14) 

52 True 

53 """ 

54 rng = np.random.default_rng(rng) 

55 if k is None: 

56 k = max(3, n // 10) 

57 

58 # Factor volatilities (daily): market ~1 %, style factors ~0.5 % 

59 factor_vols = np.concatenate([[0.01], np.full(k - 1, 0.005)]) 

60 

61 # Factor returns: T x k 

62 F = rng.standard_normal((T, k)) * factor_vols # noqa: N806 

63 

64 # Factor loadings: n x k 

65 # Market: all assets have positive exposure in [0.4, 0.8] 

66 # Style: sparse (~50 % non-zero), drawn from N(0, 0.2) 

67 B = np.zeros((n, k)) # noqa: N806 

68 B[:, 0] = rng.uniform(0.4, 0.8, size=n) 

69 for j in range(1, k): 

70 mask = rng.random(n) < 0.5 

71 B[mask, j] = rng.standard_normal(int(mask.sum())) * 0.2 

72 

73 # Idiosyncratic volatility: uniform in [0.5 %, 1.5 %] per asset 

74 idio_vols = rng.uniform(0.005, 0.015, size=n) 

75 E = rng.standard_normal((T, n)) * idio_vols # noqa: N806 

76 

77 X: np.ndarray = F @ B.T + E # noqa: N806 

78 X -= X.mean(axis=0) # noqa: N806 

79 return X