Coverage for src/jquantstats/_stats/_montecarlo.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 06:13 +0000

1"""Monte Carlo simulation statistics.""" 

2 

3from __future__ import annotations 

4 

5import math 

6from typing import TYPE_CHECKING 

7 

8import numpy as np 

9import polars as pl 

10 

11if TYPE_CHECKING: 

12 from ..data import Data 

13 

14 

15class _MonteCarloStatsMixin: 

16 """Mixin providing Monte Carlo simulation statistics for return series.""" 

17 

18 _data: Data 

19 all: pl.DataFrame 

20 

21 if TYPE_CHECKING: 

22 from .._protocol import DataLike 

23 

24 data: DataLike 

25 

26 @staticmethod 

27 def _validate_positive_integer(name: str, value: int) -> int: 

28 """Validate that *value* is a positive integer.""" 

29 if isinstance(value, bool) or not isinstance(value, int) or value <= 0: 

30 raise ValueError(f"{name} must be a positive integer") # noqa: TRY003 

31 return value 

32 

33 @staticmethod 

34 def _block_bootstrap_paths(values: np.ndarray, n: int, period: int, block_size: int) -> np.ndarray: 

35 """Generate *n* return paths via block bootstrap, returning an ``(n, period)`` array. 

36 

37 All *n* paths are sampled simultaneously using fully vectorised numpy 

38 indexing — no Python-level loop is required. 

39 

40 Args: 

41 values: 1-D array of historical returns. 

42 n: Number of simulation paths. 

43 period: Length of each simulated path (number of return observations). 

44 block_size: Block length for the block bootstrap resampling. 

45 

46 Returns: 

47 np.ndarray: Float64 array of shape ``(n, period)``. 

48 

49 """ 

50 n_obs = values.size 

51 if n_obs == 0: # pragma: no cover 

52 return np.full((n, period), np.nan, dtype=np.float64) 

53 

54 n_blocks = math.ceil(period / block_size) 

55 max_start = max(1, n_obs - block_size + 1) 

56 # Draw all block starting indices at once: shape (n, n_blocks) 

57 starts = np.random.randint(0, max_start, size=(n, n_blocks)) 

58 # Build full index array: (n, n_blocks, block_size) 

59 idx = starts[:, :, np.newaxis] + np.arange(block_size)[np.newaxis, np.newaxis, :] 

60 idx = np.clip(idx, 0, n_obs - 1) 

61 # Flatten and trim to the requested period length 

62 return np.asarray(values[idx].reshape(n, -1)[:, :period]) 

63 

64 def _simulate_distribution(self, n: int, period: int) -> dict[str, np.ndarray]: 

65 """Prepare validated inputs and sample *n* block-bootstrap paths per asset. 

66 

67 Returns a dict mapping each asset column name to an ``(n, period)`` 

68 float64 array of simulated return paths. Assets with no usable data 

69 map to an ``(n, period)`` array of NaN. 

70 

71 Args: 

72 n: Number of simulation paths (must be a positive integer). 

73 period: Path length in return observations (must be a positive integer). 

74 

75 Returns: 

76 dict[str, np.ndarray]: Asset name → ``(n, period)`` paths array. 

77 

78 """ 

79 n = self._validate_positive_integer("n", n) 

80 period = self._validate_positive_integer("period", period) 

81 block_size = max(1, round(period**0.5)) 

82 

83 paths: dict[str, np.ndarray] = {} 

84 for col, series in self._data.items(): 

85 clean = series.cast(pl.Float64).drop_nulls().drop_nans() 

86 values = np.asarray(clean.to_numpy(), dtype=np.float64) 

87 if values.size == 0: 

88 paths[col] = np.full((n, period), np.nan, dtype=np.float64) 

89 else: 

90 block = min(block_size, values.size) 

91 paths[col] = self._block_bootstrap_paths(values, n, period, block) 

92 return paths 

93 

94 def montecarlo(self, n: int = 1000, period: int = 252) -> pl.DataFrame: 

95 """Simulate cumulative returns across *n* block-bootstrap paths. 

96 

97 Args: 

98 n: Number of Monte Carlo paths. Defaults to 1000. 

99 period: Simulation horizon in return observations. Defaults to 252. 

100 

101 Returns: 

102 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated terminal 

103 cumulative return per path and asset. 

104 

105 

106 Returns NaN when: 

107 Entries are NaN for assets with no usable (non-null, non-NaN) 

108 observations. 

109 """ 

110 paths = self._simulate_distribution(n=n, period=period) 

111 result = {col: np.prod(1.0 + arr, axis=1) - 1.0 for col, arr in paths.items()} 

112 return pl.DataFrame(result) 

113 

114 def montecarlo_sharpe( 

115 self, 

116 n: int = 1000, 

117 period: int = 252, 

118 periods_per_year: int | float | None = None, 

119 ) -> pl.DataFrame: 

120 """Simulate the Sharpe-ratio distribution across block-bootstrap paths. 

121 

122 Args: 

123 n: Number of Monte Carlo paths. Defaults to 1000. 

124 period: Simulation horizon in return observations. Defaults to 252. 

125 periods_per_year: Annualisation factor. Defaults to the value 

126 inferred from the data. 

127 

128 Returns: 

129 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised 

130 Sharpe ratio per path and asset. 

131 

132 

133 Returns NaN when: 

134 Entries are NaN when a path's standard deviation is zero or the asset 

135 has no usable observations. 

136 """ 

137 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year 

138 if ppy <= 0: 

139 raise ValueError("periods_per_year must be positive") # noqa: TRY003 

140 scale = math.sqrt(ppy) 

141 paths = self._simulate_distribution(n=n, period=period) 

142 result: dict[str, np.ndarray] = {} 

143 for col, arr in paths.items(): 

144 means = arr.mean(axis=1) 

145 stds = arr.std(axis=1, ddof=1) 

146 with np.errstate(invalid="ignore", divide="ignore"): 

147 result[col] = np.where(stds == 0.0, np.nan, means / stds * scale) 

148 return pl.DataFrame(result) 

149 

150 def montecarlo_drawdown(self, n: int = 1000, period: int = 252) -> pl.DataFrame: 

151 """Simulate the maximum-drawdown distribution across block-bootstrap paths. 

152 

153 Args: 

154 n: Number of Monte Carlo paths. Defaults to 1000. 

155 period: Simulation horizon in return observations. Defaults to 252. 

156 

157 Returns: 

158 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated maximum 

159 drawdown per path and asset (values in ``[-1, 0]``). 

160 

161 

162 Returns NaN when: 

163 Entries are NaN for assets with no usable (non-null, non-NaN) 

164 observations. 

165 """ 

166 paths = self._simulate_distribution(n=n, period=period) 

167 result: dict[str, np.ndarray] = {} 

168 for col, arr in paths.items(): 

169 nav = np.cumprod(1.0 + arr, axis=1) 

170 hwm = np.maximum.accumulate(nav, axis=1) 

171 result[col] = np.min(nav / hwm - 1.0, axis=1) 

172 return pl.DataFrame(result) 

173 

174 def montecarlo_cagr( 

175 self, 

176 n: int = 1000, 

177 period: int = 252, 

178 periods_per_year: int | float | None = None, 

179 ) -> pl.DataFrame: 

180 """Simulate the CAGR distribution across block-bootstrap paths. 

181 

182 Args: 

183 n: Number of Monte Carlo paths. Defaults to 1000. 

184 period: Simulation horizon in return observations. Defaults to 252. 

185 periods_per_year: Annualisation factor. Defaults to the value 

186 inferred from the data. 

187 

188 Returns: 

189 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised 

190 CAGR per path and asset. 

191 

192 

193 Returns NaN when: 

194 Entries are NaN when a path's terminal compound return is non-positive 

195 or the asset has no usable observations. 

196 """ 

197 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year 

198 if ppy <= 0: 

199 raise ValueError("periods_per_year must be positive") # noqa: TRY003 

200 years = period / ppy 

201 paths = self._simulate_distribution(n=n, period=period) 

202 result: dict[str, np.ndarray] = {} 

203 for col, arr in paths.items(): 

204 totals = np.prod(1.0 + arr, axis=1) 

205 with np.errstate(invalid="ignore"): 

206 result[col] = np.where(totals > 0, totals ** (1.0 / years) - 1.0, np.nan) 

207 return pl.DataFrame(result)