Coverage for src/jquantstats/_stats/

1"""Monte Carlo simulation statistics."""

3from __future__ import annotations

5import math

6from typing import TYPE_CHECKING

8import numpy as np

9import polars as pl

11if TYPE_CHECKING:

12 from ..data import Data

15class _MonteCarloStatsMixin:

16 """Mixin providing Monte Carlo simulation statistics for return series."""

18 _data: Data

19 all: pl.DataFrame

21 if TYPE_CHECKING:

22 from .._protocol import DataLike

24 data: DataLike

26 @staticmethod

27 def _validate_positive_integer(name: str, value: int) -> int:

28 """Validate that *value* is a positive integer."""

29 if isinstance(value, bool) or not isinstance(value, int) or value <= 0:

30 raise ValueError(f"{name} must be a positive integer") # noqa: TRY003

31 return value

33 @staticmethod

34 def _block_bootstrap_paths(values: np.ndarray, n: int, period: int, block_size: int) -> np.ndarray:

35 """Generate *n* return paths via block bootstrap, returning an ``(n, period)`` array.

37 All *n* paths are sampled simultaneously using fully vectorised numpy

38 indexing — no Python-level loop is required.

40 Args:

41 values: 1-D array of historical returns.

42 n: Number of simulation paths.

43 period: Length of each simulated path (number of return observations).

44 block_size: Block length for the block bootstrap resampling.

46 Returns:

47 np.ndarray: Float64 array of shape ``(n, period)``.

49 """

50 n_obs = values.size

51 if n_obs == 0: # pragma: no cover

52 return np.full((n, period), np.nan, dtype=np.float64)

54 n_blocks = math.ceil(period / block_size)

55 max_start = max(1, n_obs - block_size + 1)

56 # Draw all block starting indices at once: shape (n, n_blocks)

57 starts = np.random.randint(0, max_start, size=(n, n_blocks))

58 # Build full index array: (n, n_blocks, block_size)

59 idx = starts[:, :, np.newaxis] + np.arange(block_size)[np.newaxis, np.newaxis, :]

60 idx = np.clip(idx, 0, n_obs - 1)

61 # Flatten and trim to the requested period length

62 return np.asarray(values[idx].reshape(n, -1)[:, :period])

64 def _simulate_distribution(self, n: int, period: int) -> dict[str, np.ndarray]:

65 """Prepare validated inputs and sample *n* block-bootstrap paths per asset.

67 Returns a dict mapping each asset column name to an ``(n, period)``

68 float64 array of simulated return paths. Assets with no usable data

69 map to an ``(n, period)`` array of NaN.

71 Args:

72 n: Number of simulation paths (must be a positive integer).

73 period: Path length in return observations (must be a positive integer).

75 Returns:

76 dict[str, np.ndarray]: Asset name → ``(n, period)`` paths array.

78 """

79 n = self._validate_positive_integer("n", n)

80 period = self._validate_positive_integer("period", period)

81 block_size = max(1, round(period**0.5))

83 paths: dict[str, np.ndarray] = {}

84 for col, series in self._data.items():

85 clean = series.cast(pl.Float64).drop_nulls().drop_nans()

86 values = np.asarray(clean.to_numpy(), dtype=np.float64)

87 if values.size == 0:

88 paths[col] = np.full((n, period), np.nan, dtype=np.float64)

89 else:

90 block = min(block_size, values.size)

91 paths[col] = self._block_bootstrap_paths(values, n, period, block)

92 return paths

94 def montecarlo(self, n: int = 1000, period: int = 252) -> pl.DataFrame:

95 """Simulate cumulative returns across *n* block-bootstrap paths.

97 Args:

98 n: Number of Monte Carlo paths. Defaults to 1000.

99 period: Simulation horizon in return observations. Defaults to 252.

100

101 Returns:

102 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated terminal

103 cumulative return per path and asset.

104

105

106 Returns NaN when:

107 Entries are NaN for assets with no usable (non-null, non-NaN)

108 observations.

109 """

110 paths = self._simulate_distribution(n=n, period=period)

111 result = {col: np.prod(1.0 + arr, axis=1) - 1.0 for col, arr in paths.items()}

112 return pl.DataFrame(result)

113

114 def montecarlo_sharpe(

115 self,

116 n: int = 1000,

117 period: int = 252,

118 periods_per_year: int | float | None = None,

119 ) -> pl.DataFrame:

120 """Simulate the Sharpe-ratio distribution across block-bootstrap paths.

121

122 Args:

123 n: Number of Monte Carlo paths. Defaults to 1000.

124 period: Simulation horizon in return observations. Defaults to 252.

125 periods_per_year: Annualisation factor. Defaults to the value

126 inferred from the data.

127

128 Returns:

129 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised

130 Sharpe ratio per path and asset.

131

132

133 Returns NaN when:

134 Entries are NaN when a path's standard deviation is zero or the asset

135 has no usable observations.

136 """

137 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year

138 if ppy <= 0:

139 raise ValueError("periods_per_year must be positive") # noqa: TRY003

140 scale = math.sqrt(ppy)

141 paths = self._simulate_distribution(n=n, period=period)

142 result: dict[str, np.ndarray] = {}

143 for col, arr in paths.items():

144 means = arr.mean(axis=1)

145 stds = arr.std(axis=1, ddof=1)

146 with np.errstate(invalid="ignore", divide="ignore"):

147 result[col] = np.where(stds == 0.0, np.nan, means / stds * scale)

148 return pl.DataFrame(result)

149

150 def montecarlo_drawdown(self, n: int = 1000, period: int = 252) -> pl.DataFrame:

151 """Simulate the maximum-drawdown distribution across block-bootstrap paths.

152

153 Args:

154 n: Number of Monte Carlo paths. Defaults to 1000.

155 period: Simulation horizon in return observations. Defaults to 252.

156

157 Returns:

158 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated maximum

159 drawdown per path and asset (values in ``[-1, 0]``).

160

161

162 Returns NaN when:

163 Entries are NaN for assets with no usable (non-null, non-NaN)

164 observations.

165 """

166 paths = self._simulate_distribution(n=n, period=period)

167 result: dict[str, np.ndarray] = {}

168 for col, arr in paths.items():

169 nav = np.cumprod(1.0 + arr, axis=1)

170 hwm = np.maximum.accumulate(nav, axis=1)

171 result[col] = np.min(nav / hwm - 1.0, axis=1)

172 return pl.DataFrame(result)

173

174 def montecarlo_cagr(

175 self,

176 n: int = 1000,

177 period: int = 252,

178 periods_per_year: int | float | None = None,

179 ) -> pl.DataFrame:

180 """Simulate the CAGR distribution across block-bootstrap paths.

181

182 Args:

183 n: Number of Monte Carlo paths. Defaults to 1000.

184 period: Simulation horizon in return observations. Defaults to 252.

185 periods_per_year: Annualisation factor. Defaults to the value

186 inferred from the data.

187

188 Returns:

189 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised

190 CAGR per path and asset.

191

192

193 Returns NaN when:

194 Entries are NaN when a path's terminal compound return is non-positive

195 or the asset has no usable observations.

196 """

197 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year

198 if ppy <= 0:

199 raise ValueError("periods_per_year must be positive") # noqa: TRY003

200 years = period / ppy

201 paths = self._simulate_distribution(n=n, period=period)

202 result: dict[str, np.ndarray] = {}

203 for col, arr in paths.items():

204 totals = np.prod(1.0 + arr, axis=1)

205 with np.errstate(invalid="ignore"):

206 result[col] = np.where(totals > 0, totals ** (1.0 / years) - 1.0, np.nan)

207 return pl.DataFrame(result)

Coverage for src/jquantstats/_stats/_montecarlo.py: 100%

72 statements