Coverage for src/jquantstats/_stats/_montecarlo.py: 100%
72 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
1"""Monte Carlo simulation statistics."""
3from __future__ import annotations
5import math
6from typing import TYPE_CHECKING
8import numpy as np
9import polars as pl
11if TYPE_CHECKING:
12 from ..data import Data
15class _MonteCarloStatsMixin:
16 """Mixin providing Monte Carlo simulation statistics for return series."""
18 _data: Data
19 all: pl.DataFrame
21 if TYPE_CHECKING:
22 from .._protocol import DataLike
24 data: DataLike
26 @staticmethod
27 def _validate_positive_integer(name: str, value: int) -> int:
28 """Validate that *value* is a positive integer."""
29 if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
30 raise ValueError(f"{name} must be a positive integer") # noqa: TRY003
31 return value
33 @staticmethod
34 def _block_bootstrap_paths(values: np.ndarray, n: int, period: int, block_size: int) -> np.ndarray:
35 """Generate *n* return paths via block bootstrap, returning an ``(n, period)`` array.
37 All *n* paths are sampled simultaneously using fully vectorised numpy
38 indexing — no Python-level loop is required.
40 Args:
41 values: 1-D array of historical returns.
42 n: Number of simulation paths.
43 period: Length of each simulated path (number of return observations).
44 block_size: Block length for the block bootstrap resampling.
46 Returns:
47 np.ndarray: Float64 array of shape ``(n, period)``.
49 """
50 n_obs = values.size
51 if n_obs == 0: # pragma: no cover
52 return np.full((n, period), np.nan, dtype=np.float64)
54 n_blocks = math.ceil(period / block_size)
55 max_start = max(1, n_obs - block_size + 1)
56 # Draw all block starting indices at once: shape (n, n_blocks)
57 starts = np.random.randint(0, max_start, size=(n, n_blocks))
58 # Build full index array: (n, n_blocks, block_size)
59 idx = starts[:, :, np.newaxis] + np.arange(block_size)[np.newaxis, np.newaxis, :]
60 idx = np.clip(idx, 0, n_obs - 1)
61 # Flatten and trim to the requested period length
62 return np.asarray(values[idx].reshape(n, -1)[:, :period])
64 def _simulate_distribution(self, n: int, period: int) -> dict[str, np.ndarray]:
65 """Prepare validated inputs and sample *n* block-bootstrap paths per asset.
67 Returns a dict mapping each asset column name to an ``(n, period)``
68 float64 array of simulated return paths. Assets with no usable data
69 map to an ``(n, period)`` array of NaN.
71 Args:
72 n: Number of simulation paths (must be a positive integer).
73 period: Path length in return observations (must be a positive integer).
75 Returns:
76 dict[str, np.ndarray]: Asset name → ``(n, period)`` paths array.
78 """
79 n = self._validate_positive_integer("n", n)
80 period = self._validate_positive_integer("period", period)
81 block_size = max(1, round(period**0.5))
83 paths: dict[str, np.ndarray] = {}
84 for col, series in self._data.items():
85 clean = series.cast(pl.Float64).drop_nulls().drop_nans()
86 values = np.asarray(clean.to_numpy(), dtype=np.float64)
87 if values.size == 0:
88 paths[col] = np.full((n, period), np.nan, dtype=np.float64)
89 else:
90 block = min(block_size, values.size)
91 paths[col] = self._block_bootstrap_paths(values, n, period, block)
92 return paths
94 def montecarlo(self, n: int = 1000, period: int = 252) -> pl.DataFrame:
95 """Simulate cumulative returns across *n* block-bootstrap paths.
97 Args:
98 n: Number of Monte Carlo paths. Defaults to 1000.
99 period: Simulation horizon in return observations. Defaults to 252.
101 Returns:
102 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated terminal
103 cumulative return per path and asset.
106 Returns NaN when:
107 Entries are NaN for assets with no usable (non-null, non-NaN)
108 observations.
109 """
110 paths = self._simulate_distribution(n=n, period=period)
111 result = {col: np.prod(1.0 + arr, axis=1) - 1.0 for col, arr in paths.items()}
112 return pl.DataFrame(result)
114 def montecarlo_sharpe(
115 self,
116 n: int = 1000,
117 period: int = 252,
118 periods_per_year: int | float | None = None,
119 ) -> pl.DataFrame:
120 """Simulate the Sharpe-ratio distribution across block-bootstrap paths.
122 Args:
123 n: Number of Monte Carlo paths. Defaults to 1000.
124 period: Simulation horizon in return observations. Defaults to 252.
125 periods_per_year: Annualisation factor. Defaults to the value
126 inferred from the data.
128 Returns:
129 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised
130 Sharpe ratio per path and asset.
133 Returns NaN when:
134 Entries are NaN when a path's standard deviation is zero or the asset
135 has no usable observations.
136 """
137 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year
138 if ppy <= 0:
139 raise ValueError("periods_per_year must be positive") # noqa: TRY003
140 scale = math.sqrt(ppy)
141 paths = self._simulate_distribution(n=n, period=period)
142 result: dict[str, np.ndarray] = {}
143 for col, arr in paths.items():
144 means = arr.mean(axis=1)
145 stds = arr.std(axis=1, ddof=1)
146 with np.errstate(invalid="ignore", divide="ignore"):
147 result[col] = np.where(stds == 0.0, np.nan, means / stds * scale)
148 return pl.DataFrame(result)
150 def montecarlo_drawdown(self, n: int = 1000, period: int = 252) -> pl.DataFrame:
151 """Simulate the maximum-drawdown distribution across block-bootstrap paths.
153 Args:
154 n: Number of Monte Carlo paths. Defaults to 1000.
155 period: Simulation horizon in return observations. Defaults to 252.
157 Returns:
158 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated maximum
159 drawdown per path and asset (values in ``[-1, 0]``).
162 Returns NaN when:
163 Entries are NaN for assets with no usable (non-null, non-NaN)
164 observations.
165 """
166 paths = self._simulate_distribution(n=n, period=period)
167 result: dict[str, np.ndarray] = {}
168 for col, arr in paths.items():
169 nav = np.cumprod(1.0 + arr, axis=1)
170 hwm = np.maximum.accumulate(nav, axis=1)
171 result[col] = np.min(nav / hwm - 1.0, axis=1)
172 return pl.DataFrame(result)
174 def montecarlo_cagr(
175 self,
176 n: int = 1000,
177 period: int = 252,
178 periods_per_year: int | float | None = None,
179 ) -> pl.DataFrame:
180 """Simulate the CAGR distribution across block-bootstrap paths.
182 Args:
183 n: Number of Monte Carlo paths. Defaults to 1000.
184 period: Simulation horizon in return observations. Defaults to 252.
185 periods_per_year: Annualisation factor. Defaults to the value
186 inferred from the data.
188 Returns:
189 pl.DataFrame: Shape ``(n, n_assets)`` — one simulated annualised
190 CAGR per path and asset.
193 Returns NaN when:
194 Entries are NaN when a path's terminal compound return is non-positive
195 or the asset has no usable observations.
196 """
197 ppy = self._data._periods_per_year if periods_per_year is None else periods_per_year
198 if ppy <= 0:
199 raise ValueError("periods_per_year must be positive") # noqa: TRY003
200 years = period / ppy
201 paths = self._simulate_distribution(n=n, period=period)
202 result: dict[str, np.ndarray] = {}
203 for col, arr in paths.items():
204 totals = np.prod(1.0 + arr, axis=1)
205 with np.errstate(invalid="ignore"):
206 result[col] = np.where(totals > 0, totals ** (1.0 / years) - 1.0, np.nan)
207 return pl.DataFrame(result)