Coverage for src / basanos / analytics / _stats.py: 100%

279 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-19 05:23 +0000

1"""Statistical metrics and ratios for financial returns. 

2 

3This module defines the Stats class which operates on a Data instance to 

4compute per-asset statistics like skew, kurtosis, volatility, Sharpe, 

5VaR/CVaR, and more. 

6""" 

7 

8import dataclasses 

9from collections.abc import Callable, Iterable 

10from datetime import timedelta 

11from functools import wraps 

12from typing import cast 

13 

14import numpy as np 

15import polars as pl 

16from scipy.stats import norm 

17 

18 

19def _drawdown_series(series: pl.Series) -> pl.Series: 

20 """Compute the drawdown percentage series from a returns series. 

21 

22 Treats ``series`` as additive daily returns and builds a normalised NAV 

23 starting at 1.0. The high-water mark is the running maximum of that NAV; 

24 drawdown is expressed as the fraction below the high-water mark. 

25 

26 Args: 

27 series: A Polars Series of additive returns (profit / AUM). 

28 

29 Returns: 

30 A Polars Float64 Series whose values are in [0, 1]. A value of 0 

31 means the NAV is at its all-time high; a value of 0.2 means the NAV 

32 is 20 % below its previous peak. 

33 

34 Examples: 

35 >>> import polars as pl 

36 >>> s = pl.Series([0.0, -0.1, 0.2]) 

37 >>> [round(x, 10) for x in _drawdown_series(s).to_list()] 

38 [0.0, 0.1, 0.0] 

39 """ 

40 nav = 1.0 + series.cast(pl.Float64).cum_sum() 

41 hwm = nav.cum_max() 

42 # Guard against division by zero: a NAV of exactly 0 would make the 

43 # drawdown fraction undefined. In practice NAV starts at 1.0 so this can 

44 # only occur for extremely large cumulative losses; the 1e-10 floor avoids 

45 # a ZeroDivisionError while having no effect on normal data. 

46 hwm_safe = hwm.clip(lower_bound=1e-10) 

47 return ((hwm - nav) / hwm_safe).clip(lower_bound=0.0) 

48 

49 

50def _to_float(value: object) -> float: 

51 """Safely convert a Polars aggregation result to float. 

52 

53 Examples: 

54 >>> _to_float(2.0) 

55 2.0 

56 >>> _to_float(None) 

57 0.0 

58 """ 

59 if value is None: 

60 return 0.0 

61 if isinstance(value, timedelta): 

62 return value.total_seconds() 

63 return float(cast(float, value)) 

64 

65 

66def _to_float_or_none(value: object) -> float | None: 

67 """Safely convert a Polars aggregation result to float or None.""" 

68 if value is None: 

69 return None 

70 if isinstance(value, timedelta): 

71 return value.total_seconds() 

72 return float(cast(float, value)) 

73 

74 

75@dataclasses.dataclass(frozen=True) 

76class Stats: 

77 """Statistical analysis tools for financial returns data. 

78 

79 This class provides a comprehensive set of methods for calculating various 

80 financial metrics and statistics on returns data, including: 

81 

82 - Basic statistics (mean, skew, kurtosis) 

83 - Risk metrics (volatility, value-at-risk, drawdown) 

84 - Performance ratios (Sharpe, information ratio) 

85 - Win/loss metrics (win rate, profit factor, payoff ratio) 

86 

87 The class is designed to work with the _Data class and operates on Polars DataFrames 

88 for efficient computation. 

89 

90 Attributes: 

91 data: The _Data object containing returns data. 

92 

93 Examples: 

94 >>> import polars as pl 

95 >>> from datetime import date 

96 >>> data = pl.DataFrame({ 

97 ... "date": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)], 

98 ... "returns": [0.01, -0.02, 0.03], 

99 ... }) 

100 >>> stats = Stats(data=data) 

101 >>> stats.assets 

102 ['returns'] 

103 """ 

104 

105 data: pl.DataFrame 

106 

107 def __post_init__(self) -> None: 

108 """Validate the input data frame after initialization. 

109 

110 Ensures that `data` is a Polars DataFrame and contains at least one 

111 row. Raises TypeError or ValueError otherwise. 

112 """ 

113 if not isinstance(self.data, pl.DataFrame): 

114 raise TypeError 

115 if self.data.height == 0: 

116 raise ValueError 

117 

118 @property 

119 def assets(self) -> list[str]: 

120 """List of asset column names (numeric columns excluding 'date').""" 

121 return [c for c in self.data.columns if c != "date" and self.data[c].dtype.is_numeric()] 

122 

123 @staticmethod 

124 def _mean_positive_expr(series: pl.Series) -> float: 

125 """Return the mean of strictly positive values, or 0.0 if none exist.""" 

126 result = series.filter(series > 0).mean() 

127 return _to_float(result) 

128 

129 @staticmethod 

130 def _mean_negative_expr(series: pl.Series) -> float: 

131 """Return the mean of strictly negative values, or 0.0 if none exist.""" 

132 result = series.filter(series < 0).mean() 

133 return _to_float(result) 

134 

135 @staticmethod 

136 def columnwise_stat(func: Callable[..., float | int | None]) -> Callable[..., dict[str, float | int | None]]: 

137 """Apply a column-wise statistical function to all numeric columns. 

138 

139 Args: 

140 func (Callable): The function to decorate. 

141 

142 Returns: 

143 Callable: The decorated function. 

144 

145 """ 

146 

147 @wraps(func) 

148 def wrapper(self: "Stats", *args: object, **kwargs: object) -> dict[str, float | int | None]: 

149 """Apply the wrapped stat function to each asset column and return results as a dict.""" 

150 return {asset: func(self, self.data[asset], *args, **kwargs) for asset in self.assets} 

151 

152 return wrapper 

153 

154 @columnwise_stat 

155 def skew(self, series: pl.Series) -> float | None: 

156 """Calculate skewness (asymmetry) for each numeric column. 

157 

158 Args: 

159 series (pl.Series): The series to calculate skewness for. 

160 

161 Returns: 

162 float: The skewness value. 

163 

164 """ 

165 return _to_float_or_none(series.skew(bias=False)) 

166 

167 @columnwise_stat 

168 def kurtosis(self, series: pl.Series) -> float | None: 

169 """Calculate the excess kurtosis of returns (Fisher definition). 

170 

171 Uses an unbiased estimator when possible. For short samples where an 

172 unbiased estimator is undefined (Polars returns None when < 4 non-null 

173 observations), falls back to the biased estimator. If the series is 

174 still too short or variance is zero, computes the moment-based excess 

175 kurtosis m4/m2^2 - 3.0, returning 0.0 for constant series. 

176 """ 

177 # Drop nulls to match test expectations (ignore missing values) 

178 s = series.drop_nulls() 

179 # Use biased estimator first (Fisher=True by default in Polars) 

180 return _to_float_or_none(s.kurtosis(bias=True)) 

181 

182 @columnwise_stat 

183 def avg_return(self, series: pl.Series) -> float: 

184 """Calculate average return per non-zero, non-null value. 

185 

186 Args: 

187 series (pl.Series): The series to calculate average return for. 

188 

189 Returns: 

190 float: The average return value. 

191 

192 """ 

193 result = series.filter(series.is_not_null() & (series != 0)).mean() 

194 return _to_float(result) 

195 

196 @columnwise_stat 

197 def avg_win(self, series: pl.Series) -> float: 

198 """Calculate the average winning return/trade for an asset. 

199 

200 Args: 

201 series (pl.Series): The series to calculate average win for. 

202 

203 Returns: 

204 float: The average winning return. 

205 

206 """ 

207 return self._mean_positive_expr(series) 

208 

209 @columnwise_stat 

210 def avg_loss(self, series: pl.Series) -> float: 

211 """Calculate the average loss return/trade for a period. 

212 

213 Args: 

214 series (pl.Series): The series to calculate average loss for. 

215 

216 Returns: 

217 float: The average loss return. 

218 

219 """ 

220 return self._mean_negative_expr(series) 

221 

222 @columnwise_stat 

223 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float: 

224 """Calculate the volatility of returns. 

225 

226 - Std dev of returns 

227 - Annualized by sqrt(periods) if `annualize` is True. 

228 

229 Args: 

230 series (pl.Series): The series to calculate volatility for. 

231 periods (int, optional): Number of periods per year. Defaults to 252. 

232 annualize (bool, optional): Whether to annualize the result. Defaults to True. 

233 

234 Returns: 

235 float: The volatility value. 

236 

237 """ 

238 raw_periods = periods or self.periods_per_year 

239 

240 # Ensure it's numeric 

241 if not isinstance(raw_periods, int | float): 

242 raise TypeError 

243 

244 factor = np.sqrt(raw_periods) if annualize else 1.0 

245 return _to_float(series.std()) * factor 

246 

247 @columnwise_stat 

248 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

249 """Calculate the daily value-at-risk. 

250 

251 Uses variance-covariance calculation with confidence level. 

252 

253 Args: 

254 series (pl.Series): The series to calculate value at risk for. 

255 alpha (float, optional): Confidence level. Defaults to 0.05. 

256 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

257 

258 Returns: 

259 float: The value at risk. 

260 

261 """ 

262 mu = _to_float(series.mean()) 

263 sigma *= _to_float(series.std()) 

264 

265 return float(norm.ppf(alpha, mu, sigma)) 

266 

267 @columnwise_stat 

268 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

269 """Calculate the conditional value-at-risk. 

270 

271 Also known as CVaR or expected shortfall, calculated for each numeric column. 

272 

273 Args: 

274 series (pl.Series): The series to calculate conditional value at risk for. 

275 alpha (float, optional): Confidence level. Defaults to 0.05. 

276 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

277 

278 Returns: 

279 float: The conditional value at risk. 

280 

281 """ 

282 mu = _to_float(series.mean()) 

283 sigma *= _to_float(series.std()) 

284 

285 var = norm.ppf(alpha, mu, sigma) 

286 

287 # Compute mean of returns less than or equal to VaR. 

288 # Return NaN when no empirical observations fall below the parametric 

289 # VaR threshold (empty filter), rather than the misleading 0.0 that 

290 # _to_float(None) would otherwise produce. 

291 mask = cast(Iterable[bool], series < var) 

292 filtered = series.filter(mask) 

293 if filtered.is_empty(): 

294 return float("nan") 

295 return _to_float(filtered.mean()) 

296 

297 @columnwise_stat 

298 def best(self, series: pl.Series) -> float | None: 

299 """Find the maximum return per column (best period). 

300 

301 Args: 

302 series (pl.Series): The series to find the best return for. 

303 

304 Returns: 

305 float: The maximum return value. 

306 

307 """ 

308 return _to_float_or_none(series.max()) 

309 

310 @columnwise_stat 

311 def worst(self, series: pl.Series) -> float | None: 

312 """Find the minimum return per column (worst period). 

313 

314 Args: 

315 series (pl.Series): The series to find the worst return for. 

316 

317 Returns: 

318 float: The minimum return value. 

319 

320 """ 

321 return _to_float_or_none(series.min()) 

322 

323 @columnwise_stat 

324 def win_rate(self, series: pl.Series) -> float: 

325 """Calculate the win rate (fraction of profitable periods). 

326 

327 Counts the proportion of non-null periods where the return is strictly 

328 positive. 

329 

330 Args: 

331 series (pl.Series): The series to calculate win rate for. 

332 

333 Returns: 

334 float: Win rate in [0, 1], or NaN when the series contains no 

335 non-null observations. 

336 

337 """ 

338 non_null = series.drop_nulls() 

339 if non_null.is_empty(): 

340 return float("nan") 

341 n_positive = int((non_null > 0).sum()) 

342 return n_positive / len(non_null) 

343 

344 @columnwise_stat 

345 def profit_factor(self, series: pl.Series) -> float: 

346 """Calculate the profit factor (gross wins / absolute gross losses). 

347 

348 A profit factor greater than 1.0 indicates the strategy produces more 

349 gross profit than gross loss. Returns ``inf`` when there are no losing 

350 periods, ``0.0`` when there are no winning periods, and ``nan`` when 

351 there are neither wins nor losses (and no losses). 

352 

353 Args: 

354 series (pl.Series): The series to calculate profit factor for. 

355 

356 Returns: 

357 float: The profit factor. 

358 

359 """ 

360 gross_wins = _to_float(series.filter(series > 0).sum()) 

361 gross_losses = abs(_to_float(series.filter(series < 0).sum())) 

362 if gross_losses == 0.0: 

363 return float("inf") if gross_wins > 0 else float("nan") 

364 return gross_wins / gross_losses 

365 

366 @columnwise_stat 

367 def payoff_ratio(self, series: pl.Series) -> float: 

368 """Calculate the payoff ratio (average win / absolute average loss). 

369 

370 Separates edge type — a high payoff ratio implies the strategy wins 

371 infrequently but with large magnitude; a low payoff ratio implies 

372 frequent small wins. Returns ``nan`` when either the average win or 

373 the average loss is zero (no profitable / no losing periods). 

374 

375 Args: 

376 series (pl.Series): The series to calculate payoff ratio for. 

377 

378 Returns: 

379 float: The payoff ratio. 

380 

381 """ 

382 avg_w = self._mean_positive_expr(series) 

383 avg_l = self._mean_negative_expr(series) 

384 if avg_l == 0.0: 

385 return float("nan") 

386 return avg_w / abs(avg_l) 

387 

388 def monthly_win_rate(self) -> dict[str, float]: 

389 """Calculate the monthly win rate (fraction of profitable months). 

390 

391 Groups the daily returns data by calendar month, computes the 

392 compounded return for each month, then returns the fraction of months 

393 that had a positive compounded return. 

394 

395 Requires a ``date`` column in ``self.data``. When no ``date`` column 

396 is present, each asset entry is ``nan``. 

397 

398 Returns: 

399 dict[str, float]: Monthly win rate in [0, 1] per asset. 

400 

401 """ 

402 if "date" not in self.data.columns: 

403 return {asset: float("nan") for asset in self.assets} 

404 

405 result: dict[str, float] = {} 

406 for asset in self.assets: 

407 df = ( 

408 self.data.select(["date", asset]) 

409 .drop_nulls() 

410 .with_columns( 

411 [ 

412 pl.col("date").dt.year().alias("_year"), 

413 pl.col("date").dt.month().alias("_month"), 

414 ] 

415 ) 

416 ) 

417 monthly = ( 

418 df.group_by(["_year", "_month"]) 

419 .agg((pl.col(asset) + 1.0).product().alias("gross")) 

420 .with_columns((pl.col("gross") - 1.0).alias("monthly_return")) 

421 ) 

422 n_total = len(monthly) 

423 if n_total == 0: 

424 result[asset] = float("nan") 

425 else: 

426 n_positive = int((monthly["monthly_return"] > 0).sum()) 

427 result[asset] = n_positive / n_total 

428 return result 

429 

430 def worst_n_periods(self, n: int = 5) -> dict[str, list[float | None]]: 

431 """Return the N worst return periods per asset. 

432 

433 Sorts each asset's returns in ascending order and returns the first 

434 ``n`` values. If the series has fewer than ``n`` non-null 

435 observations the list is padded with ``None`` on the right. 

436 

437 Args: 

438 n (int, optional): Number of worst periods to return. Defaults to 5. 

439 

440 Returns: 

441 dict[str, list[float | None]]: Sorted worst returns per asset. 

442 

443 """ 

444 result: dict[str, list[float | None]] = {} 

445 for asset in self.assets: 

446 series = self.data[asset].drop_nulls() 

447 worst: list[float | None] = series.sort(descending=False).head(n).to_list() 

448 while len(worst) < n: 

449 worst.append(None) 

450 result[asset] = worst 

451 return result 

452 

453 def up_capture(self, benchmark: pl.Series) -> dict[str, float]: 

454 """Calculate the up-market capture ratio relative to a benchmark. 

455 

456 Measures the fraction of the benchmark's upside that the strategy 

457 captures. Uses geometric means over benchmark up-periods 

458 (benchmark > 0). A value greater than 1.0 means the strategy 

459 outperformed the benchmark in rising markets. 

460 

461 Args: 

462 benchmark (pl.Series): Benchmark return series aligned row-by-row 

463 with ``self.data``. 

464 

465 Returns: 

466 dict[str, float]: Up capture ratio per asset. 

467 

468 """ 

469 result: dict[str, float] = {} 

470 up_mask = benchmark > 0 

471 bench_up = benchmark.filter(up_mask).drop_nulls() 

472 if bench_up.is_empty(): 

473 return {asset: float("nan") for asset in self.assets} 

474 

475 bench_geom = float((bench_up + 1.0).product()) ** (1.0 / len(bench_up)) - 1.0 

476 if bench_geom == 0.0: 

477 return {asset: float("nan") for asset in self.assets} 

478 

479 for asset in self.assets: 

480 strat_up = self.data[asset].filter(up_mask).drop_nulls() 

481 if strat_up.is_empty(): 

482 result[asset] = float("nan") 

483 else: 

484 strat_geom = float((strat_up + 1.0).product()) ** (1.0 / len(strat_up)) - 1.0 

485 result[asset] = strat_geom / bench_geom 

486 return result 

487 

488 def down_capture(self, benchmark: pl.Series) -> dict[str, float]: 

489 """Calculate the down-market capture ratio relative to a benchmark. 

490 

491 Measures the fraction of the benchmark's downside that the strategy 

492 captures. Uses geometric means over benchmark down-periods 

493 (benchmark < 0). A value less than 1.0 means the strategy lost less 

494 than the benchmark in falling markets (a desirable property). 

495 

496 Args: 

497 benchmark (pl.Series): Benchmark return series aligned row-by-row 

498 with ``self.data``. 

499 

500 Returns: 

501 dict[str, float]: Down capture ratio per asset. 

502 

503 """ 

504 result: dict[str, float] = {} 

505 down_mask = benchmark < 0 

506 bench_down = benchmark.filter(down_mask).drop_nulls() 

507 if bench_down.is_empty(): 

508 return {asset: float("nan") for asset in self.assets} 

509 

510 bench_geom = float((bench_down + 1.0).product()) ** (1.0 / len(bench_down)) - 1.0 

511 if bench_geom == 0.0: 

512 return {asset: float("nan") for asset in self.assets} 

513 

514 for asset in self.assets: 

515 strat_down = self.data[asset].filter(down_mask).drop_nulls() 

516 if strat_down.is_empty(): 

517 result[asset] = float("nan") 

518 else: 

519 strat_geom = float((strat_down + 1.0).product()) ** (1.0 / len(strat_down)) - 1.0 

520 result[asset] = strat_geom / bench_geom 

521 return result 

522 

523 @columnwise_stat 

524 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float: 

525 """Calculate the Sharpe ratio of asset returns. 

526 

527 Args: 

528 series (pl.Series): The series to calculate Sharpe ratio for. 

529 periods (int, optional): Number of periods per year. Defaults to 252. 

530 

531 Returns: 

532 float: The Sharpe ratio value. 

533 

534 """ 

535 periods = periods or self.periods_per_year 

536 

537 mean_val = _to_float(series.mean()) 

538 divisor = _to_float(series.std(ddof=1)) 

539 

540 # Treat as zero-variance if divisor is zero or indistinguishable from 

541 # floating-point noise (i.e. smaller than 10x machine epsilon x |mean|). 

542 _eps = np.finfo(np.float64).eps 

543 if divisor <= _eps * max(abs(mean_val), _eps) * 10: 

544 return float("nan") 

545 

546 res = mean_val / divisor 

547 factor = periods or 1 

548 return float(res * np.sqrt(factor)) 

549 

550 @columnwise_stat 

551 def max_drawdown(self, series: pl.Series) -> float: 

552 """Maximum drawdown as a fraction of the high-water mark. 

553 

554 Computes the largest peak-to-trough decline in the cumulative additive 

555 NAV (starting at 1.0) expressed as a percentage of the peak. 

556 

557 Args: 

558 series (pl.Series): Series of additive daily returns. 

559 

560 Returns: 

561 float: Maximum drawdown in the range [0, 1]. 

562 

563 """ 

564 return _to_float(_drawdown_series(series).max()) 

565 

566 @columnwise_stat 

567 def avg_drawdown(self, series: pl.Series) -> float: 

568 """Average drawdown across all underwater periods. 

569 

570 Computes the mean drawdown percentage for every observation where the 

571 portfolio is below its previous peak. Returns 0.0 if there are no 

572 underwater periods. 

573 

574 Args: 

575 series (pl.Series): Series of additive daily returns. 

576 

577 Returns: 

578 float: Mean drawdown in the range [0, 1]. 

579 

580 """ 

581 dd = _drawdown_series(series) 

582 in_dd = dd.filter(dd > 0) 

583 if in_dd.is_empty(): 

584 return 0.0 

585 return _to_float(in_dd.mean()) 

586 

587 def max_drawdown_duration(self) -> dict[str, float | int | None]: 

588 """Maximum drawdown duration in calendar days (or periods) per asset. 

589 

590 Identifies consecutive runs of observations where the portfolio NAV is 

591 below its high-water mark and returns the length of the longest such 

592 run. 

593 

594 When a ``date`` column is present the duration is expressed as the 

595 number of calendar days spanned by the run (inclusive of both 

596 endpoints). When no ``date`` column exists each row counts as one 

597 period, so the result is a count of consecutive underwater periods. 

598 

599 Returns: 

600 dict[str, float | int | None]: Mapping from asset name to maximum 

601 drawdown duration. Returns 0 when there are no underwater 

602 periods. 

603 

604 """ 

605 has_date = "date" in self.data.columns 

606 result: dict[str, float | int | None] = {} 

607 for asset in self.assets: 

608 series = self.data[asset] 

609 nav = 1.0 + series.cast(pl.Float64).cum_sum() 

610 hwm = nav.cum_max() 

611 in_dd = nav < hwm 

612 

613 if not in_dd.any(): 

614 result[asset] = 0 

615 continue 

616 

617 if has_date: 

618 frame = pl.DataFrame({"date": self.data["date"], "in_dd": in_dd}) 

619 else: 

620 frame = pl.DataFrame({"date": pl.Series(list(range(len(series))), dtype=pl.Int64), "in_dd": in_dd}) 

621 

622 frame = frame.with_columns(pl.col("in_dd").rle_id().alias("run_id")) 

623 

624 dd_runs = ( 

625 frame.filter(pl.col("in_dd")) 

626 .group_by("run_id") 

627 .agg( 

628 [ 

629 pl.col("date").min().alias("start"), 

630 pl.col("date").max().alias("end"), 

631 ] 

632 ) 

633 ) 

634 

635 if has_date: 

636 dd_runs = dd_runs.with_columns( 

637 ((pl.col("end") - pl.col("start")).dt.total_days() + 1).alias("duration") 

638 ) 

639 else: 

640 dd_runs = dd_runs.with_columns((pl.col("end") - pl.col("start") + 1).alias("duration")) 

641 

642 result[asset] = int(_to_float(dd_runs["duration"].max())) 

643 

644 return result 

645 

646 @columnwise_stat 

647 def calmar(self, series: pl.Series, periods: int | float | None = None) -> float: 

648 """Calmar ratio (annualized return divided by maximum drawdown). 

649 

650 A standard complement to the Sharpe ratio for trend-following and 

651 momentum strategies. Returns ``nan`` when the maximum drawdown is 

652 zero (no drawdown observed). 

653 

654 Args: 

655 series (pl.Series): Series of additive daily returns. 

656 periods (int | float | None): Annualisation factor (observations 

657 per year). Defaults to ``periods_per_year``. 

658 

659 Returns: 

660 float: Calmar ratio, or ``nan`` if max drawdown is zero. 

661 

662 """ 

663 raw_periods = periods or self.periods_per_year 

664 max_dd = _to_float(_drawdown_series(series).max()) 

665 if max_dd <= 0: 

666 return float("nan") 

667 ann_return = _to_float(series.mean()) * raw_periods 

668 return ann_return / max_dd 

669 

670 @columnwise_stat 

671 def recovery_factor(self, series: pl.Series) -> float: 

672 """Recovery factor (total return divided by maximum drawdown). 

673 

674 A robustness signal for systematic strategies: values well above 1 

675 indicate that cumulative profits are large relative to the worst 

676 historical loss. Returns ``nan`` when the maximum drawdown is zero. 

677 

678 Args: 

679 series (pl.Series): Series of additive daily returns. 

680 

681 Returns: 

682 float: Recovery factor, or ``nan`` if max drawdown is zero. 

683 

684 """ 

685 max_dd = _to_float(_drawdown_series(series).max()) 

686 if max_dd <= 0: 

687 return float("nan") 

688 total_return = _to_float(series.sum()) 

689 return total_return / max_dd 

690 

691 def rolling_sharpe(self, window: int = 63, periods: int | float | None = None) -> pl.DataFrame: 

692 """Compute rolling annualised Sharpe ratio over a sliding window. 

693 

694 Args: 

695 window: Number of periods in the rolling window. Defaults to 63. 

696 periods: Number of periods per year for annualisation. Defaults to 

697 ``periods_per_year``. 

698 

699 Returns: 

700 pl.DataFrame: A DataFrame with the date column (when present) and 

701 one column per asset. The first ``window - 1`` rows will be 

702 null. 

703 

704 Raises: 

705 ValueError: If ``window`` is not a positive integer. 

706 

707 """ 

708 if not isinstance(window, int) or window <= 0: 

709 raise ValueError 

710 

711 scale = np.sqrt(periods or self.periods_per_year) 

712 

713 exprs = [ 

714 ( 

715 pl.col(asset).rolling_mean(window_size=window) / pl.col(asset).rolling_std(window_size=window) * scale 

716 ).alias(asset) 

717 for asset in self.assets 

718 ] 

719 

720 cols: list[str | pl.Expr] = (["date"] if "date" in self.data.columns else []) + exprs 

721 return self.data.select(cols) 

722 

723 def rolling_volatility( 

724 self, window: int = 63, periods: int | float | None = None, annualize: bool = True 

725 ) -> pl.DataFrame: 

726 """Compute rolling volatility over a sliding window. 

727 

728 Args: 

729 window: Number of periods in the rolling window. Defaults to 63. 

730 periods: Number of periods per year for annualisation. Defaults to 

731 ``periods_per_year``. 

732 annualize: Whether to annualise the result by multiplying by 

733 ``sqrt(periods)``. Defaults to True. 

734 

735 Returns: 

736 pl.DataFrame: A DataFrame with the date column (when present) and 

737 one column per asset. The first ``window - 1`` rows will be 

738 null. 

739 

740 Raises: 

741 ValueError: If ``window`` is not a positive integer. 

742 TypeError: If ``periods`` is not numeric. 

743 

744 """ 

745 if not isinstance(window, int) or window <= 0: 

746 raise ValueError 

747 

748 raw_periods = periods or self.periods_per_year 

749 if not isinstance(raw_periods, int | float): 

750 raise TypeError 

751 

752 factor = np.sqrt(raw_periods) if annualize else 1.0 

753 

754 exprs = [(pl.col(asset).rolling_std(window_size=window) * factor).alias(asset) for asset in self.assets] 

755 

756 cols: list[str | pl.Expr] = (["date"] if "date" in self.data.columns else []) + exprs 

757 return self.data.select(cols) 

758 

759 def annual_breakdown(self) -> pl.DataFrame: 

760 """Return summary statistics broken down by calendar year. 

761 

762 Groups the data by calendar year using the ``date`` column, computes 

763 a full :py:meth:`summary` for each year, and stacks the results into 

764 a single DataFrame with an additional ``year`` column. 

765 

766 Returns: 

767 pl.DataFrame: A DataFrame with columns ``year``, ``metric``, and 

768 one column per asset, sorted by ``year``. 

769 

770 Raises: 

771 ValueError: If the DataFrame has no ``date`` column. 

772 

773 """ 

774 if "date" not in self.data.columns: 

775 raise ValueError 

776 

777 years = self.data["date"].dt.year().unique().sort().to_list() 

778 

779 frames: list[pl.DataFrame] = [] 

780 for year in years: 

781 year_data = self.data.filter(self.data["date"].dt.year() == year) 

782 if year_data.height < 2: 

783 continue 

784 year_summary = Stats(year_data).summary() 

785 year_summary = year_summary.with_columns(pl.lit(year).alias("year")) 

786 frames.append(year_summary) 

787 

788 if not frames: 

789 # Build empty DataFrame with expected schema 

790 schema = {"year": pl.Int32, "metric": pl.String, **dict.fromkeys(self.assets, pl.Float64)} 

791 return pl.DataFrame(schema=schema) 

792 

793 result = pl.concat(frames) 

794 # Move 'year' to front 

795 ordered = ["year", "metric", *[c for c in result.columns if c not in ("year", "metric")]] 

796 return result.select(ordered) 

797 

798 def summary(self) -> pl.DataFrame: 

799 """Return a DataFrame summarising all statistics for each asset. 

800 

801 Each row corresponds to one statistical metric; each column (beyond 

802 the ``metric`` column) corresponds to one asset in the portfolio. 

803 

804 Returns: 

805 pl.DataFrame: A DataFrame with a ``metric`` column followed by one 

806 column per asset, containing the computed statistic values. 

807 

808 """ 

809 metrics: dict[str, dict[str, float | int | None] | dict[str, float | int]] = { 

810 "avg_return": self.avg_return(), 

811 "avg_win": self.avg_win(), 

812 "avg_loss": self.avg_loss(), 

813 "win_rate": self.win_rate(), 

814 "profit_factor": self.profit_factor(), 

815 "payoff_ratio": self.payoff_ratio(), 

816 "monthly_win_rate": self.monthly_win_rate(), 

817 "best": self.best(), 

818 "worst": self.worst(), 

819 "volatility": self.volatility(), 

820 "sharpe": self.sharpe(), 

821 "skew": self.skew(), 

822 "kurtosis": self.kurtosis(), 

823 "value_at_risk": self.value_at_risk(), 

824 "conditional_value_at_risk": self.conditional_value_at_risk(), 

825 "max_drawdown": self.max_drawdown(), 

826 "avg_drawdown": self.avg_drawdown(), 

827 "max_drawdown_duration": self.max_drawdown_duration(), 

828 "calmar": self.calmar(), 

829 "recovery_factor": self.recovery_factor(), 

830 } 

831 

832 rows: list[dict[str, object]] = [ 

833 {"metric": name, **{asset: values[asset] for asset in self.assets}} for name, values in metrics.items() 

834 ] 

835 

836 return pl.DataFrame(rows) 

837 

838 @property 

839 def periods_per_year(self) -> float: 

840 """Estimate the number of periods per year from timestamp spacing. 

841 

842 Computes the average spacing (in seconds) between consecutive timestamps using 

843 plain Python datetimes to avoid ambiguity around Polars Duration arithmetic, 

844 then returns 365 * 24 * 3600 divided by that spacing. 

845 

846 Returns: 

847 float: Estimated number of observations per calendar year. 

848 """ 

849 # Extract datetime values as Python objects (assuming a single datetime column) 

850 col_name = self.data.columns[0] 

851 dates = self.data[col_name] 

852 

853 # Index is guaranteed to have at least two rows by __post_init__, 

854 # so we can compute gaps directly after sorting. 

855 dates = dates.sort() 

856 # Compute successive differences in seconds 

857 gaps = dates.diff().drop_nulls() 

858 

859 mean_diff = gaps.mean() 

860 

861 # Convert Duration (timedelta) to seconds 

862 if isinstance(mean_diff, timedelta): 

863 seconds = mean_diff.total_seconds() 

864 elif mean_diff is not None: 

865 seconds = _to_float(mean_diff) 

866 else: 

867 # Fallback to daily if mean_diff is None 

868 seconds = 86400.0 

869 

870 return (365.0 * 24.0 * 60.0 * 60.0) / seconds