Coverage for src/jquantstats/_stats/_basic.py: 100%

272 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 06:13 +0000

1"""Basic statistical metrics for financial returns data.""" 

2 

3from __future__ import annotations 

4 

5import math 

6from collections.abc import Iterable 

7from typing import TYPE_CHECKING, cast 

8 

9import numpy as np 

10import polars as pl 

11from scipy.stats import norm 

12 

13from ._core import _mean, columnwise_stat 

14from ._internals import _annualization_factor, _comp_return 

15 

16if TYPE_CHECKING: 

17 from ..data import Data 

18 

19# ── Basic statistics mixin ─────────────────────────────────────────────────── 

20 

21 

22class _BasicStatsMixin: 

23 """Mixin providing basic return/risk and win/loss financial statistics. 

24 

25 Covers: basic statistics (skew, kurtosis, avg return/win/loss), volatility, 

26 win/loss metrics (payoff ratio, profit factor), and risk metrics (VaR, CVaR, 

27 win rate, kelly criterion, best/worst, exposure). 

28 """ 

29 

30 _data: Data 

31 all: pl.DataFrame 

32 

33 if TYPE_CHECKING: 

34 from .._protocol import DataLike 

35 

36 data: DataLike 

37 

38 @staticmethod 

39 def _positive(series: pl.Series) -> pl.Series: 

40 """Return only the positive values in *series*.""" 

41 return series.filter(series > 0) 

42 

43 @staticmethod 

44 def _negative(series: pl.Series) -> pl.Series: 

45 """Return only the negative values in *series*.""" 

46 return series.filter(series < 0) 

47 

48 @staticmethod 

49 def _mean_positive_expr(series: pl.Series) -> float: 

50 """Return the mean of all positive values in *series*, or NaN if none exist.""" 

51 return _mean(_BasicStatsMixin._positive(series)) 

52 

53 @staticmethod 

54 def _mean_negative_expr(series: pl.Series) -> float: 

55 """Return the mean of all negative values in *series*, or NaN if none exist.""" 

56 return _mean(_BasicStatsMixin._negative(series)) 

57 

58 @staticmethod 

59 def _gaussian_quantile(alpha: float, mu: float, sigma: float) -> float: 

60 """Gaussian inverse-CDF (``norm.ppf``) returning NaN for a zero-scale input. 

61 

62 ``norm.ppf(alpha, mu, 0.0)`` already returns ``nan`` for a degenerate 

63 (zero-variance) distribution — but it emits an ``invalid value 

64 encountered in multiply`` RuntimeWarning while doing so (``inf * 0`` 

65 internally). Degenerate scale arises for a single observation (undefined 

66 std) or a constant series. Short-circuiting to ``float("nan")`` keeps the 

67 exact same result while suppressing the spurious warning; downstream 

68 masking relies on this NaN (Polars treats ``x < nan`` as ``True``). 

69 """ 

70 return float("nan") if sigma == 0.0 else float(norm.ppf(alpha, mu, sigma)) 

71 

72 # ── Basic statistics ────────────────────────────────────────────────────── 

73 

74 @columnwise_stat 

75 def skew(self, series: pl.Series) -> int | float | None: 

76 """Calculate skewness (asymmetry) for each numeric column. 

77 

78 Args: 

79 series (pl.Series): The series to calculate skewness for. 

80 

81 Returns: 

82 float: The skewness value. 

83 

84 """ 

85 return series.skew(bias=False) 

86 

87 @columnwise_stat 

88 def kurtosis(self, series: pl.Series) -> int | float | None: 

89 """Calculate the kurtosis of returns. 

90 

91 The degree to which a distribution peak compared to a normal distribution. 

92 

93 Args: 

94 series (pl.Series): The series to calculate kurtosis for. 

95 

96 Returns: 

97 float: The kurtosis value. 

98 

99 """ 

100 return series.kurtosis(bias=False) 

101 

102 @columnwise_stat 

103 def avg_return(self, series: pl.Series) -> float: 

104 """Calculate average return per non-zero value. 

105 

106 Args: 

107 series (pl.Series): The series to calculate average return for. 

108 

109 Returns: 

110 float: The average return value. 

111 

112 """ 

113 return _mean(series.filter(series.is_not_null() & (series != 0))) 

114 

115 @columnwise_stat 

116 def avg_win(self, series: pl.Series) -> float: 

117 """Calculate the average winning return/trade for an asset. 

118 

119 Args: 

120 series (pl.Series): The series to calculate average win for. 

121 

122 Returns: 

123 float: The average winning return. 

124 

125 """ 

126 return self._mean_positive_expr(series) 

127 

128 @columnwise_stat 

129 def avg_loss(self, series: pl.Series) -> float: 

130 """Calculate the average loss return/trade for a period. 

131 

132 Args: 

133 series (pl.Series): The series to calculate average loss for. 

134 

135 Returns: 

136 float: The average loss return. 

137 

138 """ 

139 return self._mean_negative_expr(series) 

140 

141 @columnwise_stat 

142 def comp(self, series: pl.Series) -> float: 

143 """Calculate the total compounded return over the full period. 

144 

145 Computed as product(1 + r) - 1. 

146 

147 Args: 

148 series (pl.Series): The series to calculate compounded return for. 

149 

150 Returns: 

151 float: Total compounded return. 

152 

153 """ 

154 return _comp_return(series) 

155 

156 @columnwise_stat 

157 def geometric_mean(self, series: pl.Series, periods: int | float | None = None, annualize: bool = False) -> float: 

158 """Calculate the geometric mean of returns. 

159 

160 Computed as the per-period geometric average: (∏(1 + rᵢ))^(1/n) - 1. 

161 When annualized, raises to the power of periods_per_year instead of 1/n. 

162 

163 Args: 

164 series (pl.Series): The series to calculate geometric mean for. 

165 periods (int | float, optional): Periods per year for annualization. Defaults to periods_per_year. 

166 annualize (bool): Whether to annualize the result. Defaults to False. 

167 

168 Returns: 

169 float: The geometric mean return. 

170 

171 

172 Returns NaN when: 

173 ``float("nan")`` when the series has no non-null observations or the 

174 compounded return ``product(1 + r)`` is non-positive. 

175 """ 

176 clean = series.drop_nulls().cast(pl.Float64) 

177 n = clean.len() 

178 if n == 0: 

179 return float("nan") # indeterminate: no observations 

180 compound = float((1.0 + clean).product()) 

181 if compound <= 0: 

182 return float("nan") # indeterminate: non-positive compound return 

183 exponent = (periods or self._data._periods_per_year) / n if annualize else (1.0 / n) 

184 return float(compound**exponent) - 1.0 

185 

186 # ── Volatility & risk ───────────────────────────────────────────────────── 

187 

188 @columnwise_stat 

189 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float: 

190 """Calculate the volatility of returns. 

191 

192 - Std dev of returns 

193 - Annualized by sqrt(periods) if `annualize` is True. 

194 

195 Args: 

196 series (pl.Series): The series to calculate volatility for. 

197 periods (int, optional): Number of periods per year. Defaults to 252. 

198 annualize (bool, optional): Whether to annualize the result. Defaults to True. 

199 

200 Returns: 

201 float: The volatility value. 

202 

203 """ 

204 raw_periods = periods or self._data._periods_per_year 

205 

206 # Ensure it's numeric 

207 if not isinstance(raw_periods, int | float): 

208 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003 

209 

210 factor = _annualization_factor(raw_periods) if annualize else 1.0 

211 std_val = cast(float, series.std()) 

212 return (std_val if std_val is not None else 0.0) * factor 

213 

214 # ── Win / loss metrics ──────────────────────────────────────────────────── 

215 

216 @columnwise_stat 

217 def payoff_ratio(self, series: pl.Series) -> float: 

218 """Measure the payoff ratio. 

219 

220 The payoff ratio is calculated as average win / abs(average loss). 

221 

222 Args: 

223 series (pl.Series): The series to calculate payoff ratio for. 

224 

225 Returns: 

226 float: The payoff ratio value. 

227 

228 """ 

229 avg_win = self._mean_positive_expr(series) 

230 avg_loss = float(np.abs(self._mean_negative_expr(series))) 

231 return avg_win / avg_loss 

232 

233 @columnwise_stat 

234 def profit_ratio(self, series: pl.Series) -> float: 

235 """Measure the profit ratio. 

236 

237 The profit ratio is calculated as win ratio / loss ratio. 

238 

239 Args: 

240 series (pl.Series): The series to calculate profit ratio for. 

241 

242 Returns: 

243 float: The profit ratio value. 

244 

245 

246 Returns NaN when: 

247 ``float("nan")`` when the series has no wins or no losses. 

248 """ 

249 wins = series.filter(series >= 0) 

250 losses = self._negative(series) 

251 

252 # Filtering can legitimately leave no wins or no losses for one-sided return series. 

253 if wins.is_empty() or losses.is_empty(): 

254 return float("nan") # indeterminate: no wins or no losses 

255 

256 win_mean = _mean(wins) 

257 loss_mean = _mean(losses) 

258 win_ratio = float(np.abs(win_mean / wins.count())) 

259 loss_ratio = float(np.abs(loss_mean / losses.count())) 

260 

261 return win_ratio / loss_ratio 

262 

263 @columnwise_stat 

264 def profit_factor(self, series: pl.Series) -> float: 

265 """Measure the profit factor. 

266 

267 The profit factor is calculated as wins / loss. 

268 

269 Args: 

270 series (pl.Series): The series to calculate profit factor for. 

271 

272 Returns: 

273 float: The profit factor value. 

274 

275 """ 

276 wins = self._positive(series) 

277 losses = self._negative(series) 

278 wins_sum = wins.sum() 

279 losses_sum = losses.sum() 

280 

281 return float(np.abs(float(wins_sum) / float(losses_sum))) 

282 

283 # ── Risk metrics ────────────────────────────────────────────────────────── 

284 

285 @columnwise_stat 

286 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

287 """Calculate the daily value-at-risk. 

288 

289 Uses variance-covariance calculation with confidence level. 

290 

291 Args: 

292 series (pl.Series): The series to calculate value at risk for. 

293 alpha (float, optional): Confidence level. Defaults to 0.05. 

294 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

295 

296 Returns: 

297 float: The value at risk. 

298 

299 """ 

300 mean_val = _mean(series) 

301 std_val = cast(float, series.std()) 

302 mu = mean_val 

303 sigma *= std_val if std_val is not None else 0.0 

304 

305 return self._gaussian_quantile(alpha, mu, sigma) 

306 

307 @columnwise_stat 

308 def _conditional_value_at_risk_impl(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float: 

309 """Inner per-series implementation of conditional value-at-risk.""" 

310 mean_val = _mean(series) 

311 std_val = cast(float, series.std()) 

312 mu = mean_val 

313 sigma *= std_val if std_val is not None else 0.0 

314 

315 var = self._gaussian_quantile(alpha, mu, sigma) 

316 

317 # Compute mean of returns less than or equal to VaR 

318 # Cast to Any or pl.Series to suppress Ty error 

319 # Cast the mask to pl.Expr to satisfy type checker 

320 mask = cast(Iterable[bool], series < var) 

321 return _mean(series.filter(mask)) 

322 

323 def conditional_value_at_risk( 

324 self, sigma: float = 1.0, confidence: float = 0.95, **kwargs: float 

325 ) -> dict[str, float]: 

326 """Calculate the conditional value-at-risk (CVaR / Expected Shortfall). 

327 

328 Also known as CVaR or expected shortfall, calculated for each numeric column. 

329 

330 Args: 

331 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0. 

332 confidence (float, optional): Confidence level (e.g. 0.95 for 95 %). 

333 Converted internally to ``alpha = 1 - confidence``. Defaults to 0.95. 

334 alpha (float, optional): Tail probability (lower tail). ``alpha`` is the 

335 probability mass in the *loss* tail, so ``alpha = 1 - confidence``. 

336 For example, a 95 % confidence level corresponds to ``alpha = 0.05`` 

337 (the default). 

338 **kwargs: Legacy keyword arguments. Passing ``confidence`` (e.g. 

339 ``confidence=0.95``) is accepted for backwards compatibility with 

340 QuantStats but emits a `DeprecationWarning`. Use 

341 ``alpha = 1 - confidence`` instead. 

342 

343 Returns: 

344 dict[str, float]: The conditional value at risk per asset column. 

345 

346 Raises: 

347 TypeError: If unexpected keyword arguments are passed. 

348 

349 """ 

350 return self._conditional_value_at_risk_impl(sigma=sigma, alpha=1.0 - confidence) 

351 

352 @staticmethod 

353 def _drawdown_with_baseline(series: pl.Series) -> pl.Series: 

354 """Compute drawdown series with a phantom zero-return baseline prepended. 

355 

356 Matches the quantstats convention: a negative first return is treated as 

357 a drawdown from the initial capital of 1.0, not as the new high-water mark. 

358 """ 

359 extended = pl.concat([pl.Series([0.0]), series.cast(pl.Float64)]) 

360 nav = (1.0 + extended).cum_prod() 

361 hwm = nav.cum_max() 

362 # The phantom baseline pins nav[0] = 1.0, so hwm >= 1.0 throughout and 

363 # the 1e-10 floor is purely defensive (unreachable); a -100 % return 

364 # correctly reports as a full drawdown of 1.0 here. 

365 dd = ((hwm - nav) / hwm.clip(lower_bound=1e-10)).clip(lower_bound=0.0) 

366 return dd[1:] # drop phantom point 

367 

368 @staticmethod 

369 def _ulcer_index_series(series: pl.Series) -> float: 

370 """Compute ulcer index for a single returns series.""" 

371 dd = _BasicStatsMixin._drawdown_with_baseline(series) 

372 n = series.len() 

373 return float(np.sqrt(float((dd**2).sum()) / (n - 1))) 

374 

375 @columnwise_stat 

376 def ulcer_index(self, series: pl.Series) -> float: 

377 """Calculate the Ulcer Index (downside risk measurement). 

378 

379 Measures the depth and duration of drawdowns as the root mean square 

380 of squared drawdowns: sqrt(sum(dd²) / (n - 1)). 

381 

382 Args: 

383 series (pl.Series): The series to calculate ulcer index for. 

384 

385 Returns: 

386 float: Ulcer Index value. 

387 

388 """ 

389 return self._ulcer_index_series(series) 

390 

391 @columnwise_stat 

392 def ulcer_performance_index(self, series: pl.Series, rf: float = 0.0) -> float: 

393 """Calculate the Ulcer Performance Index (UPI). 

394 

395 Risk-adjusted return using Ulcer Index as the risk measure: 

396 (compounded_return - rf) / ulcer_index. 

397 

398 Args: 

399 series (pl.Series): The series to calculate UPI for. 

400 rf (float): Risk-free rate. Defaults to 0. 

401 

402 Returns: 

403 float: Ulcer Performance Index. 

404 

405 

406 Returns NaN when: 

407 ``float("nan")`` when the ulcer index is zero (no drawdowns). 

408 """ 

409 comp = _comp_return(series) 

410 ui = self._ulcer_index_series(series) 

411 return float("nan") if ui == 0 else (comp - rf) / ui 

412 

413 @columnwise_stat 

414 def serenity_index(self, series: pl.Series, rf: float = 0.0) -> float: 

415 """Calculate the Serenity Index. 

416 

417 Combines the Ulcer Index with a CVaR-based pitfall measure: 

418 (sum_returns - rf) / (ulcer_index * pitfall), where 

419 pitfall = -CVaR(drawdowns) / std(returns). 

420 

421 Args: 

422 series (pl.Series): The series to calculate serenity index for. 

423 rf (float): Risk-free rate. Defaults to 0. 

424 

425 Returns: 

426 float: Serenity Index. 

427 

428 

429 Returns NaN when: 

430 ``float("nan")`` when the returns have zero (or undefined) standard 

431 deviation or the denominator ``ulcer_index * pitfall`` is zero. 

432 """ 

433 std_val = cast(float, series.std()) 

434 if not std_val: 

435 return float("nan") # indeterminate: zero variance 

436 

437 # Negate drawdowns to match quantstats sign convention (negative = below peak) 

438 dd_neg = -self._drawdown_with_baseline(series) 

439 mu = _mean(dd_neg) 

440 sigma = cast(float, dd_neg.std()) 

441 var_threshold = self._gaussian_quantile(0.05, mu, sigma) 

442 mask = cast(Iterable[bool], dd_neg < var_threshold) 

443 cvar_val = _mean(dd_neg.filter(mask)) 

444 

445 pitfall = -cvar_val / std_val 

446 ui = self._ulcer_index_series(series) 

447 denominator = ui * pitfall 

448 return float("nan") if denominator == 0 else (float(series.sum()) - rf) / denominator 

449 

450 @columnwise_stat 

451 def win_rate(self, series: pl.Series) -> float: 

452 """Calculate the win ratio for a period. 

453 

454 Args: 

455 series (pl.Series): The series to calculate win rate for. 

456 

457 Returns: 

458 float: The win rate value. 

459 

460 """ 

461 num_pos = self._positive(series).count() 

462 num_nonzero = series.filter(series != 0).count() 

463 return float(num_pos / num_nonzero) 

464 

465 @columnwise_stat 

466 def autocorr_penalty(self, series: pl.Series) -> float: 

467 """Calculate the autocorrelation penalty for risk-adjusted metrics. 

468 

469 Computes a penalty factor that accounts for autocorrelation in returns, 

470 which can inflate Sharpe and Sortino ratios. 

471 

472 Args: 

473 series (pl.Series): The series to calculate autocorrelation penalty for. 

474 

475 Returns: 

476 float: Autocorrelation penalty factor (>= 1). 

477 

478 """ 

479 arr = series.drop_nulls().to_numpy() 

480 num = len(arr) 

481 coef = float(np.abs(np.corrcoef(arr[:-1], arr[1:])[0, 1])) 

482 x = np.arange(1, num) 

483 corr = ((num - x) / num) * (coef**x) 

484 return float(np.sqrt(1 + 2 * np.sum(corr))) 

485 

486 @staticmethod 

487 def _max_consecutive(mask: pl.Series) -> int: 

488 """Return the longest run of True values in a boolean mask. 

489 

490 Args: 

491 mask (pl.Series): Boolean series (True = qualifying period). 

492 

493 Returns: 

494 int: Length of the longest consecutive True run. 

495 

496 """ 

497 group_ids = mask.rle_id() 

498 df = pl.DataFrame({"v": mask.cast(pl.Int32), "g": group_ids}) 

499 result = ( 

500 df.with_columns((pl.int_range(pl.len()).over("g") + 1).alias("rank")) 

501 .select((pl.col("v") * pl.col("rank")).max()) 

502 .item() 

503 ) 

504 return int(result) if result is not None else 0 

505 

506 @columnwise_stat 

507 def consecutive_wins(self, series: pl.Series) -> int: 

508 """Calculate the maximum number of consecutive winning periods. 

509 

510 Args: 

511 series (pl.Series): The series to calculate consecutive wins for. 

512 

513 Returns: 

514 int: Maximum number of consecutive winning periods. 

515 

516 """ 

517 return self._max_consecutive(series > 0) 

518 

519 @columnwise_stat 

520 def consecutive_losses(self, series: pl.Series) -> int: 

521 """Calculate the maximum number of consecutive losing periods. 

522 

523 Args: 

524 series (pl.Series): The series to calculate consecutive losses for. 

525 

526 Returns: 

527 int: Maximum number of consecutive losing periods. 

528 

529 """ 

530 return self._max_consecutive(series < 0) 

531 

532 @columnwise_stat 

533 def risk_of_ruin(self, series: pl.Series) -> float: 

534 """Calculate the risk of ruin (probability of losing all capital). 

535 

536 Uses the formula: ((1 - win_rate) / (1 + win_rate)) ^ n, 

537 where n is the number of periods. 

538 

539 Args: 

540 series (pl.Series): The series to calculate risk of ruin for. 

541 

542 Returns: 

543 float: The risk of ruin probability. 

544 

545 """ 

546 num_pos = self._positive(series).count() 

547 num_nonzero = series.filter(series != 0).count() 

548 wins = float(num_pos / num_nonzero) 

549 n = series.len() 

550 return ((1 - wins) / (1 + wins)) ** n 

551 

552 @columnwise_stat 

553 def tail_ratio(self, series: pl.Series, cutoff: float = 0.95) -> float: 

554 """Calculate the tail ratio (right tail / left tail). 

555 

556 Measures the ratio between the upper and lower tails of the return 

557 distribution: abs(quantile(cutoff) / quantile(1 - cutoff)). 

558 

559 Args: 

560 series (pl.Series): The series to calculate tail ratio for. 

561 cutoff (float): Percentile cutoff for tail analysis. Defaults to 0.95. 

562 

563 Returns: 

564 float: Tail ratio. 

565 

566 

567 Returns NaN when: 

568 ``float("nan")`` when either quantile is missing or the lower quantile 

569 is zero. 

570 """ 

571 upper = cast(float, series.quantile(cutoff, interpolation="linear")) 

572 lower = cast(float, series.quantile(1 - cutoff, interpolation="linear")) 

573 if upper is None or lower is None or lower == 0: 

574 return float("nan") # indeterminate: zero or missing quantile 

575 return float(np.abs(upper / lower)) 

576 

577 def cpc_index(self) -> dict[str, float]: 

578 """Calculate the CPC Index (Profit Factor * Win Rate * Win-Loss Ratio). 

579 

580 Returns: 

581 dict[str, float]: Dictionary mapping asset names to CPC Index values. 

582 

583 """ 

584 pf = self.profit_factor() 

585 wr = self.win_rate() 

586 wlr = self.payoff_ratio() 

587 return {col: pf[col] * wr[col] * wlr[col] for col in pf} 

588 

589 def common_sense_ratio(self) -> dict[str, float]: 

590 """Calculate the Common Sense Ratio (Profit Factor * Tail Ratio). 

591 

592 Returns: 

593 dict[str, float]: Dictionary mapping asset names to Common Sense Ratio values. 

594 

595 """ 

596 pf = self.profit_factor() 

597 tr = self.tail_ratio() 

598 return {col: pf[col] * tr[col] for col in pf} 

599 

600 def outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]: 

601 """Return only the returns above a quantile threshold. 

602 

603 Args: 

604 quantile (float): Upper quantile threshold. Defaults to 0.95. 

605 

606 Returns: 

607 dict[str, pl.Series]: Filtered series per asset containing only 

608 returns above the quantile. 

609 

610 """ 

611 result = {} 

612 for col, series in self._data.items(): 

613 threshold = cast(float, series.quantile(quantile, interpolation="linear")) 

614 result[col] = series.filter(series > threshold).drop_nulls() 

615 return result 

616 

617 def remove_outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]: 

618 """Return returns with values above a quantile threshold removed. 

619 

620 Args: 

621 quantile (float): Upper quantile threshold. Defaults to 0.95. 

622 

623 Returns: 

624 dict[str, pl.Series]: Filtered series per asset containing only 

625 returns below the quantile. 

626 

627 """ 

628 result = {} 

629 for col, series in self._data.items(): 

630 threshold = cast(float, series.quantile(quantile, interpolation="linear")) 

631 result[col] = series.filter(series < threshold) 

632 return result 

633 

634 @columnwise_stat 

635 def outlier_win_ratio(self, series: pl.Series, quantile: float = 0.99) -> float: 

636 """Calculate the outlier winners ratio. 

637 

638 Ratio of the high-quantile return to the mean positive return, 

639 showing how much outlier wins contribute to overall performance. 

640 

641 Args: 

642 series (pl.Series): The series to calculate outlier win ratio for. 

643 quantile (float): Quantile for the outlier threshold. Defaults to 0.99. 

644 

645 Returns: 

646 float: Outlier win ratio. 

647 

648 

649 Returns NaN when: 

650 ``float("nan")`` when the mean of non-negative returns is zero. 

651 """ 

652 positive_mean = _mean(series.filter(series >= 0)) 

653 if positive_mean == 0: 

654 return float("nan") # indeterminate: zero mean of positive returns 

655 quantile_val = cast(float, series.quantile(quantile, interpolation="linear")) 

656 return float(quantile_val / positive_mean) 

657 

658 @columnwise_stat 

659 def outlier_loss_ratio(self, series: pl.Series, quantile: float = 0.01) -> float: 

660 """Calculate the outlier losers ratio. 

661 

662 Ratio of the low-quantile return to the mean negative return, 

663 showing how much outlier losses contribute to overall risk. 

664 

665 Args: 

666 series (pl.Series): The series to calculate outlier loss ratio for. 

667 quantile (float): Quantile for the outlier threshold. Defaults to 0.01. 

668 

669 Returns: 

670 float: Outlier loss ratio. 

671 

672 

673 Returns NaN when: 

674 ``float("nan")`` when the mean of negative returns is zero. 

675 """ 

676 negative_mean = self._mean_negative_expr(series) 

677 if negative_mean == 0: # pragma: no cover 

678 return float("nan") # indeterminate: zero mean of negative returns 

679 quantile_val = cast(float, series.quantile(quantile, interpolation="linear")) 

680 return float(quantile_val / negative_mean) 

681 

682 @columnwise_stat 

683 def gain_to_pain_ratio(self, series: pl.Series) -> float: 

684 """Calculate Jack Schwager's Gain-to-Pain Ratio. 

685 

686 The ratio is calculated as total return / sum of losses (in absolute value). 

687 

688 Args: 

689 series (pl.Series): The series to calculate gain to pain ratio for. 

690 

691 Returns: 

692 float: The gain to pain ratio value. 

693 

694 

695 Returns NaN when: 

696 ``float("nan")`` when there are no losses (the denominator is zero). 

697 """ 

698 total_gain = series.sum() 

699 total_pain = self._negative(series).abs().sum() 

700 try: 

701 return float(float(total_gain) / float(total_pain)) 

702 except ZeroDivisionError: 

703 return float("nan") # indeterminate: no losses (denominator is zero) 

704 

705 @columnwise_stat 

706 def risk_return_ratio(self, series: pl.Series) -> float: 

707 """Calculate the return/risk ratio. 

708 

709 This is equivalent to the Sharpe ratio without a risk-free rate. 

710 

711 Args: 

712 series (pl.Series): The series to calculate risk return ratio for. 

713 

714 Returns: 

715 float: The risk return ratio value. 

716 

717 """ 

718 mean_val = _mean(series) 

719 std_val = cast(float, series.std()) 

720 return mean_val / (std_val if std_val is not None else 1.0) 

721 

722 def kelly_criterion(self) -> dict[str, float]: 

723 """Calculate the optimal capital allocation per column. 

724 

725 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b 

726 where: 

727 - b = payoff ratio 

728 - p = win rate 

729 - q = 1 - p. 

730 

731 Returns: 

732 dict[str, float]: Dictionary mapping asset names to Kelly criterion values. 

733 

734 """ 

735 b = self.payoff_ratio() 

736 p = self.win_rate() 

737 

738 return {col: ((b[col] * p[col]) - (1 - p[col])) / b[col] for col in b} 

739 

740 @columnwise_stat 

741 def best(self, series: pl.Series) -> float | None: 

742 """Find the maximum return per column (best period). 

743 

744 Args: 

745 series (pl.Series): The series to find the best return for. 

746 

747 Returns: 

748 float: The maximum return value. 

749 

750 """ 

751 val = cast(float, series.max()) 

752 return val if val is not None else None 

753 

754 @columnwise_stat 

755 def worst(self, series: pl.Series) -> float | None: 

756 """Find the minimum return per column (worst period). 

757 

758 Args: 

759 series (pl.Series): The series to find the worst return for. 

760 

761 Returns: 

762 float: The minimum return value. 

763 

764 """ 

765 val = cast(float, series.min()) 

766 return val if val is not None else None 

767 

768 @columnwise_stat 

769 def exposure(self, series: pl.Series) -> float: 

770 """Calculate the market exposure time (returns != 0). 

771 

772 Args: 

773 series (pl.Series): The series to calculate exposure for. 

774 

775 Returns: 

776 float: The exposure value. 

777 

778 """ 

779 all_data = self.all 

780 ex = series.filter(series != 0).count() / all_data.height 

781 return math.ceil(ex * 100) / 100 

782 

783 @staticmethod 

784 def _pearson_corr_shifted(series: pl.Series, lag: int) -> float: 

785 """Compute Pearson correlation between *series* and its lag-*lag* shift. 

786 

787 Args: 

788 series (pl.Series): The input series. 

789 lag (int): Number of positions to shift. 

790 

791 Returns: 

792 float: Pearson correlation coefficient, or NaN if no valid pairs remain. 

793 

794 """ 

795 shifted = series.shift(lag) 

796 paired = pl.DataFrame({"x": series, "y": shifted}).drop_nulls() 

797 # Large lags or null-only overlap can leave no aligned observations to correlate. 

798 if paired.is_empty(): 

799 return float("nan") 

800 return float(np.corrcoef(paired["x"].to_numpy(), paired["y"].to_numpy())[0, 1]) 

801 

802 @columnwise_stat 

803 def autocorr(self, series: pl.Series, lag: int = 1) -> float: 

804 """Compute lag-n autocorrelation of returns. 

805 

806 Args: 

807 series (pl.Series): The series to calculate autocorrelation for. 

808 lag (int): Number of periods to lag. Must be a positive integer. 

809 

810 Returns: 

811 float: Pearson correlation between returns and their lagged values. 

812 

813 Raises: 

814 TypeError: If *lag* is not an ``int``. 

815 ValueError: If *lag* is not a positive integer (>= 1). 

816 

817 """ 

818 if not isinstance(lag, int): 

819 msg = f"lag must be an int, got {type(lag).__name__}" 

820 raise TypeError(msg) 

821 if lag <= 0: 

822 msg = f"lag must be a positive integer, got {lag}" 

823 raise ValueError(msg) 

824 return self._pearson_corr_shifted(series, lag) 

825 

826 def acf(self, nlags: int = 20) -> pl.DataFrame: 

827 """Compute the autocorrelation function up to nlags. 

828 

829 Args: 

830 nlags (int): Maximum number of lags to include. Default is 20. 

831 

832 Returns: 

833 pl.DataFrame: DataFrame with a ``lag`` column (0..nlags) and one 

834 column per asset containing the ACF values. 

835 

836 Raises: 

837 TypeError: If *nlags* is not an ``int``. 

838 ValueError: If *nlags* is negative. 

839 

840 """ 

841 if not isinstance(nlags, int): 

842 msg = f"nlags must be an int, got {type(nlags).__name__}" 

843 raise TypeError(msg) 

844 if nlags < 0: 

845 msg = f"nlags must be non-negative, got {nlags}" 

846 raise ValueError(msg) 

847 result: dict[str, list[float]] = {"lag": list(range(nlags + 1))} 

848 for col, series in self._data.items(): 

849 acf_values: list[float] = [1.0] 

850 for k in range(1, nlags + 1): 

851 acf_values.append(self._pearson_corr_shifted(series, k)) 

852 result[col] = acf_values 

853 return pl.DataFrame(result)