Coverage for src/jquantstats/_stats/

1"""Basic statistical metrics for financial returns data."""

3from __future__ import annotations

5import math

6from collections.abc import Iterable

7from typing import TYPE_CHECKING, cast

9import numpy as np

10import polars as pl

11from scipy.stats import norm

13from ._core import _mean, columnwise_stat

14from ._internals import _annualization_factor, _comp_return

16if TYPE_CHECKING:

17 from ..data import Data

19# ── Basic statistics mixin ───────────────────────────────────────────────────

22class _BasicStatsMixin:

23 """Mixin providing basic return/risk and win/loss financial statistics.

25 Covers: basic statistics (skew, kurtosis, avg return/win/loss), volatility,

26 win/loss metrics (payoff ratio, profit factor), and risk metrics (VaR, CVaR,

27 win rate, kelly criterion, best/worst, exposure).

28 """

30 _data: Data

31 all: pl.DataFrame

33 if TYPE_CHECKING:

34 from .._protocol import DataLike

36 data: DataLike

38 @staticmethod

39 def _positive(series: pl.Series) -> pl.Series:

40 """Return only the positive values in *series*."""

41 return series.filter(series > 0)

43 @staticmethod

44 def _negative(series: pl.Series) -> pl.Series:

45 """Return only the negative values in *series*."""

46 return series.filter(series < 0)

48 @staticmethod

49 def _mean_positive_expr(series: pl.Series) -> float:

50 """Return the mean of all positive values in *series*, or NaN if none exist."""

51 return _mean(_BasicStatsMixin._positive(series))

53 @staticmethod

54 def _mean_negative_expr(series: pl.Series) -> float:

55 """Return the mean of all negative values in *series*, or NaN if none exist."""

56 return _mean(_BasicStatsMixin._negative(series))

58 @staticmethod

59 def _gaussian_quantile(alpha: float, mu: float, sigma: float) -> float:

60 """Gaussian inverse-CDF (``norm.ppf``) returning NaN for a zero-scale input.

62 ``norm.ppf(alpha, mu, 0.0)`` already returns ``nan`` for a degenerate

63 (zero-variance) distribution — but it emits an ``invalid value

64 encountered in multiply`` RuntimeWarning while doing so (``inf * 0``

65 internally). Degenerate scale arises for a single observation (undefined

66 std) or a constant series. Short-circuiting to ``float("nan")`` keeps the

67 exact same result while suppressing the spurious warning; downstream

68 masking relies on this NaN (Polars treats ``x < nan`` as ``True``).

69 """

70 return float("nan") if sigma == 0.0 else float(norm.ppf(alpha, mu, sigma))

72 # ── Basic statistics ──────────────────────────────────────────────────────

74 @columnwise_stat

75 def skew(self, series: pl.Series) -> int | float | None:

76 """Calculate skewness (asymmetry) for each numeric column.

78 Args:

79 series (pl.Series): The series to calculate skewness for.

81 Returns:

82 float: The skewness value.

84 """

85 return series.skew(bias=False)

87 @columnwise_stat

88 def kurtosis(self, series: pl.Series) -> int | float | None:

89 """Calculate the kurtosis of returns.

91 The degree to which a distribution peak compared to a normal distribution.

93 Args:

94 series (pl.Series): The series to calculate kurtosis for.

96 Returns:

97 float: The kurtosis value.

99 """

100 return series.kurtosis(bias=False)

101

102 @columnwise_stat

103 def avg_return(self, series: pl.Series) -> float:

104 """Calculate average return per non-zero value.

105

106 Args:

107 series (pl.Series): The series to calculate average return for.

108

109 Returns:

110 float: The average return value.

111

112 """

113 return _mean(series.filter(series.is_not_null() & (series != 0)))

114

115 @columnwise_stat

116 def avg_win(self, series: pl.Series) -> float:

117 """Calculate the average winning return/trade for an asset.

118

119 Args:

120 series (pl.Series): The series to calculate average win for.

121

122 Returns:

123 float: The average winning return.

124

125 """

126 return self._mean_positive_expr(series)

127

128 @columnwise_stat

129 def avg_loss(self, series: pl.Series) -> float:

130 """Calculate the average loss return/trade for a period.

131

132 Args:

133 series (pl.Series): The series to calculate average loss for.

134

135 Returns:

136 float: The average loss return.

137

138 """

139 return self._mean_negative_expr(series)

140

141 @columnwise_stat

142 def comp(self, series: pl.Series) -> float:

143 """Calculate the total compounded return over the full period.

144

145 Computed as product(1 + r) - 1.

146

147 Args:

148 series (pl.Series): The series to calculate compounded return for.

149

150 Returns:

151 float: Total compounded return.

152

153 """

154 return _comp_return(series)

155

156 @columnwise_stat

157 def geometric_mean(self, series: pl.Series, periods: int | float | None = None, annualize: bool = False) -> float:

158 """Calculate the geometric mean of returns.

159

160 Computed as the per-period geometric average: (∏(1 + rᵢ))^(1/n) - 1.

161 When annualized, raises to the power of periods_per_year instead of 1/n.

162

163 Args:

164 series (pl.Series): The series to calculate geometric mean for.

165 periods (int | float, optional): Periods per year for annualization. Defaults to periods_per_year.

166 annualize (bool): Whether to annualize the result. Defaults to False.

167

168 Returns:

169 float: The geometric mean return.

170

171

172 Returns NaN when:

173 ``float("nan")`` when the series has no non-null observations or the

174 compounded return ``product(1 + r)`` is non-positive.

175 """

176 clean = series.drop_nulls().cast(pl.Float64)

177 n = clean.len()

178 if n == 0:

179 return float("nan") # indeterminate: no observations

180 compound = float((1.0 + clean).product())

181 if compound <= 0:

182 return float("nan") # indeterminate: non-positive compound return

183 exponent = (periods or self._data._periods_per_year) / n if annualize else (1.0 / n)

184 return float(compound**exponent) - 1.0

185

186 # ── Volatility & risk ─────────────────────────────────────────────────────

187

188 @columnwise_stat

189 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float:

190 """Calculate the volatility of returns.

191

192 - Std dev of returns

193 - Annualized by sqrt(periods) if `annualize` is True.

194

195 Args:

196 series (pl.Series): The series to calculate volatility for.

197 periods (int, optional): Number of periods per year. Defaults to 252.

198 annualize (bool, optional): Whether to annualize the result. Defaults to True.

199

200 Returns:

201 float: The volatility value.

202

203 """

204 raw_periods = periods or self._data._periods_per_year

205

206 # Ensure it's numeric

207 if not isinstance(raw_periods, int | float):

208 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003

209

210 factor = _annualization_factor(raw_periods) if annualize else 1.0

211 std_val = cast(float, series.std())

212 return (std_val if std_val is not None else 0.0) * factor

213

214 # ── Win / loss metrics ────────────────────────────────────────────────────

215

216 @columnwise_stat

217 def payoff_ratio(self, series: pl.Series) -> float:

218 """Measure the payoff ratio.

219

220 The payoff ratio is calculated as average win / abs(average loss).

221

222 Args:

223 series (pl.Series): The series to calculate payoff ratio for.

224

225 Returns:

226 float: The payoff ratio value.

227

228 """

229 avg_win = self._mean_positive_expr(series)

230 avg_loss = float(np.abs(self._mean_negative_expr(series)))

231 return avg_win / avg_loss

232

233 @columnwise_stat

234 def profit_ratio(self, series: pl.Series) -> float:

235 """Measure the profit ratio.

236

237 The profit ratio is calculated as win ratio / loss ratio.

238

239 Args:

240 series (pl.Series): The series to calculate profit ratio for.

241

242 Returns:

243 float: The profit ratio value.

244

245

246 Returns NaN when:

247 ``float("nan")`` when the series has no wins or no losses.

248 """

249 wins = series.filter(series >= 0)

250 losses = self._negative(series)

251

252 # Filtering can legitimately leave no wins or no losses for one-sided return series.

253 if wins.is_empty() or losses.is_empty():

254 return float("nan") # indeterminate: no wins or no losses

255

256 win_mean = _mean(wins)

257 loss_mean = _mean(losses)

258 win_ratio = float(np.abs(win_mean / wins.count()))

259 loss_ratio = float(np.abs(loss_mean / losses.count()))

260

261 return win_ratio / loss_ratio

262

263 @columnwise_stat

264 def profit_factor(self, series: pl.Series) -> float:

265 """Measure the profit factor.

266

267 The profit factor is calculated as wins / loss.

268

269 Args:

270 series (pl.Series): The series to calculate profit factor for.

271

272 Returns:

273 float: The profit factor value.

274

275 """

276 wins = self._positive(series)

277 losses = self._negative(series)

278 wins_sum = wins.sum()

279 losses_sum = losses.sum()

280

281 return float(np.abs(float(wins_sum) / float(losses_sum)))

282

283 # ── Risk metrics ──────────────────────────────────────────────────────────

284

285 @columnwise_stat

286 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:

287 """Calculate the daily value-at-risk.

288

289 Uses variance-covariance calculation with confidence level.

290

291 Args:

292 series (pl.Series): The series to calculate value at risk for.

293 alpha (float, optional): Confidence level. Defaults to 0.05.

294 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.

295

296 Returns:

297 float: The value at risk.

298

299 """

300 mean_val = _mean(series)

301 std_val = cast(float, series.std())

302 mu = mean_val

303 sigma *= std_val if std_val is not None else 0.0

304

305 return self._gaussian_quantile(alpha, mu, sigma)

306

307 @columnwise_stat

308 def _conditional_value_at_risk_impl(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:

309 """Inner per-series implementation of conditional value-at-risk."""

310 mean_val = _mean(series)

311 std_val = cast(float, series.std())

312 mu = mean_val

313 sigma *= std_val if std_val is not None else 0.0

314

315 var = self._gaussian_quantile(alpha, mu, sigma)

316

317 # Compute mean of returns less than or equal to VaR

318 # Cast to Any or pl.Series to suppress Ty error

319 # Cast the mask to pl.Expr to satisfy type checker

320 mask = cast(Iterable[bool], series < var)

321 return _mean(series.filter(mask))

322

323 def conditional_value_at_risk(

324 self, sigma: float = 1.0, confidence: float = 0.95, **kwargs: float

325 ) -> dict[str, float]:

326 """Calculate the conditional value-at-risk (CVaR / Expected Shortfall).

327

328 Also known as CVaR or expected shortfall, calculated for each numeric column.

329

330 Args:

331 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.

332 confidence (float, optional): Confidence level (e.g. 0.95 for 95 %).

333 Converted internally to ``alpha = 1 - confidence``. Defaults to 0.95.

334 alpha (float, optional): Tail probability (lower tail). ``alpha`` is the

335 probability mass in the *loss* tail, so ``alpha = 1 - confidence``.

336 For example, a 95 % confidence level corresponds to ``alpha = 0.05``

337 (the default).

338 **kwargs: Legacy keyword arguments. Passing ``confidence`` (e.g.

339 ``confidence=0.95``) is accepted for backwards compatibility with

340 QuantStats but emits a `DeprecationWarning`. Use

341 ``alpha = 1 - confidence`` instead.

342

343 Returns:

344 dict[str, float]: The conditional value at risk per asset column.

345

346 Raises:

347 TypeError: If unexpected keyword arguments are passed.

348

349 """

350 return self._conditional_value_at_risk_impl(sigma=sigma, alpha=1.0 - confidence)

351

352 @staticmethod

353 def _drawdown_with_baseline(series: pl.Series) -> pl.Series:

354 """Compute drawdown series with a phantom zero-return baseline prepended.

355

356 Matches the quantstats convention: a negative first return is treated as

357 a drawdown from the initial capital of 1.0, not as the new high-water mark.

358 """

359 extended = pl.concat([pl.Series([0.0]), series.cast(pl.Float64)])

360 nav = (1.0 + extended).cum_prod()

361 hwm = nav.cum_max()

362 # The phantom baseline pins nav[0] = 1.0, so hwm >= 1.0 throughout and

363 # the 1e-10 floor is purely defensive (unreachable); a -100 % return

364 # correctly reports as a full drawdown of 1.0 here.

365 dd = ((hwm - nav) / hwm.clip(lower_bound=1e-10)).clip(lower_bound=0.0)

366 return dd[1:] # drop phantom point

367

368 @staticmethod

369 def _ulcer_index_series(series: pl.Series) -> float:

370 """Compute ulcer index for a single returns series."""

371 dd = _BasicStatsMixin._drawdown_with_baseline(series)

372 n = series.len()

373 return float(np.sqrt(float((dd**2).sum()) / (n - 1)))

374

375 @columnwise_stat

376 def ulcer_index(self, series: pl.Series) -> float:

377 """Calculate the Ulcer Index (downside risk measurement).

378

379 Measures the depth and duration of drawdowns as the root mean square

380 of squared drawdowns: sqrt(sum(dd²) / (n - 1)).

381

382 Args:

383 series (pl.Series): The series to calculate ulcer index for.

384

385 Returns:

386 float: Ulcer Index value.

387

388 """

389 return self._ulcer_index_series(series)

390

391 @columnwise_stat

392 def ulcer_performance_index(self, series: pl.Series, rf: float = 0.0) -> float:

393 """Calculate the Ulcer Performance Index (UPI).

394

395 Risk-adjusted return using Ulcer Index as the risk measure:

396 (compounded_return - rf) / ulcer_index.

397

398 Args:

399 series (pl.Series): The series to calculate UPI for.

400 rf (float): Risk-free rate. Defaults to 0.

401

402 Returns:

403 float: Ulcer Performance Index.

404

405

406 Returns NaN when:

407 ``float("nan")`` when the ulcer index is zero (no drawdowns).

408 """

409 comp = _comp_return(series)

410 ui = self._ulcer_index_series(series)

411 return float("nan") if ui == 0 else (comp - rf) / ui

412

413 @columnwise_stat

414 def serenity_index(self, series: pl.Series, rf: float = 0.0) -> float:

415 """Calculate the Serenity Index.

416

417 Combines the Ulcer Index with a CVaR-based pitfall measure:

418 (sum_returns - rf) / (ulcer_index * pitfall), where

419 pitfall = -CVaR(drawdowns) / std(returns).

420

421 Args:

422 series (pl.Series): The series to calculate serenity index for.

423 rf (float): Risk-free rate. Defaults to 0.

424

425 Returns:

426 float: Serenity Index.

427

428

429 Returns NaN when:

430 ``float("nan")`` when the returns have zero (or undefined) standard

431 deviation or the denominator ``ulcer_index * pitfall`` is zero.

432 """

433 std_val = cast(float, series.std())

434 if not std_val:

435 return float("nan") # indeterminate: zero variance

436

437 # Negate drawdowns to match quantstats sign convention (negative = below peak)

438 dd_neg = -self._drawdown_with_baseline(series)

439 mu = _mean(dd_neg)

440 sigma = cast(float, dd_neg.std())

441 var_threshold = self._gaussian_quantile(0.05, mu, sigma)

442 mask = cast(Iterable[bool], dd_neg < var_threshold)

443 cvar_val = _mean(dd_neg.filter(mask))

444

445 pitfall = -cvar_val / std_val

446 ui = self._ulcer_index_series(series)

447 denominator = ui * pitfall

448 return float("nan") if denominator == 0 else (float(series.sum()) - rf) / denominator

449

450 @columnwise_stat

451 def win_rate(self, series: pl.Series) -> float:

452 """Calculate the win ratio for a period.

453

454 Args:

455 series (pl.Series): The series to calculate win rate for.

456

457 Returns:

458 float: The win rate value.

459

460 """

461 num_pos = self._positive(series).count()

462 num_nonzero = series.filter(series != 0).count()

463 return float(num_pos / num_nonzero)

464

465 @columnwise_stat

466 def autocorr_penalty(self, series: pl.Series) -> float:

467 """Calculate the autocorrelation penalty for risk-adjusted metrics.

468

469 Computes a penalty factor that accounts for autocorrelation in returns,

470 which can inflate Sharpe and Sortino ratios.

471

472 Args:

473 series (pl.Series): The series to calculate autocorrelation penalty for.

474

475 Returns:

476 float: Autocorrelation penalty factor (>= 1).

477

478 """

479 arr = series.drop_nulls().to_numpy()

480 num = len(arr)

481 coef = float(np.abs(np.corrcoef(arr[:-1], arr[1:])[0, 1]))

482 x = np.arange(1, num)

483 corr = ((num - x) / num) * (coef**x)

484 return float(np.sqrt(1 + 2 * np.sum(corr)))

485

486 @staticmethod

487 def _max_consecutive(mask: pl.Series) -> int:

488 """Return the longest run of True values in a boolean mask.

489

490 Args:

491 mask (pl.Series): Boolean series (True = qualifying period).

492

493 Returns:

494 int: Length of the longest consecutive True run.

495

496 """

497 group_ids = mask.rle_id()

498 df = pl.DataFrame({"v": mask.cast(pl.Int32), "g": group_ids})

499 result = (

500 df.with_columns((pl.int_range(pl.len()).over("g") + 1).alias("rank"))

501 .select((pl.col("v") * pl.col("rank")).max())

502 .item()

503 )

504 return int(result) if result is not None else 0

505

506 @columnwise_stat

507 def consecutive_wins(self, series: pl.Series) -> int:

508 """Calculate the maximum number of consecutive winning periods.

509

510 Args:

511 series (pl.Series): The series to calculate consecutive wins for.

512

513 Returns:

514 int: Maximum number of consecutive winning periods.

515

516 """

517 return self._max_consecutive(series > 0)

518

519 @columnwise_stat

520 def consecutive_losses(self, series: pl.Series) -> int:

521 """Calculate the maximum number of consecutive losing periods.

522

523 Args:

524 series (pl.Series): The series to calculate consecutive losses for.

525

526 Returns:

527 int: Maximum number of consecutive losing periods.

528

529 """

530 return self._max_consecutive(series < 0)

531

532 @columnwise_stat

533 def risk_of_ruin(self, series: pl.Series) -> float:

534 """Calculate the risk of ruin (probability of losing all capital).

535

536 Uses the formula: ((1 - win_rate) / (1 + win_rate)) ^ n,

537 where n is the number of periods.

538

539 Args:

540 series (pl.Series): The series to calculate risk of ruin for.

541

542 Returns:

543 float: The risk of ruin probability.

544

545 """

546 num_pos = self._positive(series).count()

547 num_nonzero = series.filter(series != 0).count()

548 wins = float(num_pos / num_nonzero)

549 n = series.len()

550 return ((1 - wins) / (1 + wins)) ** n

551

552 @columnwise_stat

553 def tail_ratio(self, series: pl.Series, cutoff: float = 0.95) -> float:

554 """Calculate the tail ratio (right tail / left tail).

555

556 Measures the ratio between the upper and lower tails of the return

557 distribution: abs(quantile(cutoff) / quantile(1 - cutoff)).

558

559 Args:

560 series (pl.Series): The series to calculate tail ratio for.

561 cutoff (float): Percentile cutoff for tail analysis. Defaults to 0.95.

562

563 Returns:

564 float: Tail ratio.

565

566

567 Returns NaN when:

568 ``float("nan")`` when either quantile is missing or the lower quantile

569 is zero.

570 """

571 upper = cast(float, series.quantile(cutoff, interpolation="linear"))

572 lower = cast(float, series.quantile(1 - cutoff, interpolation="linear"))

573 if upper is None or lower is None or lower == 0:

574 return float("nan") # indeterminate: zero or missing quantile

575 return float(np.abs(upper / lower))

576

577 def cpc_index(self) -> dict[str, float]:

578 """Calculate the CPC Index (Profit Factor * Win Rate * Win-Loss Ratio).

579

580 Returns:

581 dict[str, float]: Dictionary mapping asset names to CPC Index values.

582

583 """

584 pf = self.profit_factor()

585 wr = self.win_rate()

586 wlr = self.payoff_ratio()

587 return {col: pf[col] * wr[col] * wlr[col] for col in pf}

588

589 def common_sense_ratio(self) -> dict[str, float]:

590 """Calculate the Common Sense Ratio (Profit Factor * Tail Ratio).

591

592 Returns:

593 dict[str, float]: Dictionary mapping asset names to Common Sense Ratio values.

594

595 """

596 pf = self.profit_factor()

597 tr = self.tail_ratio()

598 return {col: pf[col] * tr[col] for col in pf}

599

600 def outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]:

601 """Return only the returns above a quantile threshold.

602

603 Args:

604 quantile (float): Upper quantile threshold. Defaults to 0.95.

605

606 Returns:

607 dict[str, pl.Series]: Filtered series per asset containing only

608 returns above the quantile.

609

610 """

611 result = {}

612 for col, series in self._data.items():

613 threshold = cast(float, series.quantile(quantile, interpolation="linear"))

614 result[col] = series.filter(series > threshold).drop_nulls()

615 return result

616

617 def remove_outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]:

618 """Return returns with values above a quantile threshold removed.

619

620 Args:

621 quantile (float): Upper quantile threshold. Defaults to 0.95.

622

623 Returns:

624 dict[str, pl.Series]: Filtered series per asset containing only

625 returns below the quantile.

626

627 """

628 result = {}

629 for col, series in self._data.items():

630 threshold = cast(float, series.quantile(quantile, interpolation="linear"))

631 result[col] = series.filter(series < threshold)

632 return result

633

634 @columnwise_stat

635 def outlier_win_ratio(self, series: pl.Series, quantile: float = 0.99) -> float:

636 """Calculate the outlier winners ratio.

637

638 Ratio of the high-quantile return to the mean positive return,

639 showing how much outlier wins contribute to overall performance.

640

641 Args:

642 series (pl.Series): The series to calculate outlier win ratio for.

643 quantile (float): Quantile for the outlier threshold. Defaults to 0.99.

644

645 Returns:

646 float: Outlier win ratio.

647

648

649 Returns NaN when:

650 ``float("nan")`` when the mean of non-negative returns is zero.

651 """

652 positive_mean = _mean(series.filter(series >= 0))

653 if positive_mean == 0:

654 return float("nan") # indeterminate: zero mean of positive returns

655 quantile_val = cast(float, series.quantile(quantile, interpolation="linear"))

656 return float(quantile_val / positive_mean)

657

658 @columnwise_stat

659 def outlier_loss_ratio(self, series: pl.Series, quantile: float = 0.01) -> float:

660 """Calculate the outlier losers ratio.

661

662 Ratio of the low-quantile return to the mean negative return,

663 showing how much outlier losses contribute to overall risk.

664

665 Args:

666 series (pl.Series): The series to calculate outlier loss ratio for.

667 quantile (float): Quantile for the outlier threshold. Defaults to 0.01.

668

669 Returns:

670 float: Outlier loss ratio.

671

672

673 Returns NaN when:

674 ``float("nan")`` when the mean of negative returns is zero.

675 """

676 negative_mean = self._mean_negative_expr(series)

677 if negative_mean == 0: # pragma: no cover

678 return float("nan") # indeterminate: zero mean of negative returns

679 quantile_val = cast(float, series.quantile(quantile, interpolation="linear"))

680 return float(quantile_val / negative_mean)

681

682 @columnwise_stat

683 def gain_to_pain_ratio(self, series: pl.Series) -> float:

684 """Calculate Jack Schwager's Gain-to-Pain Ratio.

685

686 The ratio is calculated as total return / sum of losses (in absolute value).

687

688 Args:

689 series (pl.Series): The series to calculate gain to pain ratio for.

690

691 Returns:

692 float: The gain to pain ratio value.

693

694

695 Returns NaN when:

696 ``float("nan")`` when there are no losses (the denominator is zero).

697 """

698 total_gain = series.sum()

699 total_pain = self._negative(series).abs().sum()

700 try:

701 return float(float(total_gain) / float(total_pain))

702 except ZeroDivisionError:

703 return float("nan") # indeterminate: no losses (denominator is zero)

704

705 @columnwise_stat

706 def risk_return_ratio(self, series: pl.Series) -> float:

707 """Calculate the return/risk ratio.

708

709 This is equivalent to the Sharpe ratio without a risk-free rate.

710

711 Args:

712 series (pl.Series): The series to calculate risk return ratio for.

713

714 Returns:

715 float: The risk return ratio value.

716

717 """

718 mean_val = _mean(series)

719 std_val = cast(float, series.std())

720 return mean_val / (std_val if std_val is not None else 1.0)

721

722 def kelly_criterion(self) -> dict[str, float]:

723 """Calculate the optimal capital allocation per column.

724

725 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b

726 where:

727 - b = payoff ratio

728 - p = win rate

729 - q = 1 - p.

730

731 Returns:

732 dict[str, float]: Dictionary mapping asset names to Kelly criterion values.

733

734 """

735 b = self.payoff_ratio()

736 p = self.win_rate()

737

738 return {col: ((b[col] * p[col]) - (1 - p[col])) / b[col] for col in b}

739

740 @columnwise_stat

741 def best(self, series: pl.Series) -> float | None:

742 """Find the maximum return per column (best period).

743

744 Args:

745 series (pl.Series): The series to find the best return for.

746

747 Returns:

748 float: The maximum return value.

749

750 """

751 val = cast(float, series.max())

752 return val if val is not None else None

753

754 @columnwise_stat

755 def worst(self, series: pl.Series) -> float | None:

756 """Find the minimum return per column (worst period).

757

758 Args:

759 series (pl.Series): The series to find the worst return for.

760

761 Returns:

762 float: The minimum return value.

763

764 """

765 val = cast(float, series.min())

766 return val if val is not None else None

767

768 @columnwise_stat

769 def exposure(self, series: pl.Series) -> float:

770 """Calculate the market exposure time (returns != 0).

771

772 Args:

773 series (pl.Series): The series to calculate exposure for.

774

775 Returns:

776 float: The exposure value.

777

778 """

779 all_data = self.all

780 ex = series.filter(series != 0).count() / all_data.height

781 return math.ceil(ex * 100) / 100

782

783 @staticmethod

784 def _pearson_corr_shifted(series: pl.Series, lag: int) -> float:

785 """Compute Pearson correlation between *series* and its lag-*lag* shift.

786

787 Args:

788 series (pl.Series): The input series.

789 lag (int): Number of positions to shift.

790

791 Returns:

792 float: Pearson correlation coefficient, or NaN if no valid pairs remain.

793

794 """

795 shifted = series.shift(lag)

796 paired = pl.DataFrame({"x": series, "y": shifted}).drop_nulls()

797 # Large lags or null-only overlap can leave no aligned observations to correlate.

798 if paired.is_empty():

799 return float("nan")

800 return float(np.corrcoef(paired["x"].to_numpy(), paired["y"].to_numpy())[0, 1])

801

802 @columnwise_stat

803 def autocorr(self, series: pl.Series, lag: int = 1) -> float:

804 """Compute lag-n autocorrelation of returns.

805

806 Args:

807 series (pl.Series): The series to calculate autocorrelation for.

808 lag (int): Number of periods to lag. Must be a positive integer.

809

810 Returns:

811 float: Pearson correlation between returns and their lagged values.

812

813 Raises:

814 TypeError: If *lag* is not an ``int``.

815 ValueError: If *lag* is not a positive integer (>= 1).

816

817 """

818 if not isinstance(lag, int):

819 msg = f"lag must be an int, got {type(lag).__name__}"

820 raise TypeError(msg)

821 if lag <= 0:

822 msg = f"lag must be a positive integer, got {lag}"

823 raise ValueError(msg)

824 return self._pearson_corr_shifted(series, lag)

825

826 def acf(self, nlags: int = 20) -> pl.DataFrame:

827 """Compute the autocorrelation function up to nlags.

828

829 Args:

830 nlags (int): Maximum number of lags to include. Default is 20.

831

832 Returns:

833 pl.DataFrame: DataFrame with a ``lag`` column (0..nlags) and one

834 column per asset containing the ACF values.

835

836 Raises:

837 TypeError: If *nlags* is not an ``int``.

838 ValueError: If *nlags* is negative.

839

840 """

841 if not isinstance(nlags, int):

842 msg = f"nlags must be an int, got {type(nlags).__name__}"

843 raise TypeError(msg)

844 if nlags < 0:

845 msg = f"nlags must be non-negative, got {nlags}"

846 raise ValueError(msg)

847 result: dict[str, list[float]] = {"lag": list(range(nlags + 1))}

848 for col, series in self._data.items():

849 acf_values: list[float] = [1.0]

850 for k in range(1, nlags + 1):

851 acf_values.append(self._pearson_corr_shifted(series, k))

852 result[col] = acf_values

853 return pl.DataFrame(result)

Coverage for src/jquantstats/_stats/_basic.py: 100%

272 statements