Coverage for src/jquantstats/_stats/_basic.py: 100%
272 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
1"""Basic statistical metrics for financial returns data."""
3from __future__ import annotations
5import math
6from collections.abc import Iterable
7from typing import TYPE_CHECKING, cast
9import numpy as np
10import polars as pl
11from scipy.stats import norm
13from ._core import _mean, columnwise_stat
14from ._internals import _annualization_factor, _comp_return
16if TYPE_CHECKING:
17 from ..data import Data
19# ── Basic statistics mixin ───────────────────────────────────────────────────
22class _BasicStatsMixin:
23 """Mixin providing basic return/risk and win/loss financial statistics.
25 Covers: basic statistics (skew, kurtosis, avg return/win/loss), volatility,
26 win/loss metrics (payoff ratio, profit factor), and risk metrics (VaR, CVaR,
27 win rate, kelly criterion, best/worst, exposure).
28 """
30 _data: Data
31 all: pl.DataFrame
33 if TYPE_CHECKING:
34 from .._protocol import DataLike
36 data: DataLike
38 @staticmethod
39 def _positive(series: pl.Series) -> pl.Series:
40 """Return only the positive values in *series*."""
41 return series.filter(series > 0)
43 @staticmethod
44 def _negative(series: pl.Series) -> pl.Series:
45 """Return only the negative values in *series*."""
46 return series.filter(series < 0)
48 @staticmethod
49 def _mean_positive_expr(series: pl.Series) -> float:
50 """Return the mean of all positive values in *series*, or NaN if none exist."""
51 return _mean(_BasicStatsMixin._positive(series))
53 @staticmethod
54 def _mean_negative_expr(series: pl.Series) -> float:
55 """Return the mean of all negative values in *series*, or NaN if none exist."""
56 return _mean(_BasicStatsMixin._negative(series))
58 @staticmethod
59 def _gaussian_quantile(alpha: float, mu: float, sigma: float) -> float:
60 """Gaussian inverse-CDF (``norm.ppf``) returning NaN for a zero-scale input.
62 ``norm.ppf(alpha, mu, 0.0)`` already returns ``nan`` for a degenerate
63 (zero-variance) distribution — but it emits an ``invalid value
64 encountered in multiply`` RuntimeWarning while doing so (``inf * 0``
65 internally). Degenerate scale arises for a single observation (undefined
66 std) or a constant series. Short-circuiting to ``float("nan")`` keeps the
67 exact same result while suppressing the spurious warning; downstream
68 masking relies on this NaN (Polars treats ``x < nan`` as ``True``).
69 """
70 return float("nan") if sigma == 0.0 else float(norm.ppf(alpha, mu, sigma))
72 # ── Basic statistics ──────────────────────────────────────────────────────
74 @columnwise_stat
75 def skew(self, series: pl.Series) -> int | float | None:
76 """Calculate skewness (asymmetry) for each numeric column.
78 Args:
79 series (pl.Series): The series to calculate skewness for.
81 Returns:
82 float: The skewness value.
84 """
85 return series.skew(bias=False)
87 @columnwise_stat
88 def kurtosis(self, series: pl.Series) -> int | float | None:
89 """Calculate the kurtosis of returns.
91 The degree to which a distribution peak compared to a normal distribution.
93 Args:
94 series (pl.Series): The series to calculate kurtosis for.
96 Returns:
97 float: The kurtosis value.
99 """
100 return series.kurtosis(bias=False)
102 @columnwise_stat
103 def avg_return(self, series: pl.Series) -> float:
104 """Calculate average return per non-zero value.
106 Args:
107 series (pl.Series): The series to calculate average return for.
109 Returns:
110 float: The average return value.
112 """
113 return _mean(series.filter(series.is_not_null() & (series != 0)))
115 @columnwise_stat
116 def avg_win(self, series: pl.Series) -> float:
117 """Calculate the average winning return/trade for an asset.
119 Args:
120 series (pl.Series): The series to calculate average win for.
122 Returns:
123 float: The average winning return.
125 """
126 return self._mean_positive_expr(series)
128 @columnwise_stat
129 def avg_loss(self, series: pl.Series) -> float:
130 """Calculate the average loss return/trade for a period.
132 Args:
133 series (pl.Series): The series to calculate average loss for.
135 Returns:
136 float: The average loss return.
138 """
139 return self._mean_negative_expr(series)
141 @columnwise_stat
142 def comp(self, series: pl.Series) -> float:
143 """Calculate the total compounded return over the full period.
145 Computed as product(1 + r) - 1.
147 Args:
148 series (pl.Series): The series to calculate compounded return for.
150 Returns:
151 float: Total compounded return.
153 """
154 return _comp_return(series)
156 @columnwise_stat
157 def geometric_mean(self, series: pl.Series, periods: int | float | None = None, annualize: bool = False) -> float:
158 """Calculate the geometric mean of returns.
160 Computed as the per-period geometric average: (∏(1 + rᵢ))^(1/n) - 1.
161 When annualized, raises to the power of periods_per_year instead of 1/n.
163 Args:
164 series (pl.Series): The series to calculate geometric mean for.
165 periods (int | float, optional): Periods per year for annualization. Defaults to periods_per_year.
166 annualize (bool): Whether to annualize the result. Defaults to False.
168 Returns:
169 float: The geometric mean return.
172 Returns NaN when:
173 ``float("nan")`` when the series has no non-null observations or the
174 compounded return ``product(1 + r)`` is non-positive.
175 """
176 clean = series.drop_nulls().cast(pl.Float64)
177 n = clean.len()
178 if n == 0:
179 return float("nan") # indeterminate: no observations
180 compound = float((1.0 + clean).product())
181 if compound <= 0:
182 return float("nan") # indeterminate: non-positive compound return
183 exponent = (periods or self._data._periods_per_year) / n if annualize else (1.0 / n)
184 return float(compound**exponent) - 1.0
186 # ── Volatility & risk ─────────────────────────────────────────────────────
188 @columnwise_stat
189 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float:
190 """Calculate the volatility of returns.
192 - Std dev of returns
193 - Annualized by sqrt(periods) if `annualize` is True.
195 Args:
196 series (pl.Series): The series to calculate volatility for.
197 periods (int, optional): Number of periods per year. Defaults to 252.
198 annualize (bool, optional): Whether to annualize the result. Defaults to True.
200 Returns:
201 float: The volatility value.
203 """
204 raw_periods = periods or self._data._periods_per_year
206 # Ensure it's numeric
207 if not isinstance(raw_periods, int | float):
208 raise TypeError(f"Expected int or float for periods, got {type(raw_periods).__name__}") # noqa: TRY003
210 factor = _annualization_factor(raw_periods) if annualize else 1.0
211 std_val = cast(float, series.std())
212 return (std_val if std_val is not None else 0.0) * factor
214 # ── Win / loss metrics ────────────────────────────────────────────────────
216 @columnwise_stat
217 def payoff_ratio(self, series: pl.Series) -> float:
218 """Measure the payoff ratio.
220 The payoff ratio is calculated as average win / abs(average loss).
222 Args:
223 series (pl.Series): The series to calculate payoff ratio for.
225 Returns:
226 float: The payoff ratio value.
228 """
229 avg_win = self._mean_positive_expr(series)
230 avg_loss = float(np.abs(self._mean_negative_expr(series)))
231 return avg_win / avg_loss
233 @columnwise_stat
234 def profit_ratio(self, series: pl.Series) -> float:
235 """Measure the profit ratio.
237 The profit ratio is calculated as win ratio / loss ratio.
239 Args:
240 series (pl.Series): The series to calculate profit ratio for.
242 Returns:
243 float: The profit ratio value.
246 Returns NaN when:
247 ``float("nan")`` when the series has no wins or no losses.
248 """
249 wins = series.filter(series >= 0)
250 losses = self._negative(series)
252 # Filtering can legitimately leave no wins or no losses for one-sided return series.
253 if wins.is_empty() or losses.is_empty():
254 return float("nan") # indeterminate: no wins or no losses
256 win_mean = _mean(wins)
257 loss_mean = _mean(losses)
258 win_ratio = float(np.abs(win_mean / wins.count()))
259 loss_ratio = float(np.abs(loss_mean / losses.count()))
261 return win_ratio / loss_ratio
263 @columnwise_stat
264 def profit_factor(self, series: pl.Series) -> float:
265 """Measure the profit factor.
267 The profit factor is calculated as wins / loss.
269 Args:
270 series (pl.Series): The series to calculate profit factor for.
272 Returns:
273 float: The profit factor value.
275 """
276 wins = self._positive(series)
277 losses = self._negative(series)
278 wins_sum = wins.sum()
279 losses_sum = losses.sum()
281 return float(np.abs(float(wins_sum) / float(losses_sum)))
283 # ── Risk metrics ──────────────────────────────────────────────────────────
285 @columnwise_stat
286 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
287 """Calculate the daily value-at-risk.
289 Uses variance-covariance calculation with confidence level.
291 Args:
292 series (pl.Series): The series to calculate value at risk for.
293 alpha (float, optional): Confidence level. Defaults to 0.05.
294 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
296 Returns:
297 float: The value at risk.
299 """
300 mean_val = _mean(series)
301 std_val = cast(float, series.std())
302 mu = mean_val
303 sigma *= std_val if std_val is not None else 0.0
305 return self._gaussian_quantile(alpha, mu, sigma)
307 @columnwise_stat
308 def _conditional_value_at_risk_impl(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:
309 """Inner per-series implementation of conditional value-at-risk."""
310 mean_val = _mean(series)
311 std_val = cast(float, series.std())
312 mu = mean_val
313 sigma *= std_val if std_val is not None else 0.0
315 var = self._gaussian_quantile(alpha, mu, sigma)
317 # Compute mean of returns less than or equal to VaR
318 # Cast to Any or pl.Series to suppress Ty error
319 # Cast the mask to pl.Expr to satisfy type checker
320 mask = cast(Iterable[bool], series < var)
321 return _mean(series.filter(mask))
323 def conditional_value_at_risk(
324 self, sigma: float = 1.0, confidence: float = 0.95, **kwargs: float
325 ) -> dict[str, float]:
326 """Calculate the conditional value-at-risk (CVaR / Expected Shortfall).
328 Also known as CVaR or expected shortfall, calculated for each numeric column.
330 Args:
331 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.
332 confidence (float, optional): Confidence level (e.g. 0.95 for 95 %).
333 Converted internally to ``alpha = 1 - confidence``. Defaults to 0.95.
334 alpha (float, optional): Tail probability (lower tail). ``alpha`` is the
335 probability mass in the *loss* tail, so ``alpha = 1 - confidence``.
336 For example, a 95 % confidence level corresponds to ``alpha = 0.05``
337 (the default).
338 **kwargs: Legacy keyword arguments. Passing ``confidence`` (e.g.
339 ``confidence=0.95``) is accepted for backwards compatibility with
340 QuantStats but emits a `DeprecationWarning`. Use
341 ``alpha = 1 - confidence`` instead.
343 Returns:
344 dict[str, float]: The conditional value at risk per asset column.
346 Raises:
347 TypeError: If unexpected keyword arguments are passed.
349 """
350 return self._conditional_value_at_risk_impl(sigma=sigma, alpha=1.0 - confidence)
352 @staticmethod
353 def _drawdown_with_baseline(series: pl.Series) -> pl.Series:
354 """Compute drawdown series with a phantom zero-return baseline prepended.
356 Matches the quantstats convention: a negative first return is treated as
357 a drawdown from the initial capital of 1.0, not as the new high-water mark.
358 """
359 extended = pl.concat([pl.Series([0.0]), series.cast(pl.Float64)])
360 nav = (1.0 + extended).cum_prod()
361 hwm = nav.cum_max()
362 # The phantom baseline pins nav[0] = 1.0, so hwm >= 1.0 throughout and
363 # the 1e-10 floor is purely defensive (unreachable); a -100 % return
364 # correctly reports as a full drawdown of 1.0 here.
365 dd = ((hwm - nav) / hwm.clip(lower_bound=1e-10)).clip(lower_bound=0.0)
366 return dd[1:] # drop phantom point
368 @staticmethod
369 def _ulcer_index_series(series: pl.Series) -> float:
370 """Compute ulcer index for a single returns series."""
371 dd = _BasicStatsMixin._drawdown_with_baseline(series)
372 n = series.len()
373 return float(np.sqrt(float((dd**2).sum()) / (n - 1)))
375 @columnwise_stat
376 def ulcer_index(self, series: pl.Series) -> float:
377 """Calculate the Ulcer Index (downside risk measurement).
379 Measures the depth and duration of drawdowns as the root mean square
380 of squared drawdowns: sqrt(sum(dd²) / (n - 1)).
382 Args:
383 series (pl.Series): The series to calculate ulcer index for.
385 Returns:
386 float: Ulcer Index value.
388 """
389 return self._ulcer_index_series(series)
391 @columnwise_stat
392 def ulcer_performance_index(self, series: pl.Series, rf: float = 0.0) -> float:
393 """Calculate the Ulcer Performance Index (UPI).
395 Risk-adjusted return using Ulcer Index as the risk measure:
396 (compounded_return - rf) / ulcer_index.
398 Args:
399 series (pl.Series): The series to calculate UPI for.
400 rf (float): Risk-free rate. Defaults to 0.
402 Returns:
403 float: Ulcer Performance Index.
406 Returns NaN when:
407 ``float("nan")`` when the ulcer index is zero (no drawdowns).
408 """
409 comp = _comp_return(series)
410 ui = self._ulcer_index_series(series)
411 return float("nan") if ui == 0 else (comp - rf) / ui
413 @columnwise_stat
414 def serenity_index(self, series: pl.Series, rf: float = 0.0) -> float:
415 """Calculate the Serenity Index.
417 Combines the Ulcer Index with a CVaR-based pitfall measure:
418 (sum_returns - rf) / (ulcer_index * pitfall), where
419 pitfall = -CVaR(drawdowns) / std(returns).
421 Args:
422 series (pl.Series): The series to calculate serenity index for.
423 rf (float): Risk-free rate. Defaults to 0.
425 Returns:
426 float: Serenity Index.
429 Returns NaN when:
430 ``float("nan")`` when the returns have zero (or undefined) standard
431 deviation or the denominator ``ulcer_index * pitfall`` is zero.
432 """
433 std_val = cast(float, series.std())
434 if not std_val:
435 return float("nan") # indeterminate: zero variance
437 # Negate drawdowns to match quantstats sign convention (negative = below peak)
438 dd_neg = -self._drawdown_with_baseline(series)
439 mu = _mean(dd_neg)
440 sigma = cast(float, dd_neg.std())
441 var_threshold = self._gaussian_quantile(0.05, mu, sigma)
442 mask = cast(Iterable[bool], dd_neg < var_threshold)
443 cvar_val = _mean(dd_neg.filter(mask))
445 pitfall = -cvar_val / std_val
446 ui = self._ulcer_index_series(series)
447 denominator = ui * pitfall
448 return float("nan") if denominator == 0 else (float(series.sum()) - rf) / denominator
450 @columnwise_stat
451 def win_rate(self, series: pl.Series) -> float:
452 """Calculate the win ratio for a period.
454 Args:
455 series (pl.Series): The series to calculate win rate for.
457 Returns:
458 float: The win rate value.
460 """
461 num_pos = self._positive(series).count()
462 num_nonzero = series.filter(series != 0).count()
463 return float(num_pos / num_nonzero)
465 @columnwise_stat
466 def autocorr_penalty(self, series: pl.Series) -> float:
467 """Calculate the autocorrelation penalty for risk-adjusted metrics.
469 Computes a penalty factor that accounts for autocorrelation in returns,
470 which can inflate Sharpe and Sortino ratios.
472 Args:
473 series (pl.Series): The series to calculate autocorrelation penalty for.
475 Returns:
476 float: Autocorrelation penalty factor (>= 1).
478 """
479 arr = series.drop_nulls().to_numpy()
480 num = len(arr)
481 coef = float(np.abs(np.corrcoef(arr[:-1], arr[1:])[0, 1]))
482 x = np.arange(1, num)
483 corr = ((num - x) / num) * (coef**x)
484 return float(np.sqrt(1 + 2 * np.sum(corr)))
486 @staticmethod
487 def _max_consecutive(mask: pl.Series) -> int:
488 """Return the longest run of True values in a boolean mask.
490 Args:
491 mask (pl.Series): Boolean series (True = qualifying period).
493 Returns:
494 int: Length of the longest consecutive True run.
496 """
497 group_ids = mask.rle_id()
498 df = pl.DataFrame({"v": mask.cast(pl.Int32), "g": group_ids})
499 result = (
500 df.with_columns((pl.int_range(pl.len()).over("g") + 1).alias("rank"))
501 .select((pl.col("v") * pl.col("rank")).max())
502 .item()
503 )
504 return int(result) if result is not None else 0
506 @columnwise_stat
507 def consecutive_wins(self, series: pl.Series) -> int:
508 """Calculate the maximum number of consecutive winning periods.
510 Args:
511 series (pl.Series): The series to calculate consecutive wins for.
513 Returns:
514 int: Maximum number of consecutive winning periods.
516 """
517 return self._max_consecutive(series > 0)
519 @columnwise_stat
520 def consecutive_losses(self, series: pl.Series) -> int:
521 """Calculate the maximum number of consecutive losing periods.
523 Args:
524 series (pl.Series): The series to calculate consecutive losses for.
526 Returns:
527 int: Maximum number of consecutive losing periods.
529 """
530 return self._max_consecutive(series < 0)
532 @columnwise_stat
533 def risk_of_ruin(self, series: pl.Series) -> float:
534 """Calculate the risk of ruin (probability of losing all capital).
536 Uses the formula: ((1 - win_rate) / (1 + win_rate)) ^ n,
537 where n is the number of periods.
539 Args:
540 series (pl.Series): The series to calculate risk of ruin for.
542 Returns:
543 float: The risk of ruin probability.
545 """
546 num_pos = self._positive(series).count()
547 num_nonzero = series.filter(series != 0).count()
548 wins = float(num_pos / num_nonzero)
549 n = series.len()
550 return ((1 - wins) / (1 + wins)) ** n
552 @columnwise_stat
553 def tail_ratio(self, series: pl.Series, cutoff: float = 0.95) -> float:
554 """Calculate the tail ratio (right tail / left tail).
556 Measures the ratio between the upper and lower tails of the return
557 distribution: abs(quantile(cutoff) / quantile(1 - cutoff)).
559 Args:
560 series (pl.Series): The series to calculate tail ratio for.
561 cutoff (float): Percentile cutoff for tail analysis. Defaults to 0.95.
563 Returns:
564 float: Tail ratio.
567 Returns NaN when:
568 ``float("nan")`` when either quantile is missing or the lower quantile
569 is zero.
570 """
571 upper = cast(float, series.quantile(cutoff, interpolation="linear"))
572 lower = cast(float, series.quantile(1 - cutoff, interpolation="linear"))
573 if upper is None or lower is None or lower == 0:
574 return float("nan") # indeterminate: zero or missing quantile
575 return float(np.abs(upper / lower))
577 def cpc_index(self) -> dict[str, float]:
578 """Calculate the CPC Index (Profit Factor * Win Rate * Win-Loss Ratio).
580 Returns:
581 dict[str, float]: Dictionary mapping asset names to CPC Index values.
583 """
584 pf = self.profit_factor()
585 wr = self.win_rate()
586 wlr = self.payoff_ratio()
587 return {col: pf[col] * wr[col] * wlr[col] for col in pf}
589 def common_sense_ratio(self) -> dict[str, float]:
590 """Calculate the Common Sense Ratio (Profit Factor * Tail Ratio).
592 Returns:
593 dict[str, float]: Dictionary mapping asset names to Common Sense Ratio values.
595 """
596 pf = self.profit_factor()
597 tr = self.tail_ratio()
598 return {col: pf[col] * tr[col] for col in pf}
600 def outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]:
601 """Return only the returns above a quantile threshold.
603 Args:
604 quantile (float): Upper quantile threshold. Defaults to 0.95.
606 Returns:
607 dict[str, pl.Series]: Filtered series per asset containing only
608 returns above the quantile.
610 """
611 result = {}
612 for col, series in self._data.items():
613 threshold = cast(float, series.quantile(quantile, interpolation="linear"))
614 result[col] = series.filter(series > threshold).drop_nulls()
615 return result
617 def remove_outliers(self, quantile: float = 0.95) -> dict[str, pl.Series]:
618 """Return returns with values above a quantile threshold removed.
620 Args:
621 quantile (float): Upper quantile threshold. Defaults to 0.95.
623 Returns:
624 dict[str, pl.Series]: Filtered series per asset containing only
625 returns below the quantile.
627 """
628 result = {}
629 for col, series in self._data.items():
630 threshold = cast(float, series.quantile(quantile, interpolation="linear"))
631 result[col] = series.filter(series < threshold)
632 return result
634 @columnwise_stat
635 def outlier_win_ratio(self, series: pl.Series, quantile: float = 0.99) -> float:
636 """Calculate the outlier winners ratio.
638 Ratio of the high-quantile return to the mean positive return,
639 showing how much outlier wins contribute to overall performance.
641 Args:
642 series (pl.Series): The series to calculate outlier win ratio for.
643 quantile (float): Quantile for the outlier threshold. Defaults to 0.99.
645 Returns:
646 float: Outlier win ratio.
649 Returns NaN when:
650 ``float("nan")`` when the mean of non-negative returns is zero.
651 """
652 positive_mean = _mean(series.filter(series >= 0))
653 if positive_mean == 0:
654 return float("nan") # indeterminate: zero mean of positive returns
655 quantile_val = cast(float, series.quantile(quantile, interpolation="linear"))
656 return float(quantile_val / positive_mean)
658 @columnwise_stat
659 def outlier_loss_ratio(self, series: pl.Series, quantile: float = 0.01) -> float:
660 """Calculate the outlier losers ratio.
662 Ratio of the low-quantile return to the mean negative return,
663 showing how much outlier losses contribute to overall risk.
665 Args:
666 series (pl.Series): The series to calculate outlier loss ratio for.
667 quantile (float): Quantile for the outlier threshold. Defaults to 0.01.
669 Returns:
670 float: Outlier loss ratio.
673 Returns NaN when:
674 ``float("nan")`` when the mean of negative returns is zero.
675 """
676 negative_mean = self._mean_negative_expr(series)
677 if negative_mean == 0: # pragma: no cover
678 return float("nan") # indeterminate: zero mean of negative returns
679 quantile_val = cast(float, series.quantile(quantile, interpolation="linear"))
680 return float(quantile_val / negative_mean)
682 @columnwise_stat
683 def gain_to_pain_ratio(self, series: pl.Series) -> float:
684 """Calculate Jack Schwager's Gain-to-Pain Ratio.
686 The ratio is calculated as total return / sum of losses (in absolute value).
688 Args:
689 series (pl.Series): The series to calculate gain to pain ratio for.
691 Returns:
692 float: The gain to pain ratio value.
695 Returns NaN when:
696 ``float("nan")`` when there are no losses (the denominator is zero).
697 """
698 total_gain = series.sum()
699 total_pain = self._negative(series).abs().sum()
700 try:
701 return float(float(total_gain) / float(total_pain))
702 except ZeroDivisionError:
703 return float("nan") # indeterminate: no losses (denominator is zero)
705 @columnwise_stat
706 def risk_return_ratio(self, series: pl.Series) -> float:
707 """Calculate the return/risk ratio.
709 This is equivalent to the Sharpe ratio without a risk-free rate.
711 Args:
712 series (pl.Series): The series to calculate risk return ratio for.
714 Returns:
715 float: The risk return ratio value.
717 """
718 mean_val = _mean(series)
719 std_val = cast(float, series.std())
720 return mean_val / (std_val if std_val is not None else 1.0)
722 def kelly_criterion(self) -> dict[str, float]:
723 """Calculate the optimal capital allocation per column.
725 Uses the Kelly Criterion formula: f* = [(b * p) - q] / b
726 where:
727 - b = payoff ratio
728 - p = win rate
729 - q = 1 - p.
731 Returns:
732 dict[str, float]: Dictionary mapping asset names to Kelly criterion values.
734 """
735 b = self.payoff_ratio()
736 p = self.win_rate()
738 return {col: ((b[col] * p[col]) - (1 - p[col])) / b[col] for col in b}
740 @columnwise_stat
741 def best(self, series: pl.Series) -> float | None:
742 """Find the maximum return per column (best period).
744 Args:
745 series (pl.Series): The series to find the best return for.
747 Returns:
748 float: The maximum return value.
750 """
751 val = cast(float, series.max())
752 return val if val is not None else None
754 @columnwise_stat
755 def worst(self, series: pl.Series) -> float | None:
756 """Find the minimum return per column (worst period).
758 Args:
759 series (pl.Series): The series to find the worst return for.
761 Returns:
762 float: The minimum return value.
764 """
765 val = cast(float, series.min())
766 return val if val is not None else None
768 @columnwise_stat
769 def exposure(self, series: pl.Series) -> float:
770 """Calculate the market exposure time (returns != 0).
772 Args:
773 series (pl.Series): The series to calculate exposure for.
775 Returns:
776 float: The exposure value.
778 """
779 all_data = self.all
780 ex = series.filter(series != 0).count() / all_data.height
781 return math.ceil(ex * 100) / 100
783 @staticmethod
784 def _pearson_corr_shifted(series: pl.Series, lag: int) -> float:
785 """Compute Pearson correlation between *series* and its lag-*lag* shift.
787 Args:
788 series (pl.Series): The input series.
789 lag (int): Number of positions to shift.
791 Returns:
792 float: Pearson correlation coefficient, or NaN if no valid pairs remain.
794 """
795 shifted = series.shift(lag)
796 paired = pl.DataFrame({"x": series, "y": shifted}).drop_nulls()
797 # Large lags or null-only overlap can leave no aligned observations to correlate.
798 if paired.is_empty():
799 return float("nan")
800 return float(np.corrcoef(paired["x"].to_numpy(), paired["y"].to_numpy())[0, 1])
802 @columnwise_stat
803 def autocorr(self, series: pl.Series, lag: int = 1) -> float:
804 """Compute lag-n autocorrelation of returns.
806 Args:
807 series (pl.Series): The series to calculate autocorrelation for.
808 lag (int): Number of periods to lag. Must be a positive integer.
810 Returns:
811 float: Pearson correlation between returns and their lagged values.
813 Raises:
814 TypeError: If *lag* is not an ``int``.
815 ValueError: If *lag* is not a positive integer (>= 1).
817 """
818 if not isinstance(lag, int):
819 msg = f"lag must be an int, got {type(lag).__name__}"
820 raise TypeError(msg)
821 if lag <= 0:
822 msg = f"lag must be a positive integer, got {lag}"
823 raise ValueError(msg)
824 return self._pearson_corr_shifted(series, lag)
826 def acf(self, nlags: int = 20) -> pl.DataFrame:
827 """Compute the autocorrelation function up to nlags.
829 Args:
830 nlags (int): Maximum number of lags to include. Default is 20.
832 Returns:
833 pl.DataFrame: DataFrame with a ``lag`` column (0..nlags) and one
834 column per asset containing the ACF values.
836 Raises:
837 TypeError: If *nlags* is not an ``int``.
838 ValueError: If *nlags* is negative.
840 """
841 if not isinstance(nlags, int):
842 msg = f"nlags must be an int, got {type(nlags).__name__}"
843 raise TypeError(msg)
844 if nlags < 0:
845 msg = f"nlags must be non-negative, got {nlags}"
846 raise ValueError(msg)
847 result: dict[str, list[float]] = {"lag": list(range(nlags + 1))}
848 for col, series in self._data.items():
849 acf_values: list[float] = [1.0]
850 for k in range(1, nlags + 1):
851 acf_values.append(self._pearson_corr_shifted(series, k))
852 result[col] = acf_values
853 return pl.DataFrame(result)