Coverage for src/basanos/analytics/

1"""Statistical metrics and ratios for financial returns.

3This module defines the Stats class which operates on a Data instance to

4compute per-asset statistics like skew, kurtosis, volatility, Sharpe,

5VaR/CVaR, and more.

6"""

8import dataclasses

9from collections.abc import Callable, Iterable

10from datetime import timedelta

11from functools import wraps

12from typing import cast

14import numpy as np

15import polars as pl

16from scipy.stats import norm

19def _drawdown_series(series: pl.Series) -> pl.Series:

20 """Compute the drawdown percentage series from a returns series.

22 Treats ``series`` as additive daily returns and builds a normalised NAV

23 starting at 1.0. The high-water mark is the running maximum of that NAV;

24 drawdown is expressed as the fraction below the high-water mark.

26 Args:

27 series: A Polars Series of additive returns (profit / AUM).

29 Returns:

30 A Polars Float64 Series whose values are in [0, 1]. A value of 0

31 means the NAV is at its all-time high; a value of 0.2 means the NAV

32 is 20 % below its previous peak.

34 Examples:

35 >>> import polars as pl

36 >>> s = pl.Series([0.0, -0.1, 0.2])

37 >>> [round(x, 10) for x in _drawdown_series(s).to_list()]

38 [0.0, 0.1, 0.0]

39 """

40 nav = 1.0 + series.cast(pl.Float64).cum_sum()

41 hwm = nav.cum_max()

42 # Guard against division by zero: a NAV of exactly 0 would make the

43 # drawdown fraction undefined. In practice NAV starts at 1.0 so this can

44 # only occur for extremely large cumulative losses; the 1e-10 floor avoids

45 # a ZeroDivisionError while having no effect on normal data.

46 hwm_safe = hwm.clip(lower_bound=1e-10)

47 return ((hwm - nav) / hwm_safe).clip(lower_bound=0.0)

50def _to_float(value: object) -> float:

51 """Safely convert a Polars aggregation result to float.

53 Examples:

54 >>> _to_float(2.0)

55 2.0

56 >>> _to_float(None)

57 0.0

58 """

59 if value is None:

60 return 0.0

61 if isinstance(value, timedelta):

62 return value.total_seconds()

63 return float(cast(float, value))

66def _to_float_or_none(value: object) -> float | None:

67 """Safely convert a Polars aggregation result to float or None."""

68 if value is None:

69 return None

70 if isinstance(value, timedelta):

71 return value.total_seconds()

72 return float(cast(float, value))

75@dataclasses.dataclass(frozen=True)

76class Stats:

77 """Statistical analysis tools for financial returns data.

79 This class provides a comprehensive set of methods for calculating various

80 financial metrics and statistics on returns data, including:

82 - Basic statistics (mean, skew, kurtosis)

83 - Risk metrics (volatility, value-at-risk, drawdown)

84 - Performance ratios (Sharpe, information ratio)

85 - Win/loss metrics (win rate, profit factor, payoff ratio)

87 The class is designed to work with the _Data class and operates on Polars DataFrames

88 for efficient computation.

90 Attributes:

91 data: The _Data object containing returns data.

93 Examples:

94 >>> import polars as pl

95 >>> from datetime import date

96 >>> data = pl.DataFrame({

97 ... "date": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],

98 ... "returns": [0.01, -0.02, 0.03],

99 ... })

100 >>> stats = Stats(data=data)

101 >>> stats.assets

102 ['returns']

103 """

104

105 data: pl.DataFrame

106

107 def __post_init__(self) -> None:

108 """Validate the input data frame after initialization.

109

110 Ensures that `data` is a Polars DataFrame and contains at least one

111 row. Raises TypeError or ValueError otherwise.

112 """

113 if not isinstance(self.data, pl.DataFrame):

114 raise TypeError

115 if self.data.height == 0:

116 raise ValueError

117

118 @property

119 def assets(self) -> list[str]:

120 """List of asset column names (numeric columns excluding 'date')."""

121 return [c for c in self.data.columns if c != "date" and self.data[c].dtype.is_numeric()]

122

123 @staticmethod

124 def _mean_positive_expr(series: pl.Series) -> float:

125 """Return the mean of strictly positive values, or 0.0 if none exist."""

126 result = series.filter(series > 0).mean()

127 return _to_float(result)

128

129 @staticmethod

130 def _mean_negative_expr(series: pl.Series) -> float:

131 """Return the mean of strictly negative values, or 0.0 if none exist."""

132 result = series.filter(series < 0).mean()

133 return _to_float(result)

134

135 @staticmethod

136 def columnwise_stat(func: Callable[..., float | int | None]) -> Callable[..., dict[str, float | int | None]]:

137 """Apply a column-wise statistical function to all numeric columns.

138

139 Args:

140 func (Callable): The function to decorate.

141

142 Returns:

143 Callable: The decorated function.

144

145 """

146

147 @wraps(func)

148 def wrapper(self: "Stats", *args: object, **kwargs: object) -> dict[str, float | int | None]:

149 """Apply the wrapped stat function to each asset column and return results as a dict."""

150 return {asset: func(self, self.data[asset], *args, **kwargs) for asset in self.assets}

151

152 return wrapper

153

154 @columnwise_stat

155 def skew(self, series: pl.Series) -> float | None:

156 """Calculate skewness (asymmetry) for each numeric column.

157

158 Args:

159 series (pl.Series): The series to calculate skewness for.

160

161 Returns:

162 float: The skewness value.

163

164 """

165 return _to_float_or_none(series.skew(bias=False))

166

167 @columnwise_stat

168 def kurtosis(self, series: pl.Series) -> float | None:

169 """Calculate the excess kurtosis of returns (Fisher definition).

170

171 Uses an unbiased estimator when possible. For short samples where an

172 unbiased estimator is undefined (Polars returns None when < 4 non-null

173 observations), falls back to the biased estimator. If the series is

174 still too short or variance is zero, computes the moment-based excess

175 kurtosis m4/m2^2 - 3.0, returning 0.0 for constant series.

176 """

177 # Drop nulls to match test expectations (ignore missing values)

178 s = series.drop_nulls()

179 # Use biased estimator first (Fisher=True by default in Polars)

180 return _to_float_or_none(s.kurtosis(bias=True))

181

182 @columnwise_stat

183 def avg_return(self, series: pl.Series) -> float:

184 """Calculate average return per non-zero, non-null value.

185

186 Args:

187 series (pl.Series): The series to calculate average return for.

188

189 Returns:

190 float: The average return value.

191

192 """

193 result = series.filter(series.is_not_null() & (series != 0)).mean()

194 return _to_float(result)

195

196 @columnwise_stat

197 def avg_win(self, series: pl.Series) -> float:

198 """Calculate the average winning return/trade for an asset.

199

200 Args:

201 series (pl.Series): The series to calculate average win for.

202

203 Returns:

204 float: The average winning return.

205

206 """

207 return self._mean_positive_expr(series)

208

209 @columnwise_stat

210 def avg_loss(self, series: pl.Series) -> float:

211 """Calculate the average loss return/trade for a period.

212

213 Args:

214 series (pl.Series): The series to calculate average loss for.

215

216 Returns:

217 float: The average loss return.

218

219 """

220 return self._mean_negative_expr(series)

221

222 @columnwise_stat

223 def volatility(self, series: pl.Series, periods: int | float | None = None, annualize: bool = True) -> float:

224 """Calculate the volatility of returns.

225

226 - Std dev of returns

227 - Annualized by sqrt(periods) if `annualize` is True.

228

229 Args:

230 series (pl.Series): The series to calculate volatility for.

231 periods (int, optional): Number of periods per year. Defaults to 252.

232 annualize (bool, optional): Whether to annualize the result. Defaults to True.

233

234 Returns:

235 float: The volatility value.

236

237 """

238 raw_periods = periods or self.periods_per_year

239

240 # Ensure it's numeric

241 if not isinstance(raw_periods, int | float):

242 raise TypeError

243

244 factor = np.sqrt(raw_periods) if annualize else 1.0

245 return _to_float(series.std()) * factor

246

247 @columnwise_stat

248 def value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:

249 """Calculate the daily value-at-risk.

250

251 Uses variance-covariance calculation with confidence level.

252

253 Args:

254 series (pl.Series): The series to calculate value at risk for.

255 alpha (float, optional): Confidence level. Defaults to 0.05.

256 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.

257

258 Returns:

259 float: The value at risk.

260

261 """

262 mu = _to_float(series.mean())

263 sigma *= _to_float(series.std())

264

265 return float(norm.ppf(alpha, mu, sigma))

266

267 @columnwise_stat

268 def conditional_value_at_risk(self, series: pl.Series, sigma: float = 1.0, alpha: float = 0.05) -> float:

269 """Calculate the conditional value-at-risk.

270

271 Also known as CVaR or expected shortfall, calculated for each numeric column.

272

273 Args:

274 series (pl.Series): The series to calculate conditional value at risk for.

275 alpha (float, optional): Confidence level. Defaults to 0.05.

276 sigma (float, optional): Standard deviation multiplier. Defaults to 1.0.

277

278 Returns:

279 float: The conditional value at risk.

280

281 """

282 mu = _to_float(series.mean())

283 sigma *= _to_float(series.std())

284

285 var = norm.ppf(alpha, mu, sigma)

286

287 # Compute mean of returns less than or equal to VaR.

288 # Return NaN when no empirical observations fall below the parametric

289 # VaR threshold (empty filter), rather than the misleading 0.0 that

290 # _to_float(None) would otherwise produce.

291 mask = cast(Iterable[bool], series < var)

292 filtered = series.filter(mask)

293 if filtered.is_empty():

294 return float("nan")

295 return _to_float(filtered.mean())

296

297 @columnwise_stat

298 def best(self, series: pl.Series) -> float | None:

299 """Find the maximum return per column (best period).

300

301 Args:

302 series (pl.Series): The series to find the best return for.

303

304 Returns:

305 float: The maximum return value.

306

307 """

308 return _to_float_or_none(series.max())

309

310 @columnwise_stat

311 def worst(self, series: pl.Series) -> float | None:

312 """Find the minimum return per column (worst period).

313

314 Args:

315 series (pl.Series): The series to find the worst return for.

316

317 Returns:

318 float: The minimum return value.

319

320 """

321 return _to_float_or_none(series.min())

322

323 @columnwise_stat

324 def win_rate(self, series: pl.Series) -> float:

325 """Calculate the win rate (fraction of profitable periods).

326

327 Counts the proportion of non-null periods where the return is strictly

328 positive.

329

330 Args:

331 series (pl.Series): The series to calculate win rate for.

332

333 Returns:

334 float: Win rate in [0, 1], or NaN when the series contains no

335 non-null observations.

336

337 """

338 non_null = series.drop_nulls()

339 if non_null.is_empty():

340 return float("nan")

341 n_positive = int((non_null > 0).sum())

342 return n_positive / len(non_null)

343

344 @columnwise_stat

345 def profit_factor(self, series: pl.Series) -> float:

346 """Calculate the profit factor (gross wins / absolute gross losses).

347

348 A profit factor greater than 1.0 indicates the strategy produces more

349 gross profit than gross loss. Returns ``inf`` when there are no losing

350 periods, ``0.0`` when there are no winning periods, and ``nan`` when

351 there are neither wins nor losses (and no losses).

352

353 Args:

354 series (pl.Series): The series to calculate profit factor for.

355

356 Returns:

357 float: The profit factor.

358

359 """

360 gross_wins = _to_float(series.filter(series > 0).sum())

361 gross_losses = abs(_to_float(series.filter(series < 0).sum()))

362 if gross_losses == 0.0:

363 return float("inf") if gross_wins > 0 else float("nan")

364 return gross_wins / gross_losses

365

366 @columnwise_stat

367 def payoff_ratio(self, series: pl.Series) -> float:

368 """Calculate the payoff ratio (average win / absolute average loss).

369

370 Separates edge type — a high payoff ratio implies the strategy wins

371 infrequently but with large magnitude; a low payoff ratio implies

372 frequent small wins. Returns ``nan`` when either the average win or

373 the average loss is zero (no profitable / no losing periods).

374

375 Args:

376 series (pl.Series): The series to calculate payoff ratio for.

377

378 Returns:

379 float: The payoff ratio.

380

381 """

382 avg_w = self._mean_positive_expr(series)

383 avg_l = self._mean_negative_expr(series)

384 if avg_l == 0.0:

385 return float("nan")

386 return avg_w / abs(avg_l)

387

388 def monthly_win_rate(self) -> dict[str, float]:

389 """Calculate the monthly win rate (fraction of profitable months).

390

391 Groups the daily returns data by calendar month, computes the

392 compounded return for each month, then returns the fraction of months

393 that had a positive compounded return.

394

395 Requires a ``date`` column in ``self.data``. When no ``date`` column

396 is present, each asset entry is ``nan``.

397

398 Returns:

399 dict[str, float]: Monthly win rate in [0, 1] per asset.

400

401 """

402 if "date" not in self.data.columns:

403 return {asset: float("nan") for asset in self.assets}

404

405 result: dict[str, float] = {}

406 for asset in self.assets:

407 df = (

408 self.data.select(["date", asset])

409 .drop_nulls()

410 .with_columns(

411 [

412 pl.col("date").dt.year().alias("_year"),

413 pl.col("date").dt.month().alias("_month"),

414 ]

415 )

416 )

417 monthly = (

418 df.group_by(["_year", "_month"])

419 .agg((pl.col(asset) + 1.0).product().alias("gross"))

420 .with_columns((pl.col("gross") - 1.0).alias("monthly_return"))

421 )

422 n_total = len(monthly)

423 if n_total == 0:

424 result[asset] = float("nan")

425 else:

426 n_positive = int((monthly["monthly_return"] > 0).sum())

427 result[asset] = n_positive / n_total

428 return result

429

430 def worst_n_periods(self, n: int = 5) -> dict[str, list[float | None]]:

431 """Return the N worst return periods per asset.

432

433 Sorts each asset's returns in ascending order and returns the first

434 ``n`` values. If the series has fewer than ``n`` non-null

435 observations the list is padded with ``None`` on the right.

436

437 Args:

438 n (int, optional): Number of worst periods to return. Defaults to 5.

439

440 Returns:

441 dict[str, list[float | None]]: Sorted worst returns per asset.

442

443 """

444 result: dict[str, list[float | None]] = {}

445 for asset in self.assets:

446 series = self.data[asset].drop_nulls()

447 worst: list[float | None] = series.sort(descending=False).head(n).to_list()

448 while len(worst) < n:

449 worst.append(None)

450 result[asset] = worst

451 return result

452

453 def up_capture(self, benchmark: pl.Series) -> dict[str, float]:

454 """Calculate the up-market capture ratio relative to a benchmark.

455

456 Measures the fraction of the benchmark's upside that the strategy

457 captures. Uses geometric means over benchmark up-periods

458 (benchmark > 0). A value greater than 1.0 means the strategy

459 outperformed the benchmark in rising markets.

460

461 Args:

462 benchmark (pl.Series): Benchmark return series aligned row-by-row

463 with ``self.data``.

464

465 Returns:

466 dict[str, float]: Up capture ratio per asset.

467

468 """

469 result: dict[str, float] = {}

470 up_mask = benchmark > 0

471 bench_up = benchmark.filter(up_mask).drop_nulls()

472 if bench_up.is_empty():

473 return {asset: float("nan") for asset in self.assets}

474

475 bench_geom = float((bench_up + 1.0).product()) ** (1.0 / len(bench_up)) - 1.0

476 if bench_geom == 0.0:

477 return {asset: float("nan") for asset in self.assets}

478

479 for asset in self.assets:

480 strat_up = self.data[asset].filter(up_mask).drop_nulls()

481 if strat_up.is_empty():

482 result[asset] = float("nan")

483 else:

484 strat_geom = float((strat_up + 1.0).product()) ** (1.0 / len(strat_up)) - 1.0

485 result[asset] = strat_geom / bench_geom

486 return result

487

488 def down_capture(self, benchmark: pl.Series) -> dict[str, float]:

489 """Calculate the down-market capture ratio relative to a benchmark.

490

491 Measures the fraction of the benchmark's downside that the strategy

492 captures. Uses geometric means over benchmark down-periods

493 (benchmark < 0). A value less than 1.0 means the strategy lost less

494 than the benchmark in falling markets (a desirable property).

495

496 Args:

497 benchmark (pl.Series): Benchmark return series aligned row-by-row

498 with ``self.data``.

499

500 Returns:

501 dict[str, float]: Down capture ratio per asset.

502

503 """

504 result: dict[str, float] = {}

505 down_mask = benchmark < 0

506 bench_down = benchmark.filter(down_mask).drop_nulls()

507 if bench_down.is_empty():

508 return {asset: float("nan") for asset in self.assets}

509

510 bench_geom = float((bench_down + 1.0).product()) ** (1.0 / len(bench_down)) - 1.0

511 if bench_geom == 0.0:

512 return {asset: float("nan") for asset in self.assets}

513

514 for asset in self.assets:

515 strat_down = self.data[asset].filter(down_mask).drop_nulls()

516 if strat_down.is_empty():

517 result[asset] = float("nan")

518 else:

519 strat_geom = float((strat_down + 1.0).product()) ** (1.0 / len(strat_down)) - 1.0

520 result[asset] = strat_geom / bench_geom

521 return result

522

523 @columnwise_stat

524 def sharpe(self, series: pl.Series, periods: int | float | None = None) -> float:

525 """Calculate the Sharpe ratio of asset returns.

526

527 Args:

528 series (pl.Series): The series to calculate Sharpe ratio for.

529 periods (int, optional): Number of periods per year. Defaults to 252.

530

531 Returns:

532 float: The Sharpe ratio value.

533

534 """

535 periods = periods or self.periods_per_year

536

537 mean_val = _to_float(series.mean())

538 divisor = _to_float(series.std(ddof=1))

539

540 # Treat as zero-variance if divisor is zero or indistinguishable from

541 # floating-point noise (i.e. smaller than 10x machine epsilon x |mean|).

542 _eps = np.finfo(np.float64).eps

543 if divisor <= _eps * max(abs(mean_val), _eps) * 10:

544 return float("nan")

545

546 res = mean_val / divisor

547 factor = periods or 1

548 return float(res * np.sqrt(factor))

549

550 @columnwise_stat

551 def max_drawdown(self, series: pl.Series) -> float:

552 """Maximum drawdown as a fraction of the high-water mark.

553

554 Computes the largest peak-to-trough decline in the cumulative additive

555 NAV (starting at 1.0) expressed as a percentage of the peak.

556

557 Args:

558 series (pl.Series): Series of additive daily returns.

559

560 Returns:

561 float: Maximum drawdown in the range [0, 1].

562

563 """

564 return _to_float(_drawdown_series(series).max())

565

566 @columnwise_stat

567 def avg_drawdown(self, series: pl.Series) -> float:

568 """Average drawdown across all underwater periods.

569

570 Computes the mean drawdown percentage for every observation where the

571 portfolio is below its previous peak. Returns 0.0 if there are no

572 underwater periods.

573

574 Args:

575 series (pl.Series): Series of additive daily returns.

576

577 Returns:

578 float: Mean drawdown in the range [0, 1].

579

580 """

581 dd = _drawdown_series(series)

582 in_dd = dd.filter(dd > 0)

583 if in_dd.is_empty():

584 return 0.0

585 return _to_float(in_dd.mean())

586

587 def max_drawdown_duration(self) -> dict[str, float | int | None]:

588 """Maximum drawdown duration in calendar days (or periods) per asset.

589

590 Identifies consecutive runs of observations where the portfolio NAV is

591 below its high-water mark and returns the length of the longest such

592 run.

593

594 When a ``date`` column is present the duration is expressed as the

595 number of calendar days spanned by the run (inclusive of both

596 endpoints). When no ``date`` column exists each row counts as one

597 period, so the result is a count of consecutive underwater periods.

598

599 Returns:

600 dict[str, float | int | None]: Mapping from asset name to maximum

601 drawdown duration. Returns 0 when there are no underwater

602 periods.

603

604 """

605 has_date = "date" in self.data.columns

606 result: dict[str, float | int | None] = {}

607 for asset in self.assets:

608 series = self.data[asset]

609 nav = 1.0 + series.cast(pl.Float64).cum_sum()

610 hwm = nav.cum_max()

611 in_dd = nav < hwm

612

613 if not in_dd.any():

614 result[asset] = 0

615 continue

616

617 if has_date:

618 frame = pl.DataFrame({"date": self.data["date"], "in_dd": in_dd})

619 else:

620 frame = pl.DataFrame({"date": pl.Series(list(range(len(series))), dtype=pl.Int64), "in_dd": in_dd})

621

622 frame = frame.with_columns(pl.col("in_dd").rle_id().alias("run_id"))

623

624 dd_runs = (

625 frame.filter(pl.col("in_dd"))

626 .group_by("run_id")

627 .agg(

628 [

629 pl.col("date").min().alias("start"),

630 pl.col("date").max().alias("end"),

631 ]

632 )

633 )

634

635 if has_date:

636 dd_runs = dd_runs.with_columns(

637 ((pl.col("end") - pl.col("start")).dt.total_days() + 1).alias("duration")

638 )

639 else:

640 dd_runs = dd_runs.with_columns((pl.col("end") - pl.col("start") + 1).alias("duration"))

641

642 result[asset] = int(_to_float(dd_runs["duration"].max()))

643

644 return result

645

646 @columnwise_stat

647 def calmar(self, series: pl.Series, periods: int | float | None = None) -> float:

648 """Calmar ratio (annualized return divided by maximum drawdown).

649

650 A standard complement to the Sharpe ratio for trend-following and

651 momentum strategies. Returns ``nan`` when the maximum drawdown is

652 zero (no drawdown observed).

653

654 Args:

655 series (pl.Series): Series of additive daily returns.

656 periods (int | float | None): Annualisation factor (observations

657 per year). Defaults to ``periods_per_year``.

658

659 Returns:

660 float: Calmar ratio, or ``nan`` if max drawdown is zero.

661

662 """

663 raw_periods = periods or self.periods_per_year

664 max_dd = _to_float(_drawdown_series(series).max())

665 if max_dd <= 0:

666 return float("nan")

667 ann_return = _to_float(series.mean()) * raw_periods

668 return ann_return / max_dd

669

670 @columnwise_stat

671 def recovery_factor(self, series: pl.Series) -> float:

672 """Recovery factor (total return divided by maximum drawdown).

673

674 A robustness signal for systematic strategies: values well above 1

675 indicate that cumulative profits are large relative to the worst

676 historical loss. Returns ``nan`` when the maximum drawdown is zero.

677

678 Args:

679 series (pl.Series): Series of additive daily returns.

680

681 Returns:

682 float: Recovery factor, or ``nan`` if max drawdown is zero.

683

684 """

685 max_dd = _to_float(_drawdown_series(series).max())

686 if max_dd <= 0:

687 return float("nan")

688 total_return = _to_float(series.sum())

689 return total_return / max_dd

690

691 def rolling_sharpe(self, window: int = 63, periods: int | float | None = None) -> pl.DataFrame:

692 """Compute rolling annualised Sharpe ratio over a sliding window.

693

694 Args:

695 window: Number of periods in the rolling window. Defaults to 63.

696 periods: Number of periods per year for annualisation. Defaults to

697 ``periods_per_year``.

698

699 Returns:

700 pl.DataFrame: A DataFrame with the date column (when present) and

701 one column per asset. The first ``window - 1`` rows will be

702 null.

703

704 Raises:

705 ValueError: If ``window`` is not a positive integer.

706

707 """

708 if not isinstance(window, int) or window <= 0:

709 raise ValueError

710

711 scale = np.sqrt(periods or self.periods_per_year)

712

713 exprs = [

714 (

715 pl.col(asset).rolling_mean(window_size=window) / pl.col(asset).rolling_std(window_size=window) * scale

716 ).alias(asset)

717 for asset in self.assets

718 ]

719

720 cols: list[str | pl.Expr] = (["date"] if "date" in self.data.columns else []) + exprs

721 return self.data.select(cols)

722

723 def rolling_volatility(

724 self, window: int = 63, periods: int | float | None = None, annualize: bool = True

725 ) -> pl.DataFrame:

726 """Compute rolling volatility over a sliding window.

727

728 Args:

729 window: Number of periods in the rolling window. Defaults to 63.

730 periods: Number of periods per year for annualisation. Defaults to

731 ``periods_per_year``.

732 annualize: Whether to annualise the result by multiplying by

733 ``sqrt(periods)``. Defaults to True.

734

735 Returns:

736 pl.DataFrame: A DataFrame with the date column (when present) and

737 one column per asset. The first ``window - 1`` rows will be

738 null.

739

740 Raises:

741 ValueError: If ``window`` is not a positive integer.

742 TypeError: If ``periods`` is not numeric.

743

744 """

745 if not isinstance(window, int) or window <= 0:

746 raise ValueError

747

748 raw_periods = periods or self.periods_per_year

749 if not isinstance(raw_periods, int | float):

750 raise TypeError

751

752 factor = np.sqrt(raw_periods) if annualize else 1.0

753

754 exprs = [(pl.col(asset).rolling_std(window_size=window) * factor).alias(asset) for asset in self.assets]

755

756 cols: list[str | pl.Expr] = (["date"] if "date" in self.data.columns else []) + exprs

757 return self.data.select(cols)

758

759 def annual_breakdown(self) -> pl.DataFrame:

760 """Return summary statistics broken down by calendar year.

761

762 Groups the data by calendar year using the ``date`` column, computes

763 a full :py:meth:`summary` for each year, and stacks the results into

764 a single DataFrame with an additional ``year`` column.

765

766 Returns:

767 pl.DataFrame: A DataFrame with columns ``year``, ``metric``, and

768 one column per asset, sorted by ``year``.

769

770 Raises:

771 ValueError: If the DataFrame has no ``date`` column.

772

773 """

774 if "date" not in self.data.columns:

775 raise ValueError

776

777 years = self.data["date"].dt.year().unique().sort().to_list()

778

779 frames: list[pl.DataFrame] = []

780 for year in years:

781 year_data = self.data.filter(self.data["date"].dt.year() == year)

782 if year_data.height < 2:

783 continue

784 year_summary = Stats(year_data).summary()

785 year_summary = year_summary.with_columns(pl.lit(year).alias("year"))

786 frames.append(year_summary)

787

788 if not frames:

789 # Build empty DataFrame with expected schema

790 schema = {"year": pl.Int32, "metric": pl.String, **dict.fromkeys(self.assets, pl.Float64)}

791 return pl.DataFrame(schema=schema)

792

793 result = pl.concat(frames)

794 # Move 'year' to front

795 ordered = ["year", "metric", *[c for c in result.columns if c not in ("year", "metric")]]

796 return result.select(ordered)

797

798 def summary(self) -> pl.DataFrame:

799 """Return a DataFrame summarising all statistics for each asset.

800

801 Each row corresponds to one statistical metric; each column (beyond

802 the ``metric`` column) corresponds to one asset in the portfolio.

803

804 Returns:

805 pl.DataFrame: A DataFrame with a ``metric`` column followed by one

806 column per asset, containing the computed statistic values.

807

808 """

809 metrics: dict[str, dict[str, float | int | None] | dict[str, float | int]] = {

810 "avg_return": self.avg_return(),

811 "avg_win": self.avg_win(),

812 "avg_loss": self.avg_loss(),

813 "win_rate": self.win_rate(),

814 "profit_factor": self.profit_factor(),

815 "payoff_ratio": self.payoff_ratio(),

816 "monthly_win_rate": self.monthly_win_rate(),

817 "best": self.best(),

818 "worst": self.worst(),

819 "volatility": self.volatility(),

820 "sharpe": self.sharpe(),

821 "skew": self.skew(),

822 "kurtosis": self.kurtosis(),

823 "value_at_risk": self.value_at_risk(),

824 "conditional_value_at_risk": self.conditional_value_at_risk(),

825 "max_drawdown": self.max_drawdown(),

826 "avg_drawdown": self.avg_drawdown(),

827 "max_drawdown_duration": self.max_drawdown_duration(),

828 "calmar": self.calmar(),

829 "recovery_factor": self.recovery_factor(),

830 }

831

832 rows: list[dict[str, object]] = [

833 {"metric": name, **{asset: values[asset] for asset in self.assets}} for name, values in metrics.items()

834 ]

835

836 return pl.DataFrame(rows)

837

838 @property

839 def periods_per_year(self) -> float:

840 """Estimate the number of periods per year from timestamp spacing.

841

842 Computes the average spacing (in seconds) between consecutive timestamps using

843 plain Python datetimes to avoid ambiguity around Polars Duration arithmetic,

844 then returns 365 * 24 * 3600 divided by that spacing.

845

846 Returns:

847 float: Estimated number of observations per calendar year.

848 """

849 # Extract datetime values as Python objects (assuming a single datetime column)

850 col_name = self.data.columns[0]

851 dates = self.data[col_name]

852

853 # Index is guaranteed to have at least two rows by __post_init__,

854 # so we can compute gaps directly after sorting.

855 dates = dates.sort()

856 # Compute successive differences in seconds

857 gaps = dates.diff().drop_nulls()

858

859 mean_diff = gaps.mean()

860

861 # Convert Duration (timedelta) to seconds

862 if isinstance(mean_diff, timedelta):

863 seconds = mean_diff.total_seconds()

864 elif mean_diff is not None:

865 seconds = _to_float(mean_diff)

866 else:

867 # Fallback to daily if mean_diff is None

868 seconds = 86400.0

869

870 return (365.0 * 24.0 * 60.0 * 60.0) / seconds

Coverage for src / basanos / analytics / _stats.py: 100%

279 statements