Coverage for src/jquantstats/portfolio.py: 100%

191 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 06:13 +0000

1"""Portfolio analytics class for quant finance. 

2 

3This module provides `Portfolio`, a frozen dataclass that stores the 

4raw portfolio inputs (prices, cash positions, AUM) and exposes both the 

5derived data series and the full analytics / visualisation suite. 

6 

7The class is composed from four focused mixin modules: 

8 

9- `PortfolioNavMixin` — NAV & returns chain 

10- `PortfolioAttributionMixin` — tilt/timing attribution 

11- `PortfolioTurnoverMixin` — turnover analytics 

12- `PortfolioCostMixin` — cost analysis 

13 

14Public API is unchanged: 

15 

16- Derived data series — `profits`, `profit`, `nav_accumulated`, 

17 `returns`, `monthly`, `nav_compounded`, `highwater`, 

18 `drawdown`, `all` 

19- Lazy composition accessors — `stats`, `plots`, `report` 

20- Portfolio transforms — `truncate`, `lag`, `smoothed_holding` 

21- Attribution — `tilt`, `timing`, `tilt_timing_decomp` 

22- Turnover analysis — `turnover`, `turnover_weekly`, `turnover_summary` 

23- Cost analysis — `cost_adjusted_returns`, `trading_cost_impact` 

24- Utility — `correlation` 

25""" 

26 

27import dataclasses 

28from datetime import date, datetime 

29from typing import TYPE_CHECKING, Self, cast 

30 

31if TYPE_CHECKING: 

32 from ._stats import Stats as Stats 

33 from ._utils import PortfolioUtils as PortfolioUtils 

34 from .data import Data as Data 

35 

36import polars as pl 

37import polars.selectors as cs 

38 

39from ._cache import cached_in_slot 

40from ._cost_model import CostModel 

41from ._plots import PortfolioPlots 

42from ._portfolio_attribution import PortfolioAttributionMixin 

43from ._portfolio_cost import PortfolioCostMixin 

44from ._portfolio_nav import PortfolioNavMixin 

45from ._portfolio_turnover import PortfolioTurnoverMixin 

46from ._reports import Report 

47from .exceptions import ( 

48 IntegerIndexBoundError, 

49 InvalidCashPositionTypeError, 

50 InvalidPricesTypeError, 

51 NonPositiveAumError, 

52 PositionExprColumnError, 

53 RowCountMismatchError, 

54 UncleanSeriesError, 

55) 

56 

57 

58def _evaluate_position_expr(prices: pl.DataFrame, expr: pl.Expr, param: str) -> pl.DataFrame: 

59 """Evaluate a position expression against *prices* and validate the result. 

60 

61 Args: 

62 prices: Price levels per asset over time. 

63 expr: Polars expression producing positions, evaluated via 

64 ``prices.with_columns(expr)``. 

65 param: Name of the parameter the expression was passed as (used in 

66 the error message). 

67 

68 Returns: 

69 The evaluated positions frame, guaranteed to have the same columns 

70 as *prices*. 

71 

72 Raises: 

73 PositionExprColumnError: If the expression created columns that do 

74 not exist in *prices* — those would leave the original asset 

75 columns untouched, silently treating raw prices as positions. 

76 """ 

77 evaluated = prices.with_columns(expr) 

78 extra = [c for c in evaluated.columns if c not in prices.columns] 

79 if extra: 

80 raise PositionExprColumnError(param, extra) 

81 return evaluated 

82 

83 

84# Slot fields used as lazy caches; __post_init__ initialises each to None and 

85# `cached_in_slot` fills them on first property access. 

86_CACHE_SLOTS = ( 

87 "_data_bridge", 

88 "_stats_cache", 

89 "_plots_cache", 

90 "_report_cache", 

91 "_utils_cache", 

92 "_profits_cache", 

93 "_returns_cache", 

94 "_tilt_cache", 

95 "_turnover_cache", 

96) 

97 

98 

99@dataclasses.dataclass(frozen=True, slots=True) 

100class Portfolio( 

101 PortfolioNavMixin, 

102 PortfolioAttributionMixin, 

103 PortfolioTurnoverMixin, 

104 PortfolioCostMixin, 

105): 

106 """Portfolio analytics class for quant finance. 

107 

108 Stores the three raw inputs — cash positions, prices, and AUM — and 

109 exposes the standard derived data series, analytics facades, transforms, 

110 and attribution tools. 

111 

112 Derived data series: 

113 

114 - `profits` — per-asset daily cash P&L 

115 - `profit` — aggregate daily portfolio profit 

116 - `nav_accumulated` — cumulative additive NAV 

117 - `nav_compounded` — compounded NAV 

118 - `returns` — daily returns (profit / AUM) 

119 - `monthly` — monthly compounded returns 

120 - `highwater` — running high-water mark 

121 - `drawdown` — drawdown from high-water mark 

122 - `all` — merged view of all derived series 

123 

124 - Lazy composition accessors: `stats`, `plots`, `report` 

125 - Portfolio transforms: `truncate`, `lag`, 

126 `smoothed_holding` 

127 - Attribution: `tilt`, `timing`, `tilt_timing_decomp` 

128 - Turnover: `turnover`, `turnover_weekly`, 

129 `turnover_summary` 

130 - Cost analysis: `cost_adjusted_returns`, 

131 `trading_cost_impact` 

132 - Utility: `correlation` 

133 

134 Attributes: 

135 cashposition: Polars DataFrame of positions per asset over time 

136 (includes date column if present). 

137 prices: Polars DataFrame of prices per asset over time (includes date 

138 column if present). 

139 aum: Assets under management used as base NAV offset. 

140 

141 Analytics facades 

142 ----------------- 

143 - ``.stats`` : delegates to the legacy ``Stats`` pipeline via ``.data``; all 50+ metrics available. 

144 - ``.plots`` : portfolio-specific ``Plots``; NAV overlays, lead-lag IR, rolling Sharpe/vol, heatmaps. 

145 - ``.report`` : HTML ``Report``; self-contained portfolio performance report. 

146 - ``.data`` : bridge to the legacy ``Data`` / ``Stats`` / ``DataPlots`` pipeline. 

147 

148 ``.plots`` and ``.report`` are intentionally *not* delegated to the legacy path: the legacy 

149 path operates on a bare returns series, while the analytics path has access to raw prices, 

150 positions, and AUM for richer portfolio-specific visualisations. 

151 

152 Cost models 

153 ----------- 

154 Two independent cost models are provided. They are not interchangeable: 

155 

156 **Model A — position-delta (stateful, set at construction):** 

157 ``cost_per_unit: float`` — one-way cost per unit of position change (e.g. 0.01 per share). 

158 Used by ``.position_delta_costs`` and ``.net_cost_nav``. 

159 Best for: equity portfolios where cost scales with shares traded. 

160 

161 **Model B — turnover-bps (stateless, passed at call time):** 

162 ``cost_bps: float`` — one-way cost in basis points of AUM turnover (e.g. 5 bps). 

163 Used by ``.cost_adjusted_returns(cost_bps)`` and ``.trading_cost_impact(max_bps)``. 

164 Best for: macro / fund-of-funds portfolios where cost scales with notional traded. 

165 

166 To sweep a range of cost assumptions use ``trading_cost_impact(max_bps=20)`` (Model B). 

167 To compute a net-NAV curve set ``cost_per_unit`` at construction and read ``.net_cost_nav`` (Model A). 

168 

169 Date column requirement 

170 ----------------------- 

171 Most analytics work with or without a ``date`` column. The following features require a 

172 temporal ``date`` column (``pl.Date`` or ``pl.Datetime``): 

173 

174 - ``portfolio.plots.correlation_heatmap()`` 

175 - ``portfolio.plots.lead_lag_ir_plot()`` 

176 - ``stats.monthly_win_rate()`` — returns NaN per column when no date is present 

177 - ``stats.annual_breakdown()`` — raises ``ValueError`` when no date is present 

178 - ``stats.max_drawdown_duration()`` — returns period count (int) instead of days 

179 

180 Portfolios without a ``date`` column (integer-indexed) are fully supported for 

181 NAV, returns, Sharpe, drawdown, cost analytics, and most rolling metrics. 

182 

183 Examples: 

184 >>> import polars as pl 

185 >>> from datetime import date 

186 >>> prices = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [100.0, 110.0]}) 

187 >>> pos = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [1000.0, 1000.0]}) 

188 >>> pf = Portfolio(prices=prices, cashposition=pos, aum=1e6) 

189 >>> pf.assets 

190 ['A'] 

191 """ 

192 

193 cashposition: pl.DataFrame 

194 prices: pl.DataFrame 

195 aum: float 

196 cost_per_unit: float = 0.0 

197 cost_bps: float = 0.0 

198 

199 # ── Internal cache fields ───────────────────────────────────────────────── 

200 # All cache fields are initialised to ``None`` in ``__post_init__`` via 

201 # ``object.__setattr__`` (required for frozen dataclasses) and populated 

202 # lazily on first property access. 

203 # 

204 # Lifecycle: 

205 # - Initialised: ``__post_init__`` sets every field to ``None``. 

206 # - Populated: each property computes its value on the first call and 

207 # writes it back via ``object.__setattr__``. 

208 # - Invalidation: not required — ``Portfolio`` is a *frozen* dataclass, 

209 # so its inputs never change and all derived values remain valid for the 

210 # lifetime of the instance. 

211 _data_bridge: "Data | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

212 _stats_cache: "Stats | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

213 _plots_cache: "PortfolioPlots | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

214 _report_cache: "Report | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

215 _utils_cache: "PortfolioUtils | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

216 _profits_cache: "pl.DataFrame | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

217 _returns_cache: "pl.DataFrame | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

218 _tilt_cache: "Portfolio | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

219 _turnover_cache: "pl.DataFrame | None" = dataclasses.field(init=False, repr=False, compare=False, hash=False) 

220 

221 @staticmethod 

222 def _build_data_bridge(ret: pl.DataFrame) -> "Data": 

223 """Build a `Data` bridge from a returns frame. 

224 

225 Splits out the ``'date'`` column (if present) into an index and passes 

226 the remaining numeric columns as returns. Used internally to populate 

227 ``_data_bridge`` at construction time so the ``data`` property is O(1). 

228 

229 Args: 

230 ret: Returns DataFrame, optionally with a leading ``'date'`` column. 

231 

232 Returns: 

233 A `Data` instance backed by *ret*. 

234 """ 

235 from .data import Data 

236 

237 returns_only = ret.select("returns") 

238 if "date" in ret.columns: 

239 return Data(returns=returns_only, index=ret.select("date")) 

240 return Data(returns=returns_only, index=pl.DataFrame({"index": list(range(ret.height))})) 

241 

242 def __post_init__(self) -> None: 

243 """Validate input types, shapes, and parameters post-initialization.""" 

244 if not isinstance(self.prices, pl.DataFrame): 

245 raise InvalidPricesTypeError(type(self.prices).__name__) 

246 if not isinstance(self.cashposition, pl.DataFrame): 

247 raise InvalidCashPositionTypeError(type(self.cashposition).__name__) 

248 if self.cashposition.shape[0] != self.prices.shape[0]: 

249 raise RowCountMismatchError(self.prices.shape[0], self.cashposition.shape[0]) 

250 if self.aum <= 0.0: 

251 raise NonPositiveAumError(self.aum) 

252 for slot in _CACHE_SLOTS: 

253 object.__setattr__(self, slot, None) 

254 

255 def _date_range(self) -> tuple[int, date | datetime | None, date | datetime | None]: 

256 """Return (rows, start, end) for the portfolio's returns series. 

257 

258 ``start`` and ``end`` are ``None`` when there is no ``'date'`` column. 

259 """ 

260 ret = self.returns 

261 rows = ret.height 

262 if "date" in ret.columns: 

263 return rows, cast(date | None, ret["date"].min()), cast(date | None, ret["date"].max()) 

264 return rows, None, None 

265 

266 @property 

267 def cost_model(self) -> CostModel: 

268 """Return the active cost model as a `CostModel` instance. 

269 

270 Returns: 

271 A `CostModel` whose ``cost_per_unit`` and ``cost_bps`` fields 

272 reflect the values stored on this portfolio. 

273 """ 

274 return CostModel(cost_per_unit=self.cost_per_unit, cost_bps=self.cost_bps) 

275 

276 def __repr__(self) -> str: 

277 """Return a string representation of the Portfolio object.""" 

278 rows, start, end = self._date_range() 

279 if start is not None: 

280 return f"Portfolio(assets={self.assets}, rows={rows}, start={start}, end={end})" 

281 return f"Portfolio(assets={self.assets}, rows={rows})" 

282 

283 def describe(self) -> pl.DataFrame: 

284 """Return a tidy summary of shape, date range and asset names. 

285 

286 Returns: 

287 ------- 

288 pl.DataFrame 

289 One row per asset with columns: asset, start, end, rows. 

290 

291 Examples: 

292 >>> import polars as pl 

293 >>> from datetime import date 

294 >>> prices = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [100.0, 110.0]}) 

295 >>> pos = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [1000.0, 1000.0]}) 

296 >>> pf = Portfolio(prices=prices, cashposition=pos, aum=1e6) 

297 >>> df = pf.describe() 

298 >>> list(df.columns) 

299 ['asset', 'start', 'end', 'rows'] 

300 """ 

301 rows, start, end = self._date_range() 

302 return pl.DataFrame( 

303 { 

304 "asset": self.assets, 

305 "start": [start] * len(self.assets), 

306 "end": [end] * len(self.assets), 

307 "rows": [rows] * len(self.assets), 

308 } 

309 ) 

310 

311 # ── Factory classmethods ────────────────────────────────────────────────── 

312 

313 @classmethod 

314 def from_risk_position( 

315 cls, 

316 prices: pl.DataFrame, 

317 risk_position: pl.DataFrame | pl.Expr, 

318 aum: float, 

319 vola: int | dict[str, int] = 32, 

320 vol_cap: float | None = None, 

321 cost_per_unit: float = 0.0, 

322 cost_bps: float = 0.0, 

323 cost_model: CostModel | None = None, 

324 ) -> Self: 

325 """Create a Portfolio from per-asset risk positions. 

326 

327 De-volatizes each risk position using an EWMA volatility estimate 

328 derived from the corresponding price series. 

329 

330 Args: 

331 prices: Price levels per asset over time (may include a date column). 

332 risk_position: Risk units per asset aligned with prices. 

333 vola: EWMA lookback (span-equivalent) used to estimate volatility. 

334 Pass an ``int`` to apply the same span to every asset, or a 

335 ``dict[str, int]`` to set a per-asset span (assets absent from 

336 the dict default to ``32``). Every span value must be a 

337 positive integer; a ``ValueError`` is raised otherwise. Dict 

338 keys that do not correspond to any numeric column in *prices* 

339 also raise a ``ValueError``. 

340 vol_cap: Optional lower bound for the EWMA volatility estimate. 

341 When provided, the vol series is clipped from below at this 

342 value before dividing the risk position, preventing 

343 position blow-up in calm, low-volatility regimes. For 

344 example, ``vol_cap=0.05`` ensures annualised vol is never 

345 estimated below 5%. Must be positive when not ``None``. 

346 aum: Assets under management used as the base NAV offset. 

347 cost_per_unit: One-way trading cost per unit of position change. 

348 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

349 cost_bps: One-way trading cost in basis points of AUM turnover. 

350 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

351 cost_model: Optional `CostModel` 

352 instance. When supplied, its ``cost_per_unit`` and 

353 ``cost_bps`` values take precedence over the individual 

354 parameters above. 

355 

356 Returns: 

357 A Portfolio instance whose cash positions are risk_position 

358 divided by EWMA volatility. 

359 

360 Raises: 

361 ValueError: If any span value in *vola* is ≤ 0, or if a key in a 

362 *vola* dict does not match any numeric column in *prices*, or 

363 if *vol_cap* is provided but is not positive. 

364 PositionExprColumnError: If *risk_position* is an expression that 

365 creates columns not present in *prices*. 

366 """ 

367 if isinstance(risk_position, pl.Expr): 

368 risk_position = _evaluate_position_expr(prices, risk_position, "risk_position") 

369 if cost_model is not None: 

370 cost_per_unit = cost_model.cost_per_unit 

371 cost_bps = cost_model.cost_bps 

372 assets = [col for col, dtype in prices.schema.items() if dtype.is_numeric()] 

373 

374 # ── Validate vol_cap ────────────────────────────────────────────────── 

375 if vol_cap is not None and vol_cap <= 0: 

376 raise ValueError(f"vol_cap must be a positive number when provided, got {vol_cap!r}") # noqa: TRY003 

377 

378 # ── Validate vola ───────────────────────────────────────────────────── 

379 if isinstance(vola, dict): 

380 unknown = set(vola.keys()) - set(assets) 

381 if unknown: 

382 raise ValueError( # noqa: TRY003 

383 f"vola dict contains keys that do not match any numeric column in prices: {sorted(unknown)}" 

384 ) 

385 for asset, span in vola.items(): 

386 if int(span) <= 0: 

387 raise ValueError(f"vola span for '{asset}' must be a positive integer, got {span!r}") # noqa: TRY003 

388 else: 

389 if int(vola) <= 0: 

390 raise ValueError(f"vola span must be a positive integer, got {vola!r}") # noqa: TRY003 

391 

392 def _span(asset: str) -> int: 

393 """Return the EWMA span for *asset*, falling back to 32 if not specified.""" 

394 if isinstance(vola, dict): 

395 return int(vola.get(asset, 32)) 

396 return int(vola) 

397 

398 def _vol(asset: str) -> pl.Series: 

399 """Return the EWMA volatility series for *asset*, optionally clipped from below.""" 

400 vol = prices[asset].pct_change().ewm_std(com=_span(asset) - 1, adjust=True, min_samples=_span(asset)) 

401 if vol_cap is not None: 

402 vol = vol.clip(lower_bound=vol_cap) 

403 return vol 

404 

405 cash_position = risk_position.with_columns((pl.col(asset) / _vol(asset)).alias(asset) for asset in assets) 

406 return cls(prices=prices, cashposition=cash_position, aum=aum, cost_per_unit=cost_per_unit, cost_bps=cost_bps) 

407 

408 @classmethod 

409 def from_position( 

410 cls, 

411 prices: pl.DataFrame, 

412 position: pl.DataFrame | pl.Expr, 

413 aum: float, 

414 cost_per_unit: float = 0.0, 

415 cost_bps: float = 0.0, 

416 cost_model: CostModel | None = None, 

417 ) -> Self: 

418 """Create a Portfolio from share/unit positions. 

419 

420 Converts *position* (number of units held per asset) to cash exposure 

421 by multiplying element-wise with *prices*, then delegates to 

422 :py`from_cash_position`. 

423 

424 Args: 

425 prices: Price levels per asset over time (may include a date column). 

426 position: Number of units held per asset over time, aligned with 

427 *prices*. Non-numeric columns (e.g. ``'date'``) are passed 

428 through unchanged. 

429 aum: Assets under management used as the base NAV offset. 

430 cost_per_unit: One-way trading cost per unit of position change. 

431 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

432 cost_bps: One-way trading cost in basis points of AUM turnover. 

433 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

434 cost_model: Optional `CostModel` instance. 

435 When supplied, its ``cost_per_unit`` and ``cost_bps`` values 

436 take precedence over the individual parameters above. 

437 

438 Returns: 

439 A Portfolio instance whose cash positions equal *position* x *prices*. 

440 

441 Raises: 

442 PositionExprColumnError: If *position* is an expression that 

443 creates columns not present in *prices*. 

444 

445 Examples: 

446 >>> import polars as pl 

447 >>> prices = pl.DataFrame({"A": [100.0, 110.0, 105.0]}) 

448 >>> pos = pl.DataFrame({"A": [10.0, 10.0, 10.0]}) 

449 >>> pf = Portfolio.from_position(prices=prices, position=pos, aum=1e6) 

450 >>> pf.cashposition["A"].to_list() 

451 [1000.0, 1100.0, 1050.0] 

452 """ 

453 if isinstance(position, pl.Expr): 

454 position = _evaluate_position_expr(prices, position, "position") 

455 assets = [col for col, dtype in prices.schema.items() if dtype.is_numeric()] 

456 cash_position = position.with_columns((pl.col(asset) * prices[asset]).alias(asset) for asset in assets) 

457 return cls.from_cash_position( 

458 prices=prices, 

459 cash_position=cash_position, 

460 aum=aum, 

461 cost_per_unit=cost_per_unit, 

462 cost_bps=cost_bps, 

463 cost_model=cost_model, 

464 ) 

465 

466 @classmethod 

467 def from_cash_position( 

468 cls, 

469 prices: pl.DataFrame, 

470 cash_position: pl.DataFrame | pl.Expr, 

471 aum: float, 

472 cost_per_unit: float = 0.0, 

473 cost_bps: float = 0.0, 

474 cost_model: CostModel | None = None, 

475 ) -> Self: 

476 """Create a Portfolio directly from cash positions aligned with prices. 

477 

478 Args: 

479 prices: Price levels per asset over time (may include a date column). 

480 cash_position: Cash exposure per asset over time, either as a 

481 DataFrame or as a Polars expression evaluated against *prices*. 

482 aum: Assets under management used as the base NAV offset. 

483 cost_per_unit: One-way trading cost per unit of position change. 

484 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

485 cost_bps: One-way trading cost in basis points of AUM turnover. 

486 Defaults to 0.0 (no cost). Ignored when *cost_model* is given. 

487 cost_model: Optional `CostModel` 

488 instance. When supplied, its ``cost_per_unit`` and 

489 ``cost_bps`` values take precedence over the individual 

490 parameters above. 

491 

492 Returns: 

493 A Portfolio instance with the provided cash positions. 

494 

495 Raises: 

496 PositionExprColumnError: If *cash_position* is an expression that 

497 creates columns not present in *prices* (e.g. via ``.alias``); 

498 such expressions leave the original asset columns untouched, 

499 silently treating raw prices as positions. 

500 """ 

501 if isinstance(cash_position, pl.Expr): 

502 cash_position = _evaluate_position_expr(prices, cash_position, "cash_position") 

503 if cost_model is not None: 

504 cost_per_unit = cost_model.cost_per_unit 

505 cost_bps = cost_model.cost_bps 

506 return cls(prices=prices, cashposition=cash_position, aum=aum, cost_per_unit=cost_per_unit, cost_bps=cost_bps) 

507 

508 # ── Internal helpers ─────────────────────────────────────────────────────── 

509 

510 @staticmethod 

511 def _assert_clean_series(series: pl.Series, name: str = "") -> None: 

512 """Raise `UncleanSeriesError` if *series* contains nulls or non-finite values. 

513 

514 Args: 

515 series: The series to validate. 

516 name: Optional series name included in the error message. 

517 

518 Raises: 

519 UncleanSeriesError: If the series contains null or non-finite values. 

520 """ 

521 if series.null_count() != 0: 

522 raise UncleanSeriesError(name, "null") 

523 if not series.is_finite().all(): 

524 raise UncleanSeriesError(name, "non-finite") 

525 

526 # ── Core data properties ─────────────────────────────────────────────────── 

527 

528 @property 

529 def assets(self) -> list[str]: 

530 """List the asset column names from prices (numeric columns). 

531 

532 Returns: 

533 list[str]: Names of numeric columns in prices; typically excludes 

534 ``'date'``. 

535 """ 

536 return [c for c in self.prices.columns if self.prices[c].dtype.is_numeric()] 

537 

538 # ── Lazy composition accessors ───────────────────────────────────────────── 

539 

540 @property 

541 @cached_in_slot("_data_bridge") 

542 def data(self) -> "Data": 

543 """Build a legacy `Data` object from this portfolio's returns. 

544 

545 This bridges the two entry points: ``Portfolio`` compiles the NAV curve from 

546 prices and positions; the returned `Data` object 

547 gives access to the full legacy analytics pipeline (``data.stats``, 

548 ``data.plots``, ``data.reports``). 

549 

550 Returns: 

551 `Data`: A Data object whose ``returns`` column 

552 is the portfolio's daily return series and whose ``index`` holds the date 

553 column (or a synthetic integer index for date-free portfolios). 

554 

555 Examples: 

556 >>> import polars as pl 

557 >>> from datetime import date 

558 >>> prices = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [100.0, 110.0]}) 

559 >>> pos = pl.DataFrame({"date": [date(2020, 1, 1), date(2020, 1, 2)], "A": [1000.0, 1000.0]}) 

560 >>> pf = Portfolio(prices=prices, cashposition=pos, aum=1e6) 

561 >>> d = pf.data 

562 >>> "returns" in d.returns.columns 

563 True 

564 """ 

565 return Portfolio._build_data_bridge(self.returns) 

566 

567 @property 

568 @cached_in_slot("_stats_cache") 

569 def stats(self) -> "Stats": 

570 """Return a Stats object built from the portfolio's daily returns. 

571 

572 Delegates to the legacy `Stats` pipeline via 

573 `data`, so all analytics (Sharpe, drawdown, summary, etc.) are 

574 available through the shared implementation. 

575 

576 The result is cached after first access so repeated calls are O(1). 

577 """ 

578 return self.data.stats 

579 

580 @property 

581 @cached_in_slot("_plots_cache") 

582 def plots(self) -> PortfolioPlots: 

583 """Convenience accessor returning a PortfolioPlots facade for this portfolio. 

584 

585 Use this to create Plotly visualizations such as snapshots, lagged 

586 performance curves, and lead/lag IR charts. 

587 

588 Returns: 

589 `PortfolioPlots`: Helper object with 

590 plotting methods. 

591 

592 The result is cached after first access so repeated calls are O(1). 

593 """ 

594 return PortfolioPlots(self) 

595 

596 @property 

597 @cached_in_slot("_report_cache") 

598 def report(self) -> Report: 

599 """Convenience accessor returning a Report facade for this portfolio. 

600 

601 Use this to generate a self-contained HTML performance report 

602 containing statistics tables and interactive charts. 

603 

604 Returns: 

605 `Report`: Helper object with 

606 report methods. 

607 

608 The result is cached after first access so repeated calls are O(1). 

609 """ 

610 return Report(self) 

611 

612 @property 

613 @cached_in_slot("_utils_cache") 

614 def utils(self) -> "PortfolioUtils": 

615 """Convenience accessor returning a PortfolioUtils facade for this portfolio. 

616 

617 Use this for common data transformations such as converting returns to 

618 prices, computing log returns, rebasing, aggregating by period, and 

619 computing exponential standard deviation. 

620 

621 Returns: 

622 `PortfolioUtils`: Helper object with 

623 utility transform methods. 

624 

625 The result is cached after first access so repeated calls are O(1). 

626 """ 

627 from ._utils import PortfolioUtils 

628 

629 return PortfolioUtils(self) 

630 

631 # ── Portfolio transforms ─────────────────────────────────────────────────── 

632 

633 def truncate( 

634 self, 

635 start: date | datetime | str | int | None = None, 

636 end: date | datetime | str | int | None = None, 

637 ) -> "Portfolio": 

638 """Return a new Portfolio truncated to the inclusive [start, end] range. 

639 

640 When a ``'date'`` column is present in both prices and cash positions, 

641 truncation is performed by comparing the ``'date'`` column against 

642 ``start`` and ``end`` (which should be date/datetime values or strings 

643 parseable by Polars). 

644 

645 When the ``'date'`` column is absent, integer-based row slicing is 

646 used instead. In this case ``start`` and ``end`` must be non-negative 

647 integers representing 0-based row indices. Passing non-integer bounds 

648 to an integer-indexed portfolio raises `TypeError`. 

649 

650 In all cases the ``aum`` value is preserved. 

651 

652 Args: 

653 start: Optional lower bound (inclusive). A date/datetime or 

654 Polars-parseable string when a ``'date'`` column exists; a 

655 non-negative int row index when the data has no ``'date'`` 

656 column. 

657 end: Optional upper bound (inclusive). Same type rules as 

658 ``start``. 

659 

660 Returns: 

661 A new Portfolio instance with prices and cash positions filtered 

662 to the specified range. 

663 

664 Raises: 

665 TypeError: When the portfolio has no ``'date'`` column and a 

666 non-integer bound is supplied. 

667 """ 

668 has_date = "date" in self.prices.columns 

669 if has_date: 

670 cond = pl.lit(True) 

671 if start is not None: 

672 cond = cond & (pl.col("date") >= pl.lit(start)) 

673 if end is not None: 

674 cond = cond & (pl.col("date") <= pl.lit(end)) 

675 pr = self.prices.filter(cond) 

676 cp = self.cashposition.filter(cond) 

677 else: 

678 if start is not None and not isinstance(start, int): 

679 raise IntegerIndexBoundError("start", type(start).__name__) 

680 if end is not None and not isinstance(end, int): 

681 raise IntegerIndexBoundError("end", type(end).__name__) 

682 row_start = int(start) if start is not None else 0 

683 row_end = int(end) + 1 if end is not None else self.prices.height 

684 length = max(0, row_end - row_start) 

685 pr = self.prices.slice(row_start, length) 

686 cp = self.cashposition.slice(row_start, length) 

687 return Portfolio( 

688 prices=pr, 

689 cashposition=cp, 

690 aum=self.aum, 

691 cost_per_unit=self.cost_per_unit, 

692 cost_bps=self.cost_bps, 

693 ) 

694 

695 def lag(self, n: int) -> "Portfolio": 

696 """Return a new Portfolio with cash positions lagged by ``n`` steps. 

697 

698 This method shifts the numeric asset columns in the cashposition 

699 DataFrame by ``n`` rows, preserving the ``'date'`` column and any 

700 non-numeric columns unchanged. Positive ``n`` delays weights (moves 

701 them down); negative ``n`` leads them (moves them up); ``n == 0`` 

702 returns the current portfolio unchanged. 

703 

704 Notes: 

705 Missing values introduced by the shift are left as nulls; 

706 downstream profit computation already guards and treats nulls as 

707 zero when multiplying by returns. 

708 

709 Args: 

710 n: Number of rows to shift (can be negative, zero, or positive). 

711 

712 Returns: 

713 A new Portfolio instance with lagged cash positions and the same 

714 prices/AUM as the original. 

715 """ 

716 if not isinstance(n, int): 

717 raise TypeError 

718 if n == 0: 

719 return self 

720 

721 assets = [c for c in self.cashposition.columns if c != "date" and self.cashposition[c].dtype.is_numeric()] 

722 cp_lagged = self.cashposition.with_columns(pl.col(c).shift(n) for c in assets) 

723 return Portfolio( 

724 prices=self.prices, 

725 cashposition=cp_lagged, 

726 aum=self.aum, 

727 cost_per_unit=self.cost_per_unit, 

728 cost_bps=self.cost_bps, 

729 ) 

730 

731 def smoothed_holding(self, n: int) -> "Portfolio": 

732 """Return a new Portfolio with cash positions smoothed by a rolling mean. 

733 

734 Applies a trailing window average over the last ``n`` steps for each 

735 numeric asset column (excluding ``'date'``). The window length is 

736 ``n + 1`` so that: 

737 

738 - n=0 returns the original weights (no smoothing), 

739 - n=1 averages the current and previous weights, 

740 - n=k averages the current and last k weights. 

741 

742 Args: 

743 n: Non-negative integer specifying how many previous steps to 

744 include. 

745 

746 Returns: 

747 A new Portfolio with smoothed cash positions and the same 

748 prices/AUM. 

749 """ 

750 if not isinstance(n, int): 

751 raise TypeError(f"n must be an integer, got {type(n).__name__}") # noqa: TRY003 

752 if n < 0: 

753 raise ValueError(f"n must be a non-negative integer, got {n}") # noqa: TRY003 

754 if n == 0: 

755 return self 

756 

757 assets = [c for c in self.cashposition.columns if c != "date" and self.cashposition[c].dtype.is_numeric()] 

758 window = n + 1 

759 cp_smoothed = self.cashposition.with_columns( 

760 pl.col(c).rolling_mean(window_size=window, min_samples=1).alias(c) for c in assets 

761 ) 

762 return Portfolio( 

763 prices=self.prices, 

764 cashposition=cp_smoothed, 

765 aum=self.aum, 

766 cost_per_unit=self.cost_per_unit, 

767 cost_bps=self.cost_bps, 

768 ) 

769 

770 # ── Utility ──────────────────────────────────────────────────────────────── 

771 

772 def correlation(self, frame: pl.DataFrame, name: str = "portfolio") -> pl.DataFrame: 

773 """Compute a correlation matrix of asset returns plus the portfolio. 

774 

775 Computes percentage changes for all numeric columns in ``frame``, 

776 appends the portfolio profit series under the provided ``name``, and 

777 returns the Pearson correlation matrix across all numeric columns. 

778 

779 Args: 

780 frame: A Polars DataFrame containing at least the asset price 

781 columns (and a date column which will be ignored if 

782 non-numeric). 

783 name: The column name to use when adding the portfolio profit 

784 series to the input frame. 

785 

786 Returns: 

787 A square Polars DataFrame where each cell is the correlation 

788 between a pair of series (values in [-1, 1]). 

789 """ 

790 p = frame.with_columns(cs.by_dtype(pl.Float32, pl.Float64).pct_change()) 

791 p = p.with_columns(pl.Series(name, self.profit["profit"])) 

792 corr_matrix = p.select(cs.numeric()).fill_null(0.0).corr() 

793 return corr_matrix