Coverage for src / jquantstats / data.py: 99%

183 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-07 14:28 +0000

1"""Financial returns data container and manipulation utilities.""" 

2 

3from __future__ import annotations 

4 

5import dataclasses 

6import warnings 

7from collections.abc import Iterator 

8from datetime import date, datetime, timedelta 

9from typing import TYPE_CHECKING, Literal, cast 

10 

11import narwhals as nw 

12import polars as pl 

13 

14from ._types import NativeFrame, NativeFrameOrScalar 

15from .exceptions import NullsInReturnsError 

16 

17if TYPE_CHECKING: 

18 from ._plots import DataPlots 

19 from ._reports import Reports 

20 from ._stats import Stats 

21 from ._utils import DataUtils 

22 

23 

24def _to_polars(df: NativeFrame) -> pl.DataFrame: 

25 """Convert any narwhals-compatible DataFrame to a polars DataFrame.""" 

26 if isinstance(df, pl.DataFrame): 

27 return df 

28 return nw.from_native(df, eager_only=True).to_polars() 

29 

30 

31def _apply_null_strategy( 

32 dframe: pl.DataFrame, 

33 date_col: str, 

34 frame_name: str, 

35 null_strategy: Literal["raise", "drop", "forward_fill"] | None, 

36) -> pl.DataFrame: 

37 """Check for nulls in *dframe* and apply *null_strategy*. 

38 

39 Parameters 

40 ---------- 

41 dframe : pl.DataFrame 

42 DataFrame to inspect. The date column is excluded from the null scan. 

43 date_col : str 

44 Name of the column to treat as the date index (excluded from null check). 

45 frame_name : str 

46 Descriptive name used in the error message (e.g. ``"returns"``). 

47 null_strategy : {"raise", "drop", "forward_fill"} | None 

48 How to handle null values: 

49 

50 - ``None`` — leave nulls as-is (current default behaviour; nulls will 

51 propagate through calculations). 

52 - ``"raise"`` — raise :exc:`~jquantstats.exceptions.NullsInReturnsError` 

53 if any null is found. 

54 - ``"drop"`` — drop every row that contains at least one null value. 

55 - ``"forward_fill"`` — fill each null with the most recent non-null 

56 value in the same column. 

57 

58 Returns: 

59 ------- 

60 pl.DataFrame 

61 The original DataFrame (``None`` / ``"raise"``), a filtered DataFrame 

62 (``"drop"``), or a filled DataFrame (``"forward_fill"``). 

63 

64 Raises: 

65 ------ 

66 NullsInReturnsError 

67 When *null_strategy* is ``"raise"`` and nulls are present. 

68 

69 """ 

70 if null_strategy is None: 

71 return dframe 

72 

73 value_cols = [c for c in dframe.columns if c != date_col] 

74 null_counts = dframe.select(value_cols).null_count().row(0) 

75 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0] 

76 

77 if not cols_with_nulls: 

78 return dframe 

79 

80 if null_strategy == "raise": 

81 raise NullsInReturnsError(frame_name, cols_with_nulls) 

82 if null_strategy == "drop": 

83 return dframe.drop_nulls(subset=value_cols) 

84 # forward_fill 

85 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols]) 

86 

87 

88def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame: 

89 """Subtract the risk-free rate from all numeric columns in the DataFrame. 

90 

91 Parameters 

92 ---------- 

93 dframe : pl.DataFrame 

94 DataFrame containing returns data with a date column 

95 and one or more numeric columns representing asset returns. 

96 

97 rf : float | pl.DataFrame 

98 Risk-free rate to subtract from returns. 

99 

100 - If float: A constant risk-free rate applied to all dates. 

101 - If pl.DataFrame: A DataFrame with a date column and a second column 

102 containing time-varying risk-free rates. 

103 

104 date_col : str 

105 Name of the date column in both DataFrames for joining 

106 when rf is a DataFrame. 

107 

108 Returns: 

109 ------- 

110 pl.DataFrame 

111 DataFrame with the risk-free rate subtracted from all numeric columns, 

112 preserving the original column names. 

113 

114 """ 

115 if isinstance(rf, float): 

116 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")]) 

117 else: 

118 if not isinstance(rf, pl.DataFrame): 

119 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003 

120 if rf.columns[1] != "rf": 

121 warnings.warn( 

122 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.", 

123 stacklevel=3, 

124 ) 

125 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf 

126 

127 dframe = dframe.join(rf_dframe, on=date_col, how="inner") 

128 return dframe.select( 

129 [pl.col(date_col)] 

130 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}] 

131 ) 

132 

133 

134@dataclasses.dataclass(frozen=True, slots=True) 

135class Data: 

136 """A container for financial returns data and an optional benchmark. 

137 

138 This class provides methods for analyzing and manipulating financial returns data, 

139 including converting returns to prices, calculating drawdowns, and resampling data 

140 to different time periods. It also provides access to statistical metrics through 

141 the stats property and visualization through the plots property. 

142 

143 Attributes: 

144 returns (pl.DataFrame): DataFrame containing returns data with assets as columns. 

145 benchmark (pl.DataFrame, optional): DataFrame containing benchmark returns data. 

146 Defaults to None. 

147 index (pl.DataFrame): DataFrame containing the date index for the returns data. 

148 

149 """ 

150 

151 returns: pl.DataFrame 

152 index: pl.DataFrame 

153 benchmark: pl.DataFrame | None = None 

154 

155 def __post_init__(self) -> None: 

156 """Validate the Data object after initialization.""" 

157 # You need at least two points 

158 if self.index.shape[0] < 2: 

159 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003 

160 

161 # Check index is monotonically increasing 

162 datetime_col = self.index[self.index.columns[0]] 

163 if not datetime_col.is_sorted(): 

164 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003 

165 

166 # Check row count matches returns 

167 if self.returns.shape[0] != self.index.shape[0]: 

168 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003 

169 

170 # Check row count matches benchmark (if provided) 

171 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]: 

172 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003 

173 

174 @classmethod 

175 def from_returns( 

176 cls, 

177 returns: NativeFrame, 

178 rf: NativeFrameOrScalar = 0.0, 

179 benchmark: NativeFrame | None = None, 

180 date_col: str = "Date", 

181 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

182 ) -> Data: 

183 """Create a Data object from returns and optional benchmark. 

184 

185 Parameters 

186 ---------- 

187 returns : NativeFrame 

188 Financial returns data. First column should be the date column, 

189 remaining columns are asset returns. 

190 

191 rf : float | NativeFrame, optional 

192 Risk-free rate. Default is 0.0 (no risk-free rate adjustment). 

193 

194 - If float: Constant risk-free rate applied to all dates. 

195 - If NativeFrame: Time-varying risk-free rate with dates matching returns. 

196 

197 benchmark : NativeFrame | None, optional 

198 Benchmark returns. Default is None (no benchmark). 

199 First column should be the date column, remaining columns are benchmark returns. 

200 

201 date_col : str, optional 

202 Name of the date column in the DataFrames. Default is "Date". 

203 

204 null_strategy : {"raise", "drop", "forward_fill"} | None, optional 

205 How to handle ``null`` (missing) values in *returns* and *benchmark*. 

206 Default is ``None`` (nulls are left as-is and will propagate through 

207 calculations, matching the current Polars behaviour). 

208 

209 - ``None`` — no null checking; nulls propagate through all 

210 downstream calculations. This matches Polars' default semantics. 

211 - ``"raise"`` — raise :exc:`~jquantstats.exceptions.NullsInReturnsError` 

212 if any null is found. Use this to be notified of missing data 

213 and clean it yourself before construction. 

214 - ``"drop"`` — silently drop every row that contains at least one null. 

215 Mirrors the pandas/QuantStats silent-drop behaviour. 

216 - ``"forward_fill"`` — fill each null with the most recent non-null value 

217 in the same column. 

218 

219 .. note:: 

220 This parameter affects only Polars ``null`` values (i.e. ``None`` / 

221 missing entries). IEEE-754 ``NaN`` values (``float("nan")``) are not 

222 nulls in Polars and are **not** affected — they continue to propagate 

223 through calculations as per IEEE-754 semantics. 

224 

225 Returns: 

226 ------- 

227 Data 

228 Object containing excess returns and benchmark (if any), with methods for 

229 analysis and visualization through the ``stats`` and ``plots`` properties. 

230 

231 Raises: 

232 ------ 

233 NullsInReturnsError 

234 If *null_strategy* is ``"raise"`` and the data contains null values. 

235 ValueError 

236 If there are no overlapping dates between returns and benchmark. 

237 

238 Examples: 

239 -------- 

240 Basic usage: 

241 

242 ```python 

243 from jquantstats import Data 

244 import polars as pl 

245 

246 returns = pl.DataFrame({ 

247 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

248 "Asset1": [0.01, -0.02, 0.03] 

249 }).with_columns(pl.col("Date").str.to_date()) 

250 

251 data = Data.from_returns(returns=returns) 

252 ``` 

253 

254 With benchmark and risk-free rate: 

255 

256 ```python 

257 benchmark = pl.DataFrame({ 

258 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

259 "Market": [0.005, -0.01, 0.02] 

260 }).with_columns(pl.col("Date").str.to_date()) 

261 

262 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002) 

263 ``` 

264 

265 Handling nulls automatically: 

266 

267 ```python 

268 returns_with_nulls = pl.DataFrame({ 

269 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

270 "Asset1": [0.01, None, 0.03] 

271 }).with_columns(pl.col("Date").str.to_date()) 

272 

273 # Drop rows with nulls (mirrors pandas/QuantStats behaviour) 

274 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop") 

275 

276 # Or forward-fill nulls 

277 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill") 

278 ``` 

279 

280 """ 

281 returns_pl = _to_polars(returns) 

282 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None 

283 rf_converted: float | pl.DataFrame 

284 if isinstance(rf, pl.DataFrame) or (not isinstance(rf, float) and not isinstance(rf, int)): 

285 rf_converted = _to_polars(rf) 

286 else: 

287 rf_converted = rf # int is not float/DataFrame: _subtract_risk_free raises TypeError 

288 

289 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy) 

290 if benchmark_pl is not None: 

291 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy) 

292 

293 if benchmark_pl is not None: 

294 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col) 

295 if joined_dates.is_empty(): 

296 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003 

297 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner") 

298 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner") 

299 

300 index = returns_pl.select(date_col) 

301 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col) 

302 excess_benchmark = ( 

303 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col) 

304 if benchmark_pl is not None 

305 else None 

306 ) 

307 

308 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index) 

309 

310 @classmethod 

311 def from_prices( 

312 cls, 

313 prices: NativeFrame, 

314 rf: NativeFrameOrScalar = 0.0, 

315 benchmark: NativeFrame | None = None, 

316 date_col: str = "Date", 

317 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

318 ) -> Data: 

319 """Create a Data object from prices and optional benchmark. 

320 

321 Converts price levels to returns via percentage change and delegates 

322 to :meth:`from_returns`. The first row of each asset is dropped 

323 because no prior price is available to compute a return. 

324 

325 Parameters 

326 ---------- 

327 prices : NativeFrame 

328 Price-level data. First column should be the date column; 

329 remaining columns are asset prices. 

330 

331 rf : float | NativeFrame, optional 

332 Risk-free rate. Forwarded unchanged to :meth:`from_returns`. 

333 Default is 0.0 (no risk-free rate adjustment). 

334 

335 benchmark : NativeFrame | None, optional 

336 Benchmark prices. Converted to returns in the same way as 

337 ``prices`` before being forwarded to :meth:`from_returns`. 

338 Default is None (no benchmark). 

339 

340 date_col : str, optional 

341 Name of the date column in the DataFrames. Default is ``"Date"``. 

342 

343 null_strategy : {"raise", "drop", "forward_fill"} | None, optional 

344 How to handle ``null`` (missing) values after converting prices to 

345 returns. Forwarded unchanged to :meth:`from_returns`. 

346 Default is ``None`` (nulls propagate through calculations). 

347 

348 - ``None`` — no null checking; nulls propagate. 

349 - ``"raise"`` — raise :exc:`~jquantstats.exceptions.NullsInReturnsError` 

350 if any null is found in the derived returns. 

351 - ``"drop"`` — silently drop every row that contains at least one null. 

352 - ``"forward_fill"`` — fill each null with the most recent non-null value. 

353 

354 .. note:: 

355 Prices that contain nulls will produce null returns via 

356 ``pct_change()``. If you expect missing price entries, pass 

357 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``. 

358 

359 Returns: 

360 ------- 

361 Data 

362 Object containing excess returns derived from the supplied prices, 

363 with methods for analysis and visualization through the ``stats`` 

364 and ``plots`` properties. 

365 

366 Examples: 

367 -------- 

368 ```python 

369 from jquantstats import Data 

370 import polars as pl 

371 

372 prices = pl.DataFrame({ 

373 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

374 "Asset1": [100.0, 101.0, 99.0] 

375 }).with_columns(pl.col("Date").str.to_date()) 

376 

377 data = Data.from_prices(prices=prices) 

378 ``` 

379 

380 """ 

381 prices_pl = _to_polars(prices) 

382 asset_cols = [c for c in prices_pl.columns if c != date_col] 

383 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1) 

384 

385 benchmark_returns: NativeFrame | None = None 

386 if benchmark is not None: 

387 benchmark_pl = _to_polars(benchmark) 

388 bench_cols = [c for c in benchmark_pl.columns if c != date_col] 

389 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice( 

390 1 

391 ) 

392 

393 return cls.from_returns( 

394 returns=returns_pl, 

395 rf=rf, 

396 benchmark=benchmark_returns, 

397 date_col=date_col, 

398 null_strategy=null_strategy, 

399 ) 

400 

401 def __repr__(self) -> str: 

402 """Return a string representation of the Data object.""" 

403 rows = len(self.index) 

404 date_cols = self.date_col 

405 if date_cols: 

406 date_column = date_cols[0] 

407 start = self.index[date_column].min() 

408 end = self.index[date_column].max() 

409 return f"Data(assets={self.assets}, rows={rows}, start={start}, end={end})" 

410 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column 

411 

412 @property 

413 def plots(self) -> DataPlots: 

414 """Provides access to visualization methods for the financial data. 

415 

416 Returns: 

417 DataPlots: An instance of the DataPlots class initialized with this data. 

418 

419 """ 

420 from ._plots import DataPlots 

421 

422 return DataPlots(self) 

423 

424 @property 

425 def stats(self) -> Stats: 

426 """Provides access to statistical analysis methods for the financial data. 

427 

428 Returns: 

429 Stats: An instance of the Stats class initialized with this data. 

430 

431 """ 

432 from ._stats import Stats 

433 

434 return Stats(self) 

435 

436 @property 

437 def reports(self) -> Reports: 

438 """Provides access to reporting methods for the financial data. 

439 

440 Returns: 

441 Reports: An instance of the Reports class initialized with this data. 

442 

443 """ 

444 from ._reports import Reports 

445 

446 return Reports(self) 

447 

448 @property 

449 def utils(self) -> DataUtils: 

450 """Provides access to utility transforms and conversions for the financial data. 

451 

452 Returns: 

453 DataUtils: An instance of the DataUtils class initialized with this data. 

454 

455 """ 

456 from ._utils import DataUtils 

457 

458 return DataUtils(self) 

459 

460 @property 

461 def date_col(self) -> list[str]: 

462 """Return the column names of the index DataFrame. 

463 

464 Returns: 

465 list[str]: List of column names in the index DataFrame, typically containing 

466 the date column name. 

467 

468 """ 

469 return list(self.index.columns) 

470 

471 @property 

472 def assets(self) -> list[str]: 

473 """Return the combined list of asset column names from returns and benchmark. 

474 

475 Returns: 

476 list[str]: List of all asset column names from both returns and benchmark 

477 (if available). 

478 

479 """ 

480 if self.benchmark is not None: 

481 return list(self.returns.columns) + list(self.benchmark.columns) 

482 return list(self.returns.columns) 

483 

484 @property 

485 def all(self) -> pl.DataFrame: 

486 """Combine index, returns, and benchmark data into a single DataFrame. 

487 

488 This property provides a convenient way to access all data in a single DataFrame, 

489 which is useful for analysis and visualization. 

490 

491 Returns: 

492 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data 

493 (if available) combined horizontally. 

494 

495 """ 

496 if self.benchmark is None: 

497 return pl.concat([self.index, self.returns], how="horizontal") 

498 else: 

499 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal") 

500 

501 def resample(self, every: str = "1mo") -> Data: 

502 """Resamples returns and benchmark to a different frequency using Polars. 

503 

504 Args: 

505 every (str, optional): Resampling frequency (e.g., '1mo', '1y'). Defaults to '1mo'. 

506 

507 Returns: 

508 Data: Resampled data. 

509 

510 """ 

511 

512 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame: 

513 """Resample a single DataFrame to the target frequency using compound returns.""" 

514 dframe = self.index.hstack(dframe) # Add the date column for resampling 

515 

516 return dframe.group_by_dynamic( 

517 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right" 

518 ).agg( 

519 [ 

520 ((pl.col(col) + 1.0).product() - 1.0).alias(col) 

521 for col in dframe.columns 

522 if col != self.index.columns[0] 

523 ] 

524 ) 

525 

526 resampled_returns = resample_frame(self.returns) 

527 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None 

528 resampled_index = resampled_returns.select(self.index.columns[0]) 

529 

530 return Data( 

531 returns=resampled_returns.drop(self.index.columns[0]), 

532 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None, 

533 index=resampled_index, 

534 ) 

535 

536 def describe(self) -> pl.DataFrame: 

537 """Return a tidy summary of shape, date range and asset names. 

538 

539 Returns: 

540 ------- 

541 pl.DataFrame 

542 One row per asset with columns: asset, start, end, rows, has_benchmark. 

543 

544 """ 

545 date_column = self.date_col[0] 

546 start = self.index[date_column].min() 

547 end = self.index[date_column].max() 

548 rows = len(self.index) 

549 return pl.DataFrame( 

550 { 

551 "asset": self.returns.columns, 

552 "start": [start] * len(self.returns.columns), 

553 "end": [end] * len(self.returns.columns), 

554 "rows": [rows] * len(self.returns.columns), 

555 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns), 

556 } 

557 ) 

558 

559 def copy(self) -> Data: 

560 """Create a deep copy of the Data object. 

561 

562 Returns: 

563 Data: A new Data object with copies of the returns and benchmark. 

564 

565 """ 

566 if self.benchmark is not None: 

567 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone()) 

568 return Data(returns=self.returns.clone(), index=self.index.clone()) 

569 

570 def head(self, n: int = 5) -> Data: 

571 """Return the first n rows of the combined returns and benchmark data. 

572 

573 Args: 

574 n (int, optional): Number of rows to return. Defaults to 5. 

575 

576 Returns: 

577 Data: A new Data object containing the first n rows of the combined data. 

578 

579 """ 

580 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None 

581 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n)) 

582 

583 def tail(self, n: int = 5) -> Data: 

584 """Return the last n rows of the combined returns and benchmark data. 

585 

586 Args: 

587 n (int, optional): Number of rows to return. Defaults to 5. 

588 

589 Returns: 

590 Data: A new Data object containing the last n rows of the combined data. 

591 

592 """ 

593 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None 

594 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n)) 

595 

596 def truncate( 

597 self, 

598 start: date | datetime | str | int | None = None, 

599 end: date | datetime | str | int | None = None, 

600 ) -> Data: 

601 """Return a new Data object truncated to the inclusive [start, end] range. 

602 

603 When the index is temporal (Date/Datetime), truncation is performed by 

604 comparing the date column against ``start`` and ``end`` values. 

605 

606 When the index is integer-based, row slicing is used instead, and 

607 ``start`` and ``end`` must be non-negative integers. Passing 

608 non-integer bounds to an integer-indexed Data raises :exc:`TypeError`. 

609 

610 Args: 

611 start: Optional lower bound (inclusive). A date/datetime value 

612 when the index is temporal; a non-negative :class:`int` row 

613 index when the data has no temporal index. 

614 end: Optional upper bound (inclusive). Same type rules as 

615 ``start``. 

616 

617 Returns: 

618 Data: A new Data object filtered to the specified range. 

619 

620 Raises: 

621 TypeError: When the index is not temporal and a non-integer bound 

622 is supplied. 

623 

624 """ 

625 date_column = self.index.columns[0] 

626 is_temporal = self.index[date_column].dtype.is_temporal() 

627 

628 if is_temporal: 

629 cond = pl.lit(True) 

630 if start is not None: 

631 cond = cond & (pl.col(date_column) >= pl.lit(start)) 

632 if end is not None: 

633 cond = cond & (pl.col(date_column) <= pl.lit(end)) 

634 mask = self.index.select(cond.alias("mask"))["mask"] 

635 new_index = self.index.filter(mask) 

636 new_returns = self.returns.filter(mask) 

637 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None 

638 else: 

639 if start is not None and not isinstance(start, int): 

640 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003 

641 if end is not None and not isinstance(end, int): 

642 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003 

643 row_start = start if start is not None else 0 

644 row_end = end + 1 if end is not None else self.index.height 

645 length = max(0, row_end - row_start) 

646 new_index = self.index.slice(row_start, length) 

647 new_returns = self.returns.slice(row_start, length) 

648 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None 

649 

650 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index) 

651 

652 @property 

653 def _periods_per_year(self) -> float: 

654 """Estimate the number of periods per year based on average frequency in the index. 

655 

656 For temporal (Date/Datetime) indices, computes the mean gap between observations 

657 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly). 

658 

659 For integer indices (date-free portfolios), falls back to 252 trading days per year 

660 because integer diffs have no time meaning. 

661 """ 

662 datetime_col = self.index[self.index.columns[0]] 

663 

664 if not datetime_col.dtype.is_temporal(): 

665 return 252.0 

666 

667 sorted_dt = datetime_col.sort() 

668 diffs = sorted_dt.diff().drop_nulls() 

669 mean_diff = diffs.mean() 

670 

671 if isinstance(mean_diff, timedelta): 

672 seconds = mean_diff.total_seconds() 

673 else: # pragma: no cover # Polars always returns timedelta for temporal diff 

674 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0 

675 

676 return (365 * 24 * 60 * 60) / seconds 

677 

678 def items(self) -> Iterator[tuple[str, pl.Series]]: 

679 """Iterate over all assets and their corresponding data series. 

680 

681 This method provides a convenient way to iterate over all assets in the data, 

682 yielding each asset name and its corresponding data series. 

683 

684 Yields: 

685 tuple[str, pl.Series]: A tuple containing the asset name and its data series. 

686 

687 """ 

688 matrix = self.all 

689 

690 for col in self.assets: 

691 yield col, matrix.get_column(col)