Coverage for src/jquantstats/data.py: 100%

216 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 06:13 +0000

1"""Financial returns data container and manipulation utilities.""" 

2 

3from __future__ import annotations 

4 

5import dataclasses 

6import warnings 

7from collections.abc import Iterator 

8from datetime import date, datetime, timedelta 

9from typing import TYPE_CHECKING, Literal, cast 

10 

11import narwhals as nw 

12import polars as pl 

13 

14from ._types import NativeFrame, NativeFrameOrScalar 

15from .exceptions import BenchmarkAlignmentWarning, MissingDateColumnError, NullsInReturnsError 

16 

17if TYPE_CHECKING: 

18 from ._plots import DataPlots 

19 from ._reports import Reports 

20 from ._stats import Stats 

21 from ._utils import DataUtils 

22 

23 

24def _to_polars(df: NativeFrame) -> pl.DataFrame: 

25 """Convert any narwhals-compatible DataFrame to a polars DataFrame.""" 

26 if isinstance(df, pl.DataFrame): 

27 return df 

28 return nw.from_native(df, eager_only=True).to_polars() 

29 

30 

31def _apply_null_strategy( 

32 dframe: pl.DataFrame, 

33 date_col: str, 

34 frame_name: str, 

35 null_strategy: Literal["raise", "drop", "forward_fill"] | None, 

36) -> pl.DataFrame: 

37 """Check for nulls in *dframe* and apply *null_strategy*. 

38 

39 Args: 

40 dframe (pl.DataFrame): DataFrame to inspect. The date column is 

41 excluded from the null scan. 

42 date_col (str): Name of the column to treat as the date index 

43 (excluded from null check). 

44 frame_name (str): Descriptive name used in the error message 

45 (e.g. ``"returns"``). 

46 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

47 handle null values: 

48 

49 - ``None`` — leave nulls as-is (nulls will propagate through 

50 calculations). 

51 - ``"raise"`` — raise `NullsInReturnsError` if any null is found. 

52 - ``"drop"`` — drop every row that contains at least one null. 

53 - ``"forward_fill"`` — fill each null with the most recent 

54 non-null value in the same column. 

55 

56 Returns: 

57 pl.DataFrame: The original DataFrame (``None`` / ``"raise"``), a 

58 filtered DataFrame (``"drop"``), or a filled DataFrame 

59 (``"forward_fill"``). 

60 

61 Raises: 

62 NullsInReturnsError: When *null_strategy* is ``"raise"`` and nulls 

63 are present. 

64 

65 """ 

66 if null_strategy is None: 

67 return dframe 

68 

69 value_cols = [c for c in dframe.columns if c != date_col] 

70 null_counts = dframe.select(value_cols).null_count().row(0) 

71 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0] 

72 

73 if not cols_with_nulls: 

74 return dframe 

75 

76 if null_strategy == "raise": 

77 raise NullsInReturnsError(frame_name, cols_with_nulls) 

78 if null_strategy == "drop": 

79 return dframe.drop_nulls(subset=value_cols) 

80 # forward_fill 

81 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols]) 

82 

83 

84def interpolate(df: pl.DataFrame) -> pl.DataFrame: 

85 """Forward-fill numeric columns only between first and last non-null values. 

86 

87 For each numeric column, forward-fill is applied strictly within the span 

88 bounded by its first and last non-null samples. Values outside this span 

89 are left as-is (including leading/trailing nulls). Non-numeric columns are 

90 returned unchanged. 

91 

92 Args: 

93 df: Input frame possibly containing nulls. 

94 

95 Returns: 

96 pl.DataFrame: Frame where numeric columns have been interior-forward- 

97 filled; schema and dtypes of the original columns are preserved. 

98 

99 Examples: 

100 ```python 

101 import polars as pl 

102 from jquantstats import interpolate 

103 

104 df = pl.DataFrame({"a": [None, 1.0, None, 3.0, None], "b": ["x", "y", "z", "w", "v"]}) 

105 result = interpolate(df) 

106 # a: [None, 1.0, 1.0, 3.0, None] (leading/trailing nulls untouched) 

107 # b: ["x", "y", "z", "w", "v"] (non-numeric unchanged) 

108 ``` 

109 

110 """ 

111 # Choose a temp column name guaranteed not to collide with any user column. 

112 tmp_col = "__row_idx__" 

113 while tmp_col in df.columns: 

114 tmp_col = f"_{tmp_col}_" 

115 

116 out = [] 

117 

118 for col in df.columns: 

119 s = df[col] 

120 if s.dtype.is_numeric(): 

121 non_null_mask = s.is_not_null() 

122 if non_null_mask.any(): 

123 _fwd = non_null_mask.arg_max() 

124 _rev = non_null_mask.reverse().arg_max() 

125 if _fwd is None or _rev is None: # pragma: no cover 

126 out.append(pl.col(col)) 

127 continue 

128 first_valid_idx = _fwd 

129 last_valid_idx = len(s) - 1 - _rev 

130 else: 

131 out.append(pl.col(col)) 

132 continue 

133 

134 mask = (pl.col(tmp_col) >= pl.lit(first_valid_idx)) & (pl.col(tmp_col) <= pl.lit(last_valid_idx)) 

135 filled_col = pl.when(mask).then(pl.col(col).fill_null(strategy="forward")).otherwise(pl.col(col)).alias(col) 

136 out.append(filled_col) 

137 else: 

138 out.append(pl.col(col)) 

139 

140 return df.with_columns(pl.int_range(0, df.height).alias(tmp_col)).select(out) 

141 

142 

143def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame: 

144 """Subtract the risk-free rate from all numeric columns in the DataFrame. 

145 

146 Args: 

147 dframe (pl.DataFrame): DataFrame containing returns data with a date 

148 column and one or more numeric columns representing asset returns. 

149 rf (float | pl.DataFrame): Risk-free rate to subtract from returns. 

150 

151 - If float: A constant risk-free rate applied to all dates. 

152 - If pl.DataFrame: A DataFrame with a date column and a second 

153 column containing time-varying risk-free rates. 

154 

155 date_col (str): Name of the date column in both DataFrames for 

156 joining when rf is a DataFrame. 

157 

158 Returns: 

159 pl.DataFrame: DataFrame with the risk-free rate subtracted from all 

160 numeric columns, preserving the original column names. 

161 

162 """ 

163 if isinstance(rf, float): 

164 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")]) 

165 else: 

166 if not isinstance(rf, pl.DataFrame): 

167 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003 

168 if rf.columns[1] != "rf": 

169 warnings.warn( 

170 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.", 

171 stacklevel=3, 

172 ) 

173 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf 

174 

175 dframe = dframe.join(rf_dframe, on=date_col, how="inner") 

176 return dframe.select( 

177 [pl.col(date_col)] 

178 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}] 

179 ) 

180 

181 

182@dataclasses.dataclass(frozen=True, slots=True) 

183class Data: 

184 """A container for financial returns data and an optional benchmark. 

185 

186 Provides methods for analyzing and manipulating financial returns data, 

187 including resampling, truncation, and access to statistical metrics and 

188 visualizations via the ``stats`` and ``plots`` properties. 

189 

190 Attributes: 

191 returns (pl.DataFrame): DataFrame containing returns data with assets 

192 as columns. 

193 benchmark (pl.DataFrame | None): Optional benchmark returns DataFrame. 

194 Defaults to None. 

195 index (pl.DataFrame): DataFrame containing the date index for the 

196 returns data. 

197 

198 """ 

199 

200 returns: pl.DataFrame 

201 index: pl.DataFrame 

202 benchmark: pl.DataFrame | None = None 

203 

204 def __post_init__(self) -> None: 

205 """Validate the Data object after initialization.""" 

206 # You need at least two points 

207 if self.index.shape[0] < 2: 

208 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003 

209 

210 # Check index is monotonically increasing 

211 datetime_col = self.index[self.index.columns[0]] 

212 if not datetime_col.is_sorted(): 

213 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003 

214 

215 # Check row count matches returns 

216 if self.returns.shape[0] != self.index.shape[0]: 

217 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003 

218 

219 # Check row count matches benchmark (if provided) 

220 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]: 

221 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003 

222 

223 @classmethod 

224 def from_returns( 

225 cls, 

226 returns: NativeFrame, 

227 rf: NativeFrameOrScalar = 0.0, 

228 benchmark: NativeFrame | None = None, 

229 date_col: str = "Date", 

230 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

231 ) -> Data: 

232 """Create a Data object from returns and optional benchmark. 

233 

234 Args: 

235 returns (NativeFrame): Financial returns data. First column should 

236 be the date column, remaining columns are asset returns. 

237 rf (float | NativeFrame): Risk-free rate. Defaults to 0.0 (no 

238 risk-free rate adjustment). 

239 

240 - If float: Constant risk-free rate applied to all dates. 

241 - If NativeFrame: Time-varying risk-free rate with dates 

242 matching returns. 

243 

244 benchmark (NativeFrame | None): Benchmark returns. Defaults to 

245 None (no benchmark). First column should be the date column, 

246 remaining columns are benchmark returns. Returns and 

247 benchmark are aligned on their common dates; if either frame 

248 contains dates the other lacks, those rows are dropped and a 

249 `BenchmarkAlignmentWarning` is emitted. 

250 date_col (str): Name of the date column in the DataFrames. 

251 Defaults to ``"Date"``. 

252 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

253 handle ``null`` (missing) values in *returns* and *benchmark*. 

254 Defaults to ``None`` (nulls propagate through calculations). 

255 

256 - ``None`` — no null checking; nulls propagate through all 

257 downstream calculations. 

258 - ``"raise"`` — raise `NullsInReturnsError` if any null is 

259 found. 

260 - ``"drop"`` — silently drop every row that contains at least 

261 one null. 

262 - ``"forward_fill"`` — fill each null with the most recent 

263 non-null value in the same column. 

264 

265 Note: Affects only Polars ``null`` values (i.e. ``None`` / 

266 missing entries). IEEE-754 ``NaN`` values are **not** affected 

267 and continue to propagate as per IEEE-754 semantics. 

268 

269 Returns: 

270 Data: Object containing excess returns and benchmark (if any), 

271 with methods for analysis and visualization through the ``stats`` 

272 and ``plots`` properties. 

273 

274 Raises: 

275 MissingDateColumnError: If *date_col* is not a column of 

276 *returns*, *benchmark*, or a DataFrame-valued *rf*. Raised 

277 before any joins so the offending frame is named explicitly. 

278 NullsInReturnsError: If *null_strategy* is ``"raise"`` and the 

279 data contains null values. 

280 ValueError: If there are no overlapping dates between returns and 

281 benchmark. 

282 

283 Warns: 

284 BenchmarkAlignmentWarning: If aligning returns and benchmark on 

285 their common dates drops rows from either frame. 

286 

287 Examples: 

288 Basic usage: 

289 

290 ```python 

291 from jquantstats import Data 

292 import polars as pl 

293 

294 returns = pl.DataFrame({ 

295 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

296 "Asset1": [0.01, -0.02, 0.03] 

297 }).with_columns(pl.col("Date").str.to_date()) 

298 

299 data = Data.from_returns(returns=returns) 

300 ``` 

301 

302 With benchmark and risk-free rate: 

303 

304 ```python 

305 benchmark = pl.DataFrame({ 

306 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

307 "Market": [0.005, -0.01, 0.02] 

308 }).with_columns(pl.col("Date").str.to_date()) 

309 

310 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002) 

311 ``` 

312 

313 Handling nulls automatically: 

314 

315 ```python 

316 returns_with_nulls = pl.DataFrame({ 

317 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

318 "Asset1": [0.01, None, 0.03] 

319 }).with_columns(pl.col("Date").str.to_date()) 

320 

321 # Drop rows with nulls (mirrors pandas/QuantStats behaviour) 

322 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop") 

323 

324 # Or forward-fill nulls 

325 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill") 

326 ``` 

327 

328 """ 

329 returns_pl = _to_polars(returns) 

330 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None 

331 # accept ints (e.g. rf=0) by coercing to float 

332 rf_converted: float | pl.DataFrame = float(rf) if isinstance(rf, int | float) else _to_polars(rf) 

333 

334 frames: list[tuple[str, pl.DataFrame | None]] = [("returns", returns_pl), ("benchmark", benchmark_pl)] 

335 if isinstance(rf_converted, pl.DataFrame): 

336 frames.append(("rf", rf_converted)) 

337 for frame_name, frame in frames: 

338 if frame is not None and date_col not in frame.columns: 

339 raise MissingDateColumnError(frame_name, column=date_col, available=list(frame.columns)) 

340 

341 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy) 

342 if benchmark_pl is not None: 

343 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy) 

344 

345 if benchmark_pl is not None: 

346 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col) 

347 if joined_dates.is_empty(): 

348 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003 

349 dropped_returns = returns_pl.height - joined_dates.height 

350 dropped_benchmark = benchmark_pl.height - joined_dates.height 

351 if dropped_returns > 0 or dropped_benchmark > 0: 

352 warnings.warn( 

353 f"Aligning returns and benchmark on common dates dropped " 

354 f"{dropped_returns} of {returns_pl.height} returns row(s) and " 

355 f"{dropped_benchmark} of {benchmark_pl.height} benchmark row(s); " 

356 f"{joined_dates.height} row(s) remain. Pass a benchmark covering " 

357 f"the same dates as the returns to avoid this.", 

358 BenchmarkAlignmentWarning, 

359 stacklevel=2, 

360 ) 

361 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner") 

362 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner") 

363 

364 index = returns_pl.select(date_col) 

365 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col) 

366 excess_benchmark = ( 

367 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col) 

368 if benchmark_pl is not None 

369 else None 

370 ) 

371 

372 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index) 

373 

374 @classmethod 

375 def from_prices( 

376 cls, 

377 prices: NativeFrame, 

378 rf: NativeFrameOrScalar = 0.0, 

379 benchmark: NativeFrame | None = None, 

380 date_col: str = "Date", 

381 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

382 ) -> Data: 

383 """Create a Data object from prices and optional benchmark. 

384 

385 Converts price levels to returns via percentage change and delegates 

386 to `from_returns`. The first row of each asset is dropped because no 

387 prior price is available to compute a return. 

388 

389 Args: 

390 prices (NativeFrame): Price-level data. First column should be 

391 the date column; remaining columns are asset prices. 

392 rf (float | NativeFrame): Risk-free rate. Forwarded unchanged to 

393 `from_returns`. Defaults to 0.0 (no risk-free rate 

394 adjustment). 

395 benchmark (NativeFrame | None): Benchmark prices. Converted to 

396 returns in the same way as ``prices`` before being forwarded 

397 to `from_returns`. Defaults to None (no benchmark). 

398 date_col (str): Name of the date column in the DataFrames. 

399 Defaults to ``"Date"``. 

400 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

401 handle ``null`` (missing) values after converting prices to 

402 returns. Forwarded unchanged to `from_returns`. Defaults to 

403 ``None`` (nulls propagate through calculations). 

404 

405 - ``None`` — no null checking; nulls propagate. 

406 - ``"raise"`` — raise `NullsInReturnsError` if any null is 

407 found in the derived returns. 

408 - ``"drop"`` — silently drop every row that contains at least 

409 one null. 

410 - ``"forward_fill"`` — fill each null with the most recent 

411 non-null value. 

412 

413 Note: Prices that contain nulls will produce null returns via 

414 ``pct_change()``. If you expect missing price entries, pass 

415 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``. 

416 

417 Returns: 

418 Data: Object containing excess returns derived from the supplied 

419 prices, with methods for analysis and visualization through the 

420 ``stats`` and ``plots`` properties. 

421 

422 Raises: 

423 MissingDateColumnError: If *date_col* is not a column of *prices* 

424 or *benchmark*. Raised before returns are derived so the 

425 offending frame is named explicitly. 

426 

427 Examples: 

428 ```python 

429 from jquantstats import Data 

430 import polars as pl 

431 

432 prices = pl.DataFrame({ 

433 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

434 "Asset1": [100.0, 101.0, 99.0] 

435 }).with_columns(pl.col("Date").str.to_date()) 

436 

437 data = Data.from_prices(prices=prices) 

438 ``` 

439 

440 """ 

441 prices_pl = _to_polars(prices) 

442 if date_col not in prices_pl.columns: 

443 raise MissingDateColumnError("prices", column=date_col, available=list(prices_pl.columns)) 

444 asset_cols = [c for c in prices_pl.columns if c != date_col] 

445 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1) 

446 

447 benchmark_returns: NativeFrame | None = None 

448 if benchmark is not None: 

449 benchmark_pl = _to_polars(benchmark) 

450 if date_col not in benchmark_pl.columns: 

451 raise MissingDateColumnError("benchmark", column=date_col, available=list(benchmark_pl.columns)) 

452 bench_cols = [c for c in benchmark_pl.columns if c != date_col] 

453 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice( 

454 1 

455 ) 

456 

457 return cls.from_returns( 

458 returns=returns_pl, 

459 rf=rf, 

460 benchmark=benchmark_returns, 

461 date_col=date_col, 

462 null_strategy=null_strategy, 

463 ) 

464 

465 def __repr__(self) -> str: 

466 """Return a string representation of the Data object.""" 

467 rows = len(self.index) 

468 date_cols = self.date_col 

469 if date_cols: 

470 date_column = date_cols[0] 

471 start = self.index[date_column].min() 

472 end = self.index[date_column].max() 

473 return f"Data(assets={self.assets}, rows={rows}, start={start!s}, end={end!s})" 

474 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column 

475 

476 @property 

477 def plots(self) -> DataPlots: 

478 """Provides access to visualization methods for the financial data. 

479 

480 Returns: 

481 DataPlots: An instance of the DataPlots class initialized with this data. 

482 

483 """ 

484 from ._plots import DataPlots 

485 

486 return DataPlots(self) 

487 

488 @property 

489 def stats(self) -> Stats: 

490 """Provides access to statistical analysis methods for the financial data. 

491 

492 Returns: 

493 Stats: An instance of the Stats class initialized with this data. 

494 

495 """ 

496 from ._stats import Stats 

497 

498 return Stats(self) 

499 

500 @property 

501 def reports(self) -> Reports: 

502 """Provides access to reporting methods for the financial data. 

503 

504 Returns: 

505 Reports: An instance of the Reports class initialized with this data. 

506 

507 """ 

508 from ._reports import Reports 

509 

510 return Reports(self) 

511 

512 @property 

513 def utils(self) -> DataUtils: 

514 """Provides access to utility transforms and conversions for the financial data. 

515 

516 Returns: 

517 DataUtils: An instance of the DataUtils class initialized with this data. 

518 

519 """ 

520 from ._utils import DataUtils 

521 

522 return DataUtils(self) 

523 

524 @property 

525 def date_col(self) -> list[str]: 

526 """Return the column names of the index DataFrame. 

527 

528 Returns: 

529 list[str]: List of column names in the index DataFrame, typically containing 

530 the date column name. 

531 

532 """ 

533 return list(self.index.columns) 

534 

535 @property 

536 def assets(self) -> list[str]: 

537 """Return the combined list of asset column names from returns and benchmark. 

538 

539 Returns: 

540 list[str]: List of all asset column names from both returns and benchmark 

541 (if available). 

542 

543 """ 

544 if self.benchmark is not None: 

545 return list(self.returns.columns) + list(self.benchmark.columns) 

546 return list(self.returns.columns) 

547 

548 @property 

549 def all(self) -> pl.DataFrame: 

550 """Combine index, returns, and benchmark data into a single DataFrame. 

551 

552 This property provides a convenient way to access all data in a single DataFrame, 

553 which is useful for analysis and visualization. 

554 

555 Returns: 

556 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data 

557 (if available) combined horizontally. 

558 

559 """ 

560 if self.benchmark is None: 

561 return pl.concat([self.index, self.returns], how="horizontal") 

562 else: 

563 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal") 

564 

565 def resample(self, every: str = "1mo") -> Data: 

566 """Resample returns and benchmark to a different frequency. 

567 

568 Args: 

569 every (str): Resampling frequency (e.g., ``'1mo'``, ``'1y'``). 

570 Defaults to ``'1mo'``. 

571 

572 Returns: 

573 Data: Resampled data at the requested frequency. 

574 

575 """ 

576 

577 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame: 

578 """Resample a single DataFrame to the target frequency using compound returns.""" 

579 dframe = self.index.hstack(dframe) # Add the date column for resampling 

580 

581 return dframe.group_by_dynamic( 

582 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right" 

583 ).agg( 

584 [ 

585 ((pl.col(col) + 1.0).product() - 1.0).alias(col) 

586 for col in dframe.columns 

587 if col != self.index.columns[0] 

588 ] 

589 ) 

590 

591 resampled_returns = resample_frame(self.returns) 

592 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None 

593 resampled_index = resampled_returns.select(self.index.columns[0]) 

594 

595 return Data( 

596 returns=resampled_returns.drop(self.index.columns[0]), 

597 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None, 

598 index=resampled_index, 

599 ) 

600 

601 def describe(self) -> pl.DataFrame: 

602 """Return a tidy summary of shape, date range and asset names. 

603 

604 Returns: 

605 pl.DataFrame: One row per asset with columns: asset, start, end, 

606 rows, has_benchmark. 

607 

608 """ 

609 date_column = self.date_col[0] 

610 start = self.index[date_column].min() 

611 end = self.index[date_column].max() 

612 rows = len(self.index) 

613 return pl.DataFrame( 

614 { 

615 "asset": self.returns.columns, 

616 "start": [start] * len(self.returns.columns), 

617 "end": [end] * len(self.returns.columns), 

618 "rows": [rows] * len(self.returns.columns), 

619 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns), 

620 } 

621 ) 

622 

623 def copy(self) -> Data: 

624 """Create a deep copy of the Data object. 

625 

626 Returns: 

627 Data: A new Data object with copies of the returns and benchmark. 

628 

629 """ 

630 if self.benchmark is not None: 

631 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone()) 

632 return Data(returns=self.returns.clone(), index=self.index.clone()) 

633 

634 def head(self, n: int = 5) -> Data: 

635 """Return the first n rows of the combined returns and benchmark data. 

636 

637 Args: 

638 n (int, optional): Number of rows to return. Defaults to 5. 

639 

640 Returns: 

641 Data: A new Data object containing the first n rows of the combined data. 

642 

643 """ 

644 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None 

645 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n)) 

646 

647 def tail(self, n: int = 5) -> Data: 

648 """Return the last n rows of the combined returns and benchmark data. 

649 

650 Args: 

651 n (int, optional): Number of rows to return. Defaults to 5. 

652 

653 Returns: 

654 Data: A new Data object containing the last n rows of the combined data. 

655 

656 """ 

657 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None 

658 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n)) 

659 

660 def truncate( 

661 self, 

662 start: date | datetime | str | int | None = None, 

663 end: date | datetime | str | int | None = None, 

664 ) -> Data: 

665 """Return a new Data object truncated to the inclusive [start, end] range. 

666 

667 When the index is temporal (Date/Datetime), truncation is performed by 

668 comparing the date column against ``start`` and ``end`` values. 

669 

670 When the index is integer-based, row slicing is used instead, and 

671 ``start`` and ``end`` must be non-negative integers. Passing 

672 non-integer bounds to an integer-indexed Data raises `TypeError`. 

673 

674 Args: 

675 start: Optional lower bound (inclusive). A date/datetime value 

676 when the index is temporal; a non-negative `int` row 

677 index when the data has no temporal index. 

678 end: Optional upper bound (inclusive). Same type rules as 

679 ``start``. 

680 

681 Returns: 

682 Data: A new Data object filtered to the specified range. 

683 

684 Raises: 

685 TypeError: When the index is not temporal and a non-integer bound 

686 is supplied. 

687 

688 """ 

689 date_column = self.index.columns[0] 

690 is_temporal = self.index[date_column].dtype.is_temporal() 

691 

692 if is_temporal: 

693 cond = pl.lit(True) 

694 if start is not None: 

695 cond = cond & (pl.col(date_column) >= pl.lit(start)) 

696 if end is not None: 

697 cond = cond & (pl.col(date_column) <= pl.lit(end)) 

698 mask = self.index.select(cond.alias("mask"))["mask"] 

699 new_index = self.index.filter(mask) 

700 new_returns = self.returns.filter(mask) 

701 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None 

702 else: 

703 if start is not None and not isinstance(start, int): 

704 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003 

705 if end is not None and not isinstance(end, int): 

706 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003 

707 row_start = start if start is not None else 0 

708 row_end = end + 1 if end is not None else self.index.height 

709 length = max(0, row_end - row_start) 

710 new_index = self.index.slice(row_start, length) 

711 new_returns = self.returns.slice(row_start, length) 

712 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None 

713 

714 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index) 

715 

716 @property 

717 def _periods_per_year(self) -> float: 

718 """Estimate the number of periods per year based on average frequency in the index. 

719 

720 For temporal (Date/Datetime) indices, computes the mean gap between observations 

721 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly). 

722 

723 For integer indices (date-free portfolios), falls back to 252 trading days per year 

724 because integer diffs have no time meaning. 

725 """ 

726 datetime_col = self.index[self.index.columns[0]] 

727 

728 if not datetime_col.dtype.is_temporal(): 

729 return 252.0 

730 

731 sorted_dt = datetime_col.sort() 

732 diffs = sorted_dt.diff().drop_nulls() 

733 mean_diff = diffs.mean() 

734 

735 if isinstance(mean_diff, timedelta): 

736 seconds = mean_diff.total_seconds() 

737 else: # pragma: no cover # Polars always returns timedelta for temporal diff 

738 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0 

739 

740 return (365 * 24 * 60 * 60) / seconds 

741 

742 def items(self) -> Iterator[tuple[str, pl.Series]]: 

743 """Iterate over all assets and their corresponding data series. 

744 

745 This method provides a convenient way to iterate over all assets in the data, 

746 yielding each asset name and its corresponding data series. 

747 

748 Yields: 

749 tuple[str, pl.Series]: A tuple containing the asset name and its data series. 

750 

751 """ 

752 matrix = self.all 

753 

754 for col in self.assets: 

755 yield col, matrix.get_column(col)