Coverage for src / jquantstats / data.py: 100%

204 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-07 15:52 +0000

1"""Financial returns data container and manipulation utilities.""" 

2 

3from __future__ import annotations 

4 

5import dataclasses 

6import warnings 

7from collections.abc import Iterator 

8from datetime import date, datetime, timedelta 

9from typing import TYPE_CHECKING, Literal, cast 

10 

11import narwhals as nw 

12import polars as pl 

13 

14from ._types import NativeFrame, NativeFrameOrScalar 

15from .exceptions import NullsInReturnsError 

16 

17if TYPE_CHECKING: 

18 from ._plots import DataPlots 

19 from ._reports import Reports 

20 from ._stats import Stats 

21 from ._utils import DataUtils 

22 

23 

24def _to_polars(df: NativeFrame) -> pl.DataFrame: 

25 """Convert any narwhals-compatible DataFrame to a polars DataFrame.""" 

26 if isinstance(df, pl.DataFrame): 

27 return df 

28 return nw.from_native(df, eager_only=True).to_polars() 

29 

30 

31def _apply_null_strategy( 

32 dframe: pl.DataFrame, 

33 date_col: str, 

34 frame_name: str, 

35 null_strategy: Literal["raise", "drop", "forward_fill"] | None, 

36) -> pl.DataFrame: 

37 """Check for nulls in *dframe* and apply *null_strategy*. 

38 

39 Args: 

40 dframe (pl.DataFrame): DataFrame to inspect. The date column is 

41 excluded from the null scan. 

42 date_col (str): Name of the column to treat as the date index 

43 (excluded from null check). 

44 frame_name (str): Descriptive name used in the error message 

45 (e.g. ``"returns"``). 

46 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

47 handle null values: 

48 

49 - ``None`` — leave nulls as-is (nulls will propagate through 

50 calculations). 

51 - ``"raise"`` — raise `NullsInReturnsError` if any null is found. 

52 - ``"drop"`` — drop every row that contains at least one null. 

53 - ``"forward_fill"`` — fill each null with the most recent 

54 non-null value in the same column. 

55 

56 Returns: 

57 pl.DataFrame: The original DataFrame (``None`` / ``"raise"``), a 

58 filtered DataFrame (``"drop"``), or a filled DataFrame 

59 (``"forward_fill"``). 

60 

61 Raises: 

62 NullsInReturnsError: When *null_strategy* is ``"raise"`` and nulls 

63 are present. 

64 

65 """ 

66 if null_strategy is None: 

67 return dframe 

68 

69 value_cols = [c for c in dframe.columns if c != date_col] 

70 null_counts = dframe.select(value_cols).null_count().row(0) 

71 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0] 

72 

73 if not cols_with_nulls: 

74 return dframe 

75 

76 if null_strategy == "raise": 

77 raise NullsInReturnsError(frame_name, cols_with_nulls) 

78 if null_strategy == "drop": 

79 return dframe.drop_nulls(subset=value_cols) 

80 # forward_fill 

81 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols]) 

82 

83 

84def interpolate(df: pl.DataFrame) -> pl.DataFrame: 

85 """Forward-fill numeric columns only between first and last non-null values. 

86 

87 For each numeric column, forward-fill is applied strictly within the span 

88 bounded by its first and last non-null samples. Values outside this span 

89 are left as-is (including leading/trailing nulls). Non-numeric columns are 

90 returned unchanged. 

91 

92 Args: 

93 df: Input frame possibly containing nulls. 

94 

95 Returns: 

96 pl.DataFrame: Frame where numeric columns have been interior-forward- 

97 filled; schema and dtypes of the original columns are preserved. 

98 

99 Examples: 

100 ```python 

101 import polars as pl 

102 from jquantstats import interpolate 

103 

104 df = pl.DataFrame({"a": [None, 1.0, None, 3.0, None], "b": ["x", "y", "z", "w", "v"]}) 

105 result = interpolate(df) 

106 # a: [None, 1.0, 1.0, 3.0, None] (leading/trailing nulls untouched) 

107 # b: ["x", "y", "z", "w", "v"] (non-numeric unchanged) 

108 ``` 

109 

110 """ 

111 # Choose a temp column name guaranteed not to collide with any user column. 

112 tmp_col = "__row_idx__" 

113 while tmp_col in df.columns: 

114 tmp_col = f"_{tmp_col}_" 

115 

116 out = [] 

117 

118 for col in df.columns: 

119 s = df[col] 

120 if s.dtype.is_numeric(): 

121 non_null_mask = s.is_not_null() 

122 if non_null_mask.any(): 

123 _fwd = non_null_mask.arg_max() 

124 _rev = non_null_mask.reverse().arg_max() 

125 if _fwd is None or _rev is None: # pragma: no cover 

126 out.append(pl.col(col)) 

127 continue 

128 first_valid_idx = _fwd 

129 last_valid_idx = len(s) - 1 - _rev 

130 else: 

131 out.append(pl.col(col)) 

132 continue 

133 

134 mask = (pl.col(tmp_col) >= pl.lit(first_valid_idx)) & (pl.col(tmp_col) <= pl.lit(last_valid_idx)) 

135 filled_col = pl.when(mask).then(pl.col(col).fill_null(strategy="forward")).otherwise(pl.col(col)).alias(col) 

136 out.append(filled_col) 

137 else: 

138 out.append(pl.col(col)) 

139 

140 return df.with_columns(pl.int_range(0, df.height).alias(tmp_col)).select(out) 

141 

142 

143def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame: 

144 """Subtract the risk-free rate from all numeric columns in the DataFrame. 

145 

146 Args: 

147 dframe (pl.DataFrame): DataFrame containing returns data with a date 

148 column and one or more numeric columns representing asset returns. 

149 rf (float | pl.DataFrame): Risk-free rate to subtract from returns. 

150 

151 - If float: A constant risk-free rate applied to all dates. 

152 - If pl.DataFrame: A DataFrame with a date column and a second 

153 column containing time-varying risk-free rates. 

154 

155 date_col (str): Name of the date column in both DataFrames for 

156 joining when rf is a DataFrame. 

157 

158 Returns: 

159 pl.DataFrame: DataFrame with the risk-free rate subtracted from all 

160 numeric columns, preserving the original column names. 

161 

162 """ 

163 if isinstance(rf, float): 

164 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")]) 

165 else: 

166 if not isinstance(rf, pl.DataFrame): 

167 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003 

168 if rf.columns[1] != "rf": 

169 warnings.warn( 

170 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.", 

171 stacklevel=3, 

172 ) 

173 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf 

174 

175 dframe = dframe.join(rf_dframe, on=date_col, how="inner") 

176 return dframe.select( 

177 [pl.col(date_col)] 

178 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}] 

179 ) 

180 

181 

182@dataclasses.dataclass(frozen=True, slots=True) 

183class Data: 

184 """A container for financial returns data and an optional benchmark. 

185 

186 Provides methods for analyzing and manipulating financial returns data, 

187 including resampling, truncation, and access to statistical metrics and 

188 visualizations via the ``stats`` and ``plots`` properties. 

189 

190 Attributes: 

191 returns (pl.DataFrame): DataFrame containing returns data with assets 

192 as columns. 

193 benchmark (pl.DataFrame | None): Optional benchmark returns DataFrame. 

194 Defaults to None. 

195 index (pl.DataFrame): DataFrame containing the date index for the 

196 returns data. 

197 

198 """ 

199 

200 returns: pl.DataFrame 

201 index: pl.DataFrame 

202 benchmark: pl.DataFrame | None = None 

203 

204 def __post_init__(self) -> None: 

205 """Validate the Data object after initialization.""" 

206 # You need at least two points 

207 if self.index.shape[0] < 2: 

208 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003 

209 

210 # Check index is monotonically increasing 

211 datetime_col = self.index[self.index.columns[0]] 

212 if not datetime_col.is_sorted(): 

213 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003 

214 

215 # Check row count matches returns 

216 if self.returns.shape[0] != self.index.shape[0]: 

217 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003 

218 

219 # Check row count matches benchmark (if provided) 

220 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]: 

221 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003 

222 

223 @classmethod 

224 def from_returns( 

225 cls, 

226 returns: NativeFrame, 

227 rf: NativeFrameOrScalar = 0.0, 

228 benchmark: NativeFrame | None = None, 

229 date_col: str = "Date", 

230 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

231 ) -> Data: 

232 """Create a Data object from returns and optional benchmark. 

233 

234 Args: 

235 returns (NativeFrame): Financial returns data. First column should 

236 be the date column, remaining columns are asset returns. 

237 rf (float | NativeFrame): Risk-free rate. Defaults to 0.0 (no 

238 risk-free rate adjustment). 

239 

240 - If float: Constant risk-free rate applied to all dates. 

241 - If NativeFrame: Time-varying risk-free rate with dates 

242 matching returns. 

243 

244 benchmark (NativeFrame | None): Benchmark returns. Defaults to 

245 None (no benchmark). First column should be the date column, 

246 remaining columns are benchmark returns. 

247 date_col (str): Name of the date column in the DataFrames. 

248 Defaults to ``"Date"``. 

249 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

250 handle ``null`` (missing) values in *returns* and *benchmark*. 

251 Defaults to ``None`` (nulls propagate through calculations). 

252 

253 - ``None`` — no null checking; nulls propagate through all 

254 downstream calculations. 

255 - ``"raise"`` — raise `NullsInReturnsError` if any null is 

256 found. 

257 - ``"drop"`` — silently drop every row that contains at least 

258 one null. 

259 - ``"forward_fill"`` — fill each null with the most recent 

260 non-null value in the same column. 

261 

262 Note: Affects only Polars ``null`` values (i.e. ``None`` / 

263 missing entries). IEEE-754 ``NaN`` values are **not** affected 

264 and continue to propagate as per IEEE-754 semantics. 

265 

266 Returns: 

267 Data: Object containing excess returns and benchmark (if any), 

268 with methods for analysis and visualization through the ``stats`` 

269 and ``plots`` properties. 

270 

271 Raises: 

272 NullsInReturnsError: If *null_strategy* is ``"raise"`` and the 

273 data contains null values. 

274 ValueError: If there are no overlapping dates between returns and 

275 benchmark. 

276 

277 Examples: 

278 Basic usage: 

279 

280 ```python 

281 from jquantstats import Data 

282 import polars as pl 

283 

284 returns = pl.DataFrame({ 

285 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

286 "Asset1": [0.01, -0.02, 0.03] 

287 }).with_columns(pl.col("Date").str.to_date()) 

288 

289 data = Data.from_returns(returns=returns) 

290 ``` 

291 

292 With benchmark and risk-free rate: 

293 

294 ```python 

295 benchmark = pl.DataFrame({ 

296 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

297 "Market": [0.005, -0.01, 0.02] 

298 }).with_columns(pl.col("Date").str.to_date()) 

299 

300 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002) 

301 ``` 

302 

303 Handling nulls automatically: 

304 

305 ```python 

306 returns_with_nulls = pl.DataFrame({ 

307 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

308 "Asset1": [0.01, None, 0.03] 

309 }).with_columns(pl.col("Date").str.to_date()) 

310 

311 # Drop rows with nulls (mirrors pandas/QuantStats behaviour) 

312 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop") 

313 

314 # Or forward-fill nulls 

315 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill") 

316 ``` 

317 

318 """ 

319 returns_pl = _to_polars(returns) 

320 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None 

321 rf_converted: float | pl.DataFrame 

322 if isinstance(rf, pl.DataFrame) or (not isinstance(rf, float) and not isinstance(rf, int)): 

323 rf_converted = _to_polars(rf) 

324 else: 

325 rf_converted = rf # int is not float/DataFrame: _subtract_risk_free raises TypeError 

326 

327 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy) 

328 if benchmark_pl is not None: 

329 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy) 

330 

331 if benchmark_pl is not None: 

332 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col) 

333 if joined_dates.is_empty(): 

334 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003 

335 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner") 

336 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner") 

337 

338 index = returns_pl.select(date_col) 

339 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col) 

340 excess_benchmark = ( 

341 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col) 

342 if benchmark_pl is not None 

343 else None 

344 ) 

345 

346 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index) 

347 

348 @classmethod 

349 def from_prices( 

350 cls, 

351 prices: NativeFrame, 

352 rf: NativeFrameOrScalar = 0.0, 

353 benchmark: NativeFrame | None = None, 

354 date_col: str = "Date", 

355 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None, 

356 ) -> Data: 

357 """Create a Data object from prices and optional benchmark. 

358 

359 Converts price levels to returns via percentage change and delegates 

360 to `from_returns`. The first row of each asset is dropped because no 

361 prior price is available to compute a return. 

362 

363 Args: 

364 prices (NativeFrame): Price-level data. First column should be 

365 the date column; remaining columns are asset prices. 

366 rf (float | NativeFrame): Risk-free rate. Forwarded unchanged to 

367 `from_returns`. Defaults to 0.0 (no risk-free rate 

368 adjustment). 

369 benchmark (NativeFrame | None): Benchmark prices. Converted to 

370 returns in the same way as ``prices`` before being forwarded 

371 to `from_returns`. Defaults to None (no benchmark). 

372 date_col (str): Name of the date column in the DataFrames. 

373 Defaults to ``"Date"``. 

374 null_strategy ({"raise", "drop", "forward_fill"} | None): How to 

375 handle ``null`` (missing) values after converting prices to 

376 returns. Forwarded unchanged to `from_returns`. Defaults to 

377 ``None`` (nulls propagate through calculations). 

378 

379 - ``None`` — no null checking; nulls propagate. 

380 - ``"raise"`` — raise `NullsInReturnsError` if any null is 

381 found in the derived returns. 

382 - ``"drop"`` — silently drop every row that contains at least 

383 one null. 

384 - ``"forward_fill"`` — fill each null with the most recent 

385 non-null value. 

386 

387 Note: Prices that contain nulls will produce null returns via 

388 ``pct_change()``. If you expect missing price entries, pass 

389 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``. 

390 

391 Returns: 

392 Data: Object containing excess returns derived from the supplied 

393 prices, with methods for analysis and visualization through the 

394 ``stats`` and ``plots`` properties. 

395 

396 Examples: 

397 ```python 

398 from jquantstats import Data 

399 import polars as pl 

400 

401 prices = pl.DataFrame({ 

402 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"], 

403 "Asset1": [100.0, 101.0, 99.0] 

404 }).with_columns(pl.col("Date").str.to_date()) 

405 

406 data = Data.from_prices(prices=prices) 

407 ``` 

408 

409 """ 

410 prices_pl = _to_polars(prices) 

411 asset_cols = [c for c in prices_pl.columns if c != date_col] 

412 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1) 

413 

414 benchmark_returns: NativeFrame | None = None 

415 if benchmark is not None: 

416 benchmark_pl = _to_polars(benchmark) 

417 bench_cols = [c for c in benchmark_pl.columns if c != date_col] 

418 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice( 

419 1 

420 ) 

421 

422 return cls.from_returns( 

423 returns=returns_pl, 

424 rf=rf, 

425 benchmark=benchmark_returns, 

426 date_col=date_col, 

427 null_strategy=null_strategy, 

428 ) 

429 

430 def __repr__(self) -> str: 

431 """Return a string representation of the Data object.""" 

432 rows = len(self.index) 

433 date_cols = self.date_col 

434 if date_cols: 

435 date_column = date_cols[0] 

436 start = self.index[date_column].min() 

437 end = self.index[date_column].max() 

438 return f"Data(assets={self.assets}, rows={rows}, start={start}, end={end})" 

439 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column 

440 

441 @property 

442 def plots(self) -> DataPlots: 

443 """Provides access to visualization methods for the financial data. 

444 

445 Returns: 

446 DataPlots: An instance of the DataPlots class initialized with this data. 

447 

448 """ 

449 from ._plots import DataPlots 

450 

451 return DataPlots(self) 

452 

453 @property 

454 def stats(self) -> Stats: 

455 """Provides access to statistical analysis methods for the financial data. 

456 

457 Returns: 

458 Stats: An instance of the Stats class initialized with this data. 

459 

460 """ 

461 from ._stats import Stats 

462 

463 return Stats(self) 

464 

465 @property 

466 def reports(self) -> Reports: 

467 """Provides access to reporting methods for the financial data. 

468 

469 Returns: 

470 Reports: An instance of the Reports class initialized with this data. 

471 

472 """ 

473 from ._reports import Reports 

474 

475 return Reports(self) 

476 

477 @property 

478 def utils(self) -> DataUtils: 

479 """Provides access to utility transforms and conversions for the financial data. 

480 

481 Returns: 

482 DataUtils: An instance of the DataUtils class initialized with this data. 

483 

484 """ 

485 from ._utils import DataUtils 

486 

487 return DataUtils(self) 

488 

489 @property 

490 def date_col(self) -> list[str]: 

491 """Return the column names of the index DataFrame. 

492 

493 Returns: 

494 list[str]: List of column names in the index DataFrame, typically containing 

495 the date column name. 

496 

497 """ 

498 return list(self.index.columns) 

499 

500 @property 

501 def assets(self) -> list[str]: 

502 """Return the combined list of asset column names from returns and benchmark. 

503 

504 Returns: 

505 list[str]: List of all asset column names from both returns and benchmark 

506 (if available). 

507 

508 """ 

509 if self.benchmark is not None: 

510 return list(self.returns.columns) + list(self.benchmark.columns) 

511 return list(self.returns.columns) 

512 

513 @property 

514 def all(self) -> pl.DataFrame: 

515 """Combine index, returns, and benchmark data into a single DataFrame. 

516 

517 This property provides a convenient way to access all data in a single DataFrame, 

518 which is useful for analysis and visualization. 

519 

520 Returns: 

521 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data 

522 (if available) combined horizontally. 

523 

524 """ 

525 if self.benchmark is None: 

526 return pl.concat([self.index, self.returns], how="horizontal") 

527 else: 

528 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal") 

529 

530 def resample(self, every: str = "1mo") -> Data: 

531 """Resample returns and benchmark to a different frequency. 

532 

533 Args: 

534 every (str): Resampling frequency (e.g., ``'1mo'``, ``'1y'``). 

535 Defaults to ``'1mo'``. 

536 

537 Returns: 

538 Data: Resampled data at the requested frequency. 

539 

540 """ 

541 

542 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame: 

543 """Resample a single DataFrame to the target frequency using compound returns.""" 

544 dframe = self.index.hstack(dframe) # Add the date column for resampling 

545 

546 return dframe.group_by_dynamic( 

547 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right" 

548 ).agg( 

549 [ 

550 ((pl.col(col) + 1.0).product() - 1.0).alias(col) 

551 for col in dframe.columns 

552 if col != self.index.columns[0] 

553 ] 

554 ) 

555 

556 resampled_returns = resample_frame(self.returns) 

557 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None 

558 resampled_index = resampled_returns.select(self.index.columns[0]) 

559 

560 return Data( 

561 returns=resampled_returns.drop(self.index.columns[0]), 

562 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None, 

563 index=resampled_index, 

564 ) 

565 

566 def describe(self) -> pl.DataFrame: 

567 """Return a tidy summary of shape, date range and asset names. 

568 

569 Returns: 

570 pl.DataFrame: One row per asset with columns: asset, start, end, 

571 rows, has_benchmark. 

572 

573 """ 

574 date_column = self.date_col[0] 

575 start = self.index[date_column].min() 

576 end = self.index[date_column].max() 

577 rows = len(self.index) 

578 return pl.DataFrame( 

579 { 

580 "asset": self.returns.columns, 

581 "start": [start] * len(self.returns.columns), 

582 "end": [end] * len(self.returns.columns), 

583 "rows": [rows] * len(self.returns.columns), 

584 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns), 

585 } 

586 ) 

587 

588 def copy(self) -> Data: 

589 """Create a deep copy of the Data object. 

590 

591 Returns: 

592 Data: A new Data object with copies of the returns and benchmark. 

593 

594 """ 

595 if self.benchmark is not None: 

596 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone()) 

597 return Data(returns=self.returns.clone(), index=self.index.clone()) 

598 

599 def head(self, n: int = 5) -> Data: 

600 """Return the first n rows of the combined returns and benchmark data. 

601 

602 Args: 

603 n (int, optional): Number of rows to return. Defaults to 5. 

604 

605 Returns: 

606 Data: A new Data object containing the first n rows of the combined data. 

607 

608 """ 

609 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None 

610 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n)) 

611 

612 def tail(self, n: int = 5) -> Data: 

613 """Return the last n rows of the combined returns and benchmark data. 

614 

615 Args: 

616 n (int, optional): Number of rows to return. Defaults to 5. 

617 

618 Returns: 

619 Data: A new Data object containing the last n rows of the combined data. 

620 

621 """ 

622 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None 

623 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n)) 

624 

625 def truncate( 

626 self, 

627 start: date | datetime | str | int | None = None, 

628 end: date | datetime | str | int | None = None, 

629 ) -> Data: 

630 """Return a new Data object truncated to the inclusive [start, end] range. 

631 

632 When the index is temporal (Date/Datetime), truncation is performed by 

633 comparing the date column against ``start`` and ``end`` values. 

634 

635 When the index is integer-based, row slicing is used instead, and 

636 ``start`` and ``end`` must be non-negative integers. Passing 

637 non-integer bounds to an integer-indexed Data raises `TypeError`. 

638 

639 Args: 

640 start: Optional lower bound (inclusive). A date/datetime value 

641 when the index is temporal; a non-negative `int` row 

642 index when the data has no temporal index. 

643 end: Optional upper bound (inclusive). Same type rules as 

644 ``start``. 

645 

646 Returns: 

647 Data: A new Data object filtered to the specified range. 

648 

649 Raises: 

650 TypeError: When the index is not temporal and a non-integer bound 

651 is supplied. 

652 

653 """ 

654 date_column = self.index.columns[0] 

655 is_temporal = self.index[date_column].dtype.is_temporal() 

656 

657 if is_temporal: 

658 cond = pl.lit(True) 

659 if start is not None: 

660 cond = cond & (pl.col(date_column) >= pl.lit(start)) 

661 if end is not None: 

662 cond = cond & (pl.col(date_column) <= pl.lit(end)) 

663 mask = self.index.select(cond.alias("mask"))["mask"] 

664 new_index = self.index.filter(mask) 

665 new_returns = self.returns.filter(mask) 

666 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None 

667 else: 

668 if start is not None and not isinstance(start, int): 

669 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003 

670 if end is not None and not isinstance(end, int): 

671 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003 

672 row_start = start if start is not None else 0 

673 row_end = end + 1 if end is not None else self.index.height 

674 length = max(0, row_end - row_start) 

675 new_index = self.index.slice(row_start, length) 

676 new_returns = self.returns.slice(row_start, length) 

677 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None 

678 

679 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index) 

680 

681 @property 

682 def _periods_per_year(self) -> float: 

683 """Estimate the number of periods per year based on average frequency in the index. 

684 

685 For temporal (Date/Datetime) indices, computes the mean gap between observations 

686 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly). 

687 

688 For integer indices (date-free portfolios), falls back to 252 trading days per year 

689 because integer diffs have no time meaning. 

690 """ 

691 datetime_col = self.index[self.index.columns[0]] 

692 

693 if not datetime_col.dtype.is_temporal(): 

694 return 252.0 

695 

696 sorted_dt = datetime_col.sort() 

697 diffs = sorted_dt.diff().drop_nulls() 

698 mean_diff = diffs.mean() 

699 

700 if isinstance(mean_diff, timedelta): 

701 seconds = mean_diff.total_seconds() 

702 else: # pragma: no cover # Polars always returns timedelta for temporal diff 

703 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0 

704 

705 return (365 * 24 * 60 * 60) / seconds 

706 

707 def items(self) -> Iterator[tuple[str, pl.Series]]: 

708 """Iterate over all assets and their corresponding data series. 

709 

710 This method provides a convenient way to iterate over all assets in the data, 

711 yielding each asset name and its corresponding data series. 

712 

713 Yields: 

714 tuple[str, pl.Series]: A tuple containing the asset name and its data series. 

715 

716 """ 

717 matrix = self.all 

718 

719 for col in self.assets: 

720 yield col, matrix.get_column(col)