Coverage for src/jquantstats/data.py: 100%
216 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-23 06:13 +0000
1"""Financial returns data container and manipulation utilities."""
3from __future__ import annotations
5import dataclasses
6import warnings
7from collections.abc import Iterator
8from datetime import date, datetime, timedelta
9from typing import TYPE_CHECKING, Literal, cast
11import narwhals as nw
12import polars as pl
14from ._types import NativeFrame, NativeFrameOrScalar
15from .exceptions import BenchmarkAlignmentWarning, MissingDateColumnError, NullsInReturnsError
17if TYPE_CHECKING:
18 from ._plots import DataPlots
19 from ._reports import Reports
20 from ._stats import Stats
21 from ._utils import DataUtils
24def _to_polars(df: NativeFrame) -> pl.DataFrame:
25 """Convert any narwhals-compatible DataFrame to a polars DataFrame."""
26 if isinstance(df, pl.DataFrame):
27 return df
28 return nw.from_native(df, eager_only=True).to_polars()
31def _apply_null_strategy(
32 dframe: pl.DataFrame,
33 date_col: str,
34 frame_name: str,
35 null_strategy: Literal["raise", "drop", "forward_fill"] | None,
36) -> pl.DataFrame:
37 """Check for nulls in *dframe* and apply *null_strategy*.
39 Args:
40 dframe (pl.DataFrame): DataFrame to inspect. The date column is
41 excluded from the null scan.
42 date_col (str): Name of the column to treat as the date index
43 (excluded from null check).
44 frame_name (str): Descriptive name used in the error message
45 (e.g. ``"returns"``).
46 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
47 handle null values:
49 - ``None`` — leave nulls as-is (nulls will propagate through
50 calculations).
51 - ``"raise"`` — raise `NullsInReturnsError` if any null is found.
52 - ``"drop"`` — drop every row that contains at least one null.
53 - ``"forward_fill"`` — fill each null with the most recent
54 non-null value in the same column.
56 Returns:
57 pl.DataFrame: The original DataFrame (``None`` / ``"raise"``), a
58 filtered DataFrame (``"drop"``), or a filled DataFrame
59 (``"forward_fill"``).
61 Raises:
62 NullsInReturnsError: When *null_strategy* is ``"raise"`` and nulls
63 are present.
65 """
66 if null_strategy is None:
67 return dframe
69 value_cols = [c for c in dframe.columns if c != date_col]
70 null_counts = dframe.select(value_cols).null_count().row(0)
71 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0]
73 if not cols_with_nulls:
74 return dframe
76 if null_strategy == "raise":
77 raise NullsInReturnsError(frame_name, cols_with_nulls)
78 if null_strategy == "drop":
79 return dframe.drop_nulls(subset=value_cols)
80 # forward_fill
81 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols])
84def interpolate(df: pl.DataFrame) -> pl.DataFrame:
85 """Forward-fill numeric columns only between first and last non-null values.
87 For each numeric column, forward-fill is applied strictly within the span
88 bounded by its first and last non-null samples. Values outside this span
89 are left as-is (including leading/trailing nulls). Non-numeric columns are
90 returned unchanged.
92 Args:
93 df: Input frame possibly containing nulls.
95 Returns:
96 pl.DataFrame: Frame where numeric columns have been interior-forward-
97 filled; schema and dtypes of the original columns are preserved.
99 Examples:
100 ```python
101 import polars as pl
102 from jquantstats import interpolate
104 df = pl.DataFrame({"a": [None, 1.0, None, 3.0, None], "b": ["x", "y", "z", "w", "v"]})
105 result = interpolate(df)
106 # a: [None, 1.0, 1.0, 3.0, None] (leading/trailing nulls untouched)
107 # b: ["x", "y", "z", "w", "v"] (non-numeric unchanged)
108 ```
110 """
111 # Choose a temp column name guaranteed not to collide with any user column.
112 tmp_col = "__row_idx__"
113 while tmp_col in df.columns:
114 tmp_col = f"_{tmp_col}_"
116 out = []
118 for col in df.columns:
119 s = df[col]
120 if s.dtype.is_numeric():
121 non_null_mask = s.is_not_null()
122 if non_null_mask.any():
123 _fwd = non_null_mask.arg_max()
124 _rev = non_null_mask.reverse().arg_max()
125 if _fwd is None or _rev is None: # pragma: no cover
126 out.append(pl.col(col))
127 continue
128 first_valid_idx = _fwd
129 last_valid_idx = len(s) - 1 - _rev
130 else:
131 out.append(pl.col(col))
132 continue
134 mask = (pl.col(tmp_col) >= pl.lit(first_valid_idx)) & (pl.col(tmp_col) <= pl.lit(last_valid_idx))
135 filled_col = pl.when(mask).then(pl.col(col).fill_null(strategy="forward")).otherwise(pl.col(col)).alias(col)
136 out.append(filled_col)
137 else:
138 out.append(pl.col(col))
140 return df.with_columns(pl.int_range(0, df.height).alias(tmp_col)).select(out)
143def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame:
144 """Subtract the risk-free rate from all numeric columns in the DataFrame.
146 Args:
147 dframe (pl.DataFrame): DataFrame containing returns data with a date
148 column and one or more numeric columns representing asset returns.
149 rf (float | pl.DataFrame): Risk-free rate to subtract from returns.
151 - If float: A constant risk-free rate applied to all dates.
152 - If pl.DataFrame: A DataFrame with a date column and a second
153 column containing time-varying risk-free rates.
155 date_col (str): Name of the date column in both DataFrames for
156 joining when rf is a DataFrame.
158 Returns:
159 pl.DataFrame: DataFrame with the risk-free rate subtracted from all
160 numeric columns, preserving the original column names.
162 """
163 if isinstance(rf, float):
164 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")])
165 else:
166 if not isinstance(rf, pl.DataFrame):
167 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003
168 if rf.columns[1] != "rf":
169 warnings.warn(
170 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.",
171 stacklevel=3,
172 )
173 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf
175 dframe = dframe.join(rf_dframe, on=date_col, how="inner")
176 return dframe.select(
177 [pl.col(date_col)]
178 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}]
179 )
182@dataclasses.dataclass(frozen=True, slots=True)
183class Data:
184 """A container for financial returns data and an optional benchmark.
186 Provides methods for analyzing and manipulating financial returns data,
187 including resampling, truncation, and access to statistical metrics and
188 visualizations via the ``stats`` and ``plots`` properties.
190 Attributes:
191 returns (pl.DataFrame): DataFrame containing returns data with assets
192 as columns.
193 benchmark (pl.DataFrame | None): Optional benchmark returns DataFrame.
194 Defaults to None.
195 index (pl.DataFrame): DataFrame containing the date index for the
196 returns data.
198 """
200 returns: pl.DataFrame
201 index: pl.DataFrame
202 benchmark: pl.DataFrame | None = None
204 def __post_init__(self) -> None:
205 """Validate the Data object after initialization."""
206 # You need at least two points
207 if self.index.shape[0] < 2:
208 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003
210 # Check index is monotonically increasing
211 datetime_col = self.index[self.index.columns[0]]
212 if not datetime_col.is_sorted():
213 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003
215 # Check row count matches returns
216 if self.returns.shape[0] != self.index.shape[0]:
217 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003
219 # Check row count matches benchmark (if provided)
220 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:
221 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003
223 @classmethod
224 def from_returns(
225 cls,
226 returns: NativeFrame,
227 rf: NativeFrameOrScalar = 0.0,
228 benchmark: NativeFrame | None = None,
229 date_col: str = "Date",
230 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,
231 ) -> Data:
232 """Create a Data object from returns and optional benchmark.
234 Args:
235 returns (NativeFrame): Financial returns data. First column should
236 be the date column, remaining columns are asset returns.
237 rf (float | NativeFrame): Risk-free rate. Defaults to 0.0 (no
238 risk-free rate adjustment).
240 - If float: Constant risk-free rate applied to all dates.
241 - If NativeFrame: Time-varying risk-free rate with dates
242 matching returns.
244 benchmark (NativeFrame | None): Benchmark returns. Defaults to
245 None (no benchmark). First column should be the date column,
246 remaining columns are benchmark returns. Returns and
247 benchmark are aligned on their common dates; if either frame
248 contains dates the other lacks, those rows are dropped and a
249 `BenchmarkAlignmentWarning` is emitted.
250 date_col (str): Name of the date column in the DataFrames.
251 Defaults to ``"Date"``.
252 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
253 handle ``null`` (missing) values in *returns* and *benchmark*.
254 Defaults to ``None`` (nulls propagate through calculations).
256 - ``None`` — no null checking; nulls propagate through all
257 downstream calculations.
258 - ``"raise"`` — raise `NullsInReturnsError` if any null is
259 found.
260 - ``"drop"`` — silently drop every row that contains at least
261 one null.
262 - ``"forward_fill"`` — fill each null with the most recent
263 non-null value in the same column.
265 Note: Affects only Polars ``null`` values (i.e. ``None`` /
266 missing entries). IEEE-754 ``NaN`` values are **not** affected
267 and continue to propagate as per IEEE-754 semantics.
269 Returns:
270 Data: Object containing excess returns and benchmark (if any),
271 with methods for analysis and visualization through the ``stats``
272 and ``plots`` properties.
274 Raises:
275 MissingDateColumnError: If *date_col* is not a column of
276 *returns*, *benchmark*, or a DataFrame-valued *rf*. Raised
277 before any joins so the offending frame is named explicitly.
278 NullsInReturnsError: If *null_strategy* is ``"raise"`` and the
279 data contains null values.
280 ValueError: If there are no overlapping dates between returns and
281 benchmark.
283 Warns:
284 BenchmarkAlignmentWarning: If aligning returns and benchmark on
285 their common dates drops rows from either frame.
287 Examples:
288 Basic usage:
290 ```python
291 from jquantstats import Data
292 import polars as pl
294 returns = pl.DataFrame({
295 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
296 "Asset1": [0.01, -0.02, 0.03]
297 }).with_columns(pl.col("Date").str.to_date())
299 data = Data.from_returns(returns=returns)
300 ```
302 With benchmark and risk-free rate:
304 ```python
305 benchmark = pl.DataFrame({
306 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
307 "Market": [0.005, -0.01, 0.02]
308 }).with_columns(pl.col("Date").str.to_date())
310 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002)
311 ```
313 Handling nulls automatically:
315 ```python
316 returns_with_nulls = pl.DataFrame({
317 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
318 "Asset1": [0.01, None, 0.03]
319 }).with_columns(pl.col("Date").str.to_date())
321 # Drop rows with nulls (mirrors pandas/QuantStats behaviour)
322 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop")
324 # Or forward-fill nulls
325 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill")
326 ```
328 """
329 returns_pl = _to_polars(returns)
330 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None
331 # accept ints (e.g. rf=0) by coercing to float
332 rf_converted: float | pl.DataFrame = float(rf) if isinstance(rf, int | float) else _to_polars(rf)
334 frames: list[tuple[str, pl.DataFrame | None]] = [("returns", returns_pl), ("benchmark", benchmark_pl)]
335 if isinstance(rf_converted, pl.DataFrame):
336 frames.append(("rf", rf_converted))
337 for frame_name, frame in frames:
338 if frame is not None and date_col not in frame.columns:
339 raise MissingDateColumnError(frame_name, column=date_col, available=list(frame.columns))
341 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy)
342 if benchmark_pl is not None:
343 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy)
345 if benchmark_pl is not None:
346 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col)
347 if joined_dates.is_empty():
348 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003
349 dropped_returns = returns_pl.height - joined_dates.height
350 dropped_benchmark = benchmark_pl.height - joined_dates.height
351 if dropped_returns > 0 or dropped_benchmark > 0:
352 warnings.warn(
353 f"Aligning returns and benchmark on common dates dropped "
354 f"{dropped_returns} of {returns_pl.height} returns row(s) and "
355 f"{dropped_benchmark} of {benchmark_pl.height} benchmark row(s); "
356 f"{joined_dates.height} row(s) remain. Pass a benchmark covering "
357 f"the same dates as the returns to avoid this.",
358 BenchmarkAlignmentWarning,
359 stacklevel=2,
360 )
361 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner")
362 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner")
364 index = returns_pl.select(date_col)
365 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col)
366 excess_benchmark = (
367 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col)
368 if benchmark_pl is not None
369 else None
370 )
372 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index)
374 @classmethod
375 def from_prices(
376 cls,
377 prices: NativeFrame,
378 rf: NativeFrameOrScalar = 0.0,
379 benchmark: NativeFrame | None = None,
380 date_col: str = "Date",
381 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,
382 ) -> Data:
383 """Create a Data object from prices and optional benchmark.
385 Converts price levels to returns via percentage change and delegates
386 to `from_returns`. The first row of each asset is dropped because no
387 prior price is available to compute a return.
389 Args:
390 prices (NativeFrame): Price-level data. First column should be
391 the date column; remaining columns are asset prices.
392 rf (float | NativeFrame): Risk-free rate. Forwarded unchanged to
393 `from_returns`. Defaults to 0.0 (no risk-free rate
394 adjustment).
395 benchmark (NativeFrame | None): Benchmark prices. Converted to
396 returns in the same way as ``prices`` before being forwarded
397 to `from_returns`. Defaults to None (no benchmark).
398 date_col (str): Name of the date column in the DataFrames.
399 Defaults to ``"Date"``.
400 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
401 handle ``null`` (missing) values after converting prices to
402 returns. Forwarded unchanged to `from_returns`. Defaults to
403 ``None`` (nulls propagate through calculations).
405 - ``None`` — no null checking; nulls propagate.
406 - ``"raise"`` — raise `NullsInReturnsError` if any null is
407 found in the derived returns.
408 - ``"drop"`` — silently drop every row that contains at least
409 one null.
410 - ``"forward_fill"`` — fill each null with the most recent
411 non-null value.
413 Note: Prices that contain nulls will produce null returns via
414 ``pct_change()``. If you expect missing price entries, pass
415 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``.
417 Returns:
418 Data: Object containing excess returns derived from the supplied
419 prices, with methods for analysis and visualization through the
420 ``stats`` and ``plots`` properties.
422 Raises:
423 MissingDateColumnError: If *date_col* is not a column of *prices*
424 or *benchmark*. Raised before returns are derived so the
425 offending frame is named explicitly.
427 Examples:
428 ```python
429 from jquantstats import Data
430 import polars as pl
432 prices = pl.DataFrame({
433 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
434 "Asset1": [100.0, 101.0, 99.0]
435 }).with_columns(pl.col("Date").str.to_date())
437 data = Data.from_prices(prices=prices)
438 ```
440 """
441 prices_pl = _to_polars(prices)
442 if date_col not in prices_pl.columns:
443 raise MissingDateColumnError("prices", column=date_col, available=list(prices_pl.columns))
444 asset_cols = [c for c in prices_pl.columns if c != date_col]
445 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1)
447 benchmark_returns: NativeFrame | None = None
448 if benchmark is not None:
449 benchmark_pl = _to_polars(benchmark)
450 if date_col not in benchmark_pl.columns:
451 raise MissingDateColumnError("benchmark", column=date_col, available=list(benchmark_pl.columns))
452 bench_cols = [c for c in benchmark_pl.columns if c != date_col]
453 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice(
454 1
455 )
457 return cls.from_returns(
458 returns=returns_pl,
459 rf=rf,
460 benchmark=benchmark_returns,
461 date_col=date_col,
462 null_strategy=null_strategy,
463 )
465 def __repr__(self) -> str:
466 """Return a string representation of the Data object."""
467 rows = len(self.index)
468 date_cols = self.date_col
469 if date_cols:
470 date_column = date_cols[0]
471 start = self.index[date_column].min()
472 end = self.index[date_column].max()
473 return f"Data(assets={self.assets}, rows={rows}, start={start!s}, end={end!s})"
474 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column
476 @property
477 def plots(self) -> DataPlots:
478 """Provides access to visualization methods for the financial data.
480 Returns:
481 DataPlots: An instance of the DataPlots class initialized with this data.
483 """
484 from ._plots import DataPlots
486 return DataPlots(self)
488 @property
489 def stats(self) -> Stats:
490 """Provides access to statistical analysis methods for the financial data.
492 Returns:
493 Stats: An instance of the Stats class initialized with this data.
495 """
496 from ._stats import Stats
498 return Stats(self)
500 @property
501 def reports(self) -> Reports:
502 """Provides access to reporting methods for the financial data.
504 Returns:
505 Reports: An instance of the Reports class initialized with this data.
507 """
508 from ._reports import Reports
510 return Reports(self)
512 @property
513 def utils(self) -> DataUtils:
514 """Provides access to utility transforms and conversions for the financial data.
516 Returns:
517 DataUtils: An instance of the DataUtils class initialized with this data.
519 """
520 from ._utils import DataUtils
522 return DataUtils(self)
524 @property
525 def date_col(self) -> list[str]:
526 """Return the column names of the index DataFrame.
528 Returns:
529 list[str]: List of column names in the index DataFrame, typically containing
530 the date column name.
532 """
533 return list(self.index.columns)
535 @property
536 def assets(self) -> list[str]:
537 """Return the combined list of asset column names from returns and benchmark.
539 Returns:
540 list[str]: List of all asset column names from both returns and benchmark
541 (if available).
543 """
544 if self.benchmark is not None:
545 return list(self.returns.columns) + list(self.benchmark.columns)
546 return list(self.returns.columns)
548 @property
549 def all(self) -> pl.DataFrame:
550 """Combine index, returns, and benchmark data into a single DataFrame.
552 This property provides a convenient way to access all data in a single DataFrame,
553 which is useful for analysis and visualization.
555 Returns:
556 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data
557 (if available) combined horizontally.
559 """
560 if self.benchmark is None:
561 return pl.concat([self.index, self.returns], how="horizontal")
562 else:
563 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")
565 def resample(self, every: str = "1mo") -> Data:
566 """Resample returns and benchmark to a different frequency.
568 Args:
569 every (str): Resampling frequency (e.g., ``'1mo'``, ``'1y'``).
570 Defaults to ``'1mo'``.
572 Returns:
573 Data: Resampled data at the requested frequency.
575 """
577 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:
578 """Resample a single DataFrame to the target frequency using compound returns."""
579 dframe = self.index.hstack(dframe) # Add the date column for resampling
581 return dframe.group_by_dynamic(
582 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"
583 ).agg(
584 [
585 ((pl.col(col) + 1.0).product() - 1.0).alias(col)
586 for col in dframe.columns
587 if col != self.index.columns[0]
588 ]
589 )
591 resampled_returns = resample_frame(self.returns)
592 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None
593 resampled_index = resampled_returns.select(self.index.columns[0])
595 return Data(
596 returns=resampled_returns.drop(self.index.columns[0]),
597 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,
598 index=resampled_index,
599 )
601 def describe(self) -> pl.DataFrame:
602 """Return a tidy summary of shape, date range and asset names.
604 Returns:
605 pl.DataFrame: One row per asset with columns: asset, start, end,
606 rows, has_benchmark.
608 """
609 date_column = self.date_col[0]
610 start = self.index[date_column].min()
611 end = self.index[date_column].max()
612 rows = len(self.index)
613 return pl.DataFrame(
614 {
615 "asset": self.returns.columns,
616 "start": [start] * len(self.returns.columns),
617 "end": [end] * len(self.returns.columns),
618 "rows": [rows] * len(self.returns.columns),
619 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns),
620 }
621 )
623 def copy(self) -> Data:
624 """Create a deep copy of the Data object.
626 Returns:
627 Data: A new Data object with copies of the returns and benchmark.
629 """
630 if self.benchmark is not None:
631 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())
632 return Data(returns=self.returns.clone(), index=self.index.clone())
634 def head(self, n: int = 5) -> Data:
635 """Return the first n rows of the combined returns and benchmark data.
637 Args:
638 n (int, optional): Number of rows to return. Defaults to 5.
640 Returns:
641 Data: A new Data object containing the first n rows of the combined data.
643 """
644 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None
645 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n))
647 def tail(self, n: int = 5) -> Data:
648 """Return the last n rows of the combined returns and benchmark data.
650 Args:
651 n (int, optional): Number of rows to return. Defaults to 5.
653 Returns:
654 Data: A new Data object containing the last n rows of the combined data.
656 """
657 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None
658 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n))
660 def truncate(
661 self,
662 start: date | datetime | str | int | None = None,
663 end: date | datetime | str | int | None = None,
664 ) -> Data:
665 """Return a new Data object truncated to the inclusive [start, end] range.
667 When the index is temporal (Date/Datetime), truncation is performed by
668 comparing the date column against ``start`` and ``end`` values.
670 When the index is integer-based, row slicing is used instead, and
671 ``start`` and ``end`` must be non-negative integers. Passing
672 non-integer bounds to an integer-indexed Data raises `TypeError`.
674 Args:
675 start: Optional lower bound (inclusive). A date/datetime value
676 when the index is temporal; a non-negative `int` row
677 index when the data has no temporal index.
678 end: Optional upper bound (inclusive). Same type rules as
679 ``start``.
681 Returns:
682 Data: A new Data object filtered to the specified range.
684 Raises:
685 TypeError: When the index is not temporal and a non-integer bound
686 is supplied.
688 """
689 date_column = self.index.columns[0]
690 is_temporal = self.index[date_column].dtype.is_temporal()
692 if is_temporal:
693 cond = pl.lit(True)
694 if start is not None:
695 cond = cond & (pl.col(date_column) >= pl.lit(start))
696 if end is not None:
697 cond = cond & (pl.col(date_column) <= pl.lit(end))
698 mask = self.index.select(cond.alias("mask"))["mask"]
699 new_index = self.index.filter(mask)
700 new_returns = self.returns.filter(mask)
701 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None
702 else:
703 if start is not None and not isinstance(start, int):
704 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003
705 if end is not None and not isinstance(end, int):
706 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003
707 row_start = start if start is not None else 0
708 row_end = end + 1 if end is not None else self.index.height
709 length = max(0, row_end - row_start)
710 new_index = self.index.slice(row_start, length)
711 new_returns = self.returns.slice(row_start, length)
712 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None
714 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index)
716 @property
717 def _periods_per_year(self) -> float:
718 """Estimate the number of periods per year based on average frequency in the index.
720 For temporal (Date/Datetime) indices, computes the mean gap between observations
721 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly).
723 For integer indices (date-free portfolios), falls back to 252 trading days per year
724 because integer diffs have no time meaning.
725 """
726 datetime_col = self.index[self.index.columns[0]]
728 if not datetime_col.dtype.is_temporal():
729 return 252.0
731 sorted_dt = datetime_col.sort()
732 diffs = sorted_dt.diff().drop_nulls()
733 mean_diff = diffs.mean()
735 if isinstance(mean_diff, timedelta):
736 seconds = mean_diff.total_seconds()
737 else: # pragma: no cover # Polars always returns timedelta for temporal diff
738 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0
740 return (365 * 24 * 60 * 60) / seconds
742 def items(self) -> Iterator[tuple[str, pl.Series]]:
743 """Iterate over all assets and their corresponding data series.
745 This method provides a convenient way to iterate over all assets in the data,
746 yielding each asset name and its corresponding data series.
748 Yields:
749 tuple[str, pl.Series]: A tuple containing the asset name and its data series.
751 """
752 matrix = self.all
754 for col in self.assets:
755 yield col, matrix.get_column(col)