Coverage for src / jquantstats / data.py: 100%
204 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-07 15:52 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-07 15:52 +0000
1"""Financial returns data container and manipulation utilities."""
3from __future__ import annotations
5import dataclasses
6import warnings
7from collections.abc import Iterator
8from datetime import date, datetime, timedelta
9from typing import TYPE_CHECKING, Literal, cast
11import narwhals as nw
12import polars as pl
14from ._types import NativeFrame, NativeFrameOrScalar
15from .exceptions import NullsInReturnsError
17if TYPE_CHECKING:
18 from ._plots import DataPlots
19 from ._reports import Reports
20 from ._stats import Stats
21 from ._utils import DataUtils
24def _to_polars(df: NativeFrame) -> pl.DataFrame:
25 """Convert any narwhals-compatible DataFrame to a polars DataFrame."""
26 if isinstance(df, pl.DataFrame):
27 return df
28 return nw.from_native(df, eager_only=True).to_polars()
31def _apply_null_strategy(
32 dframe: pl.DataFrame,
33 date_col: str,
34 frame_name: str,
35 null_strategy: Literal["raise", "drop", "forward_fill"] | None,
36) -> pl.DataFrame:
37 """Check for nulls in *dframe* and apply *null_strategy*.
39 Args:
40 dframe (pl.DataFrame): DataFrame to inspect. The date column is
41 excluded from the null scan.
42 date_col (str): Name of the column to treat as the date index
43 (excluded from null check).
44 frame_name (str): Descriptive name used in the error message
45 (e.g. ``"returns"``).
46 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
47 handle null values:
49 - ``None`` — leave nulls as-is (nulls will propagate through
50 calculations).
51 - ``"raise"`` — raise `NullsInReturnsError` if any null is found.
52 - ``"drop"`` — drop every row that contains at least one null.
53 - ``"forward_fill"`` — fill each null with the most recent
54 non-null value in the same column.
56 Returns:
57 pl.DataFrame: The original DataFrame (``None`` / ``"raise"``), a
58 filtered DataFrame (``"drop"``), or a filled DataFrame
59 (``"forward_fill"``).
61 Raises:
62 NullsInReturnsError: When *null_strategy* is ``"raise"`` and nulls
63 are present.
65 """
66 if null_strategy is None:
67 return dframe
69 value_cols = [c for c in dframe.columns if c != date_col]
70 null_counts = dframe.select(value_cols).null_count().row(0)
71 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0]
73 if not cols_with_nulls:
74 return dframe
76 if null_strategy == "raise":
77 raise NullsInReturnsError(frame_name, cols_with_nulls)
78 if null_strategy == "drop":
79 return dframe.drop_nulls(subset=value_cols)
80 # forward_fill
81 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols])
84def interpolate(df: pl.DataFrame) -> pl.DataFrame:
85 """Forward-fill numeric columns only between first and last non-null values.
87 For each numeric column, forward-fill is applied strictly within the span
88 bounded by its first and last non-null samples. Values outside this span
89 are left as-is (including leading/trailing nulls). Non-numeric columns are
90 returned unchanged.
92 Args:
93 df: Input frame possibly containing nulls.
95 Returns:
96 pl.DataFrame: Frame where numeric columns have been interior-forward-
97 filled; schema and dtypes of the original columns are preserved.
99 Examples:
100 ```python
101 import polars as pl
102 from jquantstats import interpolate
104 df = pl.DataFrame({"a": [None, 1.0, None, 3.0, None], "b": ["x", "y", "z", "w", "v"]})
105 result = interpolate(df)
106 # a: [None, 1.0, 1.0, 3.0, None] (leading/trailing nulls untouched)
107 # b: ["x", "y", "z", "w", "v"] (non-numeric unchanged)
108 ```
110 """
111 # Choose a temp column name guaranteed not to collide with any user column.
112 tmp_col = "__row_idx__"
113 while tmp_col in df.columns:
114 tmp_col = f"_{tmp_col}_"
116 out = []
118 for col in df.columns:
119 s = df[col]
120 if s.dtype.is_numeric():
121 non_null_mask = s.is_not_null()
122 if non_null_mask.any():
123 _fwd = non_null_mask.arg_max()
124 _rev = non_null_mask.reverse().arg_max()
125 if _fwd is None or _rev is None: # pragma: no cover
126 out.append(pl.col(col))
127 continue
128 first_valid_idx = _fwd
129 last_valid_idx = len(s) - 1 - _rev
130 else:
131 out.append(pl.col(col))
132 continue
134 mask = (pl.col(tmp_col) >= pl.lit(first_valid_idx)) & (pl.col(tmp_col) <= pl.lit(last_valid_idx))
135 filled_col = pl.when(mask).then(pl.col(col).fill_null(strategy="forward")).otherwise(pl.col(col)).alias(col)
136 out.append(filled_col)
137 else:
138 out.append(pl.col(col))
140 return df.with_columns(pl.int_range(0, df.height).alias(tmp_col)).select(out)
143def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame:
144 """Subtract the risk-free rate from all numeric columns in the DataFrame.
146 Args:
147 dframe (pl.DataFrame): DataFrame containing returns data with a date
148 column and one or more numeric columns representing asset returns.
149 rf (float | pl.DataFrame): Risk-free rate to subtract from returns.
151 - If float: A constant risk-free rate applied to all dates.
152 - If pl.DataFrame: A DataFrame with a date column and a second
153 column containing time-varying risk-free rates.
155 date_col (str): Name of the date column in both DataFrames for
156 joining when rf is a DataFrame.
158 Returns:
159 pl.DataFrame: DataFrame with the risk-free rate subtracted from all
160 numeric columns, preserving the original column names.
162 """
163 if isinstance(rf, float):
164 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")])
165 else:
166 if not isinstance(rf, pl.DataFrame):
167 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003
168 if rf.columns[1] != "rf":
169 warnings.warn(
170 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.",
171 stacklevel=3,
172 )
173 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf
175 dframe = dframe.join(rf_dframe, on=date_col, how="inner")
176 return dframe.select(
177 [pl.col(date_col)]
178 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}]
179 )
182@dataclasses.dataclass(frozen=True, slots=True)
183class Data:
184 """A container for financial returns data and an optional benchmark.
186 Provides methods for analyzing and manipulating financial returns data,
187 including resampling, truncation, and access to statistical metrics and
188 visualizations via the ``stats`` and ``plots`` properties.
190 Attributes:
191 returns (pl.DataFrame): DataFrame containing returns data with assets
192 as columns.
193 benchmark (pl.DataFrame | None): Optional benchmark returns DataFrame.
194 Defaults to None.
195 index (pl.DataFrame): DataFrame containing the date index for the
196 returns data.
198 """
200 returns: pl.DataFrame
201 index: pl.DataFrame
202 benchmark: pl.DataFrame | None = None
204 def __post_init__(self) -> None:
205 """Validate the Data object after initialization."""
206 # You need at least two points
207 if self.index.shape[0] < 2:
208 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003
210 # Check index is monotonically increasing
211 datetime_col = self.index[self.index.columns[0]]
212 if not datetime_col.is_sorted():
213 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003
215 # Check row count matches returns
216 if self.returns.shape[0] != self.index.shape[0]:
217 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003
219 # Check row count matches benchmark (if provided)
220 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:
221 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003
223 @classmethod
224 def from_returns(
225 cls,
226 returns: NativeFrame,
227 rf: NativeFrameOrScalar = 0.0,
228 benchmark: NativeFrame | None = None,
229 date_col: str = "Date",
230 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,
231 ) -> Data:
232 """Create a Data object from returns and optional benchmark.
234 Args:
235 returns (NativeFrame): Financial returns data. First column should
236 be the date column, remaining columns are asset returns.
237 rf (float | NativeFrame): Risk-free rate. Defaults to 0.0 (no
238 risk-free rate adjustment).
240 - If float: Constant risk-free rate applied to all dates.
241 - If NativeFrame: Time-varying risk-free rate with dates
242 matching returns.
244 benchmark (NativeFrame | None): Benchmark returns. Defaults to
245 None (no benchmark). First column should be the date column,
246 remaining columns are benchmark returns.
247 date_col (str): Name of the date column in the DataFrames.
248 Defaults to ``"Date"``.
249 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
250 handle ``null`` (missing) values in *returns* and *benchmark*.
251 Defaults to ``None`` (nulls propagate through calculations).
253 - ``None`` — no null checking; nulls propagate through all
254 downstream calculations.
255 - ``"raise"`` — raise `NullsInReturnsError` if any null is
256 found.
257 - ``"drop"`` — silently drop every row that contains at least
258 one null.
259 - ``"forward_fill"`` — fill each null with the most recent
260 non-null value in the same column.
262 Note: Affects only Polars ``null`` values (i.e. ``None`` /
263 missing entries). IEEE-754 ``NaN`` values are **not** affected
264 and continue to propagate as per IEEE-754 semantics.
266 Returns:
267 Data: Object containing excess returns and benchmark (if any),
268 with methods for analysis and visualization through the ``stats``
269 and ``plots`` properties.
271 Raises:
272 NullsInReturnsError: If *null_strategy* is ``"raise"`` and the
273 data contains null values.
274 ValueError: If there are no overlapping dates between returns and
275 benchmark.
277 Examples:
278 Basic usage:
280 ```python
281 from jquantstats import Data
282 import polars as pl
284 returns = pl.DataFrame({
285 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
286 "Asset1": [0.01, -0.02, 0.03]
287 }).with_columns(pl.col("Date").str.to_date())
289 data = Data.from_returns(returns=returns)
290 ```
292 With benchmark and risk-free rate:
294 ```python
295 benchmark = pl.DataFrame({
296 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
297 "Market": [0.005, -0.01, 0.02]
298 }).with_columns(pl.col("Date").str.to_date())
300 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002)
301 ```
303 Handling nulls automatically:
305 ```python
306 returns_with_nulls = pl.DataFrame({
307 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
308 "Asset1": [0.01, None, 0.03]
309 }).with_columns(pl.col("Date").str.to_date())
311 # Drop rows with nulls (mirrors pandas/QuantStats behaviour)
312 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop")
314 # Or forward-fill nulls
315 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill")
316 ```
318 """
319 returns_pl = _to_polars(returns)
320 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None
321 rf_converted: float | pl.DataFrame
322 if isinstance(rf, pl.DataFrame) or (not isinstance(rf, float) and not isinstance(rf, int)):
323 rf_converted = _to_polars(rf)
324 else:
325 rf_converted = rf # int is not float/DataFrame: _subtract_risk_free raises TypeError
327 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy)
328 if benchmark_pl is not None:
329 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy)
331 if benchmark_pl is not None:
332 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col)
333 if joined_dates.is_empty():
334 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003
335 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner")
336 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner")
338 index = returns_pl.select(date_col)
339 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col)
340 excess_benchmark = (
341 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col)
342 if benchmark_pl is not None
343 else None
344 )
346 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index)
348 @classmethod
349 def from_prices(
350 cls,
351 prices: NativeFrame,
352 rf: NativeFrameOrScalar = 0.0,
353 benchmark: NativeFrame | None = None,
354 date_col: str = "Date",
355 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,
356 ) -> Data:
357 """Create a Data object from prices and optional benchmark.
359 Converts price levels to returns via percentage change and delegates
360 to `from_returns`. The first row of each asset is dropped because no
361 prior price is available to compute a return.
363 Args:
364 prices (NativeFrame): Price-level data. First column should be
365 the date column; remaining columns are asset prices.
366 rf (float | NativeFrame): Risk-free rate. Forwarded unchanged to
367 `from_returns`. Defaults to 0.0 (no risk-free rate
368 adjustment).
369 benchmark (NativeFrame | None): Benchmark prices. Converted to
370 returns in the same way as ``prices`` before being forwarded
371 to `from_returns`. Defaults to None (no benchmark).
372 date_col (str): Name of the date column in the DataFrames.
373 Defaults to ``"Date"``.
374 null_strategy ({"raise", "drop", "forward_fill"} | None): How to
375 handle ``null`` (missing) values after converting prices to
376 returns. Forwarded unchanged to `from_returns`. Defaults to
377 ``None`` (nulls propagate through calculations).
379 - ``None`` — no null checking; nulls propagate.
380 - ``"raise"`` — raise `NullsInReturnsError` if any null is
381 found in the derived returns.
382 - ``"drop"`` — silently drop every row that contains at least
383 one null.
384 - ``"forward_fill"`` — fill each null with the most recent
385 non-null value.
387 Note: Prices that contain nulls will produce null returns via
388 ``pct_change()``. If you expect missing price entries, pass
389 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``.
391 Returns:
392 Data: Object containing excess returns derived from the supplied
393 prices, with methods for analysis and visualization through the
394 ``stats`` and ``plots`` properties.
396 Examples:
397 ```python
398 from jquantstats import Data
399 import polars as pl
401 prices = pl.DataFrame({
402 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],
403 "Asset1": [100.0, 101.0, 99.0]
404 }).with_columns(pl.col("Date").str.to_date())
406 data = Data.from_prices(prices=prices)
407 ```
409 """
410 prices_pl = _to_polars(prices)
411 asset_cols = [c for c in prices_pl.columns if c != date_col]
412 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1)
414 benchmark_returns: NativeFrame | None = None
415 if benchmark is not None:
416 benchmark_pl = _to_polars(benchmark)
417 bench_cols = [c for c in benchmark_pl.columns if c != date_col]
418 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice(
419 1
420 )
422 return cls.from_returns(
423 returns=returns_pl,
424 rf=rf,
425 benchmark=benchmark_returns,
426 date_col=date_col,
427 null_strategy=null_strategy,
428 )
430 def __repr__(self) -> str:
431 """Return a string representation of the Data object."""
432 rows = len(self.index)
433 date_cols = self.date_col
434 if date_cols:
435 date_column = date_cols[0]
436 start = self.index[date_column].min()
437 end = self.index[date_column].max()
438 return f"Data(assets={self.assets}, rows={rows}, start={start}, end={end})"
439 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column
441 @property
442 def plots(self) -> DataPlots:
443 """Provides access to visualization methods for the financial data.
445 Returns:
446 DataPlots: An instance of the DataPlots class initialized with this data.
448 """
449 from ._plots import DataPlots
451 return DataPlots(self)
453 @property
454 def stats(self) -> Stats:
455 """Provides access to statistical analysis methods for the financial data.
457 Returns:
458 Stats: An instance of the Stats class initialized with this data.
460 """
461 from ._stats import Stats
463 return Stats(self)
465 @property
466 def reports(self) -> Reports:
467 """Provides access to reporting methods for the financial data.
469 Returns:
470 Reports: An instance of the Reports class initialized with this data.
472 """
473 from ._reports import Reports
475 return Reports(self)
477 @property
478 def utils(self) -> DataUtils:
479 """Provides access to utility transforms and conversions for the financial data.
481 Returns:
482 DataUtils: An instance of the DataUtils class initialized with this data.
484 """
485 from ._utils import DataUtils
487 return DataUtils(self)
489 @property
490 def date_col(self) -> list[str]:
491 """Return the column names of the index DataFrame.
493 Returns:
494 list[str]: List of column names in the index DataFrame, typically containing
495 the date column name.
497 """
498 return list(self.index.columns)
500 @property
501 def assets(self) -> list[str]:
502 """Return the combined list of asset column names from returns and benchmark.
504 Returns:
505 list[str]: List of all asset column names from both returns and benchmark
506 (if available).
508 """
509 if self.benchmark is not None:
510 return list(self.returns.columns) + list(self.benchmark.columns)
511 return list(self.returns.columns)
513 @property
514 def all(self) -> pl.DataFrame:
515 """Combine index, returns, and benchmark data into a single DataFrame.
517 This property provides a convenient way to access all data in a single DataFrame,
518 which is useful for analysis and visualization.
520 Returns:
521 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data
522 (if available) combined horizontally.
524 """
525 if self.benchmark is None:
526 return pl.concat([self.index, self.returns], how="horizontal")
527 else:
528 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")
530 def resample(self, every: str = "1mo") -> Data:
531 """Resample returns and benchmark to a different frequency.
533 Args:
534 every (str): Resampling frequency (e.g., ``'1mo'``, ``'1y'``).
535 Defaults to ``'1mo'``.
537 Returns:
538 Data: Resampled data at the requested frequency.
540 """
542 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:
543 """Resample a single DataFrame to the target frequency using compound returns."""
544 dframe = self.index.hstack(dframe) # Add the date column for resampling
546 return dframe.group_by_dynamic(
547 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"
548 ).agg(
549 [
550 ((pl.col(col) + 1.0).product() - 1.0).alias(col)
551 for col in dframe.columns
552 if col != self.index.columns[0]
553 ]
554 )
556 resampled_returns = resample_frame(self.returns)
557 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None
558 resampled_index = resampled_returns.select(self.index.columns[0])
560 return Data(
561 returns=resampled_returns.drop(self.index.columns[0]),
562 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,
563 index=resampled_index,
564 )
566 def describe(self) -> pl.DataFrame:
567 """Return a tidy summary of shape, date range and asset names.
569 Returns:
570 pl.DataFrame: One row per asset with columns: asset, start, end,
571 rows, has_benchmark.
573 """
574 date_column = self.date_col[0]
575 start = self.index[date_column].min()
576 end = self.index[date_column].max()
577 rows = len(self.index)
578 return pl.DataFrame(
579 {
580 "asset": self.returns.columns,
581 "start": [start] * len(self.returns.columns),
582 "end": [end] * len(self.returns.columns),
583 "rows": [rows] * len(self.returns.columns),
584 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns),
585 }
586 )
588 def copy(self) -> Data:
589 """Create a deep copy of the Data object.
591 Returns:
592 Data: A new Data object with copies of the returns and benchmark.
594 """
595 if self.benchmark is not None:
596 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())
597 return Data(returns=self.returns.clone(), index=self.index.clone())
599 def head(self, n: int = 5) -> Data:
600 """Return the first n rows of the combined returns and benchmark data.
602 Args:
603 n (int, optional): Number of rows to return. Defaults to 5.
605 Returns:
606 Data: A new Data object containing the first n rows of the combined data.
608 """
609 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None
610 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n))
612 def tail(self, n: int = 5) -> Data:
613 """Return the last n rows of the combined returns and benchmark data.
615 Args:
616 n (int, optional): Number of rows to return. Defaults to 5.
618 Returns:
619 Data: A new Data object containing the last n rows of the combined data.
621 """
622 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None
623 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n))
625 def truncate(
626 self,
627 start: date | datetime | str | int | None = None,
628 end: date | datetime | str | int | None = None,
629 ) -> Data:
630 """Return a new Data object truncated to the inclusive [start, end] range.
632 When the index is temporal (Date/Datetime), truncation is performed by
633 comparing the date column against ``start`` and ``end`` values.
635 When the index is integer-based, row slicing is used instead, and
636 ``start`` and ``end`` must be non-negative integers. Passing
637 non-integer bounds to an integer-indexed Data raises `TypeError`.
639 Args:
640 start: Optional lower bound (inclusive). A date/datetime value
641 when the index is temporal; a non-negative `int` row
642 index when the data has no temporal index.
643 end: Optional upper bound (inclusive). Same type rules as
644 ``start``.
646 Returns:
647 Data: A new Data object filtered to the specified range.
649 Raises:
650 TypeError: When the index is not temporal and a non-integer bound
651 is supplied.
653 """
654 date_column = self.index.columns[0]
655 is_temporal = self.index[date_column].dtype.is_temporal()
657 if is_temporal:
658 cond = pl.lit(True)
659 if start is not None:
660 cond = cond & (pl.col(date_column) >= pl.lit(start))
661 if end is not None:
662 cond = cond & (pl.col(date_column) <= pl.lit(end))
663 mask = self.index.select(cond.alias("mask"))["mask"]
664 new_index = self.index.filter(mask)
665 new_returns = self.returns.filter(mask)
666 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None
667 else:
668 if start is not None and not isinstance(start, int):
669 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003
670 if end is not None and not isinstance(end, int):
671 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003
672 row_start = start if start is not None else 0
673 row_end = end + 1 if end is not None else self.index.height
674 length = max(0, row_end - row_start)
675 new_index = self.index.slice(row_start, length)
676 new_returns = self.returns.slice(row_start, length)
677 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None
679 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index)
681 @property
682 def _periods_per_year(self) -> float:
683 """Estimate the number of periods per year based on average frequency in the index.
685 For temporal (Date/Datetime) indices, computes the mean gap between observations
686 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly).
688 For integer indices (date-free portfolios), falls back to 252 trading days per year
689 because integer diffs have no time meaning.
690 """
691 datetime_col = self.index[self.index.columns[0]]
693 if not datetime_col.dtype.is_temporal():
694 return 252.0
696 sorted_dt = datetime_col.sort()
697 diffs = sorted_dt.diff().drop_nulls()
698 mean_diff = diffs.mean()
700 if isinstance(mean_diff, timedelta):
701 seconds = mean_diff.total_seconds()
702 else: # pragma: no cover # Polars always returns timedelta for temporal diff
703 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0
705 return (365 * 24 * 60 * 60) / seconds
707 def items(self) -> Iterator[tuple[str, pl.Series]]:
708 """Iterate over all assets and their corresponding data series.
710 This method provides a convenient way to iterate over all assets in the data,
711 yielding each asset name and its corresponding data series.
713 Yields:
714 tuple[str, pl.Series]: A tuple containing the asset name and its data series.
716 """
717 matrix = self.all
719 for col in self.assets:
720 yield col, matrix.get_column(col)