Coverage for src/jquantstats/data.py: 100%

1"""Financial returns data container and manipulation utilities."""

3from __future__ import annotations

5import dataclasses

6import warnings

7from collections.abc import Iterator

8from datetime import date, datetime, timedelta

9from typing import TYPE_CHECKING, Literal, cast

11import narwhals as nw

12import polars as pl

14from ._types import NativeFrame, NativeFrameOrScalar

15from .exceptions import NullsInReturnsError

17if TYPE_CHECKING:

18 from ._plots import DataPlots

19 from ._reports import Reports

20 from ._stats import Stats

21 from ._utils import DataUtils

24def _to_polars(df: NativeFrame) -> pl.DataFrame:

25 """Convert any narwhals-compatible DataFrame to a polars DataFrame."""

26 if isinstance(df, pl.DataFrame):

27 return df

28 return nw.from_native(df, eager_only=True).to_polars()

31def _apply_null_strategy(

32 dframe: pl.DataFrame,

33 date_col: str,

34 frame_name: str,

35 null_strategy: Literal["raise", "drop", "forward_fill"] | None,

36) -> pl.DataFrame:

37 """Check for nulls in *dframe* and apply *null_strategy*.

39 Args:

40 dframe (pl.DataFrame): DataFrame to inspect. The date column is

41 excluded from the null scan.

42 date_col (str): Name of the column to treat as the date index

43 (excluded from null check).

44 frame_name (str): Descriptive name used in the error message

45 (e.g. ``"returns"``).

46 null_strategy ({"raise", "drop", "forward_fill"} | None): How to

47 handle null values:

49 - ``None`` — leave nulls as-is (nulls will propagate through

50 calculations).

51 - ``"raise"`` — raise `NullsInReturnsError` if any null is found.

52 - ``"drop"`` — drop every row that contains at least one null.

53 - ``"forward_fill"`` — fill each null with the most recent

54 non-null value in the same column.

56 Returns:

57 pl.DataFrame: The original DataFrame (``None`` / ``"raise"``), a

58 filtered DataFrame (``"drop"``), or a filled DataFrame

59 (``"forward_fill"``).

61 Raises:

62 NullsInReturnsError: When *null_strategy* is ``"raise"`` and nulls

63 are present.

65 """

66 if null_strategy is None:

67 return dframe

69 value_cols = [c for c in dframe.columns if c != date_col]

70 null_counts = dframe.select(value_cols).null_count().row(0)

71 cols_with_nulls = [col for col, count in zip(value_cols, null_counts, strict=False) if count > 0]

73 if not cols_with_nulls:

74 return dframe

76 if null_strategy == "raise":

77 raise NullsInReturnsError(frame_name, cols_with_nulls)

78 if null_strategy == "drop":

79 return dframe.drop_nulls(subset=value_cols)

80 # forward_fill

81 return dframe.with_columns([pl.col(c).forward_fill() for c in value_cols])

84def interpolate(df: pl.DataFrame) -> pl.DataFrame:

85 """Forward-fill numeric columns only between first and last non-null values.

87 For each numeric column, forward-fill is applied strictly within the span

88 bounded by its first and last non-null samples. Values outside this span

89 are left as-is (including leading/trailing nulls). Non-numeric columns are

90 returned unchanged.

92 Args:

93 df: Input frame possibly containing nulls.

95 Returns:

96 pl.DataFrame: Frame where numeric columns have been interior-forward-

97 filled; schema and dtypes of the original columns are preserved.

99 Examples:

100 ```python

101 import polars as pl

102 from jquantstats import interpolate

103

104 df = pl.DataFrame({"a": [None, 1.0, None, 3.0, None], "b": ["x", "y", "z", "w", "v"]})

105 result = interpolate(df)

106 # a: [None, 1.0, 1.0, 3.0, None] (leading/trailing nulls untouched)

107 # b: ["x", "y", "z", "w", "v"] (non-numeric unchanged)

108 ```

109

110 """

111 # Choose a temp column name guaranteed not to collide with any user column.

112 tmp_col = "__row_idx__"

113 while tmp_col in df.columns:

114 tmp_col = f"_{tmp_col}_"

115

116 out = []

117

118 for col in df.columns:

119 s = df[col]

120 if s.dtype.is_numeric():

121 non_null_mask = s.is_not_null()

122 if non_null_mask.any():

123 _fwd = non_null_mask.arg_max()

124 _rev = non_null_mask.reverse().arg_max()

125 if _fwd is None or _rev is None: # pragma: no cover

126 out.append(pl.col(col))

127 continue

128 first_valid_idx = _fwd

129 last_valid_idx = len(s) - 1 - _rev

130 else:

131 out.append(pl.col(col))

132 continue

133

134 mask = (pl.col(tmp_col) >= pl.lit(first_valid_idx)) & (pl.col(tmp_col) <= pl.lit(last_valid_idx))

135 filled_col = pl.when(mask).then(pl.col(col).fill_null(strategy="forward")).otherwise(pl.col(col)).alias(col)

136 out.append(filled_col)

137 else:

138 out.append(pl.col(col))

139

140 return df.with_columns(pl.int_range(0, df.height).alias(tmp_col)).select(out)

141

142

143def _subtract_risk_free(dframe: pl.DataFrame, rf: float | pl.DataFrame, date_col: str) -> pl.DataFrame:

144 """Subtract the risk-free rate from all numeric columns in the DataFrame.

145

146 Args:

147 dframe (pl.DataFrame): DataFrame containing returns data with a date

148 column and one or more numeric columns representing asset returns.

149 rf (float | pl.DataFrame): Risk-free rate to subtract from returns.

150

151 - If float: A constant risk-free rate applied to all dates.

152 - If pl.DataFrame: A DataFrame with a date column and a second

153 column containing time-varying risk-free rates.

154

155 date_col (str): Name of the date column in both DataFrames for

156 joining when rf is a DataFrame.

157

158 Returns:

159 pl.DataFrame: DataFrame with the risk-free rate subtracted from all

160 numeric columns, preserving the original column names.

161

162 """

163 if isinstance(rf, float):

164 rf_dframe = dframe.select([pl.col(date_col), pl.lit(rf).alias("rf")])

165 else:

166 if not isinstance(rf, pl.DataFrame):

167 raise TypeError("rf must be a float or DataFrame") # noqa: TRY003

168 if rf.columns[1] != "rf":

169 warnings.warn(

170 f"Risk-free rate column '{rf.columns[1]}' has been renamed to 'rf' for internal alignment.",

171 stacklevel=3,

172 )

173 rf_dframe = rf.rename({rf.columns[1]: "rf"}) if rf.columns[1] != "rf" else rf

174

175 dframe = dframe.join(rf_dframe, on=date_col, how="inner")

176 return dframe.select(

177 [pl.col(date_col)]

178 + [(pl.col(col) - pl.col("rf")).alias(col) for col in dframe.columns if col not in {date_col, "rf"}]

179 )

180

181

182@dataclasses.dataclass(frozen=True, slots=True)

183class Data:

184 """A container for financial returns data and an optional benchmark.

185

186 Provides methods for analyzing and manipulating financial returns data,

187 including resampling, truncation, and access to statistical metrics and

188 visualizations via the ``stats`` and ``plots`` properties.

189

190 Attributes:

191 returns (pl.DataFrame): DataFrame containing returns data with assets

192 as columns.

193 benchmark (pl.DataFrame | None): Optional benchmark returns DataFrame.

194 Defaults to None.

195 index (pl.DataFrame): DataFrame containing the date index for the

196 returns data.

197

198 """

199

200 returns: pl.DataFrame

201 index: pl.DataFrame

202 benchmark: pl.DataFrame | None = None

203

204 def __post_init__(self) -> None:

205 """Validate the Data object after initialization."""

206 # You need at least two points

207 if self.index.shape[0] < 2:

208 raise ValueError("Index must contain at least two timestamps.") # noqa: TRY003

209

210 # Check index is monotonically increasing

211 datetime_col = self.index[self.index.columns[0]]

212 if not datetime_col.is_sorted():

213 raise ValueError("Index must be monotonically increasing.") # noqa: TRY003

214

215 # Check row count matches returns

216 if self.returns.shape[0] != self.index.shape[0]:

217 raise ValueError("Returns and index must have the same number of rows.") # noqa: TRY003

218

219 # Check row count matches benchmark (if provided)

220 if self.benchmark is not None and self.benchmark.shape[0] != self.index.shape[0]:

221 raise ValueError("Benchmark and index must have the same number of rows.") # noqa: TRY003

222

223 @classmethod

224 def from_returns(

225 cls,

226 returns: NativeFrame,

227 rf: NativeFrameOrScalar = 0.0,

228 benchmark: NativeFrame | None = None,

229 date_col: str = "Date",

230 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,

231 ) -> Data:

232 """Create a Data object from returns and optional benchmark.

233

234 Args:

235 returns (NativeFrame): Financial returns data. First column should

236 be the date column, remaining columns are asset returns.

237 rf (float | NativeFrame): Risk-free rate. Defaults to 0.0 (no

238 risk-free rate adjustment).

239

240 - If float: Constant risk-free rate applied to all dates.

241 - If NativeFrame: Time-varying risk-free rate with dates

242 matching returns.

243

244 benchmark (NativeFrame | None): Benchmark returns. Defaults to

245 None (no benchmark). First column should be the date column,

246 remaining columns are benchmark returns.

247 date_col (str): Name of the date column in the DataFrames.

248 Defaults to ``"Date"``.

249 null_strategy ({"raise", "drop", "forward_fill"} | None): How to

250 handle ``null`` (missing) values in *returns* and *benchmark*.

251 Defaults to ``None`` (nulls propagate through calculations).

252

253 - ``None`` — no null checking; nulls propagate through all

254 downstream calculations.

255 - ``"raise"`` — raise `NullsInReturnsError` if any null is

256 found.

257 - ``"drop"`` — silently drop every row that contains at least

258 one null.

259 - ``"forward_fill"`` — fill each null with the most recent

260 non-null value in the same column.

261

262 Note: Affects only Polars ``null`` values (i.e. ``None`` /

263 missing entries). IEEE-754 ``NaN`` values are **not** affected

264 and continue to propagate as per IEEE-754 semantics.

265

266 Returns:

267 Data: Object containing excess returns and benchmark (if any),

268 with methods for analysis and visualization through the ``stats``

269 and ``plots`` properties.

270

271 Raises:

272 NullsInReturnsError: If *null_strategy* is ``"raise"`` and the

273 data contains null values.

274 ValueError: If there are no overlapping dates between returns and

275 benchmark.

276

277 Examples:

278 Basic usage:

279

280 ```python

281 from jquantstats import Data

282 import polars as pl

283

284 returns = pl.DataFrame({

285 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

286 "Asset1": [0.01, -0.02, 0.03]

287 }).with_columns(pl.col("Date").str.to_date())

288

289 data = Data.from_returns(returns=returns)

290 ```

291

292 With benchmark and risk-free rate:

293

294 ```python

295 benchmark = pl.DataFrame({

296 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

297 "Market": [0.005, -0.01, 0.02]

298 }).with_columns(pl.col("Date").str.to_date())

299

300 data = Data.from_returns(returns=returns, benchmark=benchmark, rf=0.0002)

301 ```

302

303 Handling nulls automatically:

304

305 ```python

306 returns_with_nulls = pl.DataFrame({

307 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

308 "Asset1": [0.01, None, 0.03]

309 }).with_columns(pl.col("Date").str.to_date())

310

311 # Drop rows with nulls (mirrors pandas/QuantStats behaviour)

312 data = Data.from_returns(returns=returns_with_nulls, null_strategy="drop")

313

314 # Or forward-fill nulls

315 data = Data.from_returns(returns=returns_with_nulls, null_strategy="forward_fill")

316 ```

317

318 """

319 returns_pl = _to_polars(returns)

320 benchmark_pl = _to_polars(benchmark) if benchmark is not None else None

321 rf_converted: float | pl.DataFrame

322 if isinstance(rf, pl.DataFrame) or (not isinstance(rf, float) and not isinstance(rf, int)):

323 rf_converted = _to_polars(rf)

324 else:

325 rf_converted = rf # int is not float/DataFrame: _subtract_risk_free raises TypeError

326

327 returns_pl = _apply_null_strategy(returns_pl, date_col, "returns", null_strategy)

328 if benchmark_pl is not None:

329 benchmark_pl = _apply_null_strategy(benchmark_pl, date_col, "benchmark", null_strategy)

330

331 if benchmark_pl is not None:

332 joined_dates = returns_pl.join(benchmark_pl, on=date_col, how="inner").select(date_col)

333 if joined_dates.is_empty():

334 raise ValueError("No overlapping dates between returns and benchmark.") # noqa: TRY003

335 returns_pl = returns_pl.join(joined_dates, on=date_col, how="inner")

336 benchmark_pl = benchmark_pl.join(joined_dates, on=date_col, how="inner")

337

338 index = returns_pl.select(date_col)

339 excess_returns = _subtract_risk_free(returns_pl, rf_converted, date_col).drop(date_col)

340 excess_benchmark = (

341 _subtract_risk_free(benchmark_pl, rf_converted, date_col).drop(date_col)

342 if benchmark_pl is not None

343 else None

344 )

345

346 return cls(returns=excess_returns, benchmark=excess_benchmark, index=index)

347

348 @classmethod

349 def from_prices(

350 cls,

351 prices: NativeFrame,

352 rf: NativeFrameOrScalar = 0.0,

353 benchmark: NativeFrame | None = None,

354 date_col: str = "Date",

355 null_strategy: Literal["raise", "drop", "forward_fill"] | None = None,

356 ) -> Data:

357 """Create a Data object from prices and optional benchmark.

358

359 Converts price levels to returns via percentage change and delegates

360 to `from_returns`. The first row of each asset is dropped because no

361 prior price is available to compute a return.

362

363 Args:

364 prices (NativeFrame): Price-level data. First column should be

365 the date column; remaining columns are asset prices.

366 rf (float | NativeFrame): Risk-free rate. Forwarded unchanged to

367 `from_returns`. Defaults to 0.0 (no risk-free rate

368 adjustment).

369 benchmark (NativeFrame | None): Benchmark prices. Converted to

370 returns in the same way as ``prices`` before being forwarded

371 to `from_returns`. Defaults to None (no benchmark).

372 date_col (str): Name of the date column in the DataFrames.

373 Defaults to ``"Date"``.

374 null_strategy ({"raise", "drop", "forward_fill"} | None): How to

375 handle ``null`` (missing) values after converting prices to

376 returns. Forwarded unchanged to `from_returns`. Defaults to

377 ``None`` (nulls propagate through calculations).

378

379 - ``None`` — no null checking; nulls propagate.

380 - ``"raise"`` — raise `NullsInReturnsError` if any null is

381 found in the derived returns.

382 - ``"drop"`` — silently drop every row that contains at least

383 one null.

384 - ``"forward_fill"`` — fill each null with the most recent

385 non-null value.

386

387 Note: Prices that contain nulls will produce null returns via

388 ``pct_change()``. If you expect missing price entries, pass

389 ``null_strategy="drop"`` or ``null_strategy="forward_fill"``.

390

391 Returns:

392 Data: Object containing excess returns derived from the supplied

393 prices, with methods for analysis and visualization through the

394 ``stats`` and ``plots`` properties.

395

396 Examples:

397 ```python

398 from jquantstats import Data

399 import polars as pl

400

401 prices = pl.DataFrame({

402 "Date": ["2023-01-01", "2023-01-02", "2023-01-03"],

403 "Asset1": [100.0, 101.0, 99.0]

404 }).with_columns(pl.col("Date").str.to_date())

405

406 data = Data.from_prices(prices=prices)

407 ```

408

409 """

410 prices_pl = _to_polars(prices)

411 asset_cols = [c for c in prices_pl.columns if c != date_col]

412 returns_pl = prices_pl.with_columns([pl.col(c).pct_change().alias(c) for c in asset_cols]).slice(1)

413

414 benchmark_returns: NativeFrame | None = None

415 if benchmark is not None:

416 benchmark_pl = _to_polars(benchmark)

417 bench_cols = [c for c in benchmark_pl.columns if c != date_col]

418 benchmark_returns = benchmark_pl.with_columns([pl.col(c).pct_change().alias(c) for c in bench_cols]).slice(

419 1

420 )

421

422 return cls.from_returns(

423 returns=returns_pl,

424 rf=rf,

425 benchmark=benchmark_returns,

426 date_col=date_col,

427 null_strategy=null_strategy,

428 )

429

430 def __repr__(self) -> str:

431 """Return a string representation of the Data object."""

432 rows = len(self.index)

433 date_cols = self.date_col

434 if date_cols:

435 date_column = date_cols[0]

436 start = self.index[date_column].min()

437 end = self.index[date_column].max()

438 return f"Data(assets={self.assets}, rows={rows}, start={start}, end={end})"

439 return f"Data(assets={self.assets}, rows={rows})" # pragma: no cover # __post_init__ requires ≥1 index column

440

441 @property

442 def plots(self) -> DataPlots:

443 """Provides access to visualization methods for the financial data.

444

445 Returns:

446 DataPlots: An instance of the DataPlots class initialized with this data.

447

448 """

449 from ._plots import DataPlots

450

451 return DataPlots(self)

452

453 @property

454 def stats(self) -> Stats:

455 """Provides access to statistical analysis methods for the financial data.

456

457 Returns:

458 Stats: An instance of the Stats class initialized with this data.

459

460 """

461 from ._stats import Stats

462

463 return Stats(self)

464

465 @property

466 def reports(self) -> Reports:

467 """Provides access to reporting methods for the financial data.

468

469 Returns:

470 Reports: An instance of the Reports class initialized with this data.

471

472 """

473 from ._reports import Reports

474

475 return Reports(self)

476

477 @property

478 def utils(self) -> DataUtils:

479 """Provides access to utility transforms and conversions for the financial data.

480

481 Returns:

482 DataUtils: An instance of the DataUtils class initialized with this data.

483

484 """

485 from ._utils import DataUtils

486

487 return DataUtils(self)

488

489 @property

490 def date_col(self) -> list[str]:

491 """Return the column names of the index DataFrame.

492

493 Returns:

494 list[str]: List of column names in the index DataFrame, typically containing

495 the date column name.

496

497 """

498 return list(self.index.columns)

499

500 @property

501 def assets(self) -> list[str]:

502 """Return the combined list of asset column names from returns and benchmark.

503

504 Returns:

505 list[str]: List of all asset column names from both returns and benchmark

506 (if available).

507

508 """

509 if self.benchmark is not None:

510 return list(self.returns.columns) + list(self.benchmark.columns)

511 return list(self.returns.columns)

512

513 @property

514 def all(self) -> pl.DataFrame:

515 """Combine index, returns, and benchmark data into a single DataFrame.

516

517 This property provides a convenient way to access all data in a single DataFrame,

518 which is useful for analysis and visualization.

519

520 Returns:

521 pl.DataFrame: A DataFrame containing the index, all returns data, and benchmark data

522 (if available) combined horizontally.

523

524 """

525 if self.benchmark is None:

526 return pl.concat([self.index, self.returns], how="horizontal")

527 else:

528 return pl.concat([self.index, self.returns, self.benchmark], how="horizontal")

529

530 def resample(self, every: str = "1mo") -> Data:

531 """Resample returns and benchmark to a different frequency.

532

533 Args:

534 every (str): Resampling frequency (e.g., ``'1mo'``, ``'1y'``).

535 Defaults to ``'1mo'``.

536

537 Returns:

538 Data: Resampled data at the requested frequency.

539

540 """

541

542 def resample_frame(dframe: pl.DataFrame) -> pl.DataFrame:

543 """Resample a single DataFrame to the target frequency using compound returns."""

544 dframe = self.index.hstack(dframe) # Add the date column for resampling

545

546 return dframe.group_by_dynamic(

547 index_column=self.index.columns[0], every=every, period=every, closed="right", label="right"

548 ).agg(

549 [

550 ((pl.col(col) + 1.0).product() - 1.0).alias(col)

551 for col in dframe.columns

552 if col != self.index.columns[0]

553 ]

554 )

555

556 resampled_returns = resample_frame(self.returns)

557 resampled_benchmark = resample_frame(self.benchmark) if self.benchmark is not None else None

558 resampled_index = resampled_returns.select(self.index.columns[0])

559

560 return Data(

561 returns=resampled_returns.drop(self.index.columns[0]),

562 benchmark=resampled_benchmark.drop(self.index.columns[0]) if resampled_benchmark is not None else None,

563 index=resampled_index,

564 )

565

566 def describe(self) -> pl.DataFrame:

567 """Return a tidy summary of shape, date range and asset names.

568

569 Returns:

570 pl.DataFrame: One row per asset with columns: asset, start, end,

571 rows, has_benchmark.

572

573 """

574 date_column = self.date_col[0]

575 start = self.index[date_column].min()

576 end = self.index[date_column].max()

577 rows = len(self.index)

578 return pl.DataFrame(

579 {

580 "asset": self.returns.columns,

581 "start": [start] * len(self.returns.columns),

582 "end": [end] * len(self.returns.columns),

583 "rows": [rows] * len(self.returns.columns),

584 "has_benchmark": [self.benchmark is not None] * len(self.returns.columns),

585 }

586 )

587

588 def copy(self) -> Data:

589 """Create a deep copy of the Data object.

590

591 Returns:

592 Data: A new Data object with copies of the returns and benchmark.

593

594 """

595 if self.benchmark is not None:

596 return Data(returns=self.returns.clone(), benchmark=self.benchmark.clone(), index=self.index.clone())

597 return Data(returns=self.returns.clone(), index=self.index.clone())

598

599 def head(self, n: int = 5) -> Data:

600 """Return the first n rows of the combined returns and benchmark data.

601

602 Args:

603 n (int, optional): Number of rows to return. Defaults to 5.

604

605 Returns:

606 Data: A new Data object containing the first n rows of the combined data.

607

608 """

609 benchmark_head = self.benchmark.head(n) if self.benchmark is not None else None

610 return Data(returns=self.returns.head(n), benchmark=benchmark_head, index=self.index.head(n))

611

612 def tail(self, n: int = 5) -> Data:

613 """Return the last n rows of the combined returns and benchmark data.

614

615 Args:

616 n (int, optional): Number of rows to return. Defaults to 5.

617

618 Returns:

619 Data: A new Data object containing the last n rows of the combined data.

620

621 """

622 benchmark_tail = self.benchmark.tail(n) if self.benchmark is not None else None

623 return Data(returns=self.returns.tail(n), benchmark=benchmark_tail, index=self.index.tail(n))

624

625 def truncate(

626 self,

627 start: date | datetime | str | int | None = None,

628 end: date | datetime | str | int | None = None,

629 ) -> Data:

630 """Return a new Data object truncated to the inclusive [start, end] range.

631

632 When the index is temporal (Date/Datetime), truncation is performed by

633 comparing the date column against ``start`` and ``end`` values.

634

635 When the index is integer-based, row slicing is used instead, and

636 ``start`` and ``end`` must be non-negative integers. Passing

637 non-integer bounds to an integer-indexed Data raises `TypeError`.

638

639 Args:

640 start: Optional lower bound (inclusive). A date/datetime value

641 when the index is temporal; a non-negative `int` row

642 index when the data has no temporal index.

643 end: Optional upper bound (inclusive). Same type rules as

644 ``start``.

645

646 Returns:

647 Data: A new Data object filtered to the specified range.

648

649 Raises:

650 TypeError: When the index is not temporal and a non-integer bound

651 is supplied.

652

653 """

654 date_column = self.index.columns[0]

655 is_temporal = self.index[date_column].dtype.is_temporal()

656

657 if is_temporal:

658 cond = pl.lit(True)

659 if start is not None:

660 cond = cond & (pl.col(date_column) >= pl.lit(start))

661 if end is not None:

662 cond = cond & (pl.col(date_column) <= pl.lit(end))

663 mask = self.index.select(cond.alias("mask"))["mask"]

664 new_index = self.index.filter(mask)

665 new_returns = self.returns.filter(mask)

666 new_benchmark = self.benchmark.filter(mask) if self.benchmark is not None else None

667 else:

668 if start is not None and not isinstance(start, int):

669 raise TypeError(f"start must be an integer, got {type(start).__name__}.") # noqa: TRY003

670 if end is not None and not isinstance(end, int):

671 raise TypeError(f"end must be an integer, got {type(end).__name__}.") # noqa: TRY003

672 row_start = start if start is not None else 0

673 row_end = end + 1 if end is not None else self.index.height

674 length = max(0, row_end - row_start)

675 new_index = self.index.slice(row_start, length)

676 new_returns = self.returns.slice(row_start, length)

677 new_benchmark = self.benchmark.slice(row_start, length) if self.benchmark is not None else None

678

679 return Data(returns=new_returns, benchmark=new_benchmark, index=new_index)

680

681 @property

682 def _periods_per_year(self) -> float:

683 """Estimate the number of periods per year based on average frequency in the index.

684

685 For temporal (Date/Datetime) indices, computes the mean gap between observations

686 and converts to an annualised period count (e.g. ~252 for daily, ~52 for weekly).

687

688 For integer indices (date-free portfolios), falls back to 252 trading days per year

689 because integer diffs have no time meaning.

690 """

691 datetime_col = self.index[self.index.columns[0]]

692

693 if not datetime_col.dtype.is_temporal():

694 return 252.0

695

696 sorted_dt = datetime_col.sort()

697 diffs = sorted_dt.diff().drop_nulls()

698 mean_diff = diffs.mean()

699

700 if isinstance(mean_diff, timedelta):

701 seconds = mean_diff.total_seconds()

702 else: # pragma: no cover # Polars always returns timedelta for temporal diff

703 seconds = cast(float, mean_diff) if mean_diff is not None else 1.0

704

705 return (365 * 24 * 60 * 60) / seconds

706

707 def items(self) -> Iterator[tuple[str, pl.Series]]:

708 """Iterate over all assets and their corresponding data series.

709

710 This method provides a convenient way to iterate over all assets in the data,

711 yielding each asset name and its corresponding data series.

712

713 Yields:

714 tuple[str, pl.Series]: A tuple containing the asset name and its data series.

715

716 """

717 matrix = self.all

718

719 for col in self.assets:

720 yield col, matrix.get_column(col)

Coverage for src / jquantstats / data.py: 100%

204 statements