Coverage for src/basanos/math/optimizer.py: 100%

141 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-23 05:58 +0000

1"""Correlation-aware risk position optimizer (Basanos). 

2 

3This module provides utilities to compute correlation-adjusted risk positions 

4from price data and expected-return signals. It relies on volatility-adjusted 

5returns to estimate a dynamic correlation matrix (via EWM), applies shrinkage 

6towards identity, and solves a normalized linear system per timestamp to 

7obtain stable positions. 

8 

9Performance characteristics 

10--------------------------- 

11Let *N* be the number of assets and *T* the number of timestamps. 

12 

13**Computational complexity** 

14 

15+----------------------------------+------------------+--------------------------------------+ 

16| Operation | Complexity | Bottleneck | 

17+==================================+==================+======================================+ 

18| EWM volatility (``ret_adj``, | O(T·N) | Linear in both T and N; negligible | 

19| ``vola``) | | | 

20+----------------------------------+------------------+--------------------------------------+ 

21| EWM correlation (``cor``) | O(T·N²) | ``ewm_covariance`` from | 

22| | | ``cvx.linalg`` over all N² pairs | 

23+----------------------------------+------------------+--------------------------------------+ 

24| Linear solve per timestamp | O(N³) | Cholesky / LU per row in | 

25| (``cash_position``) | * T solves | ``cash_position`` | 

26+----------------------------------+------------------+--------------------------------------+ 

27 

28**Memory usage** (peak, approximate) 

29 

30``ewm_covariance`` from ``cvx.linalg`` processes the input Polars DataFrame 

31and returns a dict of covariance matrices. Peak RAM ≈ **O(T · N²)** bytes. 

32Typical working sizes on a 16 GB machine: 

33 

34+--------+--------------------------+------------------------------------+ 

35| N | T (daily rows) | Peak memory (approx.) | 

36+========+==========================+====================================+ 

37| 50 | 252 (~1 yr) | ~70 MB | 

38+--------+--------------------------+------------------------------------+ 

39| 100 | 252 (~1 yr) | ~280 MB | 

40+--------+--------------------------+------------------------------------+ 

41| 100 | 2 520 (~10 yr) | ~2.8 GB | 

42+--------+--------------------------+------------------------------------+ 

43| 200 | 2 520 (~10 yr) | ~11 GB | 

44+--------+--------------------------+------------------------------------+ 

45| 500 | 2 520 (~10 yr) | ~70 GB ⚠ exceeds typical RAM | 

46+--------+--------------------------+------------------------------------+ 

47 

48**Practical limits (daily data)** 

49 

50* **≤ 150 assets, ≤ 5 years** — well within reach on an 8 GB laptop. 

51* **≤ 250 assets, ≤ 10 years** — requires ~11-12 GB; feasible on a 16 GB 

52 workstation. 

53* **> 500 assets with multi-year history** — peak memory exceeds 16 GB; 

54 reduce the time range or switch to a chunked / streaming approach. 

55* **> 1 000 assets** — the O(N³) per-solve cost alone makes real-time 

56 optimization impractical even with adequate RAM. 

57 

58See ``BENCHMARKS.md`` for measured wall-clock timings across representative 

59dataset sizes. 

60 

61Internal structure 

62------------------ 

63The implementation is split across focused private modules to keep each file 

64readable and independently testable: 

65 

66* `_config` — `BasanosConfig` and all 

67 covariance-mode configuration classes. 

68* `_engine_solve` — private helpers providing the 

69 ``_iter_matrices`` and ``_iter_solve`` generators (per-timestamp solve 

70 logic). 

71* `_engine_diagnostics` — private helpers providing 

72 matrix-quality diagnostics (condition number, effective rank, solver 

73 residual, signal utilisation). 

74* `_engine_ic` — private helpers providing signal 

75 evaluation metrics (IC, Rank IC, ICIR, and summary statistics). 

76* This module — `BasanosEngine`, a single flat class that wires 

77 every method together in clearly delimited sections. 

78""" 

79 

80import dataclasses 

81import datetime 

82import logging 

83from typing import TYPE_CHECKING 

84 

85import numpy as np 

86import polars as pl 

87from cvx.linalg import cov_to_corr 

88from cvx.linalg.ewm_cov import ewm_covariance 

89from jquantstats import Portfolio 

90 

91from ..exceptions import ( 

92 ColumnMismatchError, 

93 ExcessiveNullsError, 

94 MissingDateColumnError, 

95 MonotonicPricesError, 

96 NonPositivePricesError, 

97 ShapeMismatchError, 

98) 

99from ._config import ( 

100 BasanosConfig, 

101 CovarianceConfig, 

102 CovarianceMode, 

103 EwmaShrinkConfig, 

104 SlidingWindowConfig, 

105) 

106from ._engine_diagnostics import _DiagnosticsMixin as _DiagnosticsMixin 

107from ._engine_ic import _SignalEvaluatorMixin as _SignalEvaluatorMixin 

108from ._engine_solve import _SolveMixin as _SolveMixin 

109from ._signal import vol_adj 

110 

111if TYPE_CHECKING: 

112 from ._config_report import ConfigReport 

113 

114_logger = logging.getLogger(__name__) 

115 

116 

117def _validate_required_date_columns(prices: pl.DataFrame, mu: pl.DataFrame) -> None: 

118 """Ensure both input frames expose the required ``date`` column.""" 

119 if "date" not in prices.columns: 

120 raise MissingDateColumnError("prices") 

121 if "date" not in mu.columns: 

122 raise MissingDateColumnError("mu") 

123 

124 

125def _validate_shape_and_column_sets(prices: pl.DataFrame, mu: pl.DataFrame) -> None: 

126 """Ensure prices and signals are shape- and schema-compatible.""" 

127 if prices.shape != mu.shape: 

128 raise ShapeMismatchError(prices.shape, mu.shape) 

129 if not set(prices.columns) == set(mu.columns): 

130 raise ColumnMismatchError(prices.columns, mu.columns) 

131 

132 

133def _numeric_assets(prices: pl.DataFrame) -> list[str]: 

134 """Return numeric asset columns, excluding the ``date`` column.""" 

135 return [c for c in prices.columns if c != "date" and prices[c].dtype.is_numeric()] 

136 

137 

138def _validate_positive_prices(prices: pl.DataFrame, assets: list[str]) -> None: 

139 """Ensure all finite/non-null prices are strictly positive.""" 

140 for asset in assets: 

141 col = prices[asset].drop_nulls() 

142 if col.len() > 0 and (col <= 0).any(): 

143 raise NonPositivePricesError(asset) 

144 

145 

146def _validate_null_fraction(prices: pl.DataFrame, assets: list[str], max_nan_fraction: float) -> None: 

147 """Reject asset columns whose null fraction exceeds configuration bounds.""" 

148 n_rows = prices.height 

149 if n_rows == 0: 

150 return 

151 for asset in assets: 

152 nan_frac = prices[asset].null_count() / n_rows 

153 if nan_frac > max_nan_fraction: 

154 raise ExcessiveNullsError(asset, nan_frac, max_nan_fraction) 

155 

156 

157def _validate_non_monotonic_prices(prices: pl.DataFrame, assets: list[str]) -> None: 

158 """Reject monotonic asset series that indicate malformed synthetic data.""" 

159 for asset in assets: 

160 col = prices[asset].drop_nulls() 

161 if col.len() > 2: 

162 diffs = col.diff().drop_nulls() 

163 if (diffs >= 0).all() or (diffs <= 0).all(): 

164 raise MonotonicPricesError(asset) 

165 

166 

167def _warn_short_sliding_window_data(prices: pl.DataFrame, cfg: "BasanosConfig") -> None: 

168 """Emit a warning when data is too short relative to the configured SW window.""" 

169 if cfg.covariance_mode == CovarianceMode.sliding_window and cfg.window is not None: 

170 n_rows = prices.height 

171 w: int = cfg.window 

172 if n_rows < 2 * w: 

173 _logger.warning( 

174 "Dataset length (%d rows) is less than 2 * window (%d). " 

175 "The first %d timestamps will yield zero positions during warm-up; " 

176 "consider using a longer history or reducing 'window'.", 

177 n_rows, 

178 2 * w, 

179 w - 1, 

180 ) 

181 

182 

183def _validate_inputs(prices: pl.DataFrame, mu: pl.DataFrame, cfg: "BasanosConfig") -> None: 

184 """Validate ``prices``, ``mu``, and ``cfg`` for use with `BasanosEngine`. 

185 

186 Checks that both DataFrames contain a ``'date'`` column, share identical 

187 shapes and column sets, contain no non-positive prices, no excessive NaN 

188 fractions, and no monotonically non-varying price series. Also emits a 

189 warning when the dataset is too short relative to a configured 

190 sliding-window size. 

191 

192 Args: 

193 prices: DataFrame of price levels per asset over time. 

194 mu: DataFrame of expected-return signals aligned with ``prices``. 

195 cfg: Engine configuration instance. 

196 

197 Raises: 

198 MissingDateColumnError: If ``'date'`` is absent from either frame. 

199 ShapeMismatchError: If ``prices`` and ``mu`` have different shapes. 

200 ColumnMismatchError: If the column sets of the two frames differ. 

201 NonPositivePricesError: If any asset contains a non-positive price. 

202 ExcessiveNullsError: If any asset column exceeds ``cfg.max_nan_fraction``. 

203 MonotonicPricesError: If any asset price series is monotonically 

204 non-decreasing or non-increasing. 

205 

206 Warns: 

207 UserWarning (via logging): If ``cfg.covariance`` is a 

208 `SlidingWindowConfig` and 

209 ``len(prices) < 2 * cfg.covariance.window``, a warning is emitted 

210 via the module logger rather than an exception. This is a 

211 deliberate soft boundary — callers may intentionally supply data 

212 shorter than the full warm-up period. During warm-up the first 

213 ``window - 1`` timestamps will yield zero positions. 

214 """ 

215 _validate_required_date_columns(prices, mu) 

216 _validate_shape_and_column_sets(prices, mu) 

217 assets = _numeric_assets(prices) 

218 _validate_positive_prices(prices, assets) 

219 _validate_null_fraction(prices, assets, cfg.max_nan_fraction) 

220 _validate_non_monotonic_prices(prices, assets) 

221 _warn_short_sliding_window_data(prices, cfg) 

222 

223 

224# --------------------------------------------------------------------------- 

225# Re-export config symbols so ``from basanos.math.optimizer import …`` keeps 

226# working for existing callers. 

227# --------------------------------------------------------------------------- 

228__all__ = [ 

229 "BasanosConfig", 

230 "BasanosEngine", 

231 "CovarianceConfig", 

232 "CovarianceMode", 

233 "EwmaShrinkConfig", 

234 "SlidingWindowConfig", 

235] 

236 

237 

238@dataclasses.dataclass(frozen=True) 

239class BasanosEngine(_DiagnosticsMixin, _SignalEvaluatorMixin, _SolveMixin): 

240 """Engine to compute correlation matrices and optimize risk positions. 

241 

242 Encapsulates price data and configuration to build EWM-based 

243 correlations, apply shrinkage, and solve for normalized positions. 

244 

245 Public methods are organised into clearly delimited sections (some 

246 inherited from the private mixin classes): 

247 

248 * **Core data access** — `assets`, `ret_adj`, `vola`, `cor`, `cor_tensor` 

249 * **Solve / position logic** — `cash_position`, `position_status`, 

250 `risk_position`, `position_leverage`, `warmup_state` 

251 * **Portfolio and performance** — `portfolio`, `naive_sharpe`, 

252 `sharpe_at_shrink`, `sharpe_at_window_factors` 

253 * **Matrix diagnostics** — `condition_number`, `effective_rank`, 

254 `solver_residual`, `signal_utilisation` 

255 * **Signal evaluation** — `ic(h)`, `rank_ic(h)`, `ic_mean(h)`, `ic_std(h)`, 

256 `icir(h)`, `rank_ic_mean(h)`, `rank_ic_std(h)` (``h`` defaults to 1) 

257 * **Reporting** — `config_report` 

258 

259 Data-flow diagram 

260 ----------------- 

261 

262 .. code-block:: text 

263 

264 prices (pl.DataFrame) 

265 

266 ├─ vol_adj ──► ret_adj (volatility-adjusted log returns) 

267 │ │ 

268 │ ├─ ewm_covariance ──► cor / cor_tensor 

269 │ │ │ 

270 │ │ └─ shrink2id / FactorModel 

271 │ │ │ 

272 │ vola covariance matrix 

273 │ │ │ 

274 └── mu ──────────┴── _iter_solve ──────────┘ 

275 

276 cash_position 

277 

278 ┌────────┴────────┐ 

279 portfolio diagnostics 

280 (Portfolio) (condition_number, 

281 effective_rank, 

282 solver_residual, 

283 signal_utilisation, 

284 ic, rank_ic, …) 

285 

286 Attributes: 

287 prices: Polars DataFrame of price levels per asset over time. Must 

288 contain a ``'date'`` column and at least one numeric asset column 

289 with strictly positive values that are not monotonically 

290 non-decreasing or non-increasing (i.e. they must vary in sign). 

291 mu: Polars DataFrame of expected-return signals aligned with *prices*. 

292 Must share the same shape and column names as *prices*. 

293 cfg: Immutable `BasanosConfig` controlling EWMA half-lives, 

294 clipping, shrinkage intensity, and AUM. 

295 

296 Examples: 

297 Build an engine with two synthetic assets over 30 days and inspect the 

298 optimized positions and diagnostic properties. 

299 

300 >>> import numpy as np 

301 >>> import polars as pl 

302 >>> from basanos.math import BasanosConfig, BasanosEngine 

303 >>> dates = list(range(30)) 

304 >>> rng = np.random.default_rng(42) 

305 >>> prices = pl.DataFrame({ 

306 ... "date": dates, 

307 ... "A": np.cumprod(1 + rng.normal(0.001, 0.02, 30)) * 100.0, 

308 ... "B": np.cumprod(1 + rng.normal(0.001, 0.02, 30)) * 150.0, 

309 ... }) 

310 >>> mu = pl.DataFrame({ 

311 ... "date": dates, 

312 ... "A": rng.normal(0.0, 0.5, 30), 

313 ... "B": rng.normal(0.0, 0.5, 30), 

314 ... }) 

315 >>> cfg = BasanosConfig(vola=5, corr=10, clip=2.0, shrink=0.5, aum=1_000_000) 

316 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

317 >>> engine.assets 

318 ['A', 'B'] 

319 >>> engine.cash_position.shape 

320 (30, 3) 

321 >>> engine.position_leverage.columns 

322 ['date', 'leverage'] 

323 """ 

324 

325 prices: pl.DataFrame 

326 mu: pl.DataFrame 

327 cfg: BasanosConfig 

328 

329 def __post_init__(self) -> None: 

330 """Validate inputs by delegating to `_validate_inputs`.""" 

331 _validate_inputs(self.prices, self.mu, self.cfg) 

332 

333 # ------------------------------------------------------------------ 

334 # Core data-access properties 

335 # ------------------------------------------------------------------ 

336 

337 @property 

338 def assets(self) -> list[str]: 

339 """List asset column names (numeric columns excluding 'date').""" 

340 return [c for c in self.prices.columns if c != "date" and self.prices[c].dtype.is_numeric()] 

341 

342 @property 

343 def ret_adj(self) -> pl.DataFrame: 

344 """Return per-asset volatility-adjusted log returns clipped by cfg.clip. 

345 

346 Uses an EWMA volatility estimate with lookback ``cfg.vola`` to 

347 standardize log returns for each numeric asset column. 

348 """ 

349 return self.prices.with_columns( 

350 [vol_adj(pl.col(asset), vola=self.cfg.vola, clip=self.cfg.clip) for asset in self.assets] 

351 ) 

352 

353 @property 

354 def vola(self) -> pl.DataFrame: 

355 """Per-asset EWMA volatility of percentage returns. 

356 

357 Computes percent changes for each numeric asset column and applies an 

358 exponentially weighted standard deviation using the lookback specified 

359 by ``cfg.vola``. The result is a DataFrame aligned with ``self.prices`` 

360 whose numeric columns hold per-asset volatility estimates. 

361 """ 

362 return self.prices.with_columns( 

363 pl.col(asset) 

364 .pct_change() 

365 .ewm_std(com=self.cfg.vola - 1, adjust=True, min_samples=self.cfg.vola) 

366 .alias(asset) 

367 for asset in self.assets 

368 ) 

369 

370 @property 

371 def cor(self) -> dict[datetime.date, np.ndarray]: 

372 """Compute per-timestamp EWM correlation matrices. 

373 

374 Builds volatility-adjusted returns for all assets, computes an 

375 exponentially weighted correlation using a pure NumPy implementation 

376 (with window ``cfg.corr``), and returns a mapping from each timestamp 

377 to the corresponding correlation matrix as a NumPy array. 

378 

379 Returns: 

380 dict: Mapping ``date -> np.ndarray`` of shape (n_assets, n_assets). 

381 

382 Performance: 

383 Delegates to ``ewm_covariance`` from ``cvx.linalg``. 

384 For large *N* or *T*, prefer ``cor_tensor`` to keep a single 

385 contiguous array rather than building a Python dict. 

386 """ 

387 assets = list(self.assets) 

388 n = len(assets) 

389 span = 2 * self.cfg.corr + 1 

390 cov_dict = ewm_covariance( 

391 self.ret_adj, 

392 assets=assets, 

393 index_col="date", 

394 window=span, 

395 warmup=self.cfg.corr, 

396 ) 

397 nan_mat = np.full((n, n), np.nan) 

398 return { 

399 date: cov_to_corr(cov_dict[date], self.cfg.min_corr_denom) if date in cov_dict else nan_mat.copy() 

400 for date in self.prices["date"].to_list() 

401 } 

402 

403 @property 

404 def cor_tensor(self) -> np.ndarray: 

405 """Return all correlation matrices stacked as a 3-D tensor. 

406 

407 Converts the per-timestamp correlation dict (see `cor`) into a 

408 single contiguous NumPy array so that the full history can be saved to 

409 a flat ``.npy`` file with `save` and reloaded with 

410 `load`. 

411 

412 Returns: 

413 np.ndarray: Array of shape ``(T, N, N)`` where *T* is the number of 

414 timestamps and *N* the number of assets. ``tensor[t]`` is the 

415 correlation matrix for the *t*-th date (same ordering as 

416 ``self.prices["date"]``). 

417 

418 Examples: 

419 >>> import tempfile, pathlib 

420 >>> import numpy as np 

421 >>> import polars as pl 

422 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

423 >>> dates = pl.Series("date", list(range(100))) 

424 >>> rng0 = np.random.default_rng(0).lognormal(size=100) 

425 >>> rng1 = np.random.default_rng(1).lognormal(size=100) 

426 >>> prices = pl.DataFrame({"date": dates, "A": rng0, "B": rng1}) 

427 >>> rng2 = np.random.default_rng(2).normal(size=100) 

428 >>> rng3 = np.random.default_rng(3).normal(size=100) 

429 >>> mu = pl.DataFrame({"date": dates, "A": rng2, "B": rng3}) 

430 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

431 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

432 >>> tensor = engine.cor_tensor 

433 >>> with tempfile.TemporaryDirectory() as td: 

434 ... path = pathlib.Path(td) / "cor.npy" 

435 ... np.save(path, tensor) 

436 ... loaded = np.load(path) 

437 >>> np.testing.assert_array_equal(tensor, loaded) 

438 """ 

439 return np.stack(list(self.cor.values()), axis=0) 

440 

441 # ------------------------------------------------------------------ 

442 # Internal solve helpers — inherited from _SolveMixin 

443 # ------------------------------------------------------------------ 

444 # (_compute_mask, _check_signal, _scale_to_cash, _row_early_check, 

445 # _denom_guard_yield, _compute_position, _replay_positions, 

446 # _iter_matrices, _iter_solve, warmup_state) 

447 # Implementations live in _engine_solve.py; patch targets remain in that 

448 # module's namespace, e.g. ``patch("basanos.math._engine_solve.solve")``. 

449 

450 # ------------------------------------------------------------------ 

451 # Position properties 

452 # ------------------------------------------------------------------ 

453 

454 @property 

455 def cash_position(self) -> pl.DataFrame: 

456 r"""Optimize correlation-aware risk positions for each timestamp. 

457 

458 Supports two covariance modes controlled by ``cfg.covariance_config``: 

459 

460 * `EwmaShrinkConfig` (default): Computes EWMA correlations, applies 

461 linear shrinkage toward the identity, and solves a normalised linear 

462 system $C\,x = \mu$ per timestamp via Cholesky / LU. 

463 

464 * `SlidingWindowConfig`: At each timestamp uses the 

465 ``cfg.covariance_config.window`` most recent vol-adjusted returns to fit a 

466 rank-``cfg.covariance_config.n_factors`` factor model via truncated SVD and 

467 solves the system via the Woodbury identity at $O(k^3 + kn)$ rather 

468 than $O(n^3)$ per step. 

469 

470 Non-finite or ill-posed cases yield zero positions for safety. 

471 

472 Returns: 

473 pl.DataFrame: DataFrame with columns ['date'] + asset names containing 

474 the per-timestamp cash positions (risk divided by EWMA volatility). 

475 

476 Performance: 

477 For ``ewma_shrink``: dominant cost is ``self.cor`` (O(T·N²) time, 

478 O(T·N²) memory). The per-timestamp 

479 linear solve adds O(N³) per row. 

480 

481 For ``sliding_window``: O(T·W·N·k) for sliding SVDs plus 

482 O(T·(k³ + kN)) for Woodbury solves. Memory is O(W·N) per step, 

483 independent of T. 

484 """ 

485 assets = self.assets 

486 

487 # Compute risk positions row-by-row using _replay_positions. 

488 prices_num = self.prices.select(assets).to_numpy() 

489 

490 risk_pos_np = np.full_like(prices_num, fill_value=np.nan, dtype=float) 

491 cash_pos_np = np.full_like(prices_num, fill_value=np.nan, dtype=float) 

492 vola_np = self.vola.select(assets).to_numpy() 

493 

494 self._replay_positions(risk_pos_np, cash_pos_np, vola_np) 

495 

496 # Build Polars DataFrame for cash positions (numeric columns only) 

497 cash_position = self.prices.with_columns( 

498 [(pl.lit(cash_pos_np[:, i]).alias(asset)) for i, asset in enumerate(assets)] 

499 ) 

500 

501 return cash_position 

502 

503 @property 

504 def position_status(self) -> pl.DataFrame: 

505 """Per-timestamp reason code explaining each `cash_position` row. 

506 

507 Labels every row with exactly one of four `SolveStatus` 

508 codes (which compare equal to their string equivalents): 

509 

510 * ``'warmup'``: Insufficient history for the sliding-window 

511 covariance mode (``i + 1 < cfg.covariance_config.window``). 

512 Positions are ``NaN`` for all assets at this timestamp. 

513 * ``'zero_signal'``: The expected-return vector ``mu`` was 

514 all-zeros (or all-NaN) at this timestamp; the optimizer 

515 short-circuited and returned zero positions without solving. 

516 * ``'degenerate'``: The normalisation denominator was non-finite 

517 or below ``cfg.denom_tol``, the Cholesky / Woodbury solve 

518 failed, or no asset had a finite price; positions were zeroed 

519 for safety. 

520 * ``'valid'``: The linear system was solved successfully and 

521 positions are non-trivially non-zero. 

522 

523 The codes map one-to-one onto the three NaN / zero cases 

524 described in the issue and allow downstream consumers (backtests, 

525 risk monitors) to distinguish data gaps from signal silence from 

526 numerical ill-conditioning without re-inspecting ``mu`` or the 

527 engine configuration. 

528 

529 Returns: 

530 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'status': ...}`` 

531 with one row per timestamp. The ``status`` column has 

532 ``Polars`` dtype ``String``. 

533 """ 

534 statuses = [status for _i, _t, _mask, _pos, status in self._iter_solve()] 

535 return pl.DataFrame({"date": self.prices["date"], "status": pl.Series(statuses, dtype=pl.String)}) 

536 

537 @property 

538 def risk_position(self) -> pl.DataFrame: 

539 """Risk positions (before EWMA-volatility scaling) at each timestamp. 

540 

541 Derives the un-volatility-scaled position by multiplying the cash 

542 position by the per-asset EWMA volatility. Equivalently, this is 

543 the quantity solved by the correlation-adjusted linear system before 

544 dividing by ``vola``. 

545 

546 Relationship to other properties:: 

547 

548 cash_position = risk_position / vola 

549 risk_position = cash_position * vola 

550 

551 Returns: 

552 pl.DataFrame: DataFrame with columns ``['date'] + assets`` where 

553 each value is ``cash_position_i * vola_i`` at the given timestamp. 

554 """ 

555 assets = self.assets 

556 cp_np = self.cash_position.select(assets).to_numpy() 

557 vola_np = self.vola.select(assets).to_numpy() 

558 with np.errstate(invalid="ignore"): 

559 risk_pos = cp_np * vola_np 

560 return self.prices.with_columns([pl.lit(risk_pos[:, i]).alias(asset) for i, asset in enumerate(assets)]) 

561 

562 @property 

563 def position_leverage(self) -> pl.DataFrame: 

564 """L1 norm of cash positions (gross leverage) at each timestamp. 

565 

566 Sums the absolute values of all asset cash positions at each row. 

567 NaN positions are treated as zero (they contribute nothing to gross 

568 leverage). 

569 

570 Returns: 

571 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'leverage': ...}`` 

572 where ``leverage`` is the L1 norm of the cash-position vector. 

573 """ 

574 assets = self.assets 

575 cp_np = self.cash_position.select(assets).to_numpy() 

576 leverage = np.nansum(np.abs(cp_np), axis=1) 

577 return pl.DataFrame({"date": self.prices["date"], "leverage": pl.Series(leverage, dtype=pl.Float64)}) 

578 

579 # ------------------------------------------------------------------ 

580 # Portfolio and performance 

581 # ------------------------------------------------------------------ 

582 

583 @property 

584 def portfolio(self) -> Portfolio: 

585 """Construct a Portfolio from the optimized cash positions. 

586 

587 Converts the computed cash positions into a Portfolio using the 

588 configured AUM. The ``cost_per_unit`` from `cfg` is forwarded 

589 so that `net_cost_nav` and 

590 `position_delta_costs` work out 

591 of the box without any further configuration. 

592 

593 Returns: 

594 Portfolio: Instance built from cash positions with AUM scaling. 

595 """ 

596 cp = self.cash_position 

597 assets = [c for c in cp.columns if c != "date" and cp[c].dtype.is_numeric()] 

598 scaled = cp.with_columns(pl.col(a) * self.cfg.position_scale for a in assets) 

599 return Portfolio.from_cash_position(self.prices, scaled, aum=self.cfg.aum, cost_per_unit=self.cfg.cost_per_unit) 

600 

601 def sharpe_at_shrink(self, shrink: float) -> float: 

602 r"""Return the annualised portfolio Sharpe ratio for the given shrinkage weight. 

603 

604 Constructs a new `BasanosEngine` with all parameters identical to 

605 ``self`` except that ``cfg.shrink`` is replaced by ``shrink``, then 

606 returns the annualised Sharpe ratio of the resulting portfolio. 

607 

608 This is the canonical single-argument callable required by the benchmarks 

609 specification: ``f(λ) → Sharpe``. Use it to sweep λ across ``[0, 1]`` 

610 and measure whether correlation adjustment adds value over the 

611 signal-proportional baseline (λ = 0) or the unregularised limit (λ = 1). 

612 

613 Corner cases: 

614 * **λ = 0** — the shrunk matrix equals the identity, so the 

615 optimiser treats all assets as uncorrelated and positions are 

616 purely signal-proportional (no correlation adjustment). 

617 * **λ = 1** — the raw EWMA correlation matrix is used without 

618 shrinkage. 

619 

620 Args: 

621 shrink: Retention weight λ ∈ [0, 1]. See 

622 `shrink` for full documentation. 

623 

624 Returns: 

625 Annualised Sharpe ratio of the portfolio returns as a ``float``. 

626 Returns ``float("nan")`` when the Sharpe ratio cannot be computed 

627 (e.g. zero-variance returns). 

628 

629 Raises: 

630 ValidationError: When ``shrink`` is outside [0, 1] (delegated to 

631 `BasanosConfig` field validation). 

632 

633 Examples: 

634 >>> import numpy as np 

635 >>> import polars as pl 

636 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

637 >>> dates = pl.Series("date", list(range(200))) 

638 >>> rng = np.random.default_rng(0) 

639 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

640 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

641 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

642 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

643 >>> s = engine.sharpe_at_shrink(0.5) 

644 >>> isinstance(s, float) 

645 True 

646 """ 

647 new_cfg = self.cfg.replace(shrink=shrink) 

648 engine = BasanosEngine(prices=self.prices, mu=self.mu, cfg=new_cfg) 

649 return float(engine.portfolio.stats.sharpe().get("returns") or float("nan")) 

650 

651 def sharpe_at_window_factors(self, window: int, n_factors: int) -> float: 

652 r"""Return the annualised portfolio Sharpe ratio for the given sliding-window parameters. 

653 

654 Constructs a new `BasanosEngine` with ``covariance_mode`` set to 

655 ``"sliding_window"`` and the supplied ``window`` / ``n_factors``, keeping 

656 all other configuration identical to ``self``. 

657 

658 Use this method to sweep ``(W, k)`` and compare the sliding-window 

659 estimator against the EWMA baseline (via `sharpe_at_shrink`). 

660 

661 Args: 

662 window: Rolling window length $W \geq 1$. 

663 Rule of thumb: $W \geq 2 \cdot n_{\text{assets}}$. 

664 n_factors: Number of latent factors $k \geq 1$. 

665 

666 Returns: 

667 Annualised Sharpe ratio of the portfolio returns as a ``float``. 

668 Returns ``float("nan")`` when the Sharpe ratio cannot be computed 

669 (e.g. not enough history to fill the first window). 

670 

671 Raises: 

672 ValidationError: When ``window`` or ``n_factors`` fail field 

673 constraints (delegated to `BasanosConfig`). 

674 

675 Examples: 

676 >>> import numpy as np 

677 >>> import polars as pl 

678 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

679 >>> dates = pl.Series("date", list(range(200))) 

680 >>> rng = np.random.default_rng(0) 

681 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

682 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

683 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

684 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

685 >>> s = engine.sharpe_at_window_factors(window=40, n_factors=2) 

686 >>> isinstance(s, float) 

687 True 

688 """ 

689 new_cfg = self.cfg.replace( 

690 covariance_config=SlidingWindowConfig(window=window, n_factors=n_factors), 

691 ) 

692 engine = BasanosEngine(prices=self.prices, mu=self.mu, cfg=new_cfg) 

693 return float(engine.portfolio.stats.sharpe().get("returns") or float("nan")) 

694 

695 @property 

696 def naive_sharpe(self) -> float: 

697 r"""Sharpe ratio of the naïve equal-weight signal (μ = 1 for every asset/timestamp). 

698 

699 Replaces the expected-return signal ``mu`` with a constant matrix of 

700 ones, then runs the optimiser with the current configuration and returns 

701 the annualised Sharpe ratio of the resulting portfolio. 

702 

703 This provides the baseline answer to *"does the signal add value?"*: 

704 a real signal should produce a higher Sharpe than the naïve benchmark. 

705 Combined with `sharpe_at_shrink`, this yields a three-way 

706 comparison: 

707 

708 +--------------------+----------------------------------------------+ 

709 | Benchmark | What it measures | 

710 +====================+==============================================+ 

711 | ``naive_sharpe`` | No signal skill; pure correlation routing | 

712 +--------------------+----------------------------------------------+ 

713 | ``sharpe_at_shrink(0.0)`` | Signal skill, no correlation adj. | 

714 +--------------------+----------------------------------------------+ 

715 | ``sharpe_at_shrink(cfg.shrink)`` | Signal + correlation adj. | 

716 +--------------------+----------------------------------------------+ 

717 

718 Returns: 

719 Annualised Sharpe ratio of the equal-weight portfolio as a ``float``. 

720 Returns ``float("nan")`` when the Sharpe ratio cannot be computed. 

721 

722 Examples: 

723 >>> import numpy as np 

724 >>> import polars as pl 

725 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

726 >>> dates = pl.Series("date", list(range(200))) 

727 >>> rng = np.random.default_rng(0) 

728 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

729 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

730 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

731 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

732 >>> s = engine.naive_sharpe 

733 >>> isinstance(s, float) 

734 True 

735 """ 

736 naive_mu = self.mu.with_columns(pl.lit(1.0).alias(asset) for asset in self.assets) 

737 engine = BasanosEngine(prices=self.prices, mu=naive_mu, cfg=self.cfg) 

738 return float(engine.portfolio.stats.sharpe().get("returns") or float("nan")) 

739 

740 # ------------------------------------------------------------------ 

741 # Reporting 

742 # ------------------------------------------------------------------ 

743 

744 @property 

745 def config_report(self) -> "ConfigReport": 

746 """Return a `ConfigReport` facade for this engine. 

747 

748 Returns a `ConfigReport` that 

749 includes the full **lambda-sweep chart** — an interactive plot of the 

750 annualised Sharpe ratio as `shrink` (λ) is swept 

751 across [0, 1] — in addition to the parameter table, shrinkage-guidance 

752 table, and theory section available from 

753 `report`. 

754 

755 Returns: 

756 basanos.math._config_report.ConfigReport: Report facade with 

757 ``to_html()`` and ``save()`` methods. 

758 

759 Examples: 

760 >>> import numpy as np 

761 >>> import polars as pl 

762 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

763 >>> dates = pl.Series("date", list(range(200))) 

764 >>> rng = np.random.default_rng(0) 

765 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

766 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

767 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

768 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

769 >>> report = engine.config_report 

770 >>> html = report.to_html() 

771 >>> "Lambda" in html 

772 True 

773 """ 

774 from ._config_report import ConfigReport 

775 

776 return ConfigReport(config=self.cfg, engine=self) 

777 

778 # ------------------------------------------------------------------ 

779 # Matrix diagnostics — inherited from _DiagnosticsMixin 

780 # ------------------------------------------------------------------ 

781 # (condition_number, effective_rank, solver_residual, signal_utilisation) 

782 # Implementations live in _engine_diagnostics.py; patch targets remain in 

783 # that module's namespace, e.g. 

784 # ``patch("basanos.math._engine_diagnostics.solve")``. 

785 

786 # ------------------------------------------------------------------ 

787 # Signal evaluation — inherited from _SignalEvaluatorMixin 

788 # ------------------------------------------------------------------ 

789 # (_ic_series, ic, rank_ic, ic_mean, ic_std, icir, 

790 # rank_ic_mean, rank_ic_std) 

791 # Implementations live in _engine_ic.py.