Coverage for src / basanos / math / optimizer.py: 100%

349 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-19 05:23 +0000

1"""Correlation-aware risk position optimizer (Basanos). 

2 

3This module provides utilities to compute correlation-adjusted risk positions 

4from price data and expected-return signals. It relies on volatility-adjusted 

5returns to estimate a dynamic correlation matrix (via EWM), applies shrinkage 

6towards identity, and solves a normalized linear system per timestamp to 

7obtain stable positions. 

8 

9Performance characteristics 

10--------------------------- 

11Let *N* be the number of assets and *T* the number of timestamps. 

12 

13**Computational complexity** 

14 

15+----------------------------------+------------------+--------------------------------------+ 

16| Operation | Complexity | Bottleneck | 

17+==================================+==================+======================================+ 

18| EWM volatility (``ret_adj``, | O(T·N) | Linear in both T and N; negligible | 

19| ``vola``) | | | 

20+----------------------------------+------------------+--------------------------------------+ 

21| EWM correlation (``cor``) | O(T·N²) | ``lfilter`` over all N² asset pairs | 

22| | | simultaneously | 

23+----------------------------------+------------------+--------------------------------------+ 

24| Linear solve per timestamp | O(N³) | Cholesky / LU per row in | 

25| (``cash_position``) | * T solves | ``cash_position`` | 

26+----------------------------------+------------------+--------------------------------------+ 

27 

28**Memory usage** (peak, approximate) 

29 

30``_ewm_corr_numpy`` allocates roughly **14 float64 arrays** of shape 

31``(T, N, N)`` at peak (input sequences, IIR filter outputs, EWM components, 

32and the result tensor). Peak RAM ≈ **112 * T * N²** bytes. Typical 

33working sizes on a 16 GB machine: 

34 

35+--------+--------------------------+------------------------------------+ 

36| N | T (daily rows) | Peak memory (approx.) | 

37+========+==========================+====================================+ 

38| 50 | 252 (~1 yr) | ~70 MB | 

39+--------+--------------------------+------------------------------------+ 

40| 100 | 252 (~1 yr) | ~280 MB | 

41+--------+--------------------------+------------------------------------+ 

42| 100 | 2 520 (~10 yr) | ~2.8 GB | 

43+--------+--------------------------+------------------------------------+ 

44| 200 | 2 520 (~10 yr) | ~11 GB | 

45+--------+--------------------------+------------------------------------+ 

46| 500 | 2 520 (~10 yr) | ~70 GB ⚠ exceeds typical RAM | 

47+--------+--------------------------+------------------------------------+ 

48 

49**Practical limits (daily data)** 

50 

51* **≤ 150 assets, ≤ 5 years** — well within reach on an 8 GB laptop. 

52* **≤ 250 assets, ≤ 10 years** — requires ~11-12 GB; feasible on a 16 GB 

53 workstation. 

54* **> 500 assets with multi-year history** — peak memory exceeds 16 GB; 

55 reduce the time range or switch to a chunked / streaming approach. 

56* **> 1 000 assets** — the O(N³) per-solve cost alone makes real-time 

57 optimization impractical even with adequate RAM. 

58 

59See ``BENCHMARKS.md`` for measured wall-clock timings across representative 

60dataset sizes. 

61""" 

62 

63import dataclasses 

64import logging 

65from typing import TYPE_CHECKING 

66 

67import numpy as np 

68import polars as pl 

69from pydantic import BaseModel, Field, ValidationInfo, field_validator 

70from scipy.signal import lfilter 

71from scipy.stats import spearmanr 

72 

73from ..analytics import Portfolio 

74from ..exceptions import ( 

75 ColumnMismatchError, 

76 ExcessiveNullsError, 

77 MissingDateColumnError, 

78 MonotonicPricesError, 

79 NonPositivePricesError, 

80 ShapeMismatchError, 

81 SingularMatrixError, 

82) 

83from ._linalg import inv_a_norm, solve, valid 

84from ._signal import shrink2id, vol_adj 

85 

86if TYPE_CHECKING: 

87 from ._config_report import ConfigReport 

88 

89_logger = logging.getLogger(__name__) 

90 

91 

92def _ewm_corr_numpy( 

93 data: np.ndarray, 

94 com: int, 

95 min_periods: int, 

96 min_corr_denom: float = 1e-14, 

97) -> np.ndarray: 

98 """Compute per-row EWM correlation matrices without pandas. 

99 

100 Matches ``pandas.DataFrame.ewm(com=com, min_periods=min_periods).corr()`` 

101 with the default ``adjust=True, ignore_na=False`` settings to within 

102 floating-point rounding error. 

103 

104 All five EWM components used to compute ``corr(i, j)`` — namely 

105 ``ewm(x_i)``, ``ewm(x_j)``, ``ewm(x_i²)``, ``ewm(x_j²)``, and 

106 ``ewm(x_i·x_j)`` — share the **same joint weight structure**: weights 

107 decay at every timestep (``ignore_na=False``) but a new observation is 

108 only added at timesteps where *both* ``x_i`` and ``x_j`` are finite. As 

109 a result the correlation for a pair is frozen once either asset goes 

110 missing, exactly mirroring pandas behaviour. 

111 

112 The EWM recurrence ``s[t] = β·s[t-1] + v[t]`` is an IIR filter and is 

113 solved for **all N² pairs simultaneously** via ``scipy.signal.lfilter`` 

114 — no Python loop over the T timesteps. 

115 

116 Args: 

117 data: Float array of shape ``(T, N)`` - typically volatility-adjusted 

118 log returns. 

119 com: EWM centre-of-mass (``alpha = 1 / (1 + com)``). 

120 min_periods: Minimum number of joint finite observations required 

121 before a correlation value is reported; earlier rows are NaN. 

122 min_corr_denom: Guard threshold below which the correlation denominator 

123 is treated as zero and the result is set to NaN. Defaults to 

124 ``1e-14``. 

125 

126 Returns: 

127 np.ndarray of shape ``(T, N, N)`` containing the per-row correlation 

128 matrices. Each matrix is symmetric with diagonal 1.0 (or NaN during 

129 warm-up). 

130 

131 Performance: 

132 **Time** — O(T·N²): ``lfilter`` processes all N² pairs simultaneously, 

133 so wall-clock time scales linearly with both T and N². 

134 

135 **Memory** — approximately 14 float64 arrays of shape ``(T, N, N)`` 

136 exist at peak, giving roughly ``112 * T * N²`` bytes. For 100 assets 

137 over 2 520 trading days (~10 years) that is ≈ 2.8 GB; for 500 assets 

138 the same period requires ≈ 70 GB, which exceeds typical workstation 

139 RAM. Reduce T or N before calling this function when working with 

140 large universes. 

141 """ 

142 _t_len, n_assets = data.shape 

143 beta = com / (1.0 + com) 

144 

145 fin = np.isfinite(data) # (T, N) bool 

146 xt_f = np.where(fin, data, 0.0) # (T, N) float - zeroed where not finite 

147 

148 # joint_fin[t, i, j] = True iff assets i and j are both finite at t 

149 joint_fin = fin[:, :, np.newaxis] & fin[:, np.newaxis, :] # (T, N, N) 

150 

151 # Build per-pair input sequences for the recurrence s[t] = beta*s[t-1] + v[t]. 

152 # 

153 # v_x[t, i, j] = x_i[t] where pair (i,j) jointly finite, else 0 

154 # v_x2[t, i, j] = x_i[t]^2 where jointly finite, else 0 

155 # v_xy[t, i, j] = x_i[t]*x_j[t] (xt_f is 0 for non-finite, so implicit mask) 

156 # v_w[t, i, j] = 1 where jointly finite, else 0 (weight indicator) 

157 # 

158 # By symmetry v_x[t,j,i] carries x_j[t] for pair (i,j), so s_x.swapaxes(1,2) 

159 # gives the EWM numerator of x_j without a separate v_y array. 

160 v_x = xt_f[:, :, np.newaxis] * joint_fin # (T, N, N) 

161 v_x2 = (xt_f * xt_f)[:, :, np.newaxis] * joint_fin # (T, N, N) 

162 v_xy = xt_f[:, :, np.newaxis] * xt_f[:, np.newaxis, :] # (T, N, N) 

163 v_w = joint_fin.astype(np.float64) # (T, N, N) 

164 

165 # Solve the IIR recurrence for every (i, j) pair in parallel. 

166 # lfilter([1], [1, -beta], v, axis=0) computes s[t] = beta*s[t-1] + v[t]. 

167 filt_a = np.array([1.0, -beta]) 

168 s_x = lfilter([1.0], filt_a, v_x, axis=0) # (T, N, N) 

169 s_x2 = lfilter([1.0], filt_a, v_x2, axis=0) # (T, N, N) 

170 s_xy = lfilter([1.0], filt_a, v_xy, axis=0) # (T, N, N) 

171 s_w = lfilter([1.0], filt_a, v_w, axis=0) # (T, N, N) 

172 

173 # Joint finite observation count per pair at each timestep (for min_periods) 

174 count = np.cumsum(joint_fin, axis=0) # (T, N, N) int64 

175 

176 # EWM means: running numerator / running weight denominator. 

177 # s_x.swapaxes(1,2)[t,i,j] = s_x[t,j,i] = EWM numerator of x_j for pair (i,j). 

178 with np.errstate(divide="ignore", invalid="ignore"): 

179 pos_w = s_w > 0 

180 ewm_x = np.where(pos_w, s_x / s_w, np.nan) # EWM(x_i) 

181 ewm_y = np.where(pos_w, s_x.swapaxes(1, 2) / s_w, np.nan) # EWM(x_j) 

182 ewm_x2 = np.where(pos_w, s_x2 / s_w, np.nan) # EWM(x_i^2) 

183 ewm_y2 = np.where(pos_w, s_x2.swapaxes(1, 2) / s_w, np.nan) # EWM(x_j^2) 

184 ewm_xy = np.where(pos_w, s_xy / s_w, np.nan) # EWM(x_i*x_j) 

185 

186 var_x = np.maximum(ewm_x2 - ewm_x * ewm_x, 0.0) 

187 var_y = np.maximum(ewm_y2 - ewm_y * ewm_y, 0.0) 

188 denom = np.sqrt(var_x * var_y) 

189 cov = ewm_xy - ewm_x * ewm_y 

190 

191 with np.errstate(divide="ignore", invalid="ignore"): 

192 result = np.where(denom > min_corr_denom, cov / denom, np.nan) 

193 

194 result = np.clip(result, -1.0, 1.0) 

195 

196 # Apply min_periods mask for all pairs 

197 result[count < min_periods] = np.nan 

198 

199 # Diagonal is exactly 1.0 where the asset has sufficient observations 

200 diag_idx = np.arange(n_assets) 

201 diag_count = count[:, diag_idx, diag_idx] # (T, N) 

202 result[:, diag_idx, diag_idx] = np.where(diag_count >= min_periods, 1.0, np.nan) 

203 

204 return result 

205 

206 

207class BasanosConfig(BaseModel): 

208 r"""Configuration for correlation-aware position optimization. 

209 

210 The required parameters (``vola``, ``corr``, ``clip``, ``shrink``, ``aum``) 

211 must be supplied by the caller. The optional parameters carry 

212 carefully chosen defaults whose rationale is described below. 

213 

214 Shrinkage methodology 

215 --------------------- 

216 ``shrink`` controls linear shrinkage of the EWMA correlation matrix toward 

217 the identity: 

218 

219 .. math:: 

220 

221 C_{\\text{shrunk}} = \\lambda \\cdot C_{\\text{EWMA}} + (1 - \\lambda) \\cdot I_n 

222 

223 where :math:`\\lambda` = ``shrink`` and :math:`I_n` is the identity. 

224 Shrinkage regularises the matrix when assets are few relative to the 

225 lookback (high concentration ratio :math:`n / T`), reducing the impact of 

226 extreme sample eigenvalues and improving the condition number of the matrix 

227 passed to the linear solver. 

228 

229 **When to prefer strong shrinkage (low** ``shrink`` **/ high** ``1-shrink``\\ **):** 

230 

231 * Fewer than ~30 assets with a ``corr`` lookback shorter than 100 days. 

232 * High-volatility or crisis regimes where correlations spike and the sample 

233 matrix is less representative of the true structure. 

234 * Portfolios where estimation noise is more costly than correlation bias 

235 (e.g., when the signal-to-noise ratio of ``mu`` is low). 

236 

237 **When to prefer light shrinkage (high** ``shrink``\\ **):** 

238 

239 * Many assets with a long lookback (low concentration ratio). 

240 * The EWMA correlation structure carries genuine diversification information 

241 that you want the solver to exploit. 

242 * Out-of-sample testing shows that position stability is not a concern. 

243 

244 **Practical starting points (daily return data):** 

245 

246 Here *n* = number of assets and *T* = ``cfg.corr`` (EWMA lookback). 

247 

248 +-----------------------+-------------------+--------------------------------+ 

249 | n (assets) / T (corr) | Suggested shrink | Notes | 

250 +=======================+===================+================================+ 

251 | n > 20, T < 40 | 0.3 - 0.5 | Near-singular matrix likely; | 

252 | | | strong regularisation needed. | 

253 +-----------------------+-------------------+--------------------------------+ 

254 | n ~ 10, T ~ 60 | 0.5 - 0.7 | Balanced regime. | 

255 +-----------------------+-------------------+--------------------------------+ 

256 | n < 10, T > 100 | 0.7 - 0.9 | Well-conditioned sample; | 

257 | | | light shrinkage for stability. | 

258 +-----------------------+-------------------+--------------------------------+ 

259 

260 See :func:`~basanos.math._signal.shrink2id` for the full theoretical 

261 background and academic references (Ledoit & Wolf, 2004; Chen et al., 2010). 

262 

263 Default rationale 

264 ----------------- 

265 ``profit_variance_init = 1.0`` 

266 Unit variance is a neutral, uninformative starting point for the 

267 exponential moving average of realized P&L variance. Because the 

268 normalised risk positions are divided by the square root of this 

269 quantity, initialising at 1.0 leaves the first few positions 

270 unaffected until the EMA accumulates real data. Larger values 

271 would shrink initial positions; smaller values would inflate them. 

272 

273 ``profit_variance_decay = 0.99`` 

274 An EMA decay factor of λ = 0.99 corresponds to a half-life of 

275 ``log(0.5) / log(0.99) ≈ 69`` periods and an effective centre-of- 

276 mass of ``1 / (1 - 0.99) = 100`` periods. For daily data this 

277 represents approximately 100 trading days (~5 months), a 

278 commonly used horizon for medium-frequency regime adaptation in 

279 systematic strategies. 

280 

281 ``denom_tol = 1e-12`` 

282 Positions are zeroed when the normalisation denominator 

283 ``inv_a_norm(μ, Σ)`` falls at or below this threshold. The 

284 value 1e-12 provides ample headroom above float64 machine 

285 epsilon (~2.2e-16) while remaining negligible relative to any 

286 economically meaningful signal magnitude. 

287 

288 ``position_scale = 1e6`` 

289 The dimensionless risk position is multiplied by this factor 

290 before being passed to :class:`~basanos.analytics.Portfolio`. 

291 A value of 1e6 means positions are expressed in units of one 

292 million of the base currency, a conventional denomination for 

293 institutional-scale portfolios where AUM is measured in hundreds 

294 of millions. 

295 

296 ``min_corr_denom = 1e-14`` 

297 The EWMA correlation denominator ``sqrt(var_x * var_y)`` is 

298 compared against this threshold; when at or below it the 

299 correlation is set to NaN rather than dividing by a near-zero 

300 value. The default 1e-14 is safely above float64 underflow 

301 while remaining negligible for any realistic return series. 

302 Advanced users may tighten this guard (larger value) when 

303 working with very-low-variance synthetic data. 

304 

305 ``max_nan_fraction = 0.9`` 

306 :class:`~basanos.exceptions.ExcessiveNullsError` is raised 

307 during construction when the null fraction in any asset price 

308 column **strictly exceeds** this threshold. The default 0.9 

309 permits up to 90 % missing prices (e.g., illiquid or recently 

310 listed assets in a long history) while rejecting columns that 

311 are almost entirely null and would contribute no useful 

312 information. Callers who want a stricter gate can lower this 

313 value; callers running on sparse data can raise it toward 1.0. 

314 

315 Examples: 

316 >>> cfg = BasanosConfig(vola=32, corr=64, clip=3.0, shrink=0.5, aum=1e8) 

317 >>> cfg.vola 

318 32 

319 >>> cfg.corr 

320 64 

321 """ 

322 

323 vola: int = Field(..., gt=0, description="EWMA lookback for volatility normalization.") 

324 corr: int = Field(..., gt=0, description="EWMA lookback for correlation estimation.") 

325 clip: float = Field(..., gt=0.0, description="Clipping threshold for volatility adjustment.") 

326 shrink: float = Field( 

327 ..., 

328 ge=0.0, 

329 le=1.0, 

330 description=( 

331 "Retention weight λ for linear shrinkage of the EWMA correlation matrix toward " 

332 "the identity: C_shrunk = λ·C_ewma + (1-λ)·I. " 

333 "λ=1.0 uses the raw EWMA matrix (no shrinkage); λ=0.0 replaces it entirely " 

334 "with the identity (maximum shrinkage, positions are treated as uncorrelated). " 

335 "Values in [0.3, 0.8] are typical for daily financial return data. " 

336 "Lower values improve numerical stability when assets are many relative to the " 

337 "lookback (high concentration ratio n/T). See shrink2id() for full guidance." 

338 ), 

339 ) 

340 aum: float = Field(..., gt=0.0, description="Assets under management for portfolio scaling.") 

341 profit_variance_init: float = Field( 

342 default=1.0, 

343 gt=0.0, 

344 description=( 

345 "Initial value for the profit variance EMA used in position sizing. " 

346 "Defaults to 1.0 (unit variance) so that the first positions are unscaled " 

347 "until real P&L data accumulates." 

348 ), 

349 ) 

350 profit_variance_decay: float = Field( 

351 default=0.99, 

352 gt=0.0, 

353 lt=1.0, 

354 description=( 

355 "EMA decay factor λ for the realized P&L variance (higher = slower adaptation). " 

356 "The default 0.99 gives a half-life of ~69 periods and an effective window of " 

357 "100 periods, suitable for daily data." 

358 ), 

359 ) 

360 denom_tol: float = Field( 

361 default=1e-12, 

362 gt=0.0, 

363 description=( 

364 "Minimum normalisation denominator; positions are zeroed at or below this value. " 

365 "The default 1e-12 is well above float64 machine epsilon (~2.2e-16) while " 

366 "remaining negligible for any economically meaningful signal." 

367 ), 

368 ) 

369 position_scale: float = Field( 

370 default=1e6, 

371 gt=0.0, 

372 description=( 

373 "Multiplicative scaling factor applied to dimensionless risk positions to obtain " 

374 "cash positions in base-currency units. Defaults to 1e6 (one million), a " 

375 "conventional denomination for institutional portfolios." 

376 ), 

377 ) 

378 min_corr_denom: float = Field( 

379 default=1e-14, 

380 gt=0.0, 

381 description=( 

382 "Guard threshold for the EWMA correlation denominator sqrt(var_x * var_y). " 

383 "When the denominator is at or below this value the correlation is set to NaN " 

384 "instead of dividing by a near-zero number. " 

385 "The default 1e-14 is safely above float64 underflow while being negligible for " 

386 "any realistic return variance." 

387 ), 

388 ) 

389 max_nan_fraction: float = Field( 

390 default=0.9, 

391 gt=0.0, 

392 lt=1.0, 

393 description=( 

394 "Maximum tolerated fraction of null values in any asset price column. " 

395 "ExcessiveNullsError is raised during construction when the null fraction " 

396 "strictly exceeds this threshold. " 

397 "The default 0.9 allows up to 90 % missing prices while rejecting columns " 

398 "that are almost entirely null." 

399 ), 

400 ) 

401 

402 model_config = {"frozen": True, "extra": "forbid"} 

403 

404 @property 

405 def report(self) -> "ConfigReport": 

406 """Return a :class:`~basanos.math._config_report.ConfigReport` facade for this config. 

407 

408 Generates a self-contained HTML report summarising all configuration 

409 parameters, a shrinkage-guidance table, and a theory section on 

410 Ledoit-Wolf shrinkage. 

411 

412 To also include a lambda-sweep chart (Sharpe vs λ), use 

413 :attr:`BasanosEngine.config_report` instead, which requires price and 

414 signal data. 

415 

416 Returns: 

417 basanos.math._config_report.ConfigReport: Report facade with 

418 ``to_html()`` and ``save()`` methods. 

419 

420 Examples: 

421 >>> from basanos.math.optimizer import BasanosConfig 

422 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

423 >>> report = cfg.report 

424 >>> html = report.to_html() 

425 >>> "Parameters" in html 

426 True 

427 """ 

428 from ._config_report import ConfigReport 

429 

430 return ConfigReport(config=self) 

431 

432 @field_validator("corr") 

433 @classmethod 

434 def corr_greater_than_vola(cls, v: int, info: ValidationInfo) -> int: 

435 """Optionally enforce corr ≥ vola for stability. 

436 

437 Pydantic v2 passes ValidationInfo; use info.data to access other fields. 

438 """ 

439 vola = info.data.get("vola") if hasattr(info, "data") else None 

440 if vola is not None and v < vola: 

441 raise ValueError 

442 return v 

443 

444 

445@dataclasses.dataclass(frozen=True) 

446class BasanosEngine: 

447 """Engine to compute correlation matrices and optimize risk positions. 

448 

449 Encapsulates price data and configuration to build EWM-based 

450 correlations, apply shrinkage, and solve for normalized positions. 

451 

452 Attributes: 

453 prices: Polars DataFrame of price levels per asset over time. Must 

454 contain a ``'date'`` column and at least one numeric asset column 

455 with strictly positive values that are not monotonically 

456 non-decreasing or non-increasing (i.e. they must vary in sign). 

457 mu: Polars DataFrame of expected-return signals aligned with *prices*. 

458 Must share the same shape and column names as *prices*. 

459 cfg: Immutable :class:`BasanosConfig` controlling EWMA half-lives, 

460 clipping, shrinkage intensity, and AUM. 

461 

462 Examples: 

463 Build an engine with two synthetic assets over 30 days and inspect the 

464 optimized positions and diagnostic properties. 

465 

466 >>> import numpy as np 

467 >>> import polars as pl 

468 >>> from basanos.math import BasanosConfig, BasanosEngine 

469 >>> dates = list(range(30)) 

470 >>> rng = np.random.default_rng(42) 

471 >>> prices = pl.DataFrame({ 

472 ... "date": dates, 

473 ... "A": np.cumprod(1 + rng.normal(0.001, 0.02, 30)) * 100.0, 

474 ... "B": np.cumprod(1 + rng.normal(0.001, 0.02, 30)) * 150.0, 

475 ... }) 

476 >>> mu = pl.DataFrame({ 

477 ... "date": dates, 

478 ... "A": rng.normal(0.0, 0.5, 30), 

479 ... "B": rng.normal(0.0, 0.5, 30), 

480 ... }) 

481 >>> cfg = BasanosConfig(vola=5, corr=10, clip=2.0, shrink=0.5, aum=1_000_000) 

482 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

483 >>> engine.assets 

484 ['A', 'B'] 

485 >>> engine.cash_position.shape 

486 (30, 3) 

487 >>> engine.position_leverage.columns 

488 ['date', 'leverage'] 

489 """ 

490 

491 prices: pl.DataFrame 

492 mu: pl.DataFrame 

493 cfg: BasanosConfig 

494 

495 def __post_init__(self) -> None: 

496 """Validate basic invariants right after initialization. 

497 

498 Ensures both ``prices`` and ``mu`` contain a ``'date'`` column and 

499 share identical shapes/columns, which downstream computations rely on. 

500 Also checks for data quality issues that would cause silent failures 

501 downstream: non-positive prices, excessive NaN values, and monotonic 

502 (non-varying) price series. 

503 """ 

504 # ensure 'date' column exists in prices before any other validation 

505 if "date" not in self.prices.columns: 

506 raise MissingDateColumnError("prices") 

507 

508 # ensure 'date' column exists in mu as well (kept for symmetry and downstream assumptions) 

509 if "date" not in self.mu.columns: 

510 raise MissingDateColumnError("mu") 

511 

512 # check that prices and mu have the same shape 

513 if self.prices.shape != self.mu.shape: 

514 raise ShapeMismatchError(self.prices.shape, self.mu.shape) 

515 

516 # check that the columns of prices and mu are identical 

517 if not set(self.prices.columns) == set(self.mu.columns): 

518 raise ColumnMismatchError(self.prices.columns, self.mu.columns) 

519 

520 # check for non-positive prices: log returns require strictly positive prices 

521 for asset in self.assets: 

522 col = self.prices[asset].drop_nulls() 

523 if col.len() > 0 and (col <= 0).any(): 

524 raise NonPositivePricesError(asset) 

525 

526 # check for excessive NaN values: more than cfg.max_nan_fraction null in any asset column 

527 n_rows = self.prices.height 

528 if n_rows > 0: 

529 for asset in self.assets: 

530 nan_frac = self.prices[asset].null_count() / n_rows 

531 if nan_frac > self.cfg.max_nan_fraction: 

532 raise ExcessiveNullsError(asset, nan_frac, self.cfg.max_nan_fraction) 

533 

534 # check for monotonic price series: a strictly non-decreasing or non-increasing 

535 # series has no variance in its return sign, indicating malformed or synthetic data 

536 for asset in self.assets: 

537 col = self.prices[asset].drop_nulls() 

538 if col.len() > 2: 

539 diffs = col.diff().drop_nulls() 

540 if (diffs >= 0).all() or (diffs <= 0).all(): 

541 raise MonotonicPricesError(asset) 

542 

543 @property 

544 def assets(self) -> list[str]: 

545 """List asset column names (numeric columns excluding 'date').""" 

546 return [c for c in self.prices.columns if c != "date" and self.prices[c].dtype.is_numeric()] 

547 

548 @property 

549 def ret_adj(self) -> pl.DataFrame: 

550 """Return per-asset volatility-adjusted log returns clipped by cfg.clip. 

551 

552 Uses an EWMA volatility estimate with lookback ``cfg.vola`` to 

553 standardize log returns for each numeric asset column. 

554 """ 

555 return self.prices.with_columns( 

556 [vol_adj(pl.col(asset), vola=self.cfg.vola, clip=self.cfg.clip) for asset in self.assets] 

557 ) 

558 

559 @property 

560 def vola(self) -> pl.DataFrame: 

561 """Per-asset EWMA volatility of percentage returns. 

562 

563 Computes percent changes for each numeric asset column and applies an 

564 exponentially weighted standard deviation using the lookback specified 

565 by ``cfg.vola``. The result is a DataFrame aligned with ``self.prices`` 

566 whose numeric columns hold per-asset volatility estimates. 

567 """ 

568 return self.prices.with_columns( 

569 pl.col(asset) 

570 .pct_change() 

571 .ewm_std(com=self.cfg.vola - 1, adjust=True, min_samples=self.cfg.vola) 

572 .alias(asset) 

573 for asset in self.assets 

574 ) 

575 

576 @property 

577 def cor(self) -> dict[object, np.ndarray]: 

578 """Compute per-timestamp EWM correlation matrices. 

579 

580 Builds volatility-adjusted returns for all assets, computes an 

581 exponentially weighted correlation using a pure NumPy implementation 

582 (with window ``cfg.corr``), and returns a mapping from each timestamp 

583 to the corresponding correlation matrix as a NumPy array. 

584 

585 Returns: 

586 dict: Mapping ``date -> np.ndarray`` of shape (n_assets, n_assets). 

587 

588 Performance: 

589 Delegates to :func:`_ewm_corr_numpy`, which is O(T·N²) in both 

590 time and memory. The returned dict holds *T* references into the 

591 result tensor (one N*N view per date); no extra copies are made. 

592 For large *N* or *T*, prefer ``cor_tensor`` to keep a single 

593 contiguous array rather than building a Python dict. 

594 """ 

595 index = self.prices["date"] 

596 ret_adj_np = self.ret_adj.select(self.assets).to_numpy() 

597 tensor = _ewm_corr_numpy( 

598 ret_adj_np, 

599 com=self.cfg.corr, 

600 min_periods=self.cfg.corr, 

601 min_corr_denom=self.cfg.min_corr_denom, 

602 ) 

603 return {index[t]: tensor[t] for t in range(len(index))} 

604 

605 @property 

606 def cor_tensor(self) -> np.ndarray: 

607 """Return all correlation matrices stacked as a 3-D tensor. 

608 

609 Converts the per-timestamp correlation dict (see :py:attr:`cor`) into a 

610 single contiguous NumPy array so that the full history can be saved to 

611 a flat ``.npy`` file with :func:`numpy.save` and reloaded with 

612 :func:`numpy.load`. 

613 

614 Returns: 

615 np.ndarray: Array of shape ``(T, N, N)`` where *T* is the number of 

616 timestamps and *N* the number of assets. ``tensor[t]`` is the 

617 correlation matrix for the *t*-th date (same ordering as 

618 ``self.prices["date"]``). 

619 

620 Examples: 

621 >>> import tempfile, pathlib 

622 >>> import numpy as np 

623 >>> import polars as pl 

624 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

625 >>> dates = pl.Series("date", list(range(100))) 

626 >>> rng0 = np.random.default_rng(0).lognormal(size=100) 

627 >>> rng1 = np.random.default_rng(1).lognormal(size=100) 

628 >>> prices = pl.DataFrame({"date": dates, "A": rng0, "B": rng1}) 

629 >>> rng2 = np.random.default_rng(2).normal(size=100) 

630 >>> rng3 = np.random.default_rng(3).normal(size=100) 

631 >>> mu = pl.DataFrame({"date": dates, "A": rng2, "B": rng3}) 

632 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

633 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

634 >>> tensor = engine.cor_tensor 

635 >>> with tempfile.TemporaryDirectory() as td: 

636 ... path = pathlib.Path(td) / "cor.npy" 

637 ... np.save(path, tensor) 

638 ... loaded = np.load(path) 

639 >>> np.testing.assert_array_equal(tensor, loaded) 

640 """ 

641 return np.stack(list(self.cor.values()), axis=0) 

642 

643 @property 

644 def cash_position(self) -> pl.DataFrame: 

645 """Optimize correlation-aware risk positions for each timestamp. 

646 

647 Computes EWMA correlations (via ``self.cor``), applies shrinkage toward 

648 the identity matrix with intensity ``cfg.shrink``, and solves a 

649 normalized linear system A x = mu per timestamp to obtain stable, 

650 scale-invariant positions. Non-finite or ill-posed cases yield zero 

651 positions for safety. 

652 

653 Returns: 

654 pl.DataFrame: DataFrame with columns ['date'] + asset names containing 

655 the per-timestamp cash positions (risk divided by EWMA volatility). 

656 

657 Performance: 

658 Dominant cost is ``self.cor`` (O(T·N²) time, O(T·N²) memory — see 

659 :func:`_ewm_corr_numpy`). The per-timestamp linear solve via 

660 Cholesky / LU decomposition adds O(N³) per row for a total solve 

661 cost of O(T·N³). For *N* = 100 and *T* = 2 520 (~10 years daily) 

662 the solve contributes approximately 2.52 * 10^9 floating-point 

663 operations (upper bound; Cholesky is N³/3 + O(N²)); at *N* = 1 000 

664 this rises to roughly 2.52 * 10^12, making the per-row solve the 

665 dominant compute bottleneck for large universes. 

666 """ 

667 # compute the correlation matrices 

668 cor = self.cor 

669 assets = self.assets 

670 

671 # Compute risk positions row-by-row using correlation shrinkage (NumPy) 

672 prices_num = self.prices.select(assets).to_numpy() 

673 returns_num = np.zeros_like(prices_num, dtype=float) 

674 returns_num[1:] = prices_num[1:] / prices_num[:-1] - 1.0 

675 

676 mu = self.mu.select(assets).to_numpy() 

677 risk_pos_np = np.full_like(mu, fill_value=np.nan, dtype=float) 

678 cash_pos_np = np.full_like(mu, fill_value=np.nan, dtype=float) 

679 vola_np = self.vola.select(assets).to_numpy() 

680 

681 profit_variance = self.cfg.profit_variance_init 

682 lamb = self.cfg.profit_variance_decay 

683 

684 for i, t in enumerate(cor.keys()): 

685 # get the mask of finite prices for this timestamp 

686 mask = np.isfinite(prices_num[i]) 

687 

688 # Compute profit contribution using only finite returns and available positions 

689 if i > 0: 

690 ret_mask = np.isfinite(returns_num[i]) & mask 

691 # Profit at time i comes from yesterday's cash position applied to today's returns 

692 if ret_mask.any(): 

693 with np.errstate(invalid="ignore"): 

694 cash_pos_np[i - 1] = risk_pos_np[i - 1] / vola_np[i - 1] 

695 lhs = np.nan_to_num(cash_pos_np[i - 1, ret_mask], nan=0.0) 

696 rhs = np.nan_to_num(returns_num[i, ret_mask], nan=0.0) 

697 profit = lhs @ rhs 

698 profit_variance = lamb * profit_variance + (1 - lamb) * profit**2 

699 # we have no price data at all for this timestamp 

700 if not mask.any(): 

701 continue 

702 

703 # get the correlation matrix for this timestamp 

704 corr_n = cor[t] 

705 

706 # shrink the correlation matrix towards identity 

707 matrix = shrink2id(corr_n, lamb=self.cfg.shrink)[np.ix_(mask, mask)] 

708 

709 # get the expected-return vector for this timestamp 

710 expected_mu = np.nan_to_num(mu[i][mask]) 

711 

712 # Short-circuit when signal is zero - no position needed, skip norm computation 

713 if np.allclose(expected_mu, 0.0): 

714 pos = np.zeros_like(expected_mu) 

715 else: 

716 # Normalize solution; guard against zero/near-zero norm to avoid NaNs. 

717 # inv_a_norm returns float(np.nan) when no valid entries exist (never None). 

718 denom = inv_a_norm(expected_mu, matrix) 

719 

720 if not np.isfinite(denom) or denom <= self.cfg.denom_tol: 

721 _logger.warning( 

722 "Positions zeroed at t=%s: normalisation denominator is degenerate " 

723 "(denom=%s, denom_tol=%s). Check signal magnitude and covariance matrix.", 

724 t, 

725 denom, 

726 self.cfg.denom_tol, 

727 extra={ 

728 "context": { 

729 "t": str(t), 

730 "denom": denom, 

731 "denom_tol": self.cfg.denom_tol, 

732 } 

733 }, 

734 ) 

735 pos = np.zeros_like(expected_mu) 

736 else: 

737 pos = solve(matrix, expected_mu) / denom 

738 

739 risk_pos_np[i, mask] = pos / profit_variance 

740 with np.errstate(invalid="ignore"): 

741 cash_pos_np[i, mask] = risk_pos_np[i, mask] / vola_np[i, mask] 

742 

743 # Build Polars DataFrame for risk positions (numeric columns only) 

744 cash_position = self.prices.with_columns( 

745 [(pl.lit(cash_pos_np[:, i]).alias(asset)) for i, asset in enumerate(assets)] 

746 ) 

747 

748 return cash_position 

749 

750 @property 

751 def risk_position(self) -> pl.DataFrame: 

752 """Risk positions (before EWMA-volatility scaling) at each timestamp. 

753 

754 Derives the un-volatility-scaled position by multiplying the cash 

755 position by the per-asset EWMA volatility. Equivalently, this is 

756 the quantity solved by the correlation-adjusted linear system before 

757 dividing by ``vola``. 

758 

759 Relationship to other properties:: 

760 

761 cash_position = risk_position / vola 

762 risk_position = cash_position * vola 

763 

764 Returns: 

765 pl.DataFrame: DataFrame with columns ``['date'] + assets`` where 

766 each value is ``cash_position_i * vola_i`` at the given timestamp. 

767 """ 

768 assets = self.assets 

769 cp_np = self.cash_position.select(assets).to_numpy() 

770 vola_np = self.vola.select(assets).to_numpy() 

771 with np.errstate(invalid="ignore"): 

772 risk_pos = cp_np * vola_np 

773 return self.prices.with_columns([pl.lit(risk_pos[:, i]).alias(asset) for i, asset in enumerate(assets)]) 

774 

775 @property 

776 def position_leverage(self) -> pl.DataFrame: 

777 """L1 norm of cash positions (gross leverage) at each timestamp. 

778 

779 Sums the absolute values of all asset cash positions at each row. 

780 NaN positions are treated as zero (they contribute nothing to gross 

781 leverage). 

782 

783 Returns: 

784 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'leverage': ...}`` 

785 where ``leverage`` is the L1 norm of the cash-position vector. 

786 """ 

787 assets = self.assets 

788 cp_np = self.cash_position.select(assets).to_numpy() 

789 leverage = np.nansum(np.abs(cp_np), axis=1) 

790 return pl.DataFrame({"date": self.prices["date"], "leverage": pl.Series(leverage, dtype=pl.Float64)}) 

791 

792 @property 

793 def condition_number(self) -> pl.DataFrame: 

794 """Condition number κ of the shrunk correlation matrix C_shrunk at each timestamp. 

795 

796 Applies the same shrinkage as ``cash_position`` (``cfg.shrink``) to the 

797 per-timestamp EWM correlation matrix and computes ``np.linalg.cond``. 

798 Only the sub-matrix corresponding to assets with finite prices at that 

799 timestamp is used; rows with no finite prices yield ``NaN``. 

800 

801 Returns: 

802 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'condition_number': ...}``. 

803 """ 

804 cor = self.cor 

805 assets = self.assets 

806 prices_num = self.prices.select(assets).to_numpy() 

807 

808 kappas: list[float] = [] 

809 for i, (_t, corr_n) in enumerate(cor.items()): 

810 mask = np.isfinite(prices_num[i]) 

811 if not mask.any(): 

812 kappas.append(float(np.nan)) 

813 continue 

814 matrix = shrink2id(corr_n, lamb=self.cfg.shrink)[np.ix_(mask, mask)] 

815 _v, mat = valid(matrix) 

816 if not _v.any(): 

817 kappas.append(float(np.nan)) 

818 continue 

819 kappas.append(float(np.linalg.cond(mat))) 

820 

821 return pl.DataFrame({"date": self.prices["date"], "condition_number": pl.Series(kappas, dtype=pl.Float64)}) 

822 

823 @property 

824 def effective_rank(self) -> pl.DataFrame: 

825 r"""Effective rank of the shrunk correlation matrix C_shrunk at each timestamp. 

826 

827 Measures the true dimensionality of the portfolio by computing the 

828 entropy-based effective rank: 

829 

830 .. math:: 

831 

832 \\text{eff\\_rank} = \\exp\\!\\left(-\\sum_i p_i \\ln p_i\\right), 

833 \\quad p_i = \\frac{\\lambda_i}{\\sum_j \\lambda_j} 

834 

835 where :math:`\\lambda_i` are the eigenvalues of ``C_shrunk`` (restricted 

836 to assets with finite prices at that timestamp). A value equal to the 

837 number of assets indicates a perfectly uniform spectrum (identity-like 

838 matrix); a value of 1 indicates a rank-1 matrix dominated by one 

839 direction. 

840 

841 Returns: 

842 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'effective_rank': ...}``. 

843 """ 

844 cor = self.cor 

845 assets = self.assets 

846 prices_num = self.prices.select(assets).to_numpy() 

847 

848 ranks: list[float] = [] 

849 for i, (_t, corr_n) in enumerate(cor.items()): 

850 mask = np.isfinite(prices_num[i]) 

851 if not mask.any(): 

852 ranks.append(float(np.nan)) 

853 continue 

854 matrix = shrink2id(corr_n, lamb=self.cfg.shrink)[np.ix_(mask, mask)] 

855 _v, mat = valid(matrix) 

856 if not _v.any(): 

857 ranks.append(float(np.nan)) 

858 continue 

859 eigvals = np.linalg.eigvalsh(mat) 

860 eigvals = np.clip(eigvals, 0.0, None) 

861 total = eigvals.sum() 

862 if total <= 0.0: 

863 ranks.append(float(np.nan)) 

864 continue 

865 p = eigvals / total 

866 p_pos = p[p > 0.0] 

867 entropy = float(-np.sum(p_pos * np.log(p_pos))) 

868 ranks.append(float(np.exp(entropy))) 

869 

870 return pl.DataFrame({"date": self.prices["date"], "effective_rank": pl.Series(ranks, dtype=pl.Float64)}) 

871 

872 @property 

873 def solver_residual(self) -> pl.DataFrame: 

874 r"""Per-timestamp solver residual ``‖C_shrunk·x - μ‖₂``. 

875 

876 After solving the normalised linear system ``C_shrunk · x = μ`` at 

877 each timestamp, this property reports the Euclidean residual norm. 

878 For a well-posed, well-conditioned system the residual is near machine 

879 epsilon; large values flag numerical difficulties (near-singular 

880 matrices, extreme condition numbers, or solver fall-back to LU). 

881 

882 Returns: 

883 pl.DataFrame: Two-column DataFrame ``{'date': ..., 'residual': ...}``. 

884 Zero is returned when ``μ`` is the zero vector (no solve is 

885 performed). ``NaN`` is returned when no asset has finite prices. 

886 """ 

887 cor = self.cor 

888 assets = self.assets 

889 mu_np = self.mu.select(assets).to_numpy() 

890 prices_num = self.prices.select(assets).to_numpy() 

891 

892 residuals: list[float] = [] 

893 for i, (t, corr_n) in enumerate(cor.items()): 

894 mask = np.isfinite(prices_num[i]) 

895 if not mask.any(): 

896 residuals.append(float(np.nan)) 

897 continue 

898 matrix = shrink2id(corr_n, lamb=self.cfg.shrink)[np.ix_(mask, mask)] 

899 expected_mu = np.nan_to_num(mu_np[i][mask]) 

900 if np.allclose(expected_mu, 0.0): 

901 residuals.append(0.0) 

902 continue 

903 try: 

904 x = solve(matrix, expected_mu) 

905 except SingularMatrixError: 

906 # The (shrunk) covariance matrix is degenerate — e.g. the EWM 

907 # window has not yet accumulated enough data, or the asset 

908 # returns are collinear. The linear system has no unique 

909 # solution, so the residual norm is undefined and NaN is the 

910 # correct sentinel. 

911 _logger.warning( 

912 "solver_residual: SingularMatrixError at t=%s - covariance matrix is " 

913 "degenerate; residual set to NaN.", 

914 t, 

915 ) 

916 residuals.append(float(np.nan)) 

917 continue 

918 finite_x = np.isfinite(x) 

919 if not finite_x.any(): 

920 residuals.append(float(np.nan)) 

921 continue 

922 residuals.append( 

923 float(np.linalg.norm(matrix[np.ix_(finite_x, finite_x)] @ x[finite_x] - expected_mu[finite_x])) 

924 ) 

925 

926 return pl.DataFrame({"date": self.prices["date"], "residual": pl.Series(residuals, dtype=pl.Float64)}) 

927 

928 @property 

929 def signal_utilisation(self) -> pl.DataFrame: 

930 r"""Per-asset signal utilisation: fraction of μ_i surviving the correlation filter. 

931 

932 For each asset *i* and timestamp *t*, computes 

933 

934 .. math:: 

935 

936 u_i = \\frac{(C_{\\text{shrunk}}^{-1}\\,\\mu)_i}{\\mu_i} 

937 

938 where :math:`C_{\\text{shrunk}}^{-1}\\,\\mu` is the unnormalised solve 

939 result. When :math:`C = I` (identity) all assets have utilisation 1. 

940 Off-diagonal correlations attenuate some assets (:math:`u_i < 1`) and 

941 may amplify negatively correlated ones (:math:`u_i > 1`). 

942 

943 A value of ``0.0`` is returned when the entire signal vector 

944 :math:`\\mu` is near zero at that timestamp (no solve is performed). 

945 ``NaN`` is returned for individual assets where :math:`|\\mu_i|` is 

946 below machine-epsilon precision or where prices are unavailable. 

947 

948 Returns: 

949 pl.DataFrame: DataFrame with columns ``['date'] + assets``. 

950 """ 

951 cor = self.cor 

952 assets = self.assets 

953 mu_np = self.mu.select(assets).to_numpy() 

954 prices_num = self.prices.select(assets).to_numpy() 

955 

956 _mu_tol = 1e-14 # treat |μ_i| below this as zero to avoid spurious large ratios 

957 n_assets = len(assets) 

958 util_np = np.full((self.prices.height, n_assets), np.nan) 

959 

960 for i, (t, corr_n) in enumerate(cor.items()): 

961 mask = np.isfinite(prices_num[i]) 

962 if not mask.any(): 

963 continue 

964 matrix = shrink2id(corr_n, lamb=self.cfg.shrink)[np.ix_(mask, mask)] 

965 expected_mu = np.nan_to_num(mu_np[i][mask]) 

966 if np.allclose(expected_mu, 0.0): 

967 util_np[i, mask] = 0.0 

968 continue 

969 try: 

970 x = solve(matrix, expected_mu) 

971 except SingularMatrixError: 

972 # The (shrunk) covariance matrix is degenerate — the portfolio 

973 # allocation is undefined at this timestamp. All asset 

974 # utilisations remain NaN (the array was pre-filled with NaN), 

975 # which correctly signals that no valid ratio could be computed. 

976 _logger.warning( 

977 "signal_utilisation: SingularMatrixError at t=%s - covariance matrix is " 

978 "degenerate; utilisation set to NaN.", 

979 t, 

980 ) 

981 continue 

982 with np.errstate(divide="ignore", invalid="ignore"): 

983 ratio = np.where(np.abs(expected_mu) > _mu_tol, x / expected_mu, np.nan) 

984 util_np[i, mask] = ratio 

985 

986 return self.prices.with_columns([pl.lit(util_np[:, j]).alias(asset) for j, asset in enumerate(assets)]) 

987 

988 @property 

989 def portfolio(self) -> Portfolio: 

990 """Construct a Portfolio from the optimized cash positions. 

991 

992 Converts the computed cash positions into a Portfolio using the 

993 configured AUM. 

994 

995 Returns: 

996 Portfolio: Instance built from cash positions with AUM scaling. 

997 """ 

998 cp = self.cash_position 

999 assets = [c for c in cp.columns if c != "date" and cp[c].dtype.is_numeric()] 

1000 scaled = cp.with_columns(pl.col(a) * self.cfg.position_scale for a in assets) 

1001 return Portfolio.from_cash_position(self.prices, scaled, aum=self.cfg.aum) 

1002 

1003 def sharpe_at_shrink(self, shrink: float) -> float: 

1004 r"""Return the annualised portfolio Sharpe ratio for the given shrinkage weight. 

1005 

1006 Constructs a new :class:`BasanosEngine` with all parameters identical to 

1007 ``self`` except that ``cfg.shrink`` is replaced by ``shrink``, then 

1008 returns the annualised Sharpe ratio of the resulting portfolio. 

1009 

1010 This is the canonical single-argument callable required by the benchmarks 

1011 specification: ``f(λ) → Sharpe``. Use it to sweep λ across ``[0, 1]`` 

1012 and measure whether correlation adjustment adds value over the 

1013 signal-proportional baseline (λ = 0) or the unregularised limit (λ = 1). 

1014 

1015 Corner cases: 

1016 * **λ = 0** — the shrunk matrix equals the identity, so the 

1017 optimiser treats all assets as uncorrelated and positions are 

1018 purely signal-proportional (no correlation adjustment). 

1019 * **λ = 1** — the raw EWMA correlation matrix is used without 

1020 shrinkage. 

1021 

1022 Args: 

1023 shrink: Retention weight λ ∈ [0, 1]. See 

1024 :attr:`BasanosConfig.shrink` for full documentation. 

1025 

1026 Returns: 

1027 Annualised Sharpe ratio of the portfolio returns as a ``float``. 

1028 Returns ``float("nan")`` when the Sharpe ratio cannot be computed 

1029 (e.g. zero-variance returns). 

1030 

1031 Raises: 

1032 ValidationError: When ``shrink`` is outside [0, 1] (delegated to 

1033 :class:`BasanosConfig` field validation). 

1034 

1035 Examples: 

1036 >>> import numpy as np 

1037 >>> import polars as pl 

1038 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

1039 >>> dates = pl.Series("date", list(range(200))) 

1040 >>> rng = np.random.default_rng(0) 

1041 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

1042 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

1043 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

1044 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

1045 >>> s = engine.sharpe_at_shrink(0.5) 

1046 >>> isinstance(s, float) 

1047 True 

1048 """ 

1049 new_cfg = self.cfg.model_copy(update={"shrink": shrink}) 

1050 engine = BasanosEngine(prices=self.prices, mu=self.mu, cfg=new_cfg) 

1051 return float(engine.portfolio.stats.sharpe().get("returns") or float("nan")) 

1052 

1053 @property 

1054 def naive_sharpe(self) -> float: 

1055 r"""Sharpe ratio of the naïve equal-weight signal (μ = 1 for every asset/timestamp). 

1056 

1057 Replaces the expected-return signal ``mu`` with a constant matrix of 

1058 ones, then runs the optimiser with the current configuration and returns 

1059 the annualised Sharpe ratio of the resulting portfolio. 

1060 

1061 This provides the baseline answer to *"does the signal add value?"*: 

1062 a real signal should produce a higher Sharpe than the naïve benchmark. 

1063 Combined with :meth:`sharpe_at_shrink`, this yields a three-way 

1064 comparison: 

1065 

1066 +--------------------+----------------------------------------------+ 

1067 | Benchmark | What it measures | 

1068 +====================+==============================================+ 

1069 | ``naive_sharpe`` | No signal skill; pure correlation routing | 

1070 +--------------------+----------------------------------------------+ 

1071 | ``sharpe_at_shrink(0.0)`` | Signal skill, no correlation adj. | 

1072 +--------------------+----------------------------------------------+ 

1073 | ``sharpe_at_shrink(cfg.shrink)`` | Signal + correlation adj. | 

1074 +--------------------+----------------------------------------------+ 

1075 

1076 Returns: 

1077 Annualised Sharpe ratio of the equal-weight portfolio as a ``float``. 

1078 Returns ``float("nan")`` when the Sharpe ratio cannot be computed. 

1079 

1080 Examples: 

1081 >>> import numpy as np 

1082 >>> import polars as pl 

1083 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

1084 >>> dates = pl.Series("date", list(range(200))) 

1085 >>> rng = np.random.default_rng(0) 

1086 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

1087 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

1088 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

1089 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

1090 >>> s = engine.naive_sharpe 

1091 >>> isinstance(s, float) 

1092 True 

1093 """ 

1094 naive_mu = self.mu.with_columns(pl.lit(1.0).alias(asset) for asset in self.assets) 

1095 engine = BasanosEngine(prices=self.prices, mu=naive_mu, cfg=self.cfg) 

1096 return float(engine.portfolio.stats.sharpe().get("returns") or float("nan")) 

1097 

1098 def _ic_series(self, use_rank: bool) -> pl.DataFrame: 

1099 """Compute the cross-sectional IC time series. 

1100 

1101 For each timestamp *t* (from 0 to T-2), correlates the signal vector 

1102 ``mu[t, :]`` with the one-period forward return vector 

1103 ``prices[t+1, :] / prices[t, :] - 1`` across all assets where both 

1104 quantities are finite. When fewer than two valid asset pairs are 

1105 available, the IC value is set to ``NaN``. 

1106 

1107 Args: 

1108 use_rank: When ``True`` the Spearman rank correlation is used 

1109 (Rank IC); when ``False`` the Pearson correlation is used (IC). 

1110 

1111 Returns: 

1112 pl.DataFrame: Two-column frame with ``date`` (signal date) and 

1113 either ``ic`` or ``rank_ic``. 

1114 """ 

1115 assets = self.assets 

1116 prices_np = self.prices.select(assets).to_numpy().astype(float) 

1117 mu_np = self.mu.select(assets).to_numpy().astype(float) 

1118 dates = self.prices["date"].to_list() 

1119 

1120 col_name = "rank_ic" if use_rank else "ic" 

1121 ic_values: list[float] = [] 

1122 ic_dates = [] 

1123 

1124 for t in range(len(dates) - 1): 

1125 fwd_ret = prices_np[t + 1] / prices_np[t] - 1.0 

1126 signal = mu_np[t] 

1127 

1128 # Both signal and forward return must be finite 

1129 mask = np.isfinite(signal) & np.isfinite(fwd_ret) 

1130 n_valid = int(mask.sum()) 

1131 

1132 if n_valid < 2: 

1133 ic_values.append(float("nan")) 

1134 elif use_rank: 

1135 corr, _ = spearmanr(signal[mask], fwd_ret[mask]) 

1136 ic_values.append(float(corr)) 

1137 else: 

1138 ic_values.append(float(np.corrcoef(signal[mask], fwd_ret[mask])[0, 1])) 

1139 

1140 ic_dates.append(dates[t]) 

1141 

1142 return pl.DataFrame({"date": ic_dates, col_name: pl.Series(ic_values, dtype=pl.Float64)}) 

1143 

1144 @property 

1145 def ic(self) -> pl.DataFrame: 

1146 """Cross-sectional Pearson Information Coefficient (IC) time series. 

1147 

1148 For each timestamp *t* (excluding the last), computes the Pearson 

1149 correlation between the signal ``mu[t, :]`` and the one-period forward 

1150 return ``prices[t+1, :] / prices[t, :] - 1`` across all assets where 

1151 both quantities are finite. 

1152 

1153 An IC value close to +1 means the signal ranked assets in the same 

1154 order as forward returns; close to -1 means the opposite; near 0 means 

1155 no predictive relationship. 

1156 

1157 Returns: 

1158 pl.DataFrame: Frame with columns ``['date', 'ic']``. ``date`` is 

1159 the timestamp at which the signal was observed. ``ic`` is a 

1160 ``Float64`` series (``NaN`` when fewer than 2 valid asset pairs 

1161 are available for a given timestamp). 

1162 

1163 See Also: 

1164 :py:attr:`rank_ic` — Spearman variant, more robust to outliers. 

1165 :py:attr:`ic_mean`, :py:attr:`ic_std`, :py:attr:`icir` — summary 

1166 statistics. 

1167 """ 

1168 return self._ic_series(use_rank=False) 

1169 

1170 @property 

1171 def rank_ic(self) -> pl.DataFrame: 

1172 """Cross-sectional Spearman Rank Information Coefficient time series. 

1173 

1174 Identical to :py:attr:`ic` but uses the Spearman rank correlation 

1175 instead of the Pearson correlation, making it more robust to fat-tailed 

1176 return distributions and outliers. 

1177 

1178 Returns: 

1179 pl.DataFrame: Frame with columns ``['date', 'rank_ic']``. 

1180 ``rank_ic`` is a ``Float64`` series. 

1181 

1182 See Also: 

1183 :py:attr:`ic` — Pearson variant. 

1184 :py:attr:`rank_ic_mean`, :py:attr:`rank_ic_std` — summary 

1185 statistics. 

1186 """ 

1187 return self._ic_series(use_rank=True) 

1188 

1189 @property 

1190 def ic_mean(self) -> float: 

1191 """Mean of the IC time series, ignoring NaN values. 

1192 

1193 Returns: 

1194 float: Arithmetic mean of all finite IC values, or ``NaN`` if 

1195 no finite values exist. 

1196 """ 

1197 arr = self.ic["ic"].drop_nulls().to_numpy() 

1198 finite = arr[np.isfinite(arr)] 

1199 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

1200 

1201 @property 

1202 def ic_std(self) -> float: 

1203 """Standard deviation of the IC time series, ignoring NaN values. 

1204 

1205 Uses ``ddof=1`` (sample standard deviation). 

1206 

1207 Returns: 

1208 float: Sample standard deviation of all finite IC values, or 

1209 ``NaN`` if fewer than 2 finite values exist. 

1210 """ 

1211 arr = self.ic["ic"].drop_nulls().to_numpy() 

1212 finite = arr[np.isfinite(arr)] 

1213 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan") 

1214 

1215 @property 

1216 def icir(self) -> float: 

1217 """Information Coefficient Information Ratio (ICIR). 

1218 

1219 Defined as ``IC mean / IC std``. A higher absolute ICIR indicates a 

1220 more consistent signal: the mean IC is large relative to its 

1221 variability. 

1222 

1223 Returns: 

1224 float: ``ic_mean / ic_std``, or ``NaN`` when ``ic_std`` is zero 

1225 or non-finite. 

1226 """ 

1227 mean = self.ic_mean 

1228 std = self.ic_std 

1229 if not np.isfinite(std) or std == 0.0: 

1230 return float("nan") 

1231 return float(mean / std) 

1232 

1233 @property 

1234 def rank_ic_mean(self) -> float: 

1235 """Mean of the Rank IC time series, ignoring NaN values. 

1236 

1237 Returns: 

1238 float: Arithmetic mean of all finite Rank IC values, or ``NaN`` 

1239 if no finite values exist. 

1240 """ 

1241 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy() 

1242 finite = arr[np.isfinite(arr)] 

1243 return float(np.mean(finite)) if len(finite) > 0 else float("nan") 

1244 

1245 @property 

1246 def rank_ic_std(self) -> float: 

1247 """Standard deviation of the Rank IC time series, ignoring NaN values. 

1248 

1249 Uses ``ddof=1`` (sample standard deviation). 

1250 

1251 Returns: 

1252 float: Sample standard deviation of all finite Rank IC values, or 

1253 ``NaN`` if fewer than 2 finite values exist. 

1254 """ 

1255 arr = self.rank_ic["rank_ic"].drop_nulls().to_numpy() 

1256 finite = arr[np.isfinite(arr)] 

1257 return float(np.std(finite, ddof=1)) if len(finite) > 1 else float("nan") 

1258 

1259 @property 

1260 def config_report(self) -> "ConfigReport": 

1261 """Return a :class:`~basanos.math._config_report.ConfigReport` facade for this engine. 

1262 

1263 Returns a :class:`~basanos.math._config_report.ConfigReport` that 

1264 includes the full **lambda-sweep chart** — an interactive plot of the 

1265 annualised Sharpe ratio as :attr:`~BasanosConfig.shrink` (λ) is swept 

1266 across [0, 1] — in addition to the parameter table, shrinkage-guidance 

1267 table, and theory section available from 

1268 :attr:`BasanosConfig.report`. 

1269 

1270 Returns: 

1271 basanos.math._config_report.ConfigReport: Report facade with 

1272 ``to_html()`` and ``save()`` methods. 

1273 

1274 Examples: 

1275 >>> import numpy as np 

1276 >>> import polars as pl 

1277 >>> from basanos.math.optimizer import BasanosConfig, BasanosEngine 

1278 >>> dates = pl.Series("date", list(range(200))) 

1279 >>> rng = np.random.default_rng(0) 

1280 >>> prices = pl.DataFrame({"date": dates, "A": rng.lognormal(size=200), "B": rng.lognormal(size=200)}) 

1281 >>> mu = pl.DataFrame({"date": dates, "A": rng.normal(size=200), "B": rng.normal(size=200)}) 

1282 >>> cfg = BasanosConfig(vola=10, corr=20, clip=3.0, shrink=0.5, aum=1e6) 

1283 >>> engine = BasanosEngine(prices=prices, mu=mu, cfg=cfg) 

1284 >>> report = engine.config_report 

1285 >>> html = report.to_html() 

1286 >>> "Lambda" in html 

1287 True 

1288 """ 

1289 from ._config_report import ConfigReport 

1290 

1291 return ConfigReport(config=self.cfg, engine=self)