Coverage for src/nncg/solver.py: 100%
133 statements
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 19:07 +0000
« prev ^ index » next coverage.py v7.15.0, created at 2026-07-03 19:07 +0000
1"""Non-negative conjugate gradients: the active-set / block-principal-pivoting loop.
3Solves the strictly convex non-negative quadratic program
5 min_{x >= 0} 1/2 x^T A x - b^T x, A symmetric positive definite,
7and its equality-augmented variant with a general linear system ``B x = c``,
8by wrapping a matrix-free conjugate-gradient inner solver in a primal-dual
9active-set outer loop. The working-set toggles are the principal pivots of the
10linear complementarity problem LCP(A, -b); guarding the fast block-pivot path
11with a least-index Bland fallback gives unconditional finite termination at
12the unique global minimiser — no non-degeneracy assumption (Theorem 5.1 of the
13accompanying paper). See https://github.com/Jebel-Quant/mean_variance_solvers.
15The quadratic term enters as a :class:`cvx.linalg.SymmetricOperator`, accessed
16only through block products: ``apply_free`` drives the CG inner solves,
17``matvec`` the reduced gradient, and ``solve_free`` the optional direct inner
18solver. Wrap an explicit SPD array in ``DenseOperator``; for the Gram case
19``A = M^T M + ridge I`` pass ``GramOperator(M, ridge)`` and the ``n x n``
20matrix is never formed.
21"""
23from __future__ import annotations
25from collections.abc import Callable
26from dataclasses import dataclass
27from typing import cast
29import numpy as np
30from cvx.linalg import Matrix, SymmetricOperator, Vector, cholesky_solve
31from numpy.typing import NDArray
33from .krylov import MatVec, cg, pcg
35SubSolve = Callable[[NDArray[np.int_], "Vector | None"], "tuple[Vector, Vector | None, int]"]
36"""Subproblem solve on a free set: ``(idx, x0) -> (x_F, lam, inner_iters)``."""
38ReducedGradient = Callable[["Vector", "Vector | None"], "Vector"]
39"""Reduced gradient of the subproblem: ``(x, lam) -> s``."""
41_NEEDS_OPERATOR = (
42 "the quadratic term must be a cvx.linalg.SymmetricOperator: wrap a dense SPD "
43 "array in DenseOperator(a), or pass GramOperator(M, ridge) for A = M'M + ridge*I"
44)
45_RCOND_MIN = 1e-12 # matches cvx-linalg's DEFAULT_COND_THRESHOLD of 1e12
48def _check_dimension(op: SymmetricOperator, b: Vector) -> None:
49 """Check that the operator and the linear term agree in dimension.
51 Args:
52 op: The symmetric operator ``A``.
53 b: The linear term ``b``.
55 Raises:
56 ValueError: When ``op.n != len(b)``.
57 """
58 if op.n != len(b):
59 msg = f"operator dimension {op.n} does not match len(b) = {len(b)}"
60 raise ValueError(msg)
63def _require_operator(a: object) -> SymmetricOperator:
64 """Validate that the quadratic term is a symmetric operator.
66 Args:
67 a: The candidate quadratic term.
69 Returns:
70 *a* unchanged when it is a :class:`cvx.linalg.SymmetricOperator`.
72 Raises:
73 TypeError: When *a* is anything else (e.g. a dense array).
74 """
75 if not isinstance(a, SymmetricOperator):
76 raise TypeError(_NEEDS_OPERATOR)
77 return a
80def _free_matvec(op: SymmetricOperator, idx: NDArray[np.int_]) -> MatVec:
81 """Return the free-block action ``v -> A[F, F] v`` of the operator.
83 The free-set restriction is hoisted out of the inner loop: when the
84 operator provides ``restricted`` (cvx-linalg >= 0.10), the pre-sliced
85 free-block operator is built once here and the returned callable is its
86 plain ``matvec``. Calling ``apply_free(idx, v)`` per CG iteration instead
87 re-gathers the operator's storage (e.g. the Gram factor columns) on every
88 call, which costs an order of magnitude more wall clock at identical
89 iteration counts. The fallback keeps older cvx-linalg releases working.
91 Args:
92 op: The symmetric operator ``A``.
93 idx: Integer positions of the free set ``F``.
95 Returns:
96 A callable computing ``A[F, F] @ v``; the reduced matrix is never
97 materialised.
98 """
99 restricted = getattr(op, "restricted", None)
100 if restricted is not None:
101 try:
102 return cast(MatVec, restricted(idx).matvec)
103 except NotImplementedError:
104 pass # backend without a pre-sliced form; fall back below
105 return lambda v: op.apply_free(idx, v)
108@dataclass(frozen=True)
109class Result:
110 """Outcome of an active-set solve.
112 Attributes:
113 x: The minimiser (or the final iterate if ``converged`` is False).
114 outer: Number of outer active-set steps taken.
115 inner: Total inner (CG/PCG) iterations across all outer steps; each
116 direct inner solve counts as one.
117 fallback: Number of least-index Bland fallback pivots taken.
118 converged: True when the KKT exit was reached; False when an
119 ``max_outer`` cap stopped the loop first.
120 free: Boolean mask of the final free set.
121 lam: Multipliers of the equality constraints (equality-augmented
122 solves only; None otherwise).
123 traj: The sequence of visited free sets as index tuples when
124 trajectory tracking was requested; None otherwise.
125 """
127 x: Vector
128 outer: int
129 inner: int
130 fallback: int
131 converged: bool
132 free: NDArray[np.bool_]
133 lam: Vector | None = None
134 traj: list[tuple[int, ...]] | None = None
137def _active_set_loop(
138 n: int,
139 sub_solve: SubSolve,
140 reduced_gradient: ReducedGradient,
141 tol: float,
142 p_max: int,
143 track: bool = False,
144 max_outer: int | None = None,
145 warm: tuple[NDArray[np.bool_], Vector] | None = None,
146) -> Result:
147 """Run the guarded primal-dual active-set loop shared by both solvers.
149 The driver owns everything the termination proof depends on: the primal
150 and dual violator tests, the batch exchange with its patience counter,
151 and the least-index Bland fallback. What is solved on each free set — a
152 single reduced system, or the equality-augmented saddle system — enters
153 through the ``sub_solve`` callback, with ``reduced_gradient`` supplying
154 the matching dual test quantity.
156 Args:
157 n: Problem dimension.
158 sub_solve: Callback ``(idx, x0) -> (x_F, lam, inner_iters)`` solving
159 the subproblem on the free set ``idx``. ``x0`` is a warm inner
160 guess restricted to ``idx`` (None on a cold start); ``lam`` are
161 the equality multipliers (None for the bound-only problem).
162 reduced_gradient: Callback ``(x, lam) -> s`` computing the reduced
163 gradient that drives the dual violator test.
164 tol: Threshold of the primal and dual violator tests.
165 p_max: Patience budget — non-improving batch steps tolerated before a
166 fallback pivot. Any value gives finite termination.
167 track: Record the free-set trajectory in ``Result.traj``.
168 max_outer: Optional cap on outer steps; when hit, the current iterate
169 is returned with ``converged=False``.
170 warm: Optional ``(free_mask, x_prev)`` pair from a previous solve.
171 Starts the loop from that free set and seeds every subproblem
172 solve from the newest iterate.
174 Returns:
175 A :class:`Result`; ``lam`` is whatever the last subproblem returned.
176 """
177 if warm is None:
178 free = np.ones(n, dtype=bool) # F = {1..n} initially
179 x_guess: Vector | None = None
180 else:
181 free = warm[0].copy()
182 x_guess = warm[1]
183 x = np.zeros(n)
184 lam: Vector | None = None
185 n_bar = n + 1
186 patience = p_max
187 outer = inner_total = fallback = 0
188 traj: list[tuple[int, ...]] | None = [] if track else None
189 converged = True
191 while True:
192 if max_outer is not None and outer >= max_outer:
193 converged = False
194 break
195 idx = np.flatnonzero(free)
196 if traj is not None:
197 traj.append(tuple(idx.tolist()))
198 x0 = x_guess[idx] if x_guess is not None else None
199 xf, lam, k_step = sub_solve(idx, x0)
200 outer += 1
201 inner_total += k_step
203 x = np.zeros(n)
204 x[idx] = xf
205 if x_guess is not None:
206 x_guess = x # warm mode: newest iterate seeds the next reduced solve
207 s = reduced_gradient(x, lam)
209 prim = np.flatnonzero(free & (x < -tol)) # D: free but negative
210 dual = np.flatnonzero((~free) & (s < -tol)) # V: bound but s < 0
211 viol = np.concatenate([prim, dual])
212 n_viol = viol.size
213 if n_viol == 0:
214 break # KKT satisfied -> unique global minimiser
216 if n_viol < n_bar or patience > 0: # fast path: progress, or patience remains
217 if n_viol < n_bar:
218 n_bar = n_viol
219 patience = p_max
220 else:
221 patience -= 1
222 free[prim] = False # batch exchange: drop all D, add all V
223 free[dual] = True
224 else: # anti-cycling fallback: single Bland least-index pivot
225 fallback += 1
226 i_star = int(viol.min())
227 free[i_star] = not free[i_star]
229 return Result(
230 x=x,
231 outer=outer,
232 inner=inner_total,
233 fallback=fallback,
234 converged=converged,
235 free=free,
236 lam=lam,
237 traj=traj,
238 )
241def kkt_violation(a: SymmetricOperator, b: Vector, x: Vector) -> float:
242 """Maximum violation of the KKT system of ``min_{x>=0} 1/2 x'Ax - b'x``.
244 Args:
245 a: The SPD operator ``A`` (a :class:`cvx.linalg.SymmetricOperator`).
246 b: The linear term ``b``.
247 x: Candidate solution.
249 Returns:
250 ``max`` of the negativity violations of ``x`` and of the reduced
251 gradient ``s = A x - b``, and of the complementarity products
252 ``|x_i s_i|``. Zero certifies the unique global minimiser.
253 """
254 op = _require_operator(a)
255 _check_dimension(op, b)
256 s = op.matvec(x) - b
257 return float(
258 max(
259 np.max(-x, initial=0.0),
260 np.max(-s, initial=0.0),
261 np.max(np.abs(x * s), initial=0.0),
262 )
263 )
266def solve_nnqp(
267 a: SymmetricOperator,
268 b: Vector,
269 tol: float = 1e-8,
270 cg_tol: float = 1e-10,
271 p_max: int = 3,
272 inner: str = "cg",
273 track: bool = False,
274 cg_maxit: int = 100_000,
275 max_outer: int | None = None,
276 warm: tuple[NDArray[np.bool_], Vector] | None = None,
277) -> Result:
278 """Minimise ``1/2 x^T A x - b^T x`` over ``x >= 0`` by the active-set loop.
280 Each free-block solve is matrix-free CG on ``v -> op.apply_free(F, v)``;
281 the reduced matrix is never materialised and ``A`` is never refactorised.
282 The batch block-pivot fast path is guarded by a least-index Bland
283 fallback, so termination at the unique global minimiser is unconditional
284 — no non-degeneracy assumption.
286 Args:
287 a: The SPD operator ``A`` (a :class:`cvx.linalg.SymmetricOperator`) —
288 ``DenseOperator`` for an explicit array, ``GramOperator(M, ridge)``
289 for ``A = M^T M + ridge I`` whose Gram matrix is never formed.
290 b: The linear term ``b``.
291 tol: Threshold of the primal and dual violator tests.
292 cg_tol: Relative residual tolerance of the inner solves. Keep it a
293 couple of orders below ``tol`` so the inexact loop makes the same
294 sign decisions as the exact one (Lemma 5.1 of the paper).
295 p_max: Patience budget — non-improving batch steps tolerated before a
296 fallback pivot. Any value gives finite termination.
297 inner: ``"cg"`` (matrix-free), ``"pcg"`` (Jacobi-preconditioned from
298 ``op.diag``), or ``"exact"`` (direct solve of each free block via
299 ``op.solve_free``). Match the inner solver to the backend: pick
300 ``"exact"`` when ``solve_free`` is structured and cheap — e.g.
301 ``FactorOperator``'s Woodbury solve at ``O(|F| r^2)`` — and CG
302 when only products are cheap (large dense ``A``, Gram factors
303 with many rows).
304 track: Record the free-set trajectory in ``Result.traj``.
305 cg_maxit: Iteration cap per inner solve.
306 max_outer: Optional cap on outer steps; when hit, the current iterate
307 is returned with ``converged=False``.
308 warm: Optional ``(free_mask, x_prev)`` pair from a previous solve.
309 Starts the loop from that free set and warm-starts every CG call
310 from the newest iterate — across a support-stable parameter step
311 the loop then terminates in a single outer step.
313 Returns:
314 A :class:`Result`; ``converged`` is True iff the KKT system was
315 satisfied to ``tol``, which certifies the unique global minimiser.
317 Raises:
318 TypeError: When ``a`` is not a :class:`cvx.linalg.SymmetricOperator`.
319 ValueError: When the operator dimension does not match ``len(b)``, or
320 when ``inner="exact"`` meets a numerically singular free block
321 (``op.rcond_free`` below 1e-12) — ``A`` is then not positive
322 definite on that free set; add a ridge.
323 NotImplementedError: When ``inner="pcg"`` meets a backend that does
324 not expose ``diag`` (propagated from ``cvx.linalg``).
325 """
326 op = _require_operator(a)
327 _check_dimension(op, b)
328 dinv: Vector | None = None # Jacobi preconditioner, read off op.diag on first use
330 def sub_solve(idx: NDArray[np.int_], x0: Vector | None) -> tuple[Vector, Vector | None, int]:
331 """Solve the reduced system ``A_F x_F = b_F`` with the chosen inner solver."""
332 nonlocal dinv
333 if inner == "exact":
334 rcond = op.rcond_free(idx)
335 if rcond < _RCOND_MIN:
336 msg = f"free block of size {idx.size} is numerically singular (rcond={rcond:.2e})"
337 raise ValueError(msg)
338 return op.solve_free(idx, b[idx]), None, 1
339 if inner == "pcg":
340 if dinv is None:
341 dinv = 1.0 / op.diag
342 xf, k_step = pcg(_free_matvec(op, idx), b[idx], dinv[idx], tol=cg_tol, maxit=cg_maxit)
343 return xf, None, k_step
344 xf, k_step = cg(_free_matvec(op, idx), b[idx], tol=cg_tol, maxit=cg_maxit, x0=x0)
345 return xf, None, k_step
347 def reduced_gradient(x: Vector, lam: Vector | None) -> Vector: # noqa: ARG001
348 """Return the reduced gradient ``s = A x - b``."""
349 return op.matvec(x) - b
351 return _active_set_loop(
352 len(b),
353 sub_solve,
354 reduced_gradient,
355 tol=tol,
356 p_max=p_max,
357 track=track,
358 max_outer=max_outer,
359 warm=warm,
360 )
363def solve_nnqp_eq(
364 a: SymmetricOperator,
365 b: Vector,
366 b_eq: Matrix,
367 c_eq: Vector,
368 tol: float = 1e-8,
369 cg_tol: float = 1e-10,
370 p_max: int = 3,
371 warm: tuple[NDArray[np.bool_], Vector] | None = None,
372) -> Result:
373 """Solve ``min 1/2 x^T A x - b^T x`` subject to ``x >= 0`` and ``B x = c``.
375 On each free set the saddle system is solved by eliminating the multiplier
376 ``lambda`` in R^p through the p-by-p Schur complement
377 ``S = B_F A_F^{-1} B_F^T``: the ``p + 1`` right-hand sides share the
378 operator ``A_F`` and are each one matrix-free CG solve, then
379 ``S lambda = c - B_F v0`` fixes the multipliers in closed form. The single
380 normalisation ``1^T x = beta`` is the ``p = 1`` case. ``B`` must have full
381 row rank on the visited free sets (automatic for ``p = 1``).
383 Args:
384 a: The SPD operator ``A`` (a :class:`cvx.linalg.SymmetricOperator`).
385 b: The linear term ``b``.
386 b_eq: Equality matrix ``B`` of shape ``(p, n)``, full row rank.
387 c_eq: Equality right-hand side ``c`` of shape ``(p,)``.
388 tol: Threshold of the primal and dual violator tests.
389 cg_tol: Relative residual tolerance of the inner CG solves.
390 p_max: Patience budget of the batch fast path.
391 warm: Optional ``(free_mask, x_prev)`` pair from a previous solve —
392 the same tuple :func:`solve_nnqp` accepts. Starts the loop from
393 that free set and seeds the ``v0`` solve of every saddle step
394 from the newest iterate; the ``v1`` columns are re-solved cold.
395 Across a support-stable parameter step the loop then terminates
396 in a single outer step.
398 Returns:
399 A :class:`Result` with the multipliers in ``lam``. The reduced
400 gradient underlying the dual test is ``s = A x - b - B^T lam``.
402 Raises:
403 TypeError: When ``a`` is not a :class:`cvx.linalg.SymmetricOperator`.
404 ValueError: When the operator dimension does not match ``len(b)``.
405 """
406 op = _require_operator(a)
407 _check_dimension(op, b)
408 p = b_eq.shape[0]
410 def sub_solve(idx: NDArray[np.int_], x0: Vector | None) -> tuple[Vector, Vector | None, int]:
411 """Solve the saddle system on the free set via the p-by-p Schur complement."""
412 matvec_f = _free_matvec(op, idx)
413 b_f = b_eq[:, idx]
414 v0, k0 = cg(matvec_f, b[idx], tol=cg_tol, x0=x0)
415 v1 = np.zeros((idx.size, p))
416 k_cols = 0
417 for j in range(p):
418 v1[:, j], kj = cg(matvec_f, b_f[j], tol=cg_tol)
419 k_cols += kj
420 schur = b_f @ v1 # p-by-p Schur complement, SPD
421 lam = cholesky_solve(schur, c_eq - b_f @ v0)
422 xf = v0 + v1 @ lam # x_F = A_F^{-1}(b_F + B_F^T lambda)
423 return xf, lam, k0 + k_cols
425 def reduced_gradient(x: Vector, lam: Vector | None) -> Vector:
426 """Return the constrained reduced gradient ``s = A x - b - B^T lam``."""
427 correction = b_eq.T @ lam if lam is not None else np.zeros_like(b)
428 return op.matvec(x) - b - correction
430 return _active_set_loop(len(b), sub_solve, reduced_gradient, tol=tol, p_max=p_max, warm=warm)