Coverage for src/cvx/linalg/operators/gram.py: 100%

1""":class:`GramOperator`: ``A = M.T @ M + ridge I`` represented matrix-free by its factor ``M``."""

3from __future__ import annotations

5import numpy as np

7from ..core.exceptions import DimensionMismatchError, NotAMatrixError

8from ..core.types import Matrix, Vector

9from ..decomposition.cholesky import cholesky_solve

10from .base import SymmetricOperator, _as_index

12_ALPHA_RANGE_MESSAGE = "alpha must lie in the interval [0, 1]"

13_RIDGE_NEGATIVE_MESSAGE = "ridge must be non-negative"

16class GramOperator(SymmetricOperator):

17 """Symmetric operator ``A = M.T @ M + ridge * I`` represented by its factor ``M``.

19 The ``n x n`` Gram matrix is never formed: products go through *M* directly,

20 at ``O(m * len(cols))`` per :meth:`block_matvec` and ``O(m * n)`` memory

21 rather than ``O(n**2)``. This is the least-squares / normal-equations

22 setting, where ``A = M.T @ M`` for an ``m x n`` factor *M*.

24 An optional Tikhonov ``ridge >= 0`` adds ``ridge * I`` to the Gram matrix. It

25 matters because ``M.T @ M`` has rank at most ``m``: when ``M`` has fewer rows

26 than the free set has entries the un-ridged free block is singular. A positive

27 ridge makes every free block positive definite, and :meth:`solve_free` then

28 switches to the Woodbury identity in the ``m``-dimensional row space whenever

29 that is cheaper than the ``len(free) x len(free)`` normal equations -- i.e.

30 when ``m < len(free)``.

32 For a general (non-scaled-identity) positive-definite target, use

33 :meth:`regularized` instead, which forms the convex combination

34 ``(1 - alpha) M.T M + alpha R.T R`` as a stacked-factor Gram matrix.

36 Args:

37 factor: The ``m x n`` factor ``M``.

38 ridge: A non-negative diagonal loading ``ridge`` added as ``ridge * I``.

40 Example:

41 >>> import numpy as np

42 >>> from cvx.linalg import GramOperator

43 >>> M = np.array([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0]])

44 >>> op = GramOperator(M)

45 >>> bound, free = np.array([0]), np.array([1, 2])

46 >>> A = M.T @ M

47 >>> np.allclose(op.block_matvec(bound, free, np.array([1.0, 1.0])), A[np.ix_(bound, free)] @ np.ones(2))

48 True

49 """

51 def __init__(self, factor: Matrix, ridge: float = 0.0) -> None:

52 """Store the ``m x n`` factor ``M`` and ridge (the Gram matrix is never formed)."""

53 factor = np.asarray(factor, dtype=np.float64)

54 if factor.ndim != 2:

55 raise NotAMatrixError(factor.ndim, func="GramOperator")

56 if ridge < 0.0:

57 raise ValueError(_RIDGE_NEGATIVE_MESSAGE)

58 self._m = factor

59 self._ridge = float(ridge)

61 @classmethod

62 def regularized(cls, factor: Matrix, alpha: float, root: Matrix) -> GramOperator:

63 """Build the regularised operator ``(1 - alpha) M.T M + alpha R.T R``.

65 The convex combination is realised as the Gram matrix of the stacked

66 factor ``[sqrt(1 - alpha) M; sqrt(alpha) R]``, so the result is again a

67 :class:`GramOperator` and every product stays matrix-free. With *root*

68 an ``r x n`` square root of a positive-definite target ``R.T @ R`` (e.g.

69 ``sqrt(lam) I`` for a scaled-identity ridge), the stacked factor has full

70 column rank whenever ``alpha > 0``, so :meth:`solve_free` is well posed

71 on any free set.

73 Args:

74 factor: The ``m x n`` factor ``M``.

75 alpha: Regularisation intensity in ``[0, 1]``.

76 root: An ``r x n`` matrix ``R`` with ``R.T @ R`` the target.

78 Returns:

79 A :class:`GramOperator` for ``(1 - alpha) M.T M + alpha R.T R``.

81 Example:

82 >>> import numpy as np

83 >>> from cvx.linalg import GramOperator

84 >>> M = np.array([[1.0, 2.0], [3.0, 4.0]])

85 >>> R = np.eye(2)

86 >>> op = GramOperator.regularized(M, 0.25, R)

87 >>> A = 0.75 * (M.T @ M) + 0.25 * (R.T @ R)

88 >>> np.allclose(op.matvec(np.array([1.0, -1.0])), A @ np.array([1.0, -1.0]))

89 True

90 """

91 factor = np.asarray(factor, dtype=np.float64)

92 root = np.asarray(root, dtype=np.float64)

93 if not 0.0 <= alpha <= 1.0:

94 raise ValueError(_ALPHA_RANGE_MESSAGE)

95 if factor.ndim != 2:

96 raise NotAMatrixError(factor.ndim, func="GramOperator.regularized")

97 if root.ndim != 2:

98 raise NotAMatrixError(root.ndim, func="GramOperator.regularized")

99 if root.shape[1] != factor.shape[1]:

100 raise DimensionMismatchError(root.shape[1], factor.shape[1])

101 stacked = np.vstack([np.sqrt(1.0 - alpha) * factor, np.sqrt(alpha) * root])

102 return cls(stacked)

103

104 @property

105 def n(self) -> int:

106 """Dimension of the operator (columns of the factor ``M``)."""

107 return int(self._m.shape[1])

108

109 @property

110 def diag(self) -> Vector:

111 """The diagonal ``||M[:, j]||**2 + ridge`` (squared column norms of the factor)."""

112 result: Vector = np.einsum("ij,ij->j", self._m, self._m) + self._ridge

113 return result

114

115 def matvec(self, x: Vector | Matrix) -> Vector | Matrix:

116 """Return ``A @ x = M.T @ (M @ x) + ridge * x`` without forming ``A``."""

117 product = self._m.T @ (self._m @ x)

118 return product + self._ridge * x if self._ridge else product

119

120 def restricted(self, free: object) -> GramOperator:

121 """Return ``GramOperator(M[:, free], ridge)``: the free block, pre-sliced."""

122 free = _as_index(free)

123 return GramOperator(np.ascontiguousarray(self._m[:, free]), ridge=self._ridge)

124

125 def block_matvec(self, rows: object, cols: object, v: Vector | Matrix) -> Vector | Matrix:

126 """Return ``A[rows, cols] @ v = M[:, rows].T @ (M[:, cols] @ v)`` plus the ridge overlap."""

127 rows = _as_index(rows)

128 cols = _as_index(cols)

129 product: Vector | Matrix = self._m[:, rows].T @ (self._m[:, cols] @ v)

130 if not self._ridge:

131 return product

132 # ridge * I couples only positions where a row index equals a column index.

133 _common, r_idx, c_idx = np.intersect1d(rows, cols, return_indices=True)

134 diag = np.zeros_like(product)

135 diag[r_idx] = (self._ridge * np.asarray(v)[c_idx].T).T

136 return product + diag

137

138 def solve_free(self, free: object, rhs: Vector | Matrix) -> Vector | Matrix:

139 """Solve the free block ``M_F.T M_F + ridge * I``, by Woodbury in row space when ``m < len(free)``."""

140 free = _as_index(free)

141 mf = self._m[:, free]

142 m_rows, n_free = mf.shape

143 if self._ridge and m_rows < n_free:

144 # Woodbury in the m-dimensional row space:

145 # (ridge I + M_F.T M_F)^{-1} rhs = (rhs - M_F.T (ridge I_m + M_F M_F.T)^{-1} (M_F rhs)) / ridge.

146 rhs_arr = np.asarray(rhs, dtype=np.float64)

147 capacitance = self._ridge * np.eye(m_rows) + mf @ mf.T

148 correction = mf.T @ cholesky_solve(capacitance, mf @ rhs_arr)

149 return (rhs_arr - correction) / self._ridge

150 block = mf.T @ mf

151 if self._ridge:

152 block = block + self._ridge * np.eye(n_free)

153 return cholesky_solve(block, rhs)

154

155 def rcond_free(self, free: object) -> float:

156 """Reciprocal condition number of the free block from the SVD of ``M[:, free]``.

157

158 The eigenvalues of ``M_F.T M_F + ridge I`` are ``sigma_i**2 + ridge`` for the

159 singular values ``sigma_i`` of ``M_F``, with extra ``ridge`` eigenvalues when

160 the free set outgrows the rank of ``M_F``. Reads the conditioning off the

161 ``(m, len(free))`` factor block without forming the ``len(free)`` square block,

162 so a rank-deficient free set correctly drives the result toward zero.

163 """

164 free = _as_index(free)

165 n_free = free.size

166 if n_free == 0:

167 return 1.0

168 singular_values = np.linalg.svd(self._m[:, free], compute_uv=False)

169 eig = singular_values**2

170 lam_max = (float(eig[0]) if eig.size else 0.0) + self._ridge

171 if lam_max <= 0.0:

172 return 0.0

173 lam_min = self._ridge if n_free > singular_values.shape[0] else float(eig[-1]) + self._ridge

174 return max(lam_min, 0.0) / lam_max