Numerical Stability¶

Numerical stability determines whether an algorithm produces reliable results despite unavoidable floating-point errors. A stable algorithm keeps errors bounded; an unstable one amplifies them catastrophically.

Stability vs Conditioning¶

Distinguish between problem sensitivity and algorithm reliability.

1. Problem Conditioning¶

A problem's condition number measures inherent sensitivity to input perturbations.

\[\text{Relative output change} \approx \kappa \times \text{Relative input change}\]

import numpy as np

# Well-conditioned matrix (identity-like)
A_good = np.eye(5) + 0.1 * np.random.randn(5, 5)
print(f"Well-conditioned: κ = {np.linalg.cond(A_good):.2f}")

# Ill-conditioned matrix (Hilbert)
def hilbert(n):
    return np.array([[1/(i+j+1) for j in range(n)] for i in range(n)])

A_bad = hilbert(5)
print(f"Ill-conditioned:  κ = {np.linalg.cond(A_bad):.2e}")

# Implication: solving Ax = b loses log10(κ) digits
print(f"\nExpected digit loss:")
print(f"  Good matrix: ~{np.log10(np.linalg.cond(A_good)):.1f} digits")
print(f"  Bad matrix:  ~{np.log10(np.linalg.cond(A_bad)):.1f} digits")

2. Algorithm Stability¶

An algorithm is stable if it doesn't amplify errors beyond what the problem's conditioning requires.

import numpy as np

def solve_stable(A, b):
    """Stable: LU decomposition with partial pivoting."""
    return np.linalg.solve(A, b)

def solve_unstable(A, b):
    """Unstable: explicit matrix inversion."""
    return np.linalg.inv(A) @ b

# Test on moderately ill-conditioned system
np.random.seed(42)
n = 50
A = np.random.randn(n, n)
A = A @ A.T + 0.1 * np.eye(n)  # Symmetric positive definite
x_true = np.random.randn(n)
b = A @ x_true

x_stable = solve_stable(A, b)
x_unstable = solve_unstable(A, b)

print(f"Condition number: {np.linalg.cond(A):.2e}")
print(f"Stable error:     {np.linalg.norm(x_stable - x_true):.2e}")
print(f"Unstable error:   {np.linalg.norm(x_unstable - x_true):.2e}")

3. Backward Stability¶

A backward stable algorithm computes the exact solution to a nearby problem.

\[\tilde{x} = \text{exact solution of } (A + \Delta A)\tilde{x} = b + \Delta b\]

where \(\|\Delta A\| / \|A\|\) and \(\|\Delta b\| / \|b\|\) are \(O(\epsilon)\).

import numpy as np

def check_backward_stability(A, b, x_computed):
    """
    Check backward error: find residual relative to problem size.
    Small backward error indicates backward stability.
    """
    residual = A @ x_computed - b
    backward_error = np.linalg.norm(residual) / (np.linalg.norm(A) * np.linalg.norm(x_computed) + np.linalg.norm(b))
    return backward_error

# Test
np.random.seed(123)
A = np.random.randn(20, 20)
x_true = np.random.randn(20)
b = A @ x_true

x_solve = np.linalg.solve(A, b)
x_inv = np.linalg.inv(A) @ b

print(f"Machine epsilon: {np.finfo(float).eps:.2e}")
print(f"Backward error (solve): {check_backward_stability(A, b, x_solve):.2e}")
print(f"Backward error (inv):   {check_backward_stability(A, b, x_inv):.2e}")

Stable Algorithms¶

Examples of numerically stable methods.

1. Gaussian Elimination with Pivoting¶

Partial pivoting prevents division by small numbers.

import numpy as np

def lu_no_pivot(A):
    """LU decomposition without pivoting (unstable)."""
    n = A.shape[0]
    L = np.eye(n)
    U = A.astype(float).copy()

    for k in range(n-1):
        for i in range(k+1, n):
            if abs(U[k, k]) < 1e-15:
                raise ValueError("Zero pivot encountered")
            L[i, k] = U[i, k] / U[k, k]
            U[i, k:] -= L[i, k] * U[k, k:]

    return L, U

def lu_partial_pivot(A):
    """LU decomposition with partial pivoting (stable)."""
    n = A.shape[0]
    P = np.eye(n)
    L = np.zeros((n, n))
    U = A.astype(float).copy()

    for k in range(n-1):
        # Find pivot
        max_idx = k + np.argmax(np.abs(U[k:, k]))

        # Swap rows
        U[[k, max_idx]] = U[[max_idx, k]]
        P[[k, max_idx]] = P[[max_idx, k]]
        L[[k, max_idx], :k] = L[[max_idx, k], :k]

        # Eliminate
        for i in range(k+1, n):
            L[i, k] = U[i, k] / U[k, k]
            U[i, k:] -= L[i, k] * U[k, k:]

    np.fill_diagonal(L, 1)
    return P, L, U

# Test on matrix that needs pivoting
A = np.array([[1e-20, 1.0],
              [1.0, 1.0]])
b = np.array([1.0, 2.0])

# Without pivoting: unstable
try:
    L, U = lu_no_pivot(A)
    x_no_pivot = np.linalg.solve(U, np.linalg.solve(L, b))
    print(f"No pivot solution: {x_no_pivot}")
except Exception as e:
    print(f"No pivot failed: {e}")

# With pivoting: stable
P, L, U = lu_partial_pivot(A)
x_pivot = np.linalg.solve(U, np.linalg.solve(L, P @ b))
print(f"Pivot solution:    {x_pivot}")
print(f"True solution:     {np.linalg.solve(A, b)}")

2. QR Decomposition¶

More stable than LU for least squares problems.

import numpy as np

# Least squares: minimize ||Ax - b||²
np.random.seed(42)
m, n = 100, 10
A = np.random.randn(m, n)
b = np.random.randn(m)

# Method 1: Normal equations (less stable)
# Solve A^T A x = A^T b
ATA = A.T @ A
ATb = A.T @ b
x_normal = np.linalg.solve(ATA, ATb)

# Method 2: QR decomposition (more stable)
Q, R = np.linalg.qr(A)
x_qr = np.linalg.solve(R, Q.T @ b)

# Method 3: SVD (most stable)
x_svd = np.linalg.lstsq(A, b, rcond=None)[0]

# Compare residuals
print("Residual norms:")
print(f"  Normal equations: {np.linalg.norm(A @ x_normal - b):.10e}")
print(f"  QR decomposition: {np.linalg.norm(A @ x_qr - b):.10e}")
print(f"  SVD:              {np.linalg.norm(A @ x_svd - b):.10e}")

# Condition numbers
print(f"\nCondition number of A:   {np.linalg.cond(A):.2e}")
print(f"Condition number of A^TA: {np.linalg.cond(ATA):.2e}")  # Squared!

3. Householder Reflections¶

Orthogonal transformations preserve norms.

import numpy as np

def householder_qr(A):
    """QR decomposition using Householder reflections."""
    m, n = A.shape
    Q = np.eye(m)
    R = A.astype(float).copy()

    for k in range(min(m-1, n)):
        # Construct Householder vector
        x = R[k:, k]
        v = x.copy()
        v[0] += np.sign(x[0]) * np.linalg.norm(x)
        v = v / np.linalg.norm(v)

        # Apply reflection
        R[k:, k:] -= 2 * np.outer(v, v @ R[k:, k:])

        # Accumulate Q
        Q_k = np.eye(m)
        Q_k[k:, k:] -= 2 * np.outer(v, v)
        Q = Q @ Q_k

    return Q, R

# Test orthogonality preservation
A = np.random.randn(10, 5)
Q, R = householder_qr(A)

print(f"Q orthogonality error: {np.linalg.norm(Q.T @ Q - np.eye(10)):.2e}")
print(f"Reconstruction error:  {np.linalg.norm(Q @ R - A):.2e}")

Unstable Algorithms¶

Examples to avoid and their stable alternatives.

1. Gram-Schmidt Orthogonalization¶

Classical Gram-Schmidt loses orthogonality.

import numpy as np

def gram_schmidt_classical(A):
    """Classical Gram-Schmidt (unstable)."""
    m, n = A.shape
    Q = np.zeros((m, n))
    R = np.zeros((n, n))

    for j in range(n):
        v = A[:, j].copy()
        for i in range(j):
            R[i, j] = Q[:, i] @ A[:, j]
            v -= R[i, j] * Q[:, i]
        R[j, j] = np.linalg.norm(v)
        Q[:, j] = v / R[j, j]

    return Q, R

def gram_schmidt_modified(A):
    """Modified Gram-Schmidt (more stable)."""
    m, n = A.shape
    Q = A.astype(float).copy()
    R = np.zeros((n, n))

    for j in range(n):
        R[j, j] = np.linalg.norm(Q[:, j])
        Q[:, j] /= R[j, j]
        for k in range(j+1, n):
            R[j, k] = Q[:, j] @ Q[:, k]
            Q[:, k] -= R[j, k] * Q[:, j]

    return Q, R

# Test on ill-conditioned matrix
np.random.seed(42)
A = np.random.randn(100, 10)
# Make columns nearly parallel
A[:, 1] = A[:, 0] + 1e-8 * np.random.randn(100)

Q_classical, _ = gram_schmidt_classical(A)
Q_modified, _ = gram_schmidt_modified(A)
Q_householder, _ = np.linalg.qr(A)

print("Orthogonality error (||Q^T Q - I||):")
print(f"  Classical GS:  {np.linalg.norm(Q_classical.T @ Q_classical - np.eye(10)):.2e}")
print(f"  Modified GS:   {np.linalg.norm(Q_modified.T @ Q_modified - np.eye(10)):.2e}")
print(f"  Householder:   {np.linalg.norm(Q_householder.T @ Q_householder - np.eye(10)):.2e}")

2. Polynomial Evaluation¶

Horner's method vs naive evaluation.

import numpy as np

def poly_naive(coeffs, x):
    """Naive polynomial evaluation (unstable for high degree)."""
    return sum(c * x**i for i, c in enumerate(coeffs))

def poly_horner(coeffs, x):
    """Horner's method (stable)."""
    result = coeffs[-1]
    for c in reversed(coeffs[:-1]):
        result = result * x + c
    return result

# Test with Wilkinson polynomial (notoriously ill-conditioned)
# p(x) = (x-1)(x-2)...(x-20)
from numpy.polynomial import polynomial as P

roots = np.arange(1, 21)
coeffs = np.array([1.0])
for r in roots:
    coeffs = np.convolve(coeffs, [1, -r])

# Evaluate near a root
x = 15.0 + 1e-8

naive_result = poly_naive(coeffs[::-1], x)
horner_result = poly_horner(coeffs[::-1], x)
numpy_result = np.polyval(coeffs, x)

print(f"Naive:  {naive_result:.6e}")
print(f"Horner: {horner_result:.6e}")
print(f"NumPy:  {numpy_result:.6e}")

3. Recurrence Relations¶

Some recurrences amplify errors exponentially.

import numpy as np

def fibonacci_recurrence(n):
    """Forward recurrence (unstable for large n with floats)."""
    if n <= 1:
        return float(n)
    a, b = 0.0, 1.0
    for _ in range(n - 1):
        a, b = b, a + b
    return b

def fibonacci_binet(n):
    """Binet formula (direct but loses precision)."""
    phi = (1 + np.sqrt(5)) / 2
    psi = (1 - np.sqrt(5)) / 2
    return (phi**n - psi**n) / np.sqrt(5)

def fibonacci_matrix(n):
    """Matrix exponentiation (more stable)."""
    if n <= 1:
        return float(n)

    def matrix_mult(A, B):
        return np.array([[A[0,0]*B[0,0] + A[0,1]*B[1,0], A[0,0]*B[0,1] + A[0,1]*B[1,1]],
                        [A[1,0]*B[0,0] + A[1,1]*B[1,0], A[1,0]*B[0,1] + A[1,1]*B[1,1]]])

    def matrix_pow(M, p):
        if p == 1:
            return M
        if p % 2 == 0:
            half = matrix_pow(M, p // 2)
            return matrix_mult(half, half)
        return matrix_mult(M, matrix_pow(M, p - 1))

    F = np.array([[1.0, 1.0], [1.0, 0.0]])
    return matrix_pow(F, n)[0, 1]

# Compare for large n
for n in [50, 70, 80]:
    f_rec = fibonacci_recurrence(n)
    f_binet = fibonacci_binet(n)
    f_matrix = fibonacci_matrix(n)

    print(f"\nF({n}):")
    print(f"  Recurrence: {f_rec:.0f}")
    print(f"  Binet:      {f_binet:.0f}")
    print(f"  Matrix:     {f_matrix:.0f}")

Preconditioning¶

Transform ill-conditioned problems to improve stability.

1. Diagonal Preconditioning¶

Scale rows/columns to improve conditioning.

import numpy as np

def diagonal_preconditioner(A):
    """Simple diagonal preconditioner."""
    D = np.diag(1.0 / np.sqrt(np.diag(A @ A.T)))
    return D

# Ill-conditioned system
A = np.array([[1e6, 1],
              [1, 1e-6]])
b = np.array([1e6 + 1, 1 + 1e-6])

print(f"Original condition number: {np.linalg.cond(A):.2e}")

# Precondition
D = diagonal_preconditioner(A)
A_precond = D @ A
b_precond = D @ b

print(f"Preconditioned condition:  {np.linalg.cond(A_precond):.2e}")

# Solve
x_original = np.linalg.solve(A, b)
x_precond = np.linalg.solve(A_precond, b_precond)

print(f"\nSolution (original):      {x_original}")
print(f"Solution (preconditioned): {x_precond}")

Improve solution by correcting residuals.

import numpy as np

def iterative_refinement(A, b, x0, max_iter=5):
    """Improve solution via residual correction."""
    x = x0.copy()

    for i in range(max_iter):
        # Compute residual in higher precision (simulated)
        r = b - A @ x

        # Solve for correction
        dx = np.linalg.solve(A, r)

        # Update solution
        x = x + dx

        print(f"  Iter {i+1}: residual norm = {np.linalg.norm(r):.2e}")

    return x

# Test
np.random.seed(42)
A = np.random.randn(50, 50)
x_true = np.random.randn(50)
b = A @ x_true

# Initial solve with some error
x_initial = np.linalg.solve(A, b)
# Add artificial error to simulate single precision
x_initial += 1e-8 * np.random.randn(50)

print(f"Initial error: {np.linalg.norm(x_initial - x_true):.2e}")
x_refined = iterative_refinement(A, b, x_initial)
print(f"Refined error: {np.linalg.norm(x_refined - x_true):.2e}")

Stability in Machine Learning¶

Neural network training requires careful numerical handling.

1. Gradient Clipping¶

Prevent exploding gradients.

import numpy as np

def clip_gradients(grads, max_norm=1.0):
    """Clip gradients to prevent explosion."""
    total_norm = np.sqrt(sum(np.sum(g**2) for g in grads))

    if total_norm > max_norm:
        scale = max_norm / total_norm
        return [g * scale for g in grads]
    return grads

# Simulate gradient computation
np.random.seed(42)
grads = [np.random.randn(100, 100) * 10 for _ in range(3)]

original_norm = np.sqrt(sum(np.sum(g**2) for g in grads))
clipped_grads = clip_gradients(grads, max_norm=1.0)
clipped_norm = np.sqrt(sum(np.sum(g**2) for g in clipped_grads))

print(f"Original gradient norm: {original_norm:.2f}")
print(f"Clipped gradient norm:  {clipped_norm:.2f}")

2. Log-Sum-Exp Trick¶

Stable softmax computation.

import numpy as np

def softmax_naive(x):
    """Naive softmax (overflow-prone)."""
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x)

def softmax_stable(x):
    """Numerically stable softmax."""
    x_shifted = x - np.max(x)  # Prevent overflow
    exp_x = np.exp(x_shifted)
    return exp_x / np.sum(exp_x)

# Test with large values
x = np.array([1000, 1001, 1002])

print("Naive softmax:")
try:
    print(f"  {softmax_naive(x)}")
except:
    print("  Overflow!")

print("Stable softmax:")
print(f"  {softmax_stable(x)}")

# Verify: should sum to 1
print(f"  Sum: {np.sum(softmax_stable(x))}")

3. Batch Normalization¶

Stabilize training by normalizing activations.

import numpy as np

def batch_norm(x, eps=1e-5):
    """Batch normalization for numerical stability."""
    mean = np.mean(x, axis=0)
    var = np.var(x, axis=0)

    # Normalize with epsilon for stability
    x_norm = (x - mean) / np.sqrt(var + eps)

    return x_norm, mean, var

# Simulate activations with varying scales
np.random.seed(42)
activations = np.random.randn(32, 100) * np.random.uniform(0.01, 100, 100)

print(f"Before normalization:")
print(f"  Mean range: [{activations.mean(axis=0).min():.2f}, {activations.mean(axis=0).max():.2f}]")
print(f"  Std range:  [{activations.std(axis=0).min():.2f}, {activations.std(axis=0).max():.2f}]")

normalized, _, _ = batch_norm(activations)

print(f"\nAfter normalization:")
print(f"  Mean range: [{normalized.mean(axis=0).min():.4f}, {normalized.mean(axis=0).max():.4f}]")
print(f"  Std range:  [{normalized.std(axis=0).min():.4f}, {normalized.std(axis=0).max():.4f}]")

Testing for Stability¶

Verify your implementations are numerically sound.

1. Perturbation Testing¶

Check sensitivity to small input changes.

import numpy as np

def test_stability(func, x, n_tests=100, perturbation=1e-10):
    """Test function stability under perturbations."""
    base_result = func(x)
    max_amplification = 0

    for _ in range(n_tests):
        # Random perturbation
        delta = perturbation * np.random.randn(*np.atleast_1d(x).shape)
        perturbed_x = x + delta

        perturbed_result = func(perturbed_x)

        # Measure amplification
        input_change = np.linalg.norm(delta) / np.linalg.norm(x)
        output_change = np.linalg.norm(perturbed_result - base_result) / np.linalg.norm(base_result)

        if input_change > 0:
            amplification = output_change / input_change
            max_amplification = max(max_amplification, amplification)

    return max_amplification

# Test matrix inversion stability
np.random.seed(42)
A = np.random.randn(10, 10)

amp = test_stability(np.linalg.inv, A)
cond = np.linalg.cond(A)

print(f"Condition number:    {cond:.2f}")
print(f"Max amplification:   {amp:.2f}")
print(f"Ratio:               {amp/cond:.2f}")  # Should be ~1 for stable algorithm

2. Backward Error Check¶

Verify computed result solves a nearby problem.

import numpy as np

def backward_error_test(A, b, x_computed):
    """
    Compute backward error for linear solve.
    Small value indicates backward stability.
    """
    residual = A @ x_computed - b

    # Relative backward error
    backward_err = np.linalg.norm(residual) / (np.linalg.norm(A) * np.linalg.norm(x_computed) + np.linalg.norm(b))

    return backward_err

# Test various solvers
np.random.seed(42)
A = np.random.randn(100, 100)
x_true = np.random.randn(100)
b = A @ x_true

methods = [
    ("np.linalg.solve", lambda A, b: np.linalg.solve(A, b)),
    ("np.linalg.inv @", lambda A, b: np.linalg.inv(A) @ b),
    ("np.linalg.lstsq", lambda A, b: np.linalg.lstsq(A, b, rcond=None)[0]),
]

print(f"Machine epsilon: {np.finfo(float).eps:.2e}\n")
for name, solver in methods:
    x = solver(A, b)
    be = backward_error_test(A, b, x)
    print(f"{name:20s}: backward error = {be:.2e}")