Cholesky Decomposition¶
Cholesky decomposition factors symmetric positive definite matrices.
Basic Decomposition¶
1. linalg.cholesky¶
import numpy as np
from scipy import linalg
def main():
# Symmetric positive definite matrix
A = np.array([[4, 2, 1],
[2, 5, 2],
[1, 2, 6]])
L = linalg.cholesky(A, lower=True)
print("A =")
print(A)
print()
print("L (lower Cholesky factor):")
print(L)
print()
print("Verify L @ L.T:")
print(L @ L.T)
if __name__ == "__main__":
main()
2. Mathematical Form¶
\[A = LL^T\]
where \(L\) is lower triangular with positive diagonal entries.
3. Upper Form¶
import numpy as np
from scipy import linalg
def main():
A = np.array([[4, 2, 1],
[2, 5, 2],
[1, 2, 6]])
# Upper triangular form: A = U.T @ U
U = linalg.cholesky(A, lower=False)
print("U (upper Cholesky factor):")
print(U)
print()
print("Verify U.T @ U:")
print(U.T @ U)
if __name__ == "__main__":
main()
Requirements¶
1. Symmetric Positive Definite¶
import numpy as np
from scipy import linalg
def main():
# Check if matrix is SPD
A = np.array([[4, 2],
[2, 3]])
# Method 1: Check eigenvalues
eigvals = np.linalg.eigvalsh(A)
print(f"Eigenvalues: {eigvals}")
print(f"All positive: {np.all(eigvals > 0)}")
print()
# Method 2: Try Cholesky (fails if not SPD)
try:
L = linalg.cholesky(A, lower=True)
print("Cholesky succeeded - matrix is SPD")
except linalg.LinAlgError:
print("Cholesky failed - matrix is not SPD")
if __name__ == "__main__":
main()
2. Non-SPD Matrix¶
import numpy as np
from scipy import linalg
def main():
# Not positive definite
A = np.array([[1, 2],
[2, 1]]) # Has negative eigenvalue
print(f"Eigenvalues: {np.linalg.eigvalsh(A)}")
try:
L = linalg.cholesky(A, lower=True)
except linalg.LinAlgError as e:
print(f"Error: {e}")
if __name__ == "__main__":
main()
3. Creating SPD Matrices¶
import numpy as np
from scipy import linalg
def main():
# Method 1: A @ A.T is always SPD (if A has full row rank)
B = np.random.randn(3, 5)
A1 = B @ B.T
# Method 2: Covariance matrix
data = np.random.randn(100, 3)
A2 = np.cov(data.T)
# Method 3: Add to diagonal for numerical stability
C = np.random.randn(3, 3)
A3 = C @ C.T + 0.1 * np.eye(3)
# All should work
for i, A in enumerate([A1, A2, A3], 1):
L = linalg.cholesky(A, lower=True)
print(f"Matrix {i}: Cholesky succeeded")
if __name__ == "__main__":
main()
Solving Linear Systems¶
1. cho_factor and cho_solve¶
import numpy as np
from scipy import linalg
def main():
A = np.array([[4, 2, 1],
[2, 5, 2],
[1, 2, 6]])
b = np.array([7, 9, 9])
# Factor
c, low = linalg.cho_factor(A)
# Solve
x = linalg.cho_solve((c, low), b)
print(f"Solution: x = {x}")
print(f"Verify A @ x = {A @ x}")
if __name__ == "__main__":
main()
2. Multiple Solves¶
import numpy as np
from scipy import linalg
def main():
A = np.array([[4, 2],
[2, 3]])
# Factor once
c, low = linalg.cho_factor(A)
# Solve multiple systems
for b in [[6, 5], [4, 3], [2, 1]]:
b = np.array(b)
x = linalg.cho_solve((c, low), b)
print(f"b = {b} -> x = {x}")
if __name__ == "__main__":
main()
3. vs LU Performance¶
import numpy as np
from scipy import linalg
import time
def main():
n = 1000
A = np.random.randn(n, n)
A = A @ A.T + n * np.eye(n) # SPD
b = np.random.randn(n)
# LU solve
start = time.perf_counter()
lu, piv = linalg.lu_factor(A)
x1 = linalg.lu_solve((lu, piv), b)
lu_time = time.perf_counter() - start
# Cholesky solve
start = time.perf_counter()
c, low = linalg.cho_factor(A)
x2 = linalg.cho_solve((c, low), b)
cho_time = time.perf_counter() - start
print(f"LU time: {lu_time:.4f} sec")
print(f"Cholesky time: {cho_time:.4f} sec")
print(f"Speedup: {lu_time/cho_time:.2f}x")
if __name__ == "__main__":
main()
Determinant and Log-Determinant¶
1. Determinant via Cholesky¶
import numpy as np
from scipy import linalg
def main():
A = np.array([[4, 2, 1],
[2, 5, 2],
[1, 2, 6]])
L = linalg.cholesky(A, lower=True)
# det(A) = det(L)^2 = (prod of diagonal)^2
det_L = np.prod(np.diag(L))
det_A = det_L ** 2
print(f"det(A) via Cholesky: {det_A}")
print(f"det(A) direct: {np.linalg.det(A)}")
if __name__ == "__main__":
main()
2. Log-Determinant (Numerically Stable)¶
import numpy as np
from scipy import linalg
def main():
# Large SPD matrix
n = 100
A = np.random.randn(n, n)
A = A @ A.T + n * np.eye(n)
L = linalg.cholesky(A, lower=True)
# log|A| = 2 * sum(log(diag(L)))
log_det = 2 * np.sum(np.log(np.diag(L)))
print(f"log|A| via Cholesky: {log_det:.4f}")
print(f"log|A| via slogdet: {np.linalg.slogdet(A)[1]:.4f}")
if __name__ == "__main__":
main()
Applications¶
1. Multivariate Normal Sampling¶
import numpy as np
from scipy import linalg
def main():
# Sample from N(mu, Sigma)
mu = np.array([1, 2])
Sigma = np.array([[1.0, 0.5],
[0.5, 1.0]])
L = linalg.cholesky(Sigma, lower=True)
# Generate samples: x = mu + L @ z, where z ~ N(0, I)
n_samples = 1000
z = np.random.randn(n_samples, 2)
samples = mu + z @ L.T
print(f"Sample mean: {samples.mean(axis=0)}")
print(f"Sample cov:\n{np.cov(samples.T)}")
if __name__ == "__main__":
main()
2. Gaussian Process Regression¶
import numpy as np
from scipy import linalg
def main():
# Kernel matrix (must be SPD)
X = np.linspace(0, 1, 10).reshape(-1, 1)
K = np.exp(-0.5 * ((X - X.T) ** 2) / 0.1)
K += 1e-6 * np.eye(len(X)) # Numerical stability
y = np.sin(2 * np.pi * X.flatten()) + 0.1 * np.random.randn(10)
# Solve K @ alpha = y using Cholesky
L = linalg.cholesky(K, lower=True)
alpha = linalg.cho_solve((L, True), y)
print(f"Coefficients: {alpha[:5]}...")
if __name__ == "__main__":
main()
3. Portfolio Optimization¶
import numpy as np
from scipy import linalg
def main():
# Covariance matrix of returns
Sigma = np.array([[0.04, 0.01, 0.02],
[0.01, 0.09, 0.03],
[0.02, 0.03, 0.16]])
# Check it's valid covariance (SPD)
L = linalg.cholesky(Sigma, lower=True)
print("Valid covariance matrix")
print()
# Simulate correlated returns
n_days = 252
z = np.random.randn(n_days, 3)
returns = z @ L.T
print(f"Simulated correlation:\n{np.corrcoef(returns.T).round(2)}")
if __name__ == "__main__":
main()
Summary¶
1. Functions¶
| Function | Description |
|---|---|
linalg.cholesky(A, lower=True) |
Compute L where A = LL^T |
linalg.cho_factor(A) |
Factor for solving |
linalg.cho_solve((c, low), b) |
Solve using factorization |
2. Key Properties¶
- Requires symmetric positive definite matrix
- About 2x faster than LU for SPD matrices
- Numerically stable for SPD systems
- Half the storage of LU (only L needed)