Skip to content

Numerical Errors

Floating-point arithmetic introduces systematic errors that can accumulate and compromise computational results. Understanding these error sources is essential for writing reliable numerical code.

Round-Off Error

Every floating-point operation may introduce a small rounding error bounded by machine epsilon.

1. Operation Model

IEEE 754 guarantees that basic operations are correctly rounded:

\[x \odot y = (x * y)(1 + \delta), \quad |\delta| \leq \epsilon\]

where \(\odot\) is the floating-point operation and \(*\) is the exact mathematical operation.

import sys

eps = sys.float_info.epsilon
print(f"Machine epsilon: {eps:.2e}")

# Each operation introduces error up to epsilon
a = 1.0
b = 1e-16

# Exact: a + b = 1.0000000000000001
# Float: loses precision
result = a + b
print(f"1.0 + 1e-16 = {result}")
print(f"Error bound: {eps:.2e}")

2. Error Accumulation

Errors compound over many operations.

# Repeated operations accumulate error
def sum_naive(n):
    """Sum 1/n, n times. Should equal 1."""
    total = 0.0
    increment = 1.0 / n
    for _ in range(n):
        total += increment
    return total

for n in [10, 100, 1000, 10000, 100000]:
    result = sum_naive(n)
    error = abs(result - 1.0)
    print(f"n={n:>6}: result={result:.15f}, error={error:.2e}")

# Error grows roughly as sqrt(n) * epsilon

3. Kahan Summation

Compensated summation reduces accumulated error.

def sum_kahan(values):
    """Kahan summation algorithm for reduced error."""
    total = 0.0
    compensation = 0.0

    for x in values:
        y = x - compensation      # Compensate for lost low-order bits
        t = total + y             # Add compensated value
        compensation = (t - total) - y  # Recover lost bits
        total = t

    return total

# Compare naive vs Kahan
n = 100000
values = [1.0 / n] * n

naive_result = sum(values)
kahan_result = sum_kahan(values)

print(f"Naive sum: {naive_result:.15f}")
print(f"Kahan sum: {kahan_result:.15f}")
print(f"Naive error: {abs(naive_result - 1.0):.2e}")
print(f"Kahan error: {abs(kahan_result - 1.0):.2e}")

Catastrophic Cancellation

Subtracting nearly equal numbers amplifies relative error dramatically.

1. Mechanism

When \(a \approx b\), the subtraction \(a - b\) loses significant digits.

# Two numbers with 15 correct digits each
a = 1.000000000000001
b = 1.000000000000000

# Subtraction loses most significant digits
diff = a - b
print(f"a = {a:.15f}")
print(f"b = {b:.15f}")
print(f"a - b = {diff:.15e}")

# Only ~1 significant digit remains
# Expected: 1e-15, but we get noise

2. Classic Example: Quadratic Formula

The standard quadratic formula suffers from cancellation.

import math

def quadratic_standard(a, b, c):
    """Standard quadratic formula."""
    disc = math.sqrt(b*b - 4*a*c)
    x1 = (-b + disc) / (2*a)
    x2 = (-b - disc) / (2*a)
    return x1, x2

def quadratic_stable(a, b, c):
    """Numerically stable quadratic formula."""
    disc = math.sqrt(b*b - 4*a*c)
    # Choose the root that avoids cancellation
    if b >= 0:
        q = -0.5 * (b + disc)
    else:
        q = -0.5 * (b - disc)
    x1 = q / a
    x2 = c / q
    return x1, x2

# Test case with cancellation: b² >> 4ac
a, b, c = 1, 1e8, 1
print("Standard formula:")
x1, x2 = quadratic_standard(a, b, c)
print(f"  x1 = {x1:.10e}")
print(f"  x2 = {x2:.10e}")

print("\nStable formula:")
x1, x2 = quadratic_stable(a, b, c)
print(f"  x1 = {x1:.10e}")
print(f"  x2 = {x2:.10e}")

# Verify: x1 * x2 should equal c/a = 1
print(f"\nProduct check (should be 1):")
print(f"  Standard: {quadratic_standard(a, b, c)[0] * quadratic_standard(a, b, c)[1]}")
print(f"  Stable: {quadratic_stable(a, b, c)[0] * quadratic_stable(a, b, c)[1]}")

3. Variance Calculation

Naive variance formula is unstable.

import math

def variance_naive(data):
    """Naive two-pass variance (unstable for large means)."""
    n = len(data)
    sum_x = sum(data)
    sum_x2 = sum(x*x for x in data)
    return (sum_x2 - sum_x*sum_x/n) / (n-1)

def variance_welford(data):
    """Welford's online algorithm (stable)."""
    n = 0
    mean = 0.0
    M2 = 0.0

    for x in data:
        n += 1
        delta = x - mean
        mean += delta / n
        delta2 = x - mean
        M2 += delta * delta2

    return M2 / (n - 1) if n > 1 else 0.0

# Test with data having large mean
data = [1e9 + x for x in [1, 2, 3, 4, 5]]

print(f"Naive variance:   {variance_naive(data):.10f}")
print(f"Welford variance: {variance_welford(data):.10f}")
print(f"True variance:    {2.5:.10f}")  # var([1,2,3,4,5])

4. Detecting Cancellation

Monitor significant digit loss.

import math

def significant_digits(x, y, result):
    """Estimate significant digits lost in x - y = result."""
    if result == 0:
        return float('inf')  # Complete cancellation

    # Original digits
    original_digits = 15  # Double precision

    # Digits in operands vs result
    magnitude_loss = math.log10(max(abs(x), abs(y)) / abs(result))

    return max(0, original_digits - magnitude_loss)

# Examples
pairs = [
    (1.0, 0.5),           # No cancellation
    (1.0, 0.99),          # Mild
    (1.0, 0.999999),      # Moderate
    (1.0, 0.999999999999),  # Severe
]

for x, y in pairs:
    result = x - y
    digits = significant_digits(x, y, result)
    print(f"{x} - {y} = {result:.2e}, ~{digits:.1f} digits remain")

Loss of Associativity

Floating-point addition and multiplication are not associative.

1. Addition Non-Associativity

\((a + b) + c \neq a + (b + c)\) in floating-point.

# Dramatic example
a = 1e16
b = -1e16
c = 1.0

left = (a + b) + c   # (1e16 + (-1e16)) + 1 = 0 + 1 = 1
right = a + (b + c)  # 1e16 + (-1e16 + 1) = 1e16 + (-1e16) = 0

print(f"(a + b) + c = {left}")
print(f"a + (b + c) = {right}")
print(f"Difference:  {abs(left - right)}")

# Why? b + c = -1e16 because c is lost
print(f"\nb + c = {b + c}")  # -1e16, not -9999999999999999

2. Multiplication Non-Associativity

Also affects multiplication with extreme values.

# Near overflow
a = 1e200
b = 1e200
c = 1e-200

left = (a * b) * c   # inf * 1e-200 = inf
right = a * (b * c)  # 1e200 * 1 = 1e200

print(f"(a * b) * c = {left}")
print(f"a * (b * c) = {right}")

3. Summation Order Effects

Order of summation affects result.

import numpy as np

# Create array with large variance in magnitudes
np.random.seed(42)
small = np.random.randn(1000) * 1e-10
large = np.random.randn(1000) * 1e10
data = np.concatenate([small, large])

# Different orderings
sum_original = sum(data)
sum_sorted_asc = sum(sorted(data))
sum_sorted_desc = sum(sorted(data, reverse=True))
sum_abs_sorted = sum(sorted(data, key=abs))

print(f"Original order:    {sum_original:.10e}")
print(f"Ascending sort:    {sum_sorted_asc:.10e}")
print(f"Descending sort:   {sum_sorted_desc:.10e}")
print(f"By magnitude:      {sum_abs_sorted:.10e}")
print(f"NumPy pairwise:    {np.sum(data):.10e}")

4. Parallel Reduction Issues

Different reduction trees give different results.

import numpy as np

def reduce_left(arr):
    """Left-to-right reduction."""
    result = arr[0]
    for x in arr[1:]:
        result = result + x
    return result

def reduce_pairwise(arr):
    """Pairwise/tree reduction."""
    if len(arr) == 1:
        return arr[0]
    if len(arr) == 2:
        return arr[0] + arr[1]
    mid = len(arr) // 2
    return reduce_pairwise(arr[:mid]) + reduce_pairwise(arr[mid:])

# Test data
np.random.seed(123)
data = np.random.randn(1024).astype(np.float32)

left_result = reduce_left(data)
pairwise_result = reduce_pairwise(data)

print(f"Left reduction:     {left_result}")
print(f"Pairwise reduction: {pairwise_result}")
print(f"Difference:         {abs(left_result - pairwise_result):.2e}")

Overflow and Underflow

Extreme values exceed representable range.

1. Overflow to Infinity

Results exceeding maximum representable value become infinity.

import sys
import math

max_float = sys.float_info.max
print(f"Max float: {max_float:.6e}")

# Overflow
result = max_float * 2
print(f"max * 2 = {result}")  # inf

# Overflow in intermediate calculation
a = 1e200
b = 1e200
c = 1e-200

# Bad: overflow in intermediate
try:
    bad = (a * b) * c
    print(f"(a*b)*c = {bad}")  # inf
except:
    pass

# Good: reorder to avoid overflow
good = a * (b * c)
print(f"a*(b*c) = {good}")  # 1e200

2. Underflow to Zero

Very small values become zero.

import sys

min_float = sys.float_info.min
print(f"Min positive float: {min_float:.6e}")

# Gradual underflow (subnormal numbers)
tiny = 1e-308
for _ in range(20):
    tiny /= 10
    print(f"{tiny:.6e}", end=" ")
    if tiny == 0:
        print("(underflow)")
        break
print()

# Underflow in intermediate calculation
a = 1e-200
b = 1e-200
c = 1e200

# Bad: underflow
bad = (a * b) * c  # 0 * 1e200 = 0
print(f"(a*b)*c = {bad}")

# Good: reorder
good = a * (b * c)  # 1e-200 * 1 = 1e-200
print(f"a*(b*c) = {good}")

3. Log-Space Computation

Avoid overflow/underflow using logarithms.

import math
import numpy as np

# Problem: compute product of many small numbers
small_values = [1e-100] * 10

# Direct product underflows
direct_product = 1.0
for v in small_values:
    direct_product *= v
print(f"Direct product: {direct_product}")  # 0.0 (underflow)

# Log-space computation
log_sum = sum(math.log(v) for v in small_values)
print(f"Log of product: {log_sum}")  # -2302.58...
print(f"Exact: 10 * log(1e-100) = {10 * math.log(1e-100)}")

# For probabilities, use log-sum-exp
def logsumexp(log_values):
    """Numerically stable log(sum(exp(x)))."""
    max_val = max(log_values)
    return max_val + math.log(sum(math.exp(v - max_val) for v in log_values))

log_probs = [-1000, -1001, -1002]
print(f"logsumexp: {logsumexp(log_probs)}")
print(f"numpy:     {np.logaddexp.reduce(log_probs)}")

Error Propagation

Understanding how errors compound through calculations.

1. Forward Error Analysis

Track how input errors affect output.

import math

def propagate_error(f, x, dx):
    """
    Estimate output error given input error.
    Uses first-order Taylor expansion.
    """
    # Numerical derivative
    h = 1e-8 * max(abs(x), 1)
    df_dx = (f(x + h) - f(x - h)) / (2 * h)

    # Error propagation: dy ≈ |f'(x)| * dx
    dy = abs(df_dx) * dx

    return f(x), dy

# Example: sin(x) near x = 0
x = 0.001
dx = 1e-10

y, dy = propagate_error(math.sin, x, dx)
print(f"sin({x}) = {y:.10e}")
print(f"Input error:  {dx:.2e}")
print(f"Output error: {dy:.2e}")
print(f"Amplification: {dy/dx:.2f}x")

# Example: sin(x) near x = π/2 (derivative near 0)
x = math.pi / 2 - 0.001
y, dy = propagate_error(math.sin, x, dx)
print(f"\nsin({x:.4f}) = {y:.10f}")
print(f"Output error: {dy:.2e}")
print(f"Amplification: {dy/dx:.2f}x")

2. Condition Number

Measures sensitivity of a problem to input perturbations.

\[\kappa = \left| \frac{x f'(x)}{f(x)} \right|\]
import math

def condition_number(f, x, h=1e-8):
    """Compute condition number of f at x."""
    fx = f(x)
    if fx == 0:
        return float('inf')

    # Numerical derivative
    df = (f(x + h) - f(x - h)) / (2 * h)

    return abs(x * df / fx)

# Well-conditioned: sqrt
print(f"sqrt(100) condition: {condition_number(math.sqrt, 100):.2f}")

# Ill-conditioned: x - 1 near x = 1
print(f"(x-1) at x=1.001 condition: {condition_number(lambda x: x-1, 1.001):.2f}")

# Very ill-conditioned: tan near π/2
print(f"tan near π/2 condition: {condition_number(math.tan, 1.57):.2f}")

3. Backward Error Analysis

Characterize computed result as exact result of perturbed input.

def backward_error(f, x, computed_result):
    """
    Find perturbation δ such that f(x + δ) = computed_result exactly.
    Uses Newton's method.
    """
    # Find x_perturbed such that f(x_perturbed) = computed_result
    x_pert = x
    for _ in range(10):
        fx = f(x_pert)
        h = 1e-10 * max(abs(x_pert), 1)
        df = (f(x_pert + h) - f(x_pert - h)) / (2 * h)
        if abs(df) < 1e-15:
            break
        x_pert = x_pert - (fx - computed_result) / df

    return abs(x_pert - x)

# Example: computed sqrt with small error
import math
x = 2.0
exact = math.sqrt(x)
computed = exact + 1e-10  # Simulate small error

delta = backward_error(math.sqrt, x, computed)
print(f"sqrt({x}) computed with error 1e-10")
print(f"Backward error (input perturbation): {delta:.2e}")
print(f"Relative backward error: {delta/x:.2e}")

Mitigation Strategies

Techniques to minimize numerical error impact.

1. Algorithm Selection

Choose stable algorithms for sensitive operations.

import numpy as np

# Solving linear system Ax = b
np.random.seed(42)
n = 100

# Well-conditioned system
A_good = np.eye(n) + 0.1 * np.random.randn(n, n)
b = np.random.randn(n)

# Methods comparison
x_solve = np.linalg.solve(A_good, b)
x_lstsq = np.linalg.lstsq(A_good, b, rcond=None)[0]
x_inv = np.linalg.inv(A_good) @ b  # Don't do this!

# Check residuals
print("Residual norms:")
print(f"  solve:  {np.linalg.norm(A_good @ x_solve - b):.2e}")
print(f"  lstsq:  {np.linalg.norm(A_good @ x_lstsq - b):.2e}")
print(f"  inv:    {np.linalg.norm(A_good @ x_inv - b):.2e}")

2. Scaling

Normalize data to avoid extreme magnitudes.

import numpy as np

# Problem: dot product of large vectors
a = np.array([1e150, 1e150, 1e150])
b = np.array([1e150, 1e150, 1e150])

# Direct computation overflows
direct = np.dot(a, b)
print(f"Direct dot product: {direct}")  # inf

# Scaled computation
scale_a = np.linalg.norm(a)
scale_b = np.linalg.norm(b)
a_normalized = a / scale_a
b_normalized = b / scale_b

scaled_dot = np.dot(a_normalized, b_normalized) * scale_a * scale_b
print(f"Scaled dot product: {scaled_dot:.2e}")

3. Extended Precision

Use higher precision for critical calculations.

from decimal import Decimal, getcontext

# Set high precision
getcontext().prec = 50

# Standard float fails
a = 0.1 + 0.2
print(f"Float: 0.1 + 0.2 = {a}")
print(f"Float: 0.1 + 0.2 == 0.3? {a == 0.3}")

# Decimal succeeds
a_dec = Decimal('0.1') + Decimal('0.2')
print(f"\nDecimal: 0.1 + 0.2 = {a_dec}")
print(f"Decimal: 0.1 + 0.2 == 0.3? {a_dec == Decimal('0.3')}")

# High-precision calculation
pi_approx = sum(Decimal((-1)**k) / Decimal(2*k + 1) for k in range(1000)) * 4
print(f"\nπ approximation: {pi_approx}")

4. Interval Arithmetic

Track error bounds explicitly.

class Interval:
    """Simple interval arithmetic."""
    def __init__(self, lo, hi=None):
        self.lo = lo
        self.hi = hi if hi is not None else lo

    def __add__(self, other):
        return Interval(self.lo + other.lo, self.hi + other.hi)

    def __mul__(self, other):
        products = [self.lo * other.lo, self.lo * other.hi,
                   self.hi * other.lo, self.hi * other.hi]
        return Interval(min(products), max(products))

    def __repr__(self):
        return f"[{self.lo:.6f}, {self.hi:.6f}]"

# Represent 0.1 with its actual bounds
eps = 2**-53
x = Interval(0.1 - eps, 0.1 + eps)
y = Interval(0.2 - eps, 0.2 + eps)

result = x + y
print(f"0.1 + 0.2 ∈ {result}")
print(f"Contains 0.3? {result.lo <= 0.3 <= result.hi}")