Local vs Global Optimization¶

One of the fundamental challenges in optimization is that local minimization methods can get trapped in local minima — points that are lower than nearby points but not the global minimum. Understanding this problem and how to avoid it is crucial for real-world optimization.

The Local Minima Problem¶

A cost function landscape can be visualized as a terrain with multiple valleys. Local minimization methods are like hiking downhill: they follow the slope and stop at the bottom of whichever valley they start in, without knowing if there's a deeper valley elsewhere.

Understanding Local Minima¶

import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize

# Function with multiple local minima
def multi_minima(x):
    """Function with several local and one global minimum."""
    return np.sin(x) + 0.1 * x

# Plot the function
x = np.linspace(-2*np.pi, 2*np.pi, 1000)
y = multi_minima(x)

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(x, y, 'b-', linewidth=2, label='Objective function')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
ax.set_title('Function with Multiple Local Minima')
ax.grid(True, alpha=0.3)

# Find local minima from different starting points
starting_points = [-4*np.pi, -2*np.pi, 0, 2*np.pi, 4*np.pi]
colors = ['red', 'orange', 'green', 'purple', 'brown']

for start, color in zip(starting_points, colors):
    result = optimize.minimize(multi_minima, start, method='BFGS')
    ax.plot(result.x, result.fun, 'o', color=color, markersize=10,
            label=f'Start: {start:.1f}')
    ax.plot(start, multi_minima(start), 'x', color=color, markersize=8)

ax.legend()
plt.tight_layout()
plt.show()

The Rosenbrock Challenge¶

The Rosenbrock function perfectly illustrates why local methods fail:

\[f(x, y) = (1-x)^2 + 100(y-x^2)^2\]

The global minimum is at \((1, 1)\), but the function has narrow, curved valley that gradient-based methods struggle to follow.

import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt

def rosenbrock(x):
    return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2

# Local minimization
starting_points = [
    np.array([-1, -1]),
    np.array([0, 0]),
    np.array([2, 2])
]

fig = plt.figure(figsize=(14, 5))

# 3D visualization
ax1 = fig.add_subplot(131, projection='3d')
x = np.linspace(-2, 3, 100)
y = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x, y)
Z = (1 - X)**2 + 100 * (Y - X**2)**2
ax1.plot_surface(X, Y, Z, cmap='viridis', alpha=0.7)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_zlabel('f(x, y)')

# Contour plot with optimization paths
ax2 = fig.add_subplot(132)
contour = ax2.contourf(X, Y, Z, levels=30, cmap='viridis')
ax2.plot(1, 1, 'r*', markersize=20, label='Global minimum')

for i, start in enumerate(starting_points):
    result = optimize.minimize(rosenbrock, start, method='BFGS')
    ax2.plot(start[0], start[1], 'go', markersize=8)
    ax2.plot(result.x[0], result.x[1], 'rs', markersize=8)
    ax2.annotate(f'Start {i+1}', xy=start, xytext=(start[0]+0.1, start[1]+0.1))

ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_title('Local Optimization Paths')
ax2.legend()

# Cost function value from different starting points
ax3 = fig.add_subplot(133)
costs = []
for start in starting_points:
    result = optimize.minimize(rosenbrock, start, method='BFGS')
    costs.append(result.fun)

ax3.bar(range(len(starting_points)), costs)
ax3.set_xlabel('Starting Point')
ax3.set_ylabel('Final Cost')
ax3.set_title('Local Minima Different Values')
ax3.set_yscale('log')

plt.tight_layout()
plt.show()

Global Optimization Methods¶

When local minima are a concern, global optimization methods search more broadly across the parameter space.

Differential Evolution¶

Differential evolution uses a population-based approach: multiple candidate solutions evolve over iterations by mutating and recombining.

Characteristics: - Population-based (maintains many candidate solutions) - No gradient required - Very robust, rarely gets stuck in local minima - Slower than local methods (more function evaluations) - Works on non-smooth and discontinuous functions - Good for black-box optimization

from scipy import optimize
import numpy as np

def multi_peak(x):
    """Function with many local minima."""
    return np.sum(np.sin(x) * np.exp(-0.1 * np.sum(x**2)))

# Local method gets stuck
result_local = optimize.minimize(
    multi_peak,
    x0=[0, 0],
    method='BFGS'
)

# Global method finds true minimum
result_global = optimize.differential_evolution(
    multi_peak,
    bounds=[(-5, 5), (-5, 5)],
    seed=42
)

print(f"Local minimization:")
print(f"  Value: {result_local.fun:.6f}")
print(f"  x: {result_local.x}")
print(f"\nGlobal (differential_evolution):")
print(f"  Value: {result_global.fun:.6f}")
print(f"  x: {result_global.x}")
print(f"\nFunction evaluations - Local: {result_local.nfev}, Global: {result_global.nfev}")

When to use: - Black-box optimization (no gradient available) - Function has many local minima - Non-smooth or discontinuous functions - Can afford extra function evaluations

Basin-Hopping¶

Basin-hopping combines local minimization with random jumps in parameter space. It minimizes locally, then makes a random step and minimizes again from the new location.

Characteristics: - Local minimization with randomization - Escapes local minima by jumping to new regions - Faster than population-based methods - Good balance between speed and global search - Can use any local minimization method

from scipy import optimize
import numpy as np

def f(x):
    """Challenging optimization problem."""
    return (x[0] - 2)**2 * (1 + np.sin(5*x[1])) + x[1]**2

# Local method
result_local = optimize.minimize(f, x0=[0, 0], method='L-BFGS-B')

# Basin hopping
minimizer_kwargs = {"method": "L-BFGS-B"}
result_basin = optimize.basinhopping(
    f,
    x0=[0, 0],
    minimizer_kwargs=minimizer_kwargs,
    niter=100,
    seed=42
)

print(f"Local optimization:")
print(f"  Value: {result_local.fun:.6f}")
print(f"  x: {result_local.x}")
print(f"\nBasin-hopping:")
print(f"  Value: {result_basin.fun:.6f}")
print(f"  x: {result_basin.x}")

When to use: - Need faster global optimization than differential_evolution - Problem has distinct basins - Can provide good local minimizer - Function evaluations are expensive

Dual Annealing¶

Dual annealing is a variant of simulated annealing that combines global and local search strategies.

Characteristics: - Simulated annealing variant - Probabilistically accepts uphill moves to escape local minima - Temperature gradually decreases - Combines with local optimization - Good for continuous optimization

from scipy import optimize
import numpy as np

def nonconvex(x):
    """Highly non-convex function."""
    return (x[0]**2 + x[1]**2 - 5)**2 + (x[0] - x[1])**2

# Local minimization
result_local = optimize.minimize(nonconvex, [0, 0], method='L-BFGS-B')

# Dual annealing
result_annealing = optimize.dual_annealing(
    nonconvex,
    bounds=[(-3, 3), (-3, 3)],
    seed=42
)

print(f"Local: f = {result_local.fun:.6f}")
print(f"Dual annealing: f = {result_annealing.fun:.6f}")

When to use: - Moderate-sized global optimization - Bounds are naturally defined - Don't want to tune population size (like in differential_evolution)

Practical Strategies for Global Optimization¶

Strategy 1: Multi-Start Local Optimization¶

Run local optimization from many random starting points and keep the best:

from scipy import optimize
import numpy as np

def objective(x):
    return np.sin(x[0]) * np.cos(x[1]) + 0.1 * x[0]**2

best_result = None
best_value = float('inf')

# Try from 20 random starting points
np.random.seed(42)
for i in range(20):
    x0 = np.random.uniform(-2*np.pi, 2*np.pi, 2)
    result = optimize.minimize(objective, x0, method='L-BFGS-B')

    if result.fun < best_value:
        best_value = result.fun
        best_result = result

print(f"Best found: f = {best_result.fun:.6f}")
print(f"At: {best_result.x}")

First scan the space coarsely, then refine promising regions:

from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt

def objective(x):
    return (x[0] - 2.5)**2 * np.sin(x[1]) + (x[1] - 3)**2

# Coarse grid search
x = np.linspace(-1, 6, 10)
y = np.linspace(-1, 6, 10)
X, Y = np.meshgrid(x, y)
Z = np.zeros_like(X)
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        Z[i, j] = objective([X[i, j], Y[i, j]])

# Find best grid point
best_idx = np.unravel_index(np.argmin(Z), Z.shape)
x0 = np.array([X[best_idx], Y[best_idx]])

print(f"Coarse grid best: f = {Z[best_idx]:.6f} at {x0}")

# Refine locally
result = optimize.minimize(objective, x0, method='L-BFGS-B')
print(f"Refined: f = {result.fun:.6f} at {result.x}")

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Grid search
ax = axes[0]
contour = ax.contourf(X, Y, Z, levels=20, cmap='viridis')
ax.plot(x0[0], x0[1], 'r*', markersize=20, label='Grid best')
ax.set_title('Coarse Grid Search')
plt.colorbar(contour, ax=ax)

# After refinement
Z_fine = np.zeros_like(X)
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        Z_fine[i, j] = objective([X[i, j], Y[i, j]])

ax = axes[1]
contour = ax.contourf(X, Y, Z_fine, levels=20, cmap='viridis')
ax.plot(result.x[0], result.x[1], 'r*', markersize=20, label='Refined')
ax.set_title('After Local Refinement')
plt.colorbar(contour, ax=ax)

plt.tight_layout()
plt.show()

Strategy 3: Combining Multiple Approaches¶

Use different methods and compare results:

from scipy import optimize
import numpy as np

def objective(x):
    return (x[0] - 2)**2 + 100*(x[1] - x[0]**2)**2

x0 = [0, 0]
bounds = [(-5, 5), (-5, 5)]

# Try multiple methods
results = {}

# Differential evolution
results['DE'] = optimize.differential_evolution(objective, bounds)

# Basin hopping
results['BH'] = optimize.basinhopping(objective, x0,
                                      minimizer_kwargs={'method': 'L-BFGS-B'},
                                      niter=100)

# Dual annealing
results['DA'] = optimize.dual_annealing(objective, bounds)

# Local L-BFGS-B
results['LBFGS'] = optimize.minimize(objective, x0, method='L-BFGS-B', bounds=bounds)

# Compare results
print("Method Comparison:")
print("-" * 50)
for method, result in results.items():
    value = result.fun if hasattr(result, 'fun') else result.get('fun', None)
    print(f"{method:<10}: f = {value:.2e}")

# Find and report best
best_method = min(results.items(), key=lambda x: x[1].fun if hasattr(x[1], 'fun') else float('inf'))
print(f"\nBest method: {best_method[0]}")
print(f"Optimal value: {best_method[1].fun:.2e}")

Multimodal Optimization: Image Registration Example¶

The problem of aligning images demonstrates local minima challenges and solutions:

from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt

# Simulate an error surface with multiple local minima
# (similar to image alignment MSE landscape)
def alignment_error(shift):
    """
    Simulated image alignment error function.
    Has multiple local minima due to image periodicity.
    """
    main_error = (shift - 5)**2
    periodic_ripples = 2 * np.cos(shift)
    return main_error + periodic_ripples

# Plot the error surface
shifts = np.linspace(-5, 15, 1000)
errors = np.array([alignment_error(s) for s in shifts])

fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(shifts, errors, 'b-', linewidth=2, label='Alignment error')
ax.set_xlabel('Image shift (pixels)')
ax.set_ylabel('MSE')
ax.set_title('Image Alignment Error Surface')
ax.grid(True, alpha=0.3)

# Local method from poor starting point
result_local = optimize.minimize(alignment_error, x0=-2, method='BFGS')
ax.plot(result_local.x, result_local.fun, 'rs', markersize=10,
        label='Local opt (x0=-2)')

# Global method
result_global = optimize.differential_evolution(alignment_error, bounds=[(-5, 15)])
ax.plot(result_global.x, result_global.fun, 'g*', markersize=15,
        label='Global opt')

ax.legend()
plt.tight_layout()
plt.show()

print(f"Local from x0=-2: shift = {result_local.x[0]:.2f}, error = {result_local.fun:.4f}")
print(f"Global: shift = {result_global.x[0]:.2f}, error = {result_global.fun:.4f}")

Summary¶

Key Insights:

Local methods are fast but can get stuck in poor local minima
Global methods explore more thoroughly but cost more function evaluations
Differential evolution is robust and works on non-smooth functions
Basin-hopping balances speed and global search capability
Multi-start local optimization is simple and often effective
Combine strategies: Use coarse grid search → local refinement → verify globally

Choose based on: - Problem complexity and number of local minima - Computational budget (function evaluation cost) - Need for accuracy vs speed - Whether you can provide bounds or initial guesses