Local vs Global Optimization¶
One of the fundamental challenges in optimization is that local minimization methods can get trapped in local minima — points that are lower than nearby points but not the global minimum. Understanding this problem and how to avoid it is crucial for real-world optimization.
The Local Minima Problem¶
A cost function landscape can be visualized as a terrain with multiple valleys. Local minimization methods are like hiking downhill: they follow the slope and stop at the bottom of whichever valley they start in, without knowing if there's a deeper valley elsewhere.
Understanding Local Minima¶
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
# Function with multiple local minima
def multi_minima(x):
"""Function with several local and one global minimum."""
return np.sin(x) + 0.1 * x
# Plot the function
x = np.linspace(-2*np.pi, 2*np.pi, 1000)
y = multi_minima(x)
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(x, y, 'b-', linewidth=2, label='Objective function')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
ax.set_title('Function with Multiple Local Minima')
ax.grid(True, alpha=0.3)
# Find local minima from different starting points
starting_points = [-4*np.pi, -2*np.pi, 0, 2*np.pi, 4*np.pi]
colors = ['red', 'orange', 'green', 'purple', 'brown']
for start, color in zip(starting_points, colors):
result = optimize.minimize(multi_minima, start, method='BFGS')
ax.plot(result.x, result.fun, 'o', color=color, markersize=10,
label=f'Start: {start:.1f}')
ax.plot(start, multi_minima(start), 'x', color=color, markersize=8)
ax.legend()
plt.tight_layout()
plt.show()
The Rosenbrock Challenge¶
The Rosenbrock function perfectly illustrates why local methods fail:
The global minimum is at \((1, 1)\), but the function has narrow, curved valley that gradient-based methods struggle to follow.
import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt
def rosenbrock(x):
return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2
# Local minimization
starting_points = [
np.array([-1, -1]),
np.array([0, 0]),
np.array([2, 2])
]
fig = plt.figure(figsize=(14, 5))
# 3D visualization
ax1 = fig.add_subplot(131, projection='3d')
x = np.linspace(-2, 3, 100)
y = np.linspace(-1, 4, 100)
X, Y = np.meshgrid(x, y)
Z = (1 - X)**2 + 100 * (Y - X**2)**2
ax1.plot_surface(X, Y, Z, cmap='viridis', alpha=0.7)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_zlabel('f(x, y)')
# Contour plot with optimization paths
ax2 = fig.add_subplot(132)
contour = ax2.contourf(X, Y, Z, levels=30, cmap='viridis')
ax2.plot(1, 1, 'r*', markersize=20, label='Global minimum')
for i, start in enumerate(starting_points):
result = optimize.minimize(rosenbrock, start, method='BFGS')
ax2.plot(start[0], start[1], 'go', markersize=8)
ax2.plot(result.x[0], result.x[1], 'rs', markersize=8)
ax2.annotate(f'Start {i+1}', xy=start, xytext=(start[0]+0.1, start[1]+0.1))
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_title('Local Optimization Paths')
ax2.legend()
# Cost function value from different starting points
ax3 = fig.add_subplot(133)
costs = []
for start in starting_points:
result = optimize.minimize(rosenbrock, start, method='BFGS')
costs.append(result.fun)
ax3.bar(range(len(starting_points)), costs)
ax3.set_xlabel('Starting Point')
ax3.set_ylabel('Final Cost')
ax3.set_title('Local Minima Different Values')
ax3.set_yscale('log')
plt.tight_layout()
plt.show()
Global Optimization Methods¶
When local minima are a concern, global optimization methods search more broadly across the parameter space.
Differential Evolution¶
Differential evolution uses a population-based approach: multiple candidate solutions evolve over iterations by mutating and recombining.
Characteristics: - Population-based (maintains many candidate solutions) - No gradient required - Very robust, rarely gets stuck in local minima - Slower than local methods (more function evaluations) - Works on non-smooth and discontinuous functions - Good for black-box optimization
from scipy import optimize
import numpy as np
def multi_peak(x):
"""Function with many local minima."""
return np.sum(np.sin(x) * np.exp(-0.1 * np.sum(x**2)))
# Local method gets stuck
result_local = optimize.minimize(
multi_peak,
x0=[0, 0],
method='BFGS'
)
# Global method finds true minimum
result_global = optimize.differential_evolution(
multi_peak,
bounds=[(-5, 5), (-5, 5)],
seed=42
)
print(f"Local minimization:")
print(f" Value: {result_local.fun:.6f}")
print(f" x: {result_local.x}")
print(f"\nGlobal (differential_evolution):")
print(f" Value: {result_global.fun:.6f}")
print(f" x: {result_global.x}")
print(f"\nFunction evaluations - Local: {result_local.nfev}, Global: {result_global.nfev}")
When to use: - Black-box optimization (no gradient available) - Function has many local minima - Non-smooth or discontinuous functions - Can afford extra function evaluations
Basin-Hopping¶
Basin-hopping combines local minimization with random jumps in parameter space. It minimizes locally, then makes a random step and minimizes again from the new location.
Characteristics: - Local minimization with randomization - Escapes local minima by jumping to new regions - Faster than population-based methods - Good balance between speed and global search - Can use any local minimization method
from scipy import optimize
import numpy as np
def f(x):
"""Challenging optimization problem."""
return (x[0] - 2)**2 * (1 + np.sin(5*x[1])) + x[1]**2
# Local method
result_local = optimize.minimize(f, x0=[0, 0], method='L-BFGS-B')
# Basin hopping
minimizer_kwargs = {"method": "L-BFGS-B"}
result_basin = optimize.basinhopping(
f,
x0=[0, 0],
minimizer_kwargs=minimizer_kwargs,
niter=100,
seed=42
)
print(f"Local optimization:")
print(f" Value: {result_local.fun:.6f}")
print(f" x: {result_local.x}")
print(f"\nBasin-hopping:")
print(f" Value: {result_basin.fun:.6f}")
print(f" x: {result_basin.x}")
When to use: - Need faster global optimization than differential_evolution - Problem has distinct basins - Can provide good local minimizer - Function evaluations are expensive
Dual Annealing¶
Dual annealing is a variant of simulated annealing that combines global and local search strategies.
Characteristics: - Simulated annealing variant - Probabilistically accepts uphill moves to escape local minima - Temperature gradually decreases - Combines with local optimization - Good for continuous optimization
from scipy import optimize
import numpy as np
def nonconvex(x):
"""Highly non-convex function."""
return (x[0]**2 + x[1]**2 - 5)**2 + (x[0] - x[1])**2
# Local minimization
result_local = optimize.minimize(nonconvex, [0, 0], method='L-BFGS-B')
# Dual annealing
result_annealing = optimize.dual_annealing(
nonconvex,
bounds=[(-3, 3), (-3, 3)],
seed=42
)
print(f"Local: f = {result_local.fun:.6f}")
print(f"Dual annealing: f = {result_annealing.fun:.6f}")
When to use: - Moderate-sized global optimization - Bounds are naturally defined - Don't want to tune population size (like in differential_evolution)
Practical Strategies for Global Optimization¶
Strategy 1: Multi-Start Local Optimization¶
Run local optimization from many random starting points and keep the best:
from scipy import optimize
import numpy as np
def objective(x):
return np.sin(x[0]) * np.cos(x[1]) + 0.1 * x[0]**2
best_result = None
best_value = float('inf')
# Try from 20 random starting points
np.random.seed(42)
for i in range(20):
x0 = np.random.uniform(-2*np.pi, 2*np.pi, 2)
result = optimize.minimize(objective, x0, method='L-BFGS-B')
if result.fun < best_value:
best_value = result.fun
best_result = result
print(f"Best found: f = {best_result.fun:.6f}")
print(f"At: {best_result.x}")
Strategy 2: Coarse Grid Search + Local Refinement¶
First scan the space coarsely, then refine promising regions:
from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt
def objective(x):
return (x[0] - 2.5)**2 * np.sin(x[1]) + (x[1] - 3)**2
# Coarse grid search
x = np.linspace(-1, 6, 10)
y = np.linspace(-1, 6, 10)
X, Y = np.meshgrid(x, y)
Z = np.zeros_like(X)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
Z[i, j] = objective([X[i, j], Y[i, j]])
# Find best grid point
best_idx = np.unravel_index(np.argmin(Z), Z.shape)
x0 = np.array([X[best_idx], Y[best_idx]])
print(f"Coarse grid best: f = {Z[best_idx]:.6f} at {x0}")
# Refine locally
result = optimize.minimize(objective, x0, method='L-BFGS-B')
print(f"Refined: f = {result.fun:.6f} at {result.x}")
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Grid search
ax = axes[0]
contour = ax.contourf(X, Y, Z, levels=20, cmap='viridis')
ax.plot(x0[0], x0[1], 'r*', markersize=20, label='Grid best')
ax.set_title('Coarse Grid Search')
plt.colorbar(contour, ax=ax)
# After refinement
Z_fine = np.zeros_like(X)
for i in range(X.shape[0]):
for j in range(X.shape[1]):
Z_fine[i, j] = objective([X[i, j], Y[i, j]])
ax = axes[1]
contour = ax.contourf(X, Y, Z_fine, levels=20, cmap='viridis')
ax.plot(result.x[0], result.x[1], 'r*', markersize=20, label='Refined')
ax.set_title('After Local Refinement')
plt.colorbar(contour, ax=ax)
plt.tight_layout()
plt.show()
Strategy 3: Combining Multiple Approaches¶
Use different methods and compare results:
from scipy import optimize
import numpy as np
def objective(x):
return (x[0] - 2)**2 + 100*(x[1] - x[0]**2)**2
x0 = [0, 0]
bounds = [(-5, 5), (-5, 5)]
# Try multiple methods
results = {}
# Differential evolution
results['DE'] = optimize.differential_evolution(objective, bounds)
# Basin hopping
results['BH'] = optimize.basinhopping(objective, x0,
minimizer_kwargs={'method': 'L-BFGS-B'},
niter=100)
# Dual annealing
results['DA'] = optimize.dual_annealing(objective, bounds)
# Local L-BFGS-B
results['LBFGS'] = optimize.minimize(objective, x0, method='L-BFGS-B', bounds=bounds)
# Compare results
print("Method Comparison:")
print("-" * 50)
for method, result in results.items():
value = result.fun if hasattr(result, 'fun') else result.get('fun', None)
print(f"{method:<10}: f = {value:.2e}")
# Find and report best
best_method = min(results.items(), key=lambda x: x[1].fun if hasattr(x[1], 'fun') else float('inf'))
print(f"\nBest method: {best_method[0]}")
print(f"Optimal value: {best_method[1].fun:.2e}")
Multimodal Optimization: Image Registration Example¶
The problem of aligning images demonstrates local minima challenges and solutions:
from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt
# Simulate an error surface with multiple local minima
# (similar to image alignment MSE landscape)
def alignment_error(shift):
"""
Simulated image alignment error function.
Has multiple local minima due to image periodicity.
"""
main_error = (shift - 5)**2
periodic_ripples = 2 * np.cos(shift)
return main_error + periodic_ripples
# Plot the error surface
shifts = np.linspace(-5, 15, 1000)
errors = np.array([alignment_error(s) for s in shifts])
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(shifts, errors, 'b-', linewidth=2, label='Alignment error')
ax.set_xlabel('Image shift (pixels)')
ax.set_ylabel('MSE')
ax.set_title('Image Alignment Error Surface')
ax.grid(True, alpha=0.3)
# Local method from poor starting point
result_local = optimize.minimize(alignment_error, x0=-2, method='BFGS')
ax.plot(result_local.x, result_local.fun, 'rs', markersize=10,
label='Local opt (x0=-2)')
# Global method
result_global = optimize.differential_evolution(alignment_error, bounds=[(-5, 15)])
ax.plot(result_global.x, result_global.fun, 'g*', markersize=15,
label='Global opt')
ax.legend()
plt.tight_layout()
plt.show()
print(f"Local from x0=-2: shift = {result_local.x[0]:.2f}, error = {result_local.fun:.4f}")
print(f"Global: shift = {result_global.x[0]:.2f}, error = {result_global.fun:.4f}")
Summary¶
Key Insights:
- Local methods are fast but can get stuck in poor local minima
- Global methods explore more thoroughly but cost more function evaluations
- Differential evolution is robust and works on non-smooth functions
- Basin-hopping balances speed and global search capability
- Multi-start local optimization is simple and often effective
- Combine strategies: Use coarse grid search → local refinement → verify globally
Choose based on: - Problem complexity and number of local minima - Computational budget (function evaluation cost) - Need for accuracy vs speed - Whether you can provide bounds or initial guesses