Rosenbrock’s banana

import numpy as np
import pyensmallen

\[ f(x, y) = (a - x)^2 + b(y - x^2)^2 \]

global optimum reached at \((a, a^2)\).

def rosenbrock(x, a=2):
    return np.sum((a - x[:-1]) ** 2 + 100.0 * (x[1:] - x[:-1] ** 2) ** 2)


def rosenbrock_gradient(x, a=2):
    grad = np.zeros_like(x)

    # Gradient for the first element
    grad[0] = -2 * (a - x[0]) - 400 * x[0] * (x[1] - x[0] ** 2)

    # Gradient for the middle elements
    grad[1:-1] = (
        -2 * (a - x[1:-1])
        + 200 * (x[1:-1] - x[:-2] ** 2)
        - 400 * x[1:-1] * (x[2:] - x[1:-1] ** 2)
    )

    # Gradient for the last element
    grad[-1] = 200 * (x[-1] - x[-2] ** 2)

    return grad


def objective_function(x, grad):
    grad[:] = rosenbrock_gradient(x)
    return rosenbrock(x)

BFGS

# Initialize L-BFGS optimizer
lbfgs = pyensmallen.L_BFGS(numBasis=10, maxIterations=1000)

# Initial guess
initial_x = np.array([-1.2, 1.0])

# Optimize
result = lbfgs.optimize(objective_function, initial_x)

print("Optimized parameters:", result)
print("Objective value:", rosenbrock(result))
Optimized parameters: [2. 4.]
Objective value: 2.7026507009290854e-19

Gets to optimum exactly.

Adam

initial_w = np.random.randn(2)
adam = pyensmallen.Adam(maxIterations=100_000)
result = adam.optimize(objective_function, initial_w)
result
array([1.92537744, 3.70689221])
adamax = pyensmallen.AdaMax(maxIterations=100_000)
result = adamax.optimize(objective_function, initial_w)
result
array([1.95266621, 3.81279654])
a = pyensmallen.OptimisticAdam(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result
array([1.79112035, 3.20632978])
a = pyensmallen.AMSGrad(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result
array([1.10277253, 1.2126956 ])
a = pyensmallen.Nadam(maxIterations=100_000)
result = a.optimize(objective_function, initial_w)
result
array([1.73307663, 3.00434859])