import numpy as np
import pyensmallen as peRegression via ERM in pyensmallen
np.random.seed(42)
n, k = 1_000_000, 20
# Linear Regression Data
X_linear = np.random.randn(n, k)
print(true_params_linear := np.random.rand(k))
y_linear = X_linear @ true_params_linear[0.51639859 0.94598022 0.23380001 0.55162275 0.97811966 0.24254699
0.64702478 0.70271041 0.26476461 0.77362184 0.7817448 0.36874977
0.72697004 0.06518613 0.72705723 0.38967364 0.03826155 0.39386005
0.0438693 0.72142769]
Linear Regression
pyensmallen
%%time
linear_start = np.random.rand(k)
optimizer = pe.L_BFGS()
result_linear_ens = optimizer.optimize(
lambda params, gradient: pe.losses.linear_obj(params, gradient, X_linear, y_linear),
linear_start,
)
result_linear_ensCPU times: user 5.76 s, sys: 188 ms, total: 5.95 s
Wall time: 378 ms
array([0.51639859, 0.94598022, 0.23380001, 0.55162275, 0.97811966,
0.24254699, 0.64702478, 0.70271041, 0.26476461, 0.77362184,
0.7817448 , 0.36874977, 0.72697004, 0.06518613, 0.72705723,
0.38967364, 0.03826155, 0.39386005, 0.0438693 , 0.72142769])
Logistic Regression
from scipy.special import expit# Logistic Regression Data
n, k = 10_000, 20
X_logistic = np.random.randn(n, k)
print(true_params_logistic := np.random.rand(k))
p = expit(X_logistic @ true_params_logistic)
y_logistic = np.random.binomial(1, p)[0.89732722 0.75831337 0.56717163 0.34694649 0.80717784 0.58244036
0.87299492 0.41850594 0.1207063 0.98533514 0.94064507 0.7165698
0.34148517 0.21317874 0.24622957 0.77088703 0.99525454 0.92697675
0.85896413 0.96032642]
pyensmallen
%%time
logistic_start = np.random.rand(k)
X_logistic2 = np.ascontiguousarray(
X_logistic
) # Ensure C-contiguous array for better performance
y_logistic2 = y_logistic.ravel()
optimizer = pe.L_BFGS()
result_logistic_ens = optimizer.optimize(
lambda params, gradient: pe.losses.logistic_obj(
params, gradient, X_logistic2, y_logistic2
),
logistic_start,
)
result_logistic_ensCPU times: user 139 ms, sys: 1.94 ms, total: 141 ms
Wall time: 8.74 ms
array([0.91787916, 0.78073762, 0.60735961, 0.36399418, 0.72193698,
0.59354801, 0.85939579, 0.36224349, 0.13497476, 0.9933936 ,
0.94503475, 0.6627938 , 0.35080571, 0.24550555, 0.23167949,
0.781631 , 0.95323063, 0.92981989, 0.91296721, 0.99777022])
Poisson Regression
n, k = 100_000, 10
# Poisson Regression Data
X_poisson = np.random.randn(n, k)
print(true_params_poisson := np.random.rand(k))
lambda_ = np.exp(X_poisson @ true_params_poisson)
y_poisson = np.random.poisson(lambda_)[0.46622737 0.27893256 0.1526658 0.04174791 0.46506248 0.03016092
0.94782085 0.33601329 0.11498335 0.72554644]
%%time
poisson_start = np.random.rand(k)
optimizer = pe.L_BFGS()
result_poisson_ens = optimizer.optimize(
lambda params, gradient: pe.poisson_obj(params, gradient, X_poisson, y_poisson),
poisson_start,
)
result_poisson_ensCPU times: user 482 ms, sys: 968 μs, total: 483 ms
Wall time: 30.4 ms
array([0.46785229, 0.27548319, 0.15101541, 0.03977385, 0.46631077,
0.03272198, 0.94846943, 0.33680279, 0.1201882 , 0.72804329])