From 2db8e263b46a42c68c7f1eb11ab0726f9dcea630 Mon Sep 17 00:00:00 2001 From: Piero Paialunga Date: Fri, 30 May 2025 17:16:23 -0400 Subject: [PATCH] Adding the GPR Notebook for Hyperparameter tuning --- GPR_Optimization.py | 181 ++++++++++++++++++++++++++++++++ public/GPR_Optimization.html | 82 +++++++++++++++ public/index.html | 16 +-- public/llm/diffbot_llm.html | 82 --------------- public/llm/lchain_deepseek.html | 82 --------------- public/llm/lchain_ollama.html | 82 --------------- public/temp.html | 82 +++++++++++++++ public/temp/temp.html | 82 +++++++++++++++ 8 files changed, 431 insertions(+), 258 deletions(-) create mode 100644 GPR_Optimization.py create mode 100644 public/GPR_Optimization.html delete mode 100644 public/llm/diffbot_llm.html delete mode 100644 public/llm/lchain_deepseek.html delete mode 100644 public/llm/lchain_ollama.html create mode 100644 public/temp.html create mode 100644 public/temp/temp.html diff --git a/GPR_Optimization.py b/GPR_Optimization.py new file mode 100644 index 00000000..7d6f814e --- /dev/null +++ b/GPR_Optimization.py @@ -0,0 +1,181 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "marimo", +# "matplotlib==3.10.3", +# "numpy==2.2.6", +# "scikit-learn==1.6.1", +# "scipy==1.15.3", +# ] +# /// + +import marimo + +__generated_with = "0.13.7" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import numpy as np + import matplotlib.pyplot as plt + from sklearn.gaussian_process import GaussianProcessRegressor + from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel as C + + def black_box_function(x): + return - (np.sin(3*x) + 0.5 * x) + return ( + C, + GaussianProcessRegressor, + Matern, + WhiteKernel, + black_box_function, + np, + plt, + ) + + +@app.cell +def _(black_box_function, np, plt): + X = np.linspace(0, 5.5, 1000).reshape(-1, 1) + y = black_box_function(X) + plt.plot(X, y) + plt.title("Black-box function") + plt.xlabel("x") + plt.ylabel("f(x)") + plt.show() + return X, y + + +@app.cell +def _(black_box_function, np): + X_grid = np.linspace(0, 2, 100).reshape(-1, 1) + y_grid = black_box_function(X_grid) + x_best = X_grid[np.argmax(y_grid)] + return + + +@app.cell +def _(black_box_function, np): + # Initial sample points (simulate prior evaluations) + X_sample = np.array([[1.0], [3.0], [5.5]]) + y_sample = black_box_function(X_sample) + return X_sample, y_sample + + +@app.cell +def _(C, GaussianProcessRegressor, Matern, WhiteKernel, X_sample, y_sample): + # Define the kernel + kernel = C(1.0) * Matern(length_scale=1.0, nu=2.5) + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e1)) + + # Create and fit the Gaussian Process model + gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) + gpr.fit(X_sample, y_sample) + return (gpr,) + + +@app.cell +def _(X, X_sample, gpr, plt, y, y_sample): + # Predict across the domain + mu, std = gpr.predict(X, return_std=True) + + # Plot the result + plt.figure(figsize=(10, 5)) + plt.plot(X, y, 'k--', label="True function") + plt.plot(X, mu, 'b-', label="GPR mean") + plt.fill_between(X.ravel(), mu - std, mu + std, alpha=0.3, label="Uncertainty") + plt.scatter(X_sample, y_sample, c='red', label="Samples") + plt.legend() + plt.title("Gaussian Process Fit") + plt.xlabel("x") + plt.ylabel("f(x)") + plt.show() + return + + +@app.cell +def _(np): + from scipy.stats import norm + + def expected_improvement(X, X_sample, y_sample, model, xi=0.01): + mu, std = model.predict(X, return_std=True) + mu_sample_opt = np.min(y_sample) + + with np.errstate(divide='warn'): + imp = mu_sample_opt - mu - xi # because we are minimizing + Z = imp / std + ei = imp * norm.cdf(Z) + std * norm.pdf(Z) + ei[std == 0.0] = 0.0 + + return ei + + return (expected_improvement,) + + +@app.cell +def _(X, X_sample, expected_improvement, gpr, np, plt, y_sample): + ei = expected_improvement(X, X_sample, y_sample, gpr) + + plt.figure(figsize=(10, 4)) + plt.plot(X, ei, label="Expected Improvement") + plt.axvline(X[np.argmax(ei)], color='r', linestyle='--', label="Next sample point") + plt.title("Acquisition Function (Expected Improvement)") + plt.xlabel("x") + plt.ylabel("EI(x)") + plt.legend() + plt.show() + + return + + +@app.cell +def _(X, black_box_function, expected_improvement, gpr, np): + def bayesian_optimization(n_iter=10): + # Initial data + X_sample = np.array([[1.0], [2.5], [4.0]]) + y_sample = black_box_function(X_sample) + + for i in range(n_iter): + gpr.fit(X_sample, y_sample) + ei = expected_improvement(X, X_sample, y_sample, gpr) + x_next = X[np.argmax(ei)].reshape(-1, 1) + + # Evaluate the function at the new point + y_next = black_box_function(x_next) + + # Add the new sample to our dataset + X_sample = np.vstack((X_sample, x_next)) + y_sample = np.append(y_sample, y_next) + return X_sample, y_sample + + return (bayesian_optimization,) + + +@app.cell +def _(bayesian_optimization): + X_opt, y_opt = bayesian_optimization(n_iter=10) + + return X_opt, y_opt + + +@app.cell +def _(X, X_opt, black_box_function, plt, y_opt): + # Plot final sampled points + plt.plot(X, black_box_function(X), 'k--', label="True function") + plt.scatter(X_opt, y_opt, c='red', label="Sampled Points") + plt.title("Bayesian Optimization with Gaussian Process") + plt.xlabel("x") + plt.ylabel("f(x)") + plt.legend() + plt.show() + + return + + +@app.cell +def _(): + return + + +if __name__ == "__main__": + app.run() diff --git a/public/GPR_Optimization.html b/public/GPR_Optimization.html new file mode 100644 index 00000000..6bb3dc95 --- /dev/null +++ b/public/GPR_Optimization.html @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GPR Optimization + + + + + + +
+ + + + + + diff --git a/public/index.html b/public/index.html index b5373537..6c8c1cd4 100644 --- a/public/index.html +++ b/public/index.html @@ -85,22 +85,14 @@

polars vs pandas

pyspark parametrize

View the notebook -
  • -

    diffbot llm

    - View the notebook -
  • -
  • -

    lchain deepseek

    - View the notebook -
  • -
  • -

    lchain ollama

    - View the notebook -
  • pydantic ai examples

    View the notebook
  • +
  • +

    temp

    + View the notebook +
  • \ No newline at end of file diff --git a/public/llm/diffbot_llm.html b/public/llm/diffbot_llm.html deleted file mode 100644 index b073304c..00000000 --- a/public/llm/diffbot_llm.html +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diffbot llm - - - - - - -
    - - - - - - diff --git a/public/llm/lchain_deepseek.html b/public/llm/lchain_deepseek.html deleted file mode 100644 index dc95501e..00000000 --- a/public/llm/lchain_deepseek.html +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lchain deepseek - - - - - - -
    - - - - - - diff --git a/public/llm/lchain_ollama.html b/public/llm/lchain_ollama.html deleted file mode 100644 index a985a626..00000000 --- a/public/llm/lchain_ollama.html +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lchain ollama - - - - - - -
    - - - - - - diff --git a/public/temp.html b/public/temp.html new file mode 100644 index 00000000..32f06346 --- /dev/null +++ b/public/temp.html @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + temp + + + + + + +
    + + + + + + diff --git a/public/temp/temp.html b/public/temp/temp.html new file mode 100644 index 00000000..82e61e0b --- /dev/null +++ b/public/temp/temp.html @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + temp + + + + + + +
    + + + + + +