Skip to content
Snippets Groups Projects
Commit 23c1980d authored by Fredrik Lindsten's avatar Fredrik Lindsten
Browse files

Update 4 files

- /presentations/DoubleDescent_Presentation_Fine_grained_bias_variance_decomposition.pdf
- /presentations/dd_classification_ensembling_bagging.ipynb
- /presentations/dd_regression_ensembling_bagging.ipynb
- /presentations/dd_fine_grained_bv_decomposition_ensembling_bagging.ipynb
parent eff97681
Branches main
No related tags found
No related merge requests found
File added
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:code id: tags:
```
import torch
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import math
```
%% Cell type:markdown id: tags:
### True Model
$$x \in \mathbb{R}^{n_0}, \text{ with } x_j \sim \mathcal{U}(-5, 5)$$
$$y_i = x_{i, 1} + sin(x_{i, 1}) + ɛ_i, \text{ where } ɛ_i \sim \mathcal{N}(0, 0.1^2) \text{ w.p. } 0.95 \text{ and } \mathcal{N}(0, 2^2) \text{ w.p. } 0.05$$
%% Cell type:code id: tags:
```
def gen_data(n = 100, x_dim = 25, unif_scale = 5, eps_mix_weights = [0.95, 0.05], eps_sigmas = [0.1, 2]):
x = 2*unif_scale*torch.rand((n, x_dim)) - unif_scale
eps_choice = torch.distributions.categorical.Categorical(
probs = torch.tensor(eps_mix_weights)).sample(torch.Size([n]))
eps = torch.tensor([eps_sigmas[x]*torch.randn(1).item() for x in eps_choice])
y = x[:, 0] + torch.sin(x[:, 0]) + eps
return x, y
```
%% Cell type:code id: tags:
```
n_runs = 4
fig, ax = plt.subplots(n_runs//2, 2, figsize=(15, 10))
fig.subplots_adjust(hspace=0.3)
for run in range(2):
x, y = gen_data(2**10)
ax[run, 0].scatter(x[:, 0].numpy(), y.numpy())
ax[run, 0].set_title(f'$y(x)$ - $log_2(n) = 10$ - Run {run + 1}')
ax[run, 0].set_xlabel('$x_1$')
x, y = gen_data(2**10)
ax[run, 1].scatter(x[:, 0].numpy(), y.numpy())
ax[run, 1].set_title(f'$y(x))$ - $log_2(n) = 10$ - Run {run + 3}')
ax[run, 1].set_xlabel('$x_1$')
plt.savefig('regr.png', dps = 300)
```
%% Output
<ipython-input-3-b58d0e47d64c>:19: MatplotlibDeprecationWarning: savefig() got unexpected keyword argument "dps" which is no longer supported as of 3.3 and will become an error in 3.6
plt.savefig('regr.png', dps = 300)
%% Cell type:markdown id: tags:
### Inference Model (Discriminative)
$\textbf{Inputs}$:
Training data $\{x_i \in \mathbb{R}^{n_0}, y_i \in \mathbb{R}\}_{i = 1}^{m}$ and test data $\{x_{t_i} \in \mathbb{R}^{n_0}\}_{i = 1}^{m}$
$\textbf{Model}$:
Random-Fourier features for a single set of training or test covariates $x \in \mathbb{R}^{n_0}$ are computed using:
$$\phi_j(x) = \sqrt{\frac{2}{j}}cos(w_j^Tx + b_j), \text{ with } j \in \{1, 2, .., n_1\}, w_j \sim \mathcal{N}(0, 2\lambda\mathbb{I}_{25}) \text{ and } b_j \sim \mathcal{U}(0, 2\pi)$$
Let:
- $\Phi(x) \in \mathbb{R}^{n_1}$ be the random-Fourier features of $x$
- $X \in \mathbb{R}^{n_0 \times m}$ be the training covariates
- $y \in \mathbb{R}^m$ be the training labels
- $X_t \in \mathbb{R}^{n_0 \times m}$ be the test covariates
Define kernel-function $k(\cdot, \cdot\cdot) := \frac{1}{n_1}\sigma\bigg(\frac{\Phi(\cdot)}{\sqrt{n_0}}\bigg)^T\sigma\bigg(\frac{\Phi(\cdot\cdot)}{\sqrt{n_0}}\bigg) \in \mathbb{R}$.
We predict test labels $\hat{\ell}_t$ as:
$$\hat{y}_t = K^{-1}K_xy, \text{ with } K = k(X, X) \text{ and } K_x = k(X, X_t)$$
%% Cell type:code id: tags:
```
def get_fourier_features(n1, x, b, w, device="cuda"):
#b = 2*torch.pi*torch.rand(n1).type(torch.float64).to(device) # Size: (n1, )
#w = 2*lambda_*torch.randn((x.shape[1], n1)).type(torch.float64).to(device) # Size: (n0, n1)
cosine_term = torch.cos(w.T @ x.T + b.unsqueeze(-1)) # Size: (n1, m)
multiplier = torch.sqrt(2*torch.tensor([1./j for j in range(1, n1 + 1)]).unsqueeze(-1).expand((n1, x.shape[0]))).type(torch.float64).to(device) # Size: (n1, m)
return cosine_term*multiplier # Size: (n1, m)
def run_ensembles(n0, m, k_P_values, data_gen_fn, lambda_ = 1.0, r_values=None, device="cuda", seed=123, activation=torch.tanh, eps = 1e-6):
if not r_values:
r_values = np.logspace(-2, 1, 10)
torch.manual_seed(seed)
# Generate test data
X_test, y_test = data_gen_fn(m, n0)
# Data sampling (training data)
X, y = data_gen_fn(m, n0)
# Set precision and push data to device
X_test = X_test.type(torch.float64).to(device)
y_test = y_test.type(torch.float64).to(device)
X = X.type(torch.float64).to(device)
y = y.type(torch.float64).to(device)
# Begin ensembling
ensemble_results = {}
for k_P in k_P_values:
print(f"Evaluating for k_P={k_P}...")
mses = []
aggregate_labels = torch.zeros(m, dtype=torch.float64).to(device)
for r in tqdm(r_values):
for _ in range(k_P):
# Model init - ensemble
n1 = int(m * r)
b = 2*torch.pi*torch.rand(n1).type(torch.float64).to(device) # Size: (n1, )
w = 2*lambda_*torch.randn((n0, n1)).type(torch.float64).to(device) # Size: (n0, n1)
Phi_X = get_fourier_features(n1, X, b, w, device)
Phi_X_test = get_fourier_features(n1, X_test, b, w, device)
# Fit model and compute prediction y_hat
K_X = Phi_X.T @ Phi_X / n1
K_X += eps * torch.eye(m, dtype=torch.float64).to(device) # for numerical stability
K_X_test = Phi_X.T @ Phi_X_test / n1
L_hat = (y.unsqueeze(0) @ torch.linalg.solve(K_X, K_X_test)).squeeze(0) # Size: (m, )
# Aggregate
aggregate_labels += L_hat
aggregate_labels /= k_P
mse = torch.mean((aggregate_labels - y_test)**2).item()
mses.append(mse)
ensemble_results[k_P] = mses
return ensemble_results
def run_bagging(n0, m, k_D_values, data_gen_fn, lambda_ = 1.0, r_values=None, device="cuda", seed=123, activation=torch.tanh, eps = 1e-6):
if not r_values:
r_values = np.logspace(-2, 1, 10)
torch.manual_seed(seed)
# Generate test data
X_test, y_test = data_gen_fn(m, n0)
# Set precision and push test data to device
X_test = X_test.type(torch.float64).to(device)
y_test = y_test.type(torch.float64).to(device)
# Begin bagging
bagging_results = {}
for k_D in k_D_values:
print(f"Evaluating for k_D={k_D}...")
mses = []
aggregate_labels = torch.zeros(m, dtype=torch.float64).to(device)
for r in tqdm(r_values):
# Model init
n1 = int(m * r)
b = 2*torch.pi*torch.rand(n1).type(torch.float64).to(device) # Size: (n1, )
w = 2*lambda_*torch.randn((n0, n1)).type(torch.float64).to(device) # Size: (n0, n1)
Phi_X_test = get_fourier_features(n1, X_test, b, w, device)
for _ in range(k_D):
# Generate train data
X, y = data_gen_fn(m, n0)
# Set precision and push train data to device
X = X.type(torch.float64).to(device)
y = y.type(torch.float64).to(device)
# Get Train Fourier Features
Phi_X = get_fourier_features(n1, X, b, w, device)
# Fit model and compute prediction y_hat
K_X = Phi_X.T @ Phi_X / n1
K_X += eps * torch.eye(m, dtype=torch.float64).to(device) # for numerical stability
K_X_test = Phi_X.T @ Phi_X_test / n1
y_hat = (y.unsqueeze(0) @ torch.linalg.solve(K_X, K_X_test)).squeeze(0) # Size: (m, )
# Aggregate
aggregate_labels += y_hat
aggregate_labels /= k_D
mse = torch.mean((aggregate_labels - y_test)**2).item()
mses.append(mse)
bagging_results[k_D] = mses
return bagging_results
def run_true_bagging(n0, m, k_D_values, data_gen_fn, lambda_ = 1.0, r_values=None, device="cuda", seed=123, activation=torch.tanh, eps = 1e-6):
if not r_values:
r_values = np.logspace(-2, 1, 10)
torch.manual_seed(seed)
# Generate test data
X_test, y_test = data_gen_fn(m, n0)
# Set precision and push test data to device
X_test = X_test.type(torch.float64).to(device)
y_test = y_test.type(torch.float64).to(device)
# Generate train data
X, y = data_gen_fn(m, n0)
# Set precision and push train data to device
X = X.type(torch.float64).to(device)
y = y.type(torch.float64).to(device)
# Begin bagging
bagging_results = {}
for k_D in k_D_values:
print(f"Evaluating for k_D={k_D}...")
mses = []
aggregate_labels = torch.zeros(m, dtype=torch.float64).to(device)
for r in tqdm(r_values):
# Model init
n1 = int(m * r)
b = 2*torch.pi*torch.rand(n1).type(torch.float64).to(device) # Size: (n1, )
w = 2*lambda_*torch.randn((n0, n1)).type(torch.float64).to(device) # Size: (n0, n1)
Phi_X_test = get_fourier_features(n1, X_test, b, w, device)
for _ in range(k_D):
# Get Train Fourier Features
sample_indices = torch.randint(low=0, high=m, size=(m,))
X_sample = X[sample_indices, :]
y_sample = y[sample_indices]
Phi_X = get_fourier_features(n1, X_sample, b, w, device)
# Fit model and compute prediction y_hat
K_X = Phi_X.T @ Phi_X / n1
K_X += eps * torch.eye(m, dtype=torch.float64).to(device) # for numerical stability
K_X_test = Phi_X.T @ Phi_X_test / n1
y_hat = (y.unsqueeze(0) @ torch.linalg.solve(K_X, K_X_test)).squeeze(0) # Size: (m, )
# Aggregate
aggregate_labels += y_hat
aggregate_labels /= k_D
mse = torch.mean((aggregate_labels - y_test)**2).item()
mses.append(mse)
bagging_results[k_D] = mses
return bagging_results
def plot_ensembles_bagging(ensemble_results, bagging_results, true_bagging_results, k_D_values, k_P_values, r_values=None, save_filename="results.png"):
if not r_values:
r_values = np.logspace(-2, 1, 10)
fig, ax = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
for k_D, mses in bagging_results.items():
ax[0].plot(r_values, mses, '--o')
_ = ax[0].legend([f"$k_D={k}$" for k in k_D_values])
_ = ax[0].set_xlabel("$ n_1/m $")
_ = ax[0].set_ylabel("$E_{\mathrm{test}}$")
_ = ax[0].set_xscale("log")
_ = ax[0].set_yscale("log")
_ = ax[0].set_title("Bagging")
_ = ax[0].grid()
for k_D, mses in true_bagging_results.items():
ax[1].plot(r_values, mses, '--o')
_ = ax[1].legend([f"$k_D={k}$" for k in k_D_values])
_ = ax[1].set_xlabel("$ n_1/m $")
_ = ax[1].set_xscale("log")
_ = ax[1].set_yscale("log")
_ = ax[1].set_title("Bootstrap Bagging")
_ = ax[1].grid()
for k_P, mses in ensemble_results.items():
ax[2].plot(r_values, mses, '--o')
_ = ax[2].legend([f"$k_P={k}$" for k in k_P_values])
_ = ax[2].set_xlabel("$ n_1/m $")
_ = ax[2].set_xscale("log")
_ = ax[2].set_xscale("log")
_ = ax[2].set_title("Ensembles")
_ = ax[2].grid()
plt.savefig(save_filename, dpi=300)
```
%% Cell type:markdown id: tags:
### Experiments
%% Cell type:code id: tags:
```
k_D_values = k_P_values = [1, 2, 5, 10, 25]
n0 = 2 ** 10
m = 2 ** 11
ensemble_results = run_ensembles(n0, m, k_P_values, gen_data)
bagging_results = run_bagging(n0, m, k_D_values, gen_data)
true_bagging_results = run_true_bagging(n0, m, k_D_values, gen_data)
plot_ensembles_bagging(ensemble_results, bagging_results, true_bagging_results, k_D_values, k_P_values, save_filename=f"experiment_regr.png")
```
%% Output
Evaluating for k_P=1...
100%|██████████| 10/10 [00:10<00:00, 1.04s/it]
Evaluating for k_P=2...
100%|██████████| 10/10 [00:13<00:00, 1.36s/it]
Evaluating for k_P=5...
100%|██████████| 10/10 [00:33<00:00, 3.36s/it]
Evaluating for k_P=10...
100%|██████████| 10/10 [01:05<00:00, 6.59s/it]
Evaluating for k_P=25...
100%|██████████| 10/10 [02:44<00:00, 16.49s/it]
Evaluating for k_D=1...
100%|██████████| 10/10 [00:06<00:00, 1.48it/s]
Evaluating for k_D=2...
100%|██████████| 10/10 [00:12<00:00, 1.23s/it]
Evaluating for k_D=5...
100%|██████████| 10/10 [00:28<00:00, 2.87s/it]
Evaluating for k_D=10...
100%|██████████| 10/10 [00:56<00:00, 5.62s/it]
Evaluating for k_D=25...
100%|██████████| 10/10 [02:17<00:00, 13.75s/it]
Evaluating for k_D=1...
100%|██████████| 10/10 [00:06<00:00, 1.58it/s]
Evaluating for k_D=2...
100%|██████████| 10/10 [00:11<00:00, 1.17s/it]
Evaluating for k_D=5...
100%|██████████| 10/10 [00:26<00:00, 2.68s/it]
Evaluating for k_D=10...
100%|██████████| 10/10 [00:52<00:00, 5.25s/it]
Evaluating for k_D=25...
100%|██████████| 10/10 [02:09<00:00, 12.91s/it]
%% Cell type:code id: tags:
```
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment