Supervised NSM¶
Demonstrating supervised non-negative similarity matching. In addition, this uses a convolutional encoder.
In [1]:
Copied!
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import torch.utils.data
import torch.nn.functional as F
from torch import nn
from torchvision import datasets, transforms
from sklearn.linear_model import SGDClassifier
from pynsm import SupervisedSimilarityMatching, extract_embeddings
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
import torch.utils.data
import torch.nn.functional as F
from torch import nn
from torchvision import datasets, transforms
from sklearn.linear_model import SGDClassifier
from pynsm import SupervisedSimilarityMatching, extract_embeddings
Load dataset and create data loaders¶
Using standard torchvision
calls to load the dataset.
In [2]:
Copied!
transform = transforms.ToTensor()
train_data = datasets.MNIST(
root="data", train=True, download=True, transform=transform
)
test_data = datasets.MNIST(
root="data", train=False, download=True, transform=transform
)
classes = [str(i) for i in range(10)]
transform = transforms.ToTensor()
train_data = datasets.MNIST(
root="data", train=True, download=True, transform=transform
)
test_data = datasets.MNIST(
root="data", train=False, download=True, transform=transform
)
classes = [str(i) for i in range(10)]
Create training and test data loaders.
In [3]:
Copied!
batch_size = 100
train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
test_data, batch_size=batch_size, shuffle=False
)
batch_size = 100
train_loader = torch.utils.data.DataLoader(
train_data, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
test_data, batch_size=batch_size, shuffle=False
)
Show sample images to test the loading process.
In [4]:
Copied!
torch.manual_seed(42)
X_batch, y_batch = next(iter(train_loader))
X_max = X_batch.max()
X_min = X_batch.min()
print(f"batch min={X_min:.3g}, mean={X_batch.mean():.3g}, max={X_max:.3g}")
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(8, 8))
for i in range(3):
for j in range(3):
batch_idx = i * 3 + j
crt_X = X_batch[batch_idx]
ax[i][j].imshow(crt_X.numpy().squeeze(), vmin=X_min, vmax=X_max, cmap="gray")
ax[i][j].set_title(classes[y_batch[batch_idx].item()])
torch.manual_seed(42)
X_batch, y_batch = next(iter(train_loader))
X_max = X_batch.max()
X_min = X_batch.min()
print(f"batch min={X_min:.3g}, mean={X_batch.mean():.3g}, max={X_max:.3g}")
# create a grid of 3x3 images
fig, ax = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(8, 8))
for i in range(3):
for j in range(3):
batch_idx = i * 3 + j
crt_X = X_batch[batch_idx]
ax[i][j].imshow(crt_X.numpy().squeeze(), vmin=X_min, vmax=X_max, cmap="gray")
ax[i][j].set_title(classes[y_batch[batch_idx].item()])
batch min=0, mean=0.136, max=1
Train supervised convolutional NSM¶
In [5]:
Copied!
torch.manual_seed(42)
n_epochs = 3
num_labels = len(classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on {device}.")
# the encoder layer is convolutional
encoder = nn.Conv2d(1, 50, 6, stride=1, padding=0, bias=False)
model = SupervisedSimilarityMatching(
encoder, num_labels, 50, label_bias=False, iteration_projection=torch.nn.ReLU()
).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
t0 = time.time()
running_loss = []
for epoch in range(n_epochs):
pbar = tqdm(train_loader, desc=f"epoch {epoch + 1} / {n_epochs}")
sample = 0
for idx, data in enumerate(pbar):
images, labels = data
images = images.to(device)
labels = F.one_hot(labels, num_classes=num_labels).to(device).float()
outputs = model(images, labels)
loss = model.loss(images, labels, outputs)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss.append(loss.item())
pbar.set_postfix({"sample": sample, "loss": running_loss[-1]}, refresh=False)
sample += len(images)
t1 = time.time()
print(f"Training took {t1 - t0:.2f} seconds.")
torch.manual_seed(42)
n_epochs = 3
num_labels = len(classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on {device}.")
# the encoder layer is convolutional
encoder = nn.Conv2d(1, 50, 6, stride=1, padding=0, bias=False)
model = SupervisedSimilarityMatching(
encoder, num_labels, 50, label_bias=False, iteration_projection=torch.nn.ReLU()
).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
t0 = time.time()
running_loss = []
for epoch in range(n_epochs):
pbar = tqdm(train_loader, desc=f"epoch {epoch + 1} / {n_epochs}")
sample = 0
for idx, data in enumerate(pbar):
images, labels = data
images = images.to(device)
labels = F.one_hot(labels, num_classes=num_labels).to(device).float()
outputs = model(images, labels)
loss = model.loss(images, labels, outputs)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss.append(loss.item())
pbar.set_postfix({"sample": sample, "loss": running_loss[-1]}, refresh=False)
sample += len(images)
t1 = time.time()
print(f"Training took {t1 - t0:.2f} seconds.")
Running on cpu.
epoch 1 / 3: 100%|██████████| 600/600 [01:47<00:00, 5.57it/s, sample=59900, loss=-.184] epoch 2 / 3: 100%|██████████| 600/600 [01:46<00:00, 5.66it/s, sample=59900, loss=-.216] epoch 3 / 3: 100%|██████████| 600/600 [01:46<00:00, 5.66it/s, sample=59900, loss=-.233]
Training took 319.80 seconds.
Show learning curve.
In [6]:
Copied!
fig, ax = plt.subplots()
ax.plot(running_loss, lw=1.0)
ax.set_xlabel("batch")
ax.set_ylabel("loss")
sns.despine(ax=ax, offset=10)
fig, ax = plt.subplots()
ax.plot(running_loss, lw=1.0)
ax.set_xlabel("batch")
ax.set_ylabel("loss")
sns.despine(ax=ax, offset=10)
Showcase some of the convolutional filters.
In [7]:
Copied!
filters = model.encoders[0].weight.detach().cpu().numpy() # type: ignore
fig, ax = plt.subplots(7, 7, sharex=True, sharey=True, figsize=(8, 8))
for i in range(7):
for j in range(7):
crt_filter = filters[i * 7 + j, 0]
crt_max = np.max(np.abs(crt_filter))
ax[i][j].imshow(crt_filter, vmin=-crt_max, vmax=crt_max, cmap="RdBu")
filters = model.encoders[0].weight.detach().cpu().numpy() # type: ignore
fig, ax = plt.subplots(7, 7, sharex=True, sharey=True, figsize=(8, 8))
for i in range(7):
for j in range(7):
crt_filter = filters[i * 7 + j, 0]
crt_max = np.max(np.abs(crt_filter))
ax[i][j].imshow(crt_filter, vmin=-crt_max, vmax=crt_max, cmap="RdBu")
Test how well the pre-trained network can help with classification¶
We add max pooling operation to the output from our convolutional NSM module, then check how well an SVM trained on this final output manages to classify digits.
In [8]:
Copied!
inference_model = nn.Sequential(model, nn.MaxPool2d(kernel_size=2, stride=2))
t0 = time.time()
train_embed = extract_embeddings(inference_model, train_loader, progress=tqdm)
t1 = time.time()
print(f"Embedding training set took {t1 - t0:.2f} seconds.")
t0 = time.time()
test_embed = extract_embeddings(inference_model, test_loader, progress=tqdm)
t1 = time.time()
print(f"Embedding test set took {t1 - t0:.2f} seconds.")
inference_model = nn.Sequential(model, nn.MaxPool2d(kernel_size=2, stride=2))
t0 = time.time()
train_embed = extract_embeddings(inference_model, train_loader, progress=tqdm)
t1 = time.time()
print(f"Embedding training set took {t1 - t0:.2f} seconds.")
t0 = time.time()
test_embed = extract_embeddings(inference_model, test_loader, progress=tqdm)
t1 = time.time()
print(f"Embedding test set took {t1 - t0:.2f} seconds.")
100%|██████████| 600/600 [01:35<00:00, 6.31it/s]
Embedding training set took 95.24 seconds.
100%|██████████| 100/100 [00:15<00:00, 6.43it/s]
Embedding test set took 15.61 seconds.
We use scikit-learn
to fit an SVM to the embedded images.
In [9]:
Copied!
n_train = len(train_embed.output)
n_test = len(test_embed.output)
classifier = SGDClassifier(random_state=123)
train_data = train_embed.output.reshape(n_train, -1)
classifier.fit(train_data, train_embed.label)
train_error = classifier.score(train_data, train_embed.label)
print(
f"Accuracy on {len(train_embed.output)} training images: {100 * train_error:.1f}%."
)
test_error = classifier.score(test_embed.output.reshape(n_test, -1), test_embed.label)
print(f"Accuracy on {len(test_embed.output)} test images: {100 * test_error:.1f}%.")
n_train = len(train_embed.output)
n_test = len(test_embed.output)
classifier = SGDClassifier(random_state=123)
train_data = train_embed.output.reshape(n_train, -1)
classifier.fit(train_data, train_embed.label)
train_error = classifier.score(train_data, train_embed.label)
print(
f"Accuracy on {len(train_embed.output)} training images: {100 * train_error:.1f}%."
)
test_error = classifier.score(test_embed.output.reshape(n_test, -1), test_embed.label)
print(f"Accuracy on {len(test_embed.output)} test images: {100 * test_error:.1f}%.")
Accuracy on 60000 training images: 99.2%. Accuracy on 10000 test images: 98.4%.
In [ ]:
Copied!