# deep learning imports import torch from torch import nn import torch.nn.functional as fun # Traffic Sign Classifier class TrafficSignClassifier(nn.Module): def __init__(self, number_classes=43): super().__init__() # CNN layers self.conv1 = nn.Conv2d(3, 100, kernel_size=5) self.bn1 = nn.BatchNorm2d(100) self.conv2 = nn.Conv2d(100, 150, kernel_size=3) self.bn2 = nn.BatchNorm2d(150) self.conv3 = nn.Conv2d(150, 250, kernel_size=3) self.bn3 = nn.BatchNorm2d(250) self.conv_drop = nn.Dropout2d() self.fc1 = nn.Linear(250 * 2 * 2, 350) self.fc2 = nn.Linear(350, number_classes) self.localization = nn.Sequential( nn.Conv2d(3, 8, kernel_size=7), nn.MaxPool2d(2, stride=2), nn.ReLU(True), nn.Conv2d(8, 10, kernel_size=5), nn.MaxPool2d(2, stride=2), nn.ReLU(True), ) # Regressor for the 3 * 2 affine matrix self.fc_loc = nn.Sequential( nn.Linear(10 * 4 * 4, 32), nn.ReLU(True), nn.Linear(32, 3 * 2) ) # Initialize the weights/bias with identity transformation self.fc_loc[2].weight.data.zero_() self.fc_loc[2].bias.data.copy_( torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float) ) # Spatial transformer network forward function def stn(self, x): xs = self.localization(x) xs = xs.view(-1, 10 * 4 * 4) theta = self.fc_loc(xs) theta = theta.view(-1, 2, 3) grid = fun.affine_grid(theta, x.size(), align_corners=True) x = fun.grid_sample(x, grid, align_corners=True) return x def forward(self, x): # transform the input x = fun.interpolate(x, size=(32, 32), mode="bilinear") x = self.stn(x) # Perform forward pass x = self.bn1(fun.max_pool2d(fun.leaky_relu(self.conv1(x)), 2)) x = self.conv_drop(x) x = self.bn2(fun.max_pool2d(fun.leaky_relu(self.conv2(x)), 2)) x = self.conv_drop(x) x = self.bn3(fun.max_pool2d(fun.leaky_relu(self.conv3(x)), 2)) x = self.conv_drop(x) x = x.view(-1, 250 * 2 * 2) x = fun.relu(self.fc1(x)) x = fun.dropout(x, training=self.training) x = self.fc2(x) return fun.log_softmax(x, dim=1)