While training a neural network for a supervised learning problem, the objective of the network is to minimize the loss function. The loss function — also known as error, cost function, or opimization function–compares the prediction with the ground truth during the forward pass. The output of this loss function is used to optimize the weights during
the backward pass. Therefore, the loss function is crucial in training the network. By setting the correct loss function, we force the network to optimize towards the desired predictions.
We will train a network architecture with and without adjusted weights for the loss function to account for unbalanced classes.
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
Using TensorFlow backend.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Extract all 9s and 100 examples of 4s
y_train_9 = y_train[y_train == 9]
y_train_4 = y_train[y_train == 4][:100]
X_train_9 = X_train[y_train == 9]
X_train_4 = X_train[y_train == 4][:100]
X_train = np.concatenate((X_train_9, X_train_4), axis=0)
y_train = np.concatenate((y_train_9, y_train_4), axis=0)
y_test_9 = y_test[y_test == 9]
y_test_4 = y_test[y_test == 4]
X_test_9 = X_test[y_test == 9]
X_test_4 = X_test[y_test == 4]
X_test = np.concatenate((X_test_9, X_test_4), axis=0)
y_test = np.concatenate((y_test_9, y_test_4), axis=0)
X_train = X_train.astype('float32')/255.
X_test = X_test.astype('float32')/255.
X_train = X_train.reshape(len(X_train), np.prod(X_train.shape[1:]))
X_test = X_test.reshape(len(X_test), np.prod(X_test.shape[1:]))
X_test.shape
(1991, 784)
y_train_binary = y_train == 9
y_test_binary = y_test == 9
print(np.unique(y_train_binary, return_counts=True))
(array([False, True]), array([ 100, 5949]))
model = Sequential()
model.add(Dense(512, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.75))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.75))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.75))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
opt = Adam()
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])
callbacks = [EarlyStopping(monitor='val_loss', patience=5)]
class_weight_equal = {False : 1., True: 1}
class_weight_imbalanced = {False : 100, True: 1}
n_epochs = 1000
batch_size = 512
validation_split = 0.01
model.fit(X_train, y_train_binary, epochs=n_epochs,
batch_size=batch_size, shuffle=True, validation_split=validation_split, class_weight=class_weight_equal,
callbacks=callbacks, verbose=0
)
<keras.callbacks.History at 0x12708e828>
preds_equal = model.predict(X_test)
confusion_matrix(y_test_binary, np.round(preds_equal), labels=[True, False])
#array([[1009, 0],
# [ 982, 0]])
array([[1009, 0],
[ 982, 0]])
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
model.fit(X_train, y_train_binary, epochs=n_epochs,
batch_size=batch_size, shuffle=True, validation_split=validation_split, class_weight=class_weight_imbalanced,
callbacks=callbacks, verbose=0
)
preds_imbalanced = model.predict(X_test)
confusion_matrix(y_test_binary, np.round(preds_imbalanced), labels=[True, False])
#array([[1009, 3],
# [ 546, 436]])
array([[1007, 2],
[ 420, 562]])