While training a neural network for a supervised learning problem, the objective of the network is to minimize the loss function. The loss function — also known as error, cost function, or opimization function–compares the prediction with the ground truth during the forward pass. The output of this loss function is used to optimize the weights during
the backward pass. Therefore, the loss function is crucial in training the network. By setting the correct loss function, we force the network to optimize towards the desired predictions.

We will train a network architecture with and without adjusted weights for the loss function to account for unbalanced classes.

    import numpy as np
    from matplotlib import pyplot as plt
    from sklearn.metrics import confusion_matrix

    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import Dense, Dropout
    from keras.optimizers import Adam
    from keras.callbacks import EarlyStopping

Using TensorFlow backend.



    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # Extract all 9s and 100 examples of 4s
    y_train_9 = y_train[y_train == 9]
    y_train_4 = y_train[y_train == 4][:100]
    X_train_9 = X_train[y_train == 9]
    X_train_4 = X_train[y_train == 4][:100]
    X_train = np.concatenate((X_train_9, X_train_4), axis=0)
    y_train = np.concatenate((y_train_9, y_train_4), axis=0)

    y_test_9 = y_test[y_test == 9]
    y_test_4 = y_test[y_test == 4]
    X_test_9 = X_test[y_test == 9]
    X_test_4 = X_test[y_test == 4]
    X_test = np.concatenate((X_test_9, X_test_4), axis=0)
    y_test = np.concatenate((y_test_9, y_test_4), axis=0)


    X_train = X_train.astype('float32')/255.
    X_test = X_test.astype('float32')/255.
    X_train = X_train.reshape(len(X_train), np.prod(X_train.shape[1:]))
    X_test = X_test.reshape(len(X_test), np.prod(X_test.shape[1:]))


    X_test.shape




(1991, 784)




    y_train_binary = y_train == 9
    y_test_binary = y_test == 9
    print(np.unique(y_train_binary, return_counts=True))

(array([False,  True]), array([ 100, 5949]))



    model = Sequential()
    model.add(Dense(512, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(0.75))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.75))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.75))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    opt = Adam()

    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])


    callbacks = [EarlyStopping(monitor='val_loss', patience=5)]


    class_weight_equal = {False : 1., True: 1}
    class_weight_imbalanced = {False : 100, True: 1}


    n_epochs = 1000
    batch_size = 512
    validation_split = 0.01

    model.fit(X_train, y_train_binary, epochs=n_epochs, 
    batch_size=batch_size, shuffle=True, validation_split=validation_split, class_weight=class_weight_equal,
              callbacks=callbacks, verbose=0
    )




<keras.callbacks.History at 0x12708e828>




    preds_equal = model.predict(X_test)
    confusion_matrix(y_test_binary, np.round(preds_equal), labels=[True, False])

    #array([[1009,    0],
    #       [ 982,  0]])




array([[1009,    0],
       [ 982,    0]])




    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])

    model.fit(X_train, y_train_binary, epochs=n_epochs, 
              batch_size=batch_size, shuffle=True, validation_split=validation_split, class_weight=class_weight_imbalanced,
              callbacks=callbacks, verbose=0
             )

    preds_imbalanced = model.predict(X_test)
    confusion_matrix(y_test_binary, np.round(preds_imbalanced), labels=[True, False])

    #array([[1009,    3],
    #   [  546,  436]])




array([[1007,    2],
       [ 420,  562]])