import numpy as np 
    import pandas as pd
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt

    from keras.models import Sequential
    from keras.layers import Dense
    from keras.utils import to_categorical
    from keras.callbacks import Callback

    from keras.datasets import mnist

    SEED = 2017

Using TensorFlow backend.



    (X_train, y_train), (X_val, y_val) = mnist.load_data()


    # Plot first image of each label
    unique_labels = set(y_train)
    plt.figure(figsize=(12, 12))

    i = 1
    for label in unique_labels:
        image = X_train[y_train.tolist().index(label)]
        plt.subplot(10, 10, i)
        plt.axis('off')
        plt.title("{0}: ({1})".format(label, y_train.tolist().count(label)))
        i += 1
        _ = plt.imshow(image, cmap='gray')
    plt.show()

png

    # Normalize data
    X_train = X_train.astype('float32')/255.
    X_val = X_val.astype('float32')/255.

    # One-Hot-Encode labels
    n_classes = 10
    y_train = to_categorical(y_train, n_classes)
    y_val = to_categorical(y_val, n_classes)

    # Flatten data - we threat the image as a sequential array of values
    X_train = np.reshape(X_train, (60000, 784))
    X_val = np.reshape(X_val, (10000, 784))


    model_sigmoid = Sequential()
    model_sigmoid.add(Dense(700, input_dim=784, activation='sigmoid'))
    model_sigmoid.add(Dense(700, activation='sigmoid'))
    model_sigmoid.add(Dense(700, activation='sigmoid'))
    model_sigmoid.add(Dense(700, activation='sigmoid'))
    model_sigmoid.add(Dense(700, activation='sigmoid')) 
    model_sigmoid.add(Dense(350, activation='sigmoid')) 
    model_sigmoid.add(Dense(100, activation='sigmoid')) 
    model_sigmoid.add(Dense(10, activation='softmax'))

    # Compile model with SGD
    model_sigmoid.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])


    model_relu = Sequential()
    model_relu.add(Dense(700, input_dim=784, activation='relu'))
    model_relu.add(Dense(700, activation='relu'))
    model_relu.add(Dense(700, activation='relu'))
    model_relu.add(Dense(700, activation='relu'))
    model_relu.add(Dense(700, activation='relu')) 
    model_relu.add(Dense(350, activation='relu')) 
    model_relu.add(Dense(100, activation='relu')) 
    model_relu.add(Dense(10, activation='softmax'))

    # Compile model with SGD
    model_relu.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])


    class history_loss(Callback):
        def on_train_begin(self, logs={}):
            self.losses = []

        def on_batch_end(self, batch, logs={}):
            batch_loss = logs.get('loss')
            self.losses.append(batch_loss)


    n_epochs = 10
    batch_size = 256
    validation_split = 0.2

    history_sigmoid = history_loss()
    model_sigmoid.fit(X_train, y_train, epochs=n_epochs, batch_size=batch_size,
     callbacks=[history_sigmoid],
     validation_split=validation_split, verbose=2)

    history_relu = history_loss()
    model_relu.fit(X_train, y_train, epochs=n_epochs, batch_size=batch_size,
     callbacks=[history_relu],
     validation_split=validation_split, verbose=2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
2s - loss: 2.3185 - acc: 0.1101 - val_loss: 2.3024 - val_acc: 0.1060
Epoch 2/10
1s - loss: 2.3013 - acc: 0.1140 - val_loss: 2.3021 - val_acc: 0.1060
Epoch 3/10
1s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3019 - val_acc: 0.1060
Epoch 4/10
1s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3021 - val_acc: 0.1060
Epoch 5/10
1s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3020 - val_acc: 0.1060
Epoch 6/10
1s - loss: 2.3013 - acc: 0.1138 - val_loss: 2.3018 - val_acc: 0.1060
Epoch 7/10
1s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3019 - val_acc: 0.1060
Epoch 8/10
1s - loss: 2.3013 - acc: 0.1138 - val_loss: 2.3025 - val_acc: 0.1060
Epoch 9/10
1s - loss: 2.3013 - acc: 0.1140 - val_loss: 2.3024 - val_acc: 0.1060
Epoch 10/10
1s - loss: 2.3014 - acc: 0.1140 - val_loss: 2.3022 - val_acc: 0.1060
Train on 48000 samples, validate on 12000 samples
Epoch 1/10
1s - loss: 2.2131 - acc: 0.3867 - val_loss: 2.0198 - val_acc: 0.5864
Epoch 2/10
1s - loss: 1.3267 - acc: 0.7039 - val_loss: 0.6861 - val_acc: 0.8211
Epoch 3/10
1s - loss: 0.5515 - acc: 0.8399 - val_loss: 0.4470 - val_acc: 0.8649
Epoch 4/10
1s - loss: 0.4001 - acc: 0.8837 - val_loss: 0.3537 - val_acc: 0.8963
Epoch 5/10
1s - loss: 0.3317 - acc: 0.9035 - val_loss: 0.2970 - val_acc: 0.9147
Epoch 6/10
1s - loss: 0.2924 - acc: 0.9134 - val_loss: 0.3054 - val_acc: 0.9042
Epoch 7/10
1s - loss: 0.2649 - acc: 0.9221 - val_loss: 0.2413 - val_acc: 0.9286
Epoch 8/10
1s - loss: 0.2429 - acc: 0.9281 - val_loss: 0.2283 - val_acc: 0.9338
Epoch 9/10
1s - loss: 0.2212 - acc: 0.9343 - val_loss: 0.2058 - val_acc: 0.9380
Epoch 10/10
1s - loss: 0.2057 - acc: 0.9396 - val_loss: 0.2193 - val_acc: 0.9347





<keras.callbacks.History at 0x7f2bc32c4160>




    plt.plot(np.arange(len(history_sigmoid.losses)), history_sigmoid.losses, label='sigmoid')
    plt.plot(np.arange(len(history_relu.losses)), history_relu.losses, label='relu')
    plt.title('Losses')
    plt.xlabel('number of batches')
    plt.ylabel('loss')
    plt.legend(loc=1)
    plt.show()

png

    w_sigmoid = []
    w_relu = []
    for i in range(len(model_sigmoid.layers)):
        w_sigmoid.append(max(model_sigmoid.layers[i].get_weights()[1]))
        w_relu.append(max(model_relu.layers[i].get_weights()[1]))


    fig, ax = plt.subplots()

    index = np.arange(len(model_sigmoid.layers))
    bar_width = 0.35

    plt.bar(index, w_sigmoid, bar_width, label='sigmoid', color='b', alpha=0.4)
    plt.bar(index + bar_width, w_relu, bar_width, label='relu', color='r', alpha=0.4)
    plt.title('Weights across layers')
    plt.xlabel('layer number')
    plt.ylabel('maximum weight')
    plt.legend(loc=0)

    plt.xticks(index + bar_width / 2, np.arange(8))
    plt.show()

png