import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout
import numpy as np
from matplotlib import pyplot as plt
Data preparation
# Dataset can be downloaded at https://archive.ics.uci.edu/ml/machine-learning-databases/00275/
data = pd.read_csv('Data/bike-sharing/hour.csv')
# Feature engineering
ohe_features = ['season', 'weathersit', 'mnth', 'hr', 'weekday']
for feature in ohe_features:
dummies = pd.get_dummies(data[feature], prefix=feature, drop_first=False)
data = pd.concat([data, dummies], axis=1)
drop_features = ['instant', 'dteday', 'season', 'weathersit', 'weekday', 'atemp', 'mnth', 'workingday', 'hr', 'casual', 'registered']
data = data.drop(drop_features, axis=1)
norm_features = ['cnt', 'temp', 'hum', 'windspeed']
scaled_features = {}
for feature in norm_features:
mean, std = data[feature].mean(), data[feature].std()
scaled_features[feature] = [mean, std]
data.loc[:, feature] = (data[feature] - mean)/std
# Save the final month for testing
test_data = data[-31*24:]
data = data[:-31*24]
# Extract the target field
target_fields = ['cnt']
features, targets = data.drop(target_fields, axis=1), data[target_fields]
test_features, test_targets = test_data.drop(target_fields, axis=1), test_data[target_fields]
# Create a validation set (based on the last )
X_train, y_train = features[:-30*24], targets[:-30*24]
X_val, y_val = features[-30*24:], targets[-30*24:]
Without dropout
model = Sequential()
model.add(Dense(250, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(150, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='linear'))
# Compile model
model.compile(loss='mse', optimizer='sgd', metrics=['mse'])
n_epochs = 1000
batch_size = 1024
history = model.fit(X_train.values, y_train['cnt'],
validation_data=(X_val.values, y_val['cnt']),
batch_size=batch_size, epochs=n_epochs, verbose=0
)
plt.plot(np.arange(len(history.history['loss'])), history.history['loss'], label='training')
plt.plot(np.arange(len(history.history['val_loss'])), history.history['val_loss'], label='validation')
plt.title('Overfit on Bike Sharing dataset')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc=0)
plt.show()
The gap here means that the network learned very well the features from the training set but does not generalize to the validation data. This is what overfitting means.
print('Minimum loss: ', min(history.history['val_loss']),
'\nAfter ', np.argmin(history.history['val_loss']), ' epochs')
# Minimum loss: 0.129907280207
# After 980 epochs
Adding dropout
model_drop = Sequential()
model_drop.add(Dense(250, input_dim=X_train.shape[1], activation='relu'))
model_drop.add(Dropout(0.20))
model_drop.add(Dense(150, activation='relu'))
model_drop.add(Dropout(0.20))
model_drop.add(Dense(50, activation='relu'))
model_drop.add(Dropout(0.20))
model_drop.add(Dense(25, activation='relu'))
model_drop.add(Dropout(0.20))
model_drop.add(Dense(1, activation='linear'))
# Compile model
model_drop.compile(loss='mse', optimizer='sgd', metrics=['mse'])
history_drop = model_drop.fit(X_train.values, y_train['cnt'],
validation_data=(X_val.values, y_val['cnt']),
batch_size=batch_size, epochs=n_epochs, verbose=0
)
plt.plot(np.arange(len(history_drop.history['loss'])), history_drop.history['loss'], label='training')
plt.plot(np.arange(len(history_drop.history['val_loss'])), history_drop.history['val_loss'], label='validation')
plt.title('Use dropout for Bike Sharing dataset')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc=0)
plt.show()
There is no gap anymore between the training and validation. It means the network has not learned too much from the training but retained the essence.
print('Minimum loss: ', min(history_drop.history['val_loss']),
'\nAfter ', np.argmin(history_drop.history['val_loss']), ' epochs')
# Minimum loss: 0.126063346863
# After 998 epochs