import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.callbacks import EarlyStopping
from keras.datasets import imdb
n_words = 1000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=n_words)
print('Train seq: {}'.format(len(X_train)))
print('Test seq: {}'.format(len(X_train)))
print('Train example: \n{}'.format(X_train[0]))
print('\nTest example: \n{}'.format(X_test[0]))
# Note: the data is already preprocessed (words are mapped to vectors)
# Pad sequences with max_len
max_len = 200
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
# Define network architecture and compile
model = Sequential()
model.add(Embedding(n_words, 50, input_length=max_len))
model.add(Dropout(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(250, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
callbacks = [EarlyStopping(monitor='val_acc', patience=3)]
batch_size = 128
n_epochs = 100
model.fit(X_train, y_train, batch_size=batch_size, epochs=n_epochs, validation_split=0.2, callbacks=callbacks)
print('Accuracy on test set: {}'.format(model.evaluate(X_test, y_test)[1]))
# Accuracy on test set: 0.82884
Using TensorFlow backend.
Train seq: 25000
Test seq: 25000
Train example:
[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]
Test example:
[1, 591, 202, 14, 31, 6, 717, 10, 10, 2, 2, 5, 4, 360, 7, 4, 177, 2, 394, 354, 4, 123, 9, 2, 2, 2, 10, 10, 13, 92, 124, 89, 488, 2, 100, 28, 2, 14, 31, 23, 27, 2, 29, 220, 468, 8, 124, 14, 286, 170, 8, 157, 46, 5, 27, 239, 16, 179, 2, 38, 32, 25, 2, 451, 202, 14, 6, 717]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, 200, 50) 50000
_________________________________________________________________
dropout_1 (Dropout) (None, 200, 50) 0
_________________________________________________________________
lstm_1 (LSTM) (None, 100) 60400
_________________________________________________________________
dense_1 (Dense) (None, 250) 25250
_________________________________________________________________
dropout_2 (Dropout) (None, 250) 0
_________________________________________________________________
dense_2 (Dense) (None, 1) 251
=================================================================
Total params: 135,901
Trainable params: 135,901
Non-trainable params: 0
_________________________________________________________________
Train on 20000 samples, validate on 5000 samples
Epoch 1/100
20000/20000 [==============================] - 49s 2ms/step - loss: 0.5634 - acc: 0.6982 - val_loss: 0.4351 - val_acc: 0.8004
Epoch 2/100
20000/20000 [==============================] - 48s 2ms/step - loss: 0.4342 - acc: 0.8023 - val_loss: 0.3923 - val_acc: 0.8278
Epoch 3/100
4736/20000 [======>.......................] - ETA: 34s - loss: 0.4127 - acc: 0.8254
---------------------------------------------------------------------------