import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
# Data can be downloaded at https://ca.finance.yahoo.com/quote/%5EIXIC/, you can replace it with any other index or stock
data = pd.read_csv('~/Desktop/LargeFiles/Yahoo/AAPL.csv')
# Reorder the columns for convenience
data = data[['Open', 'High', 'Low', 'Volume', 'Close']]
data.head()
Open | High | Low | Volume | Close | |
---|---|---|---|---|---|
0 | 27.881428 | 28.600000 | 27.774286 | 153832000 | 28.461428 |
1 | 28.104286 | 28.338572 | 27.367144 | 189413000 | 27.435715 |
2 | 27.518572 | 28.000000 | 27.264286 | 212576700 | 27.922857 |
3 | 27.955715 | 28.268572 | 27.714285 | 119567700 | 27.731428 |
4 | 28.059999 | 28.214285 | 27.821428 | 158221700 | 28.027143 |
data.shape
(2015, 5)
sequence_length = 21 # 20 preceeding inputs
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512
data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index : index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1-val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split) :, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
Training data: (1795, 21, 5)
Validation data: (199, 21, 5)
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][: ,-1]
X_val = val[:, : -1]
y_val = val[:, -1][ : ,-1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units = 128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))
model.compile(loss="mse", optimizer="adam")
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=1
)
Epoch 1/300
1795/1795 [==============================] - 2s 1ms/step - loss: 0.1033
Epoch 2/300
1795/1795 [==============================] - 1s 524us/step - loss: 0.0294
...
Epoch 300/300
1795/1795 [==============================] - 1s 582us/step - loss: 0.0011
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
30.101429
144.770004
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()