import glob
import re
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.models import Model
from keras.layers import Flatten, Dense, Input, GlobalAveragePooling2D, GlobalMaxPooling2D, Activation
from keras.layers import Convolution2D, MaxPooling2D
from keras import optimizers
from keras import backend as K
from keras.callbacks import EarlyStopping
SEED = 2017
Using TensorFlow backend.
# Data can be downloaded at http://vis-www.cs.umass.edu/lfw/#download
DATA_DIR = 'Data/lfw/'
images = glob.glob(DATA_DIR + '*/*.jpg')
plt.figure(figsize=(10, 10))
n_examples = 5
for i in range(5):
rand = np.random.randint(len(images))
image_name = re.search(DATA_DIR+'(.+?)\/', images[rand], re.IGNORECASE).group(1).replace('_', ' ')
img = cv2.imread(images[rand])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.subplot(n_examples, n_examples, i+1)
plt.title(image_name)
plt.imshow(img)
plt.show()
images_arnold = glob.glob(DATA_DIR + 'Arnold_Schwarzenegger/*.jpg')
plt.figure(figsize=(10, 10))
for i in range(n_examples):
image_name = re.search(DATA_DIR+'(.+?)\/', images_arnold[i], re.IGNORECASE).group(1).replace('_', ' ')
img = cv2.imread(images_arnold[i])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.subplot(n_examples, n_examples, i+1)
# plt.title(image_name)
plt.imshow(img)
plt.show()
labels = np.asarray([re.search(DATA_DIR+'(.+?)\/', image, re.IGNORECASE).group(1) for image in np.asarray(images)])
print('Number of images: {}'.format(len(labels)))
print('Number of unique labels: {}'.format(len(np.unique(labels))))
Number of images: 13233
Number of unique labels: 5749
encoder = LabelBinarizer()
encoder.fit(labels)
y = encoder.transform(labels).astype(float)
X_train, X_val, y_train , y_val = train_test_split(images, y, test_size=0.2, random_state=SEED)
input_shape = (250, 250, 3)
img_input = Input(shape=input_shape)
inputs = img_input
# Block 1
x = Convolution2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input)
x = Convolution2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
# Block 2
x = Convolution2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x)
x = Convolution2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
# Block 3
x = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x)
x = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x)
x = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(x)
# Block 4
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x)
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x)
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(x)
# Block 5
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x)
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x)
x = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(x)
x = Flatten(name='flatten')(x)
x = Dense(4096, name='fc6')(x)
x = Activation('relu', name='fc6/relu')(x)
x = Dense(4096, name='fc7')(x)
x = Activation('relu', name='fc7/relu')(x)
x = Dense(len(y[0]), name='fc8')(x)
x = Activation('relu', name='fc8/softmax')(x)
model = Model(inputs, x)
opt = optimizers.Adam()
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
def random_shifts(image, shift_max_x=100, shift_max_y=100):
width, height, _ = image.shape
shift_x = np.random.randint(shift_max_x)
shift_y = np.random.randint(shift_max_y)
M = np.float32([[1, 0, shift_x],[0, 1, shift_y]])
return (cv2.warpAffine(image, M, (height, width)))
def random_flip(image, p_flip=0.5):
rand = np.random.random()
if rand < p_flip:
image = cv2.flip(image, 1)
return image
def scale_image(image, scale_range=[0.6, 1.4]):
width, height, _ = image.shape
scale_x = np.random.uniform(low=scale_range[0], high=scale_range[1])
scale_y = np.random.uniform(low=scale_range[0], high=scale_range[1])
scale_matrix = np.array([[scale_x, 0., (1. - scale_x) * width / 2.],
[0., scale_y, (1. - scale_y) * height / 2.]],
dtype=np.float32)
return(cv2.warpAffine(image, scale_matrix, (width, height), flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REFLECT_101))
img_rows = img_cols = 250
img_channels = 3
def batchgen(x, y, batch_size, transform=False):
# Create empty numpy arrays
images = np.zeros((batch_size, img_rows, img_cols, img_channels))
class_id = np.zeros((batch_size, len(y[0])))
while 1:
for n in range(batch_size):
i = np.random.randint(len(x))
x_ = cv2.imread(x[i])
x_ = cv2.cvtColor(x_, cv2.COLOR_BGR2RGB)
if transform:
x_ = random_shifts(x_, 10, 10)
x_ = random_flip(x_)
x_ = scale_image(x_, [0.8, 1,2])
images[n] = x_
class_id[n] = y[i]
yield images, class_id
batch_size = 32
n_epochs = 1000
s_epoch = len(X_train) // batch_size
val_steps = len(X_val) // batch_size
callbacks = [EarlyStopping(monitor='val_acc', patience=5)]
train_generator = batchgen(X_train, y_train, batch_size, True)
val_generator = batchgen(X_val, y_val, batch_size, False)
history = model.fit_generator(train_generator,
steps_per_epoch=s_epoch,
epochs=n_epochs,
validation_data=val_generator,
validation_steps = val_steps,
callbacks=callbacks
)
Epoch 1/1000
330/330 [==============================] - 881s - loss: 15.0265 - acc: 0.0342 - val_loss: 14.8600 - val_acc: 0.0446
Epoch 2/1000
330/330 [==============================] - 868s - loss: 15.0487 - acc: 0.0402 - val_loss: 14.8502 - val_acc: 0.0507
Epoch 3/1000
301/330 [==========================>...] - ETA: 70s - loss: 15.0633 - acc: 0.0384
test_generator = batchgen(X_val, y_val, batch_size, False)
preds = model.predict_generator(test_generator, steps=1)
y_val_ = [np.argmax(x) for x in y_val]
y_preds = [np.argmax(x) for x in preds]
accuracy_score(y_val_, y_preds)