Also referred to as a downsampling layer and with maxpooling being the most popular, this basically takes a filter (normally of size 2×2) and a stride of the same length. It then applies it to the input volume and outputs the maximum number in every subregion that the filter convolves around.

    import numpy as np
    from matplotlib import pyplot as plt

    from keras.utils import np_utils
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Flatten
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.layers import Conv2D, MaxPooling2D
    from keras.optimizers import Adam

    from keras.callbacks import EarlyStopping

    from keras.datasets import mnist
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    img_rows, img_cols = X_train[0].shape[0], X_train[0].shape[1]
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

    X_train = X_train.astype('float32')/255.
    X_test = X_test.astype('float32')/255.

    n_classes = len(set(y_train))
    y_train = np_utils.to_categorical(y_train, n_classes)
    y_test = np_utils.to_categorical(y_test, n_classes)

    model = Sequential()

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_classes, activation='softmax'))

    model.compile(loss=['categorical_crossentropy'], optimizer='adam', metrics=['accuracy'])

    model.summary()

    callbacks = [EarlyStopping(monitor='val_acc', patience=5)]

    batch_size = 128
    n_epochs = 200

    model.fit(X_train, y_train, batch_size=batch_size, epochs=n_epochs, verbose=1, validation_split=0.2, callbacks=callbacks)

    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

Using TensorFlow backend.