ConvNets for TNBC cell morphologies¶

Imports¶

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from sklearn.model_selection import train_test_split
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')
In [2]:
img_size = 28
downsize_factor = 4
original_size = img_size * downsize_factor
ground_truth_folder = './output/ground_truth/%dd%d' % (original_size, downsize_factor)
In [3]:
from src.cnn import get_data
X, Y = get_data(ground_truth_folder, IMG_SIZE=img_size)
In [4]:
from src.vis_utils import plot_array
fig = plt.figure(figsize=(8, 6))
plot_array(fig, X, Y, num_classes=9)
plt.show()

Data preprocessing¶

In [5]:
Xtr, Xte, Ytr, Yte = train_test_split(X, Y, test_size=400, random_state=42)

print 'Training data shape: ', Xtr.shape
print 'Training labels shape: ', Ytr.shape
print 'Test data shape: ', Xte.shape
print 'Test labels shape: ', Yte.shape
Training data shape:  (6130, 28, 28, 1)
Training labels shape:  (6130,)
Test data shape:  (400, 28, 28, 1)
Test labels shape:  (400,)
In [6]:
mean_image = np.mean(Xtr, axis=0)  # take mean image over training set only
Xtr -= mean_image
Xte -= mean_image
In [7]:
%matplotlib inline
fig, ax = plt.subplots(figsize=(4, 4))
ax.axis('off')
ax.imshow(mean_image[:,:,0])
Out[7]:
<matplotlib.image.AxesImage at 0x129cff210>
In [8]:
Xtr, Xval, Ytr, Yval = train_test_split(Xtr, Ytr, test_size=130, random_state=42)

print 'Training data shape: ', Xtr.shape
print 'Training labels shape: ', Ytr.shape
print 'Training data shape: ', Xval.shape
print 'Training labels shape: ', Yval.shape
Training data shape:  (6000, 28, 28, 1)
Training labels shape:  (6000,)
Training data shape:  (130, 28, 28, 1)
Training labels shape:  (130,)

Model Training¶

In [9]:
from src.cnn import ConvolutionalNeuralNetwork

model = ConvolutionalNeuralNetwork(IMG_SIZE=img_size, NUM_CHANNELS=1, NUM_LABELS=9,
    BATCH_SIZE=64, NUM_VALIDATION=Xval.shape[0], NUM_TEST=Xte.shape[0])
Model variables initialised
Model architecture initialised
Computational graph initialised
In [10]:
model.train(Xtr, Ytr, Xval, Yval, max_iters=1500)
Step 0 of 1500
Mini-batch loss: 6.86424 Error: 93.75000 Learning rate: 0.01000
Validation error: 93.8%
Step 100 of 1500
Mini-batch loss: 4.01758 Error: 26.56250 Learning rate: 0.00950
Validation error: 25.4%
Step 200 of 1500
Mini-batch loss: 3.66268 Error: 18.75000 Learning rate: 0.00902
Validation error: 25.4%
Step 300 of 1500
Mini-batch loss: 3.71915 Error: 25.00000 Learning rate: 0.00857
Validation error: 20.0%
Step 400 of 1500
Mini-batch loss: 3.77912 Error: 26.56250 Learning rate: 0.00815
Validation error: 17.7%
Step 500 of 1500
Mini-batch loss: 3.57935 Error: 23.43750 Learning rate: 0.00774
Validation error: 20.8%
Step 600 of 1500
Mini-batch loss: 3.72132 Error: 21.87500 Learning rate: 0.00735
Validation error: 17.7%
Step 700 of 1500
Mini-batch loss: 3.56777 Error: 23.43750 Learning rate: 0.00698
Validation error: 22.3%
Step 800 of 1500
Mini-batch loss: 3.86288 Error: 25.00000 Learning rate: 0.00663
Validation error: 14.6%
Step 900 of 1500
Mini-batch loss: 3.37087 Error: 14.06250 Learning rate: 0.00630
Validation error: 19.2%
Step 1000 of 1500
Mini-batch loss: 3.47066 Error: 17.18750 Learning rate: 0.00599
Validation error: 14.6%
Step 1100 of 1500
Mini-batch loss: 3.31114 Error: 15.62500 Learning rate: 0.00569
Validation error: 19.2%
Step 1200 of 1500
Mini-batch loss: 3.36550 Error: 20.31250 Learning rate: 0.00540
Validation error: 16.9%
Step 1300 of 1500
Mini-batch loss: 3.49178 Error: 23.43750 Learning rate: 0.00513
Validation error: 14.6%
Step 1400 of 1500
Mini-batch loss: 3.44717 Error: 18.75000 Learning rate: 0.00488
Validation error: 13.8%

Inference¶

In [29]:
predictions = model.test_model(Xte)
correct = np.sum(predictions == Yte)
total = predictions.shape[0]

print 'Test error: %.02f%%' % (100 * (1 - float(correct) / float(total)))
Test error: 10.50%
In [28]:
from src.vis_utils import plot_confusion_matrix

classes = ['interphase', 'large', 'prometaphase', 'metaphase', 'bright',
           'anaphase', 'early anaphase', 'polylobed', 'apoptosis']

confusion_matrix = np.zeros((9, 9), np.int32)

for i in range(len(predictions)):
    confusion_matrix[Ytr[i]][predictions[i]] += 1

fig, ax = plt.subplots(figsize=(8, 6))
plot_confusion_matrix(ax, confusion_matrix, classes, fontsize=15)