Ground Truth¶

Imports¶

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
from src.ground_truth import AnnotationReader, GroundTruth
set_matplotlib_formats('svg')
In [2]:
annotations_folder = '/Users/jcboyd/Documents/hcs/input/annotations/'
ch5_folder = '/Users/jcboyd/Documents/hcs/input/hdf5/'
In [3]:
classes = ['interphase', 'large', 'prometaphase', 'metaphase', 'bright',
           'anaphase', 'early anaphase', 'polylobed', 'apoptosis']

Read annotation data¶

In [4]:
ar = AnnotationReader(annotations_folder, ch5_folder)
ar.read_annotation_data(read_file=False)
ar.df_ground_truth.head()
[####################################################################################################>] (100%)
Out[4]:
bottom class field left right top well
0 78 1 01 359 391 20 A01
1 589 1 01 180 226 543 A01
2 659 1 01 731 789 611 A01
3 578 1 01 839 902 534 A01
4 641 1 01 528 580 602 A01
In [5]:
ar.save_data()
Saving data...
Done!
In [6]:
ar.df_ground_truth.groupby('class')['class'].count()
Out[6]:
class
1    4273
2     519
3     170
4     211
5     342
6     112
7      16
8     481
9     406
Name: class, dtype: int64

Quality check¶

In [7]:
annotations_folder = '/Users/jcboyd/Documents/hcs/input/annotations_control/'
ar2 = AnnotationReader(annotations_folder, ch5_folder)
ar2.read_annotation_data()
[####################################################################################################>] (100%)
In [8]:
ar2.df_ground_truth.groupby('class')['class'].count()
Out[8]:
class
1    289
2    203
3     43
4     44
5     91
8    156
9    120
Name: class, dtype: int64
In [9]:
comparison = ar2.make_comparison(ar)
In [10]:
N = 7
agrees = [comparison[key][0] for key in comparison.keys()]
disagrees = [comparison[key][1] for key in comparison.keys()]
ind = np.arange(N)
width = 0.35

p1 = plt.bar(ind, agrees, width, color='red')
p2 = plt.bar(ind, disagrees, width, bottom=agrees, color='blue')

plt.ylabel('Total')
plt.title('Comparison of annotations by class')
plt.xticks(ind, np.array(classes)[[0, 1, 2, 3, 4, 7, 8]], rotation='vertical')
plt.legend((p1[0], p2[0]), ('Agree', 'Disagree'))
plt.gca().set_ylim([0, 180])

plt.show()

Explore bounding box data¶

In [11]:
heights = [sample['bottom'] - sample['top'] for _, sample in ar.df_ground_truth.iterrows()]
widths = [sample['right'] - sample['left'] for _, sample in ar.df_ground_truth.iterrows()]

fig = plt.figure(figsize=(6, 3))

for i, data in enumerate([heights, widths]):
    fig.add_subplot(1, 2, i + 1)
    plt.title(['heights', 'widths'][i])
    plt.hist(heights, bins=20, color=['red', 'blue'][i])

plt.tight_layout()
plt.show()
In [12]:
percentile = 99
np.percentile(heights, percentile), np.percentile(widths, percentile)
Out[12]:
(120.71000000000004, 121.0)

Export crops¶

In [13]:
IMG_SIZE = 112
DOWNSCALE = 4
output = './output/ground_truth/%dd%d' % (IMG_SIZE, DOWNSCALE)
gg = GroundTruth(ar.df_ground_truth, ch5_folder)
gg.export_ground_truth(output, IMG_SIZE=IMG_SIZE, downsize_factor=DOWNSCALE)
[####################################################################################################>] (100%)