Ground TruthÂ¶

ImportsÂ¶

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
from src.ground_truth import AnnotationReader, GroundTruth
set_matplotlib_formats('svg')

annotations_folder = '/Users/jcboyd/Documents/hcs/input/annotations/'
ch5_folder = '/Users/jcboyd/Documents/hcs/input/hdf5/'

classes = ['interphase', 'large', 'prometaphase', 'metaphase', 'bright',
           'anaphase', 'early anaphase', 'polylobed', 'apoptosis']

Read annotation dataÂ¶

ar = AnnotationReader(annotations_folder, ch5_folder)
ar.read_annotation_data(read_file=False)
ar.df_ground_truth.head()

[####################################################################################################>] (100%)

ar.save_data()

Saving data...
Done!

ar.df_ground_truth.groupby('class')['class'].count()

class
1    4273
2     519
3     170
4     211
5     342
6     112
7      16
8     481
9     406
Name: class, dtype: int64

Quality checkÂ¶

annotations_folder = '/Users/jcboyd/Documents/hcs/input/annotations_control/'
ar2 = AnnotationReader(annotations_folder, ch5_folder)
ar2.read_annotation_data()

[####################################################################################################>] (100%)

ar2.df_ground_truth.groupby('class')['class'].count()

class
1    289
2    203
3     43
4     44
5     91
8    156
9    120
Name: class, dtype: int64

comparison = ar2.make_comparison(ar)

N = 7
agrees = [comparison[key][0] for key in comparison.keys()]
disagrees = [comparison[key][1] for key in comparison.keys()]
ind = np.arange(N)
width = 0.35

p1 = plt.bar(ind, agrees, width, color='red')
p2 = plt.bar(ind, disagrees, width, bottom=agrees, color='blue')

plt.ylabel('Total')
plt.title('Comparison of annotations by class')
plt.xticks(ind, np.array(classes)[[0, 1, 2, 3, 4, 7, 8]], rotation='vertical')
plt.legend((p1[0], p2[0]), ('Agree', 'Disagree'))
plt.gca().set_ylim([0, 180])

plt.show()

Explore bounding box dataÂ¶

heights = [sample['bottom'] - sample['top'] for _, sample in ar.df_ground_truth.iterrows()]
widths = [sample['right'] - sample['left'] for _, sample in ar.df_ground_truth.iterrows()]

fig = plt.figure(figsize=(6, 3))

for i, data in enumerate([heights, widths]):
    fig.add_subplot(1, 2, i + 1)
    plt.title(['heights', 'widths'][i])
    plt.hist(heights, bins=20, color=['red', 'blue'][i])

plt.tight_layout()
plt.show()

percentile = 99
np.percentile(heights, percentile), np.percentile(widths, percentile)

(120.71000000000004, 121.0)

Export cropsÂ¶

IMG_SIZE = 112
DOWNSCALE = 4
output = './output/ground_truth/%dd%d' % (IMG_SIZE, DOWNSCALE)
gg = GroundTruth(ar.df_ground_truth, ch5_folder)
gg.export_ground_truth(output, IMG_SIZE=IMG_SIZE, downsize_factor=DOWNSCALE)

[####################################################################################################>] (100%)

	bottom	class	field	left	right	top	well
0	78	1	01	359	391	20	A01
1	589	1	01	180	226	543	A01
2	659	1	01	731	789	611	A01
3	578	1	01	839	902	534	A01
4	641	1	01	528	580	602	A01

	bottom	class	field	left	right	top	well
0	78	1	01	359	391	20	A01
1	589	1	01	180	226	543	A01
2	659	1	01	731	789	611	A01
3	578	1	01	839	902	534	A01
4	641	1	01	528	580	602	A01

	bottom	class	field	left	right	top	well
0	78	1	01	359	391	20	A01
1	589	1	01	180	226	543	A01
2	659	1	01	731	789	611	A01
3	578	1	01	839	902	534	A01
4	641	1	01	528	580	602	A01

	bottom	class	field	left	right	top	well
0	78	1	01	359	391	20	A01
1	589	1	01	180	226	543	A01
2	659	1	01	731	789	611	A01
3	578	1	01	839	902	534	A01
4	641	1	01	528	580	602	A01