-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathwellplate_tools_pandas.py
566 lines (434 loc) · 18.4 KB
/
wellplate_tools_pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
# coding: utf-8
import pandas as pd
import csv
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
# from ipy_progressbar import ProgressBar
import progressbar
try:
from IPython.display import display
from ipywidgets import FloatProgress
except:
print('Could not import IPython.Display or ipywidgets')
import os
def extract_labels(nr, nc):
"""
Define helper function to be able to extract the well labels depending
on the actual wellplate type. Currently supports 96, 384 and 1536 well plates.
:param nr: number of rows of the wellplate, e.g. 8 (A-H) for a 96 wellplate
:param nc: number of columns of the wellplate, e.g. 12 (1-12) for a 96 wellplate
:return: lx, ly are list containing the actual row and columns IDs
"""
# labeling schemes
labelX = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
'13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24',
'25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36',
'37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', ]
labelY = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF']
lx = labelX[0:nc]
ly = labelY[0:nr]
return lx, ly
def convert_row_index(rowid):
"""
This function converts the row index given as a letter (string) to its integer.
Example: rowid as string, e.g. 'B'
Output: rowIndex as integer, e.g. 2
:param rowid - is a string representing the row a wellplate
:return: rowindex - integer representing the row
"""
rowids = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE', 'AF']
# add one since the index in Python is zero-based
# the column or row index from ZEN IAS is one-based
rowindex = rowids.index(rowid) + 1
return rowindex
def get_well_all_parameters(df, wellid,
colname='all',
wellID_key='WellID',
rowID_key='RowID',
colID_key='ColumnID'):
"""
Gets all or specific columns for specific well.
If the colname was specified, only this specific column will be returned.
:param df: original dataframe
:param wellid: wellID, e.g. 'B4'
:param colname: name of column(s) from the original dataframe to be extracted
:return: new_df - new dataframe containing only data for a specific well
"""
new_df = df.loc[df[wellID_key] == wellid]
if colname != 'all':
new_df = new_df[[wellID_key, rowID_key, colID_key, colname]]
return new_df
def get_well_row(df, rowid, rowID_key='RowID'):
"""
This function extracts all data based on the row index.
:param df: original dataframe containing all wells
:param rowid: index of row to be extracted
:return: df_row - dataframe that only contains data for a specific row af a wellplate
"""
rowindex = convert_row_index(rowid)
df_row = df.loc[df[rowID_key] == rowindex]
return df_row
def convert_array_to_heatmap(hmarray, nr, nc):
# get the labels for a well plate and create a data frame from the numpy array
lx, ly = extract_labels(nr, nc)
heatmap_dataframe = pd.DataFrame(hmarray, index=ly, columns=lx)
return heatmap_dataframe
def rename_columns(dfs, paramlist, verbose=False):
for i in range(0, len(paramlist)):
# rename the columns with measured parameters and correct types
if verbose:
print('Renamed : ', dfs.columns[i], ' to ', paramlist[i])
try:
dfs.rename(columns={dfs.columns[i]: paramlist[i]}, inplace=True)
except:
print('Column not find inside table for renaming. Doing nothing.')
return dfs
def create_heatmap_list_arrays(numparams, nr, nc):
"""
This function creates a list of arrays depending on the number of
measure parameters plus one additional array for the object number.
The shape of the arrays depends on the well type.
:param numparams - number of measured parameters w/o the actual object number
:param nr - number of rows of wellplate
:param nc - number of columns for wellplate
:return: heatmaplist_array - list of arrays representing the measures parameters plus obne for the object number
"""
heatmaplist_array = []
for i in range(0, numparams + 1):
# create list containing all heatmaps for number of objects + all measured parameters
heatmaplist_array.append(np.full([nr, nc], np.nan))
return heatmaplist_array
def fill_heatmaps(dfs, numparams, num_nonmp, nr, nc,
statfunc='mean',
showbar=False,
verbose=False,
wellID_key='WellID',
rowID_key='RowID',
colID_key='ColumnID'):
"""
Create dictionary containing heatmaps (dataframes) for all measured parameters
1) Determine how many wells actually contain data
2) Loop over all wells
3) Extract only data from current well from dataframe and calc statistics
4) Save the results in a dictionary containing entries for all wells
5) Fill the arrays with the values for the measured parameters from the well dictionary
6) Create a dictionary that contains heatmaps (dataframes) for all measure parameters
:param dfs - input data frame
:param numparams - number of measured parameters except the object number
:param nr - number of rows of well plate --> 96 plate = 8
:param nc - number of rows of well plate --> 96 plate = 12
:param statfunc - choice which statistics should be calculated
:param verbose - if True more output will be shown
:return: hm_dict - dictionary containing one dataframe for all measured parameters
plus one entry for the heatmap containing the object numbers
:return: welldata_dict - dictionary containing entries for every well analyzed
with the values calculated by the statistical function
"""
welldata_dict = {}
heatmap_dict = {}
# get all wells containing some data
#wellID_key = WELLID_KEY # 'ImageSceneContainerName::Image Scene Container Name '
print('---------------------------------------------------')
print('wellID_key : ', wellID_key)
print('Found keys:')
print(dfs.keys())
print('---------------------------------------------------')
wells_real = dfs[wellID_key].value_counts()
df_stats = pd.DataFrame(index=range(len(wells_real)), columns=dfs.columns)
#df_stats.drop(df_stats.columns[[3, 4]], axis=1, inplace=True)
try:
df_stats.drop(['ID', 'Index'], axis=1, inplace=True)
except:
print('Did not find RowID and ColumnID key in dataframe.')
try:
df_stats.drop(['ParentID'], axis=1, inplace=True)
except:
print('Did not find ParentID key in dataframe.')
new_cols = df_stats.columns
cols_orig = dfs.columns
# create an additional columns of the object numbers
df_obj = pd.DataFrame(index=range(len(wells_real)), columns=['ObjectNumbers'])
if showbar == True:
# initialize the progress bar
# pb1 = ProgressBar(len(wells_real), title='Processing Wells')
pb1 = iter(range(len(wells_real)))
try:
fp = FloatProgress(min=1,
max=len(wells_real),
step=1,
description='Processing Wells',
orientation='horizontal')
display(fp)
except:
# bar = progressbar.ProgressBar(redirect_stdout=True, max_value=len(wells_real))
bar = progressbar.ProgressBar(max_value=len(wells_real))
elif showbar is False:
pb1 = iter(range(len(wells_real)))
# iterate over all wells that were detected and do the statistics
for well in pb1:
try:
fp.value += 1
except:
bar.update(well)
# extract current dataframe for all existing wells
current_wellid = wells_real.keys()[well]
if verbose:
print("Found data for wells : ", current_wellid)
# get all data for the current well from the over dataframe
df_tmp = get_well_all_parameters(dfs, current_wellid,
wellID_key='WellID',
rowID_key='RowID',
colID_key='ColumnID')
df_stats.iloc[well][new_cols.get_loc('WellID')] = current_wellid
df_stats.iloc[well][new_cols.get_loc('RowID')] = df_tmp.iloc[0][cols_orig.get_loc('RowID')]
df_stats.iloc[well][new_cols.get_loc('ColumnID')] = df_tmp.iloc[0][cols_orig.get_loc('ColumnID')]
colnames = df_tmp.columns[list(range(num_nonmp, num_nonmp + numparams))]
if statfunc == 'mean':
stats_out = df_tmp.mean(axis=0)[colnames]
for col in colnames:
df_stats.iloc[well][col] = stats_out[col]
elif statfunc == 'median':
stats_out = df_tmp.median(axis=0)[colnames]
for col in colnames:
df_stats.iloc[well][col] = stats_out[col]
elif statfunc == 'min':
stats_out = df_tmp.min(axis=0)[colnames]
for col in colnames:
df_stats.iloc[well][col] = stats_out[col]
elif statfunc == 'max':
stats_out = df_tmp.max(axis=0)[colnames]
for col in colnames:
df_stats.iloc[well][col] = stats_out[col]
# get number of entries and add them to stats data frame
numobj_current_wellID = df_tmp.shape[0]
# find the row index for the current wellID ...
tmprow = df_stats[wellID_key].values.tolist().index(current_wellid)
# ... and use the index to add the object number to the dataframe for the numbers
df_obj['ObjectNumbers'][tmprow] = numobj_current_wellID
# join the data frame with object numbers to df_stats
df_stats = pd.concat([df_stats, df_obj], axis=1)
# create welldata_dict
for well in range(len(wells_real)):
wellid = df_stats[wellID_key][well]
# adding data to welldata_dict using the wellid)
welldata_dict[wellid] = df_stats.iloc[well]
for hm in range(3, df_stats.shape[1]):
# create heatmap based on the platetype
heatmap_array = np.full([nr, nc], np.nan)
heatmap_name = df_stats.columns[int(hm)]
print('HeatMap: ', heatmap_name)
# cycle to df_stats based on the columns nam and transfer data to heatmap
for v in range(0, df_stats.shape[0]):
rowindex = df_stats[rowID_key].iloc[v]
colindex = df_stats[colID_key].iloc[v]
hm_value = df_stats[heatmap_name].iloc[v]
heatmap_array[int(rowindex) - 1, int(colindex) - 1] = hm_value
# rowindex = df_stats[rowID_key].iloc[v]
# colindex = df_stats[colID_key].iloc[v]
# hm_value = df_stats[heatmap_name].iloc[v]
# heatmap_array[int(rowindex) - 1, int(colindex) - 1] = hm_value
# convert array to heatmap_dataframe
heatmap_dict[heatmap_name] = convert_array_to_heatmap(heatmap_array, nr, nc)
return heatmap_dict, welldata_dict
def showheatmap(heatmap, parameter2display,
fontsize_title=12,
fontsize_label=10,
colormap='Blues',
linecolor='black',
linewidth=1.0,
save=False,
savename='Heatmap.png',
robust=True,
filename='test.csv',
dpi=100,
apeer=False):
# create figure with subplots
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
# create the heatmap
ax = sns.heatmap(heatmap,
ax=ax,
cmap=colormap,
linecolor=linecolor,
linewidths=linewidth,
square=True,
robust=robust,
annot=False,
cbar_kws={"shrink": 0.68})
# customize the plot to your needs
ax.set_title(parameter2display,
fontsize=fontsize_title,
fontweight='normal')
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(fontsize_label)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(fontsize_label)
# modify the labels of the colorbar
cax = plt.gcf().axes[-1]
cax.tick_params(labelsize=fontsize_label)
if save:
if not apeer:
savename = filename[:-4] + '_HM_' + parameter2display + '.png'
elif apeer:
pass
fig.savefig(savename,
dpi=dpi,
orientation='portrait',
transparent=False,
facecolor=None)
print('Heatmap image saved as: ', savename)
else:
savename = False
return savename
def showheatmap_all(heatmap_dict, subplots,
fontsize_title=16,
fontsize_label=12,
colormap='Blues',
linecolor='black',
linewidth=1.0,
save=False,
robust=True,
filename='Test.czi',
dpi=100,
deletelast=False):
# create figure with subplots
fig, axn = plt.subplots(subplots[0], subplots[1], figsize=(12, 10))
# counter for the graphs to plot while iterating over all measured params
plotid = -1
# cycle heatmaps heatmaps
for key in heatmap_dict.keys(): # python 3
plotid = plotid + 1
# get the desired heatmap from the dictionary containing all heatmaps
heatmap_test = heatmap_dict[key]
# create the actual heatmap
ax = sns.heatmap(heatmap_test, ax=axn.flat[plotid],
cmap=colormap,
linecolor=linecolor,
linewidths=linewidth,
square=True,
robust=robust,
annot=False,
cbar_kws={"shrink": 1.0})
# customize the plot to your needs
ax.set_title(key, fontsize=fontsize_title)
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(fontsize_label)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(fontsize_label)
# delete last subplot for an uneven number of parameters
if deletelast:
axn[subplots[0]-1, subplots[1]-1].remove()
# modify the layout so that titles do not overlap
plt.tight_layout()
if save:
savename = filename[:-4] + '_HM_all.png'
fig.savefig(savename,
dpi=dpi,
orientation='portrait',
transparent=False,
facecolor=None)
print('Heatmap image saved as: ', savename)
else:
savename = False
return savename
def getwellIDfromfilename(filename):
"""
This function has to be adapted depending the choosen filename:
Example:
------------------------------------
filename = nuc-B-04.tif
colid: 04
colindex: 4
rowid: B
rowindex: 2
wellid: B2
------------------------------------
:param filename: filename of the image where the name contains the wellinfo
:return: wellid - well identifier, e.g. B4
:return: rowindex - index of the row starting with 1
:return: colindex - index of coulmn starting with 1
"""
filename_base = os.path.basename(filename)
filename_base_woext = os.path.splitext(filename_base)[0]
# this function has to be adapted depending
wellcoldigits = -2
colid = filename_base_woext[wellcoldigits:]
colindex = int(colid)
rowid = filename_base_woext[(wellcoldigits - 2):(wellcoldigits - 1)]
rowindex = convert_row_index(rowid)
wellid = rowid + str(colindex)
return wellid, rowindex, colindex
def addWellinfoColumns(dataframe):
# add WellID, RowID and ColumnID to the existing dataframe
dataframe.insert(0, 'WellID', 'A1')
dataframe.insert(1, 'RowID', 1)
dataframe.insert(2, 'ColumnID', 1)
return dataframe
def wellinfo2dataframe(df, colname_with_info):
"""
This function adds three additional columns to the beginning of the dataframe
:param dataframe: input pandas dataframe
:return: dataframe - modified dataframe
"""
# add wellinfo to the first 3 columns
df = addWellinfoColumns(df)
for i in range(0, df.shape[0]):
# get the well info based on the image filename for every row
wellid, rowindex, colindex = getwellIDfromfilename(df[colname_with_info][i])
# modify the dataframe accordingly
df.set_value(i, 'WellID', wellid)
df.set_value(i, 'RowID', rowindex)
df.set_value(i, 'ColumnID', colindex)
return df
def getrowandcolumn(platetype=96):
"""
:param platetype - number total wells of plate (6, 24, 96, 384 or 1536)
:return nr - number of rows of wellplate
:return nc - number of columns for wellplate
"""
platetype = int(platetype)
if platetype == 6:
nr = 2
nc = 3
elif platetype == 24:
nr = 4
nc = 6
elif platetype == 96:
nr = 8
nc = 12
elif platetype == 384:
nr = 16
nc = 24
elif platetype == 1536:
nr = 32
nc = 48
return nr, nc
def remove_units(df):
# remove units from table
df.drop([0], inplace=True)
return df
def convert_dec_sep(df, np):
for id in range(np, len(df.columns)):
#print('Index: ', id)
try:
df.iloc[:, id] = df.iloc[:, id].str.replace(',', '.').astype('float')
except:
print('No correction of types possible for column: ', df.columns[id])
return df
def check_separator(csvfile):
reader = pd.read_csv(csvfile, sep=None, engine='python', iterator=True)
sep = reader._engine.data.dialect.delimiter
reader.close()
return sep
def determine_plotgrid(num_parameter, columns=2):
if np.mod(num_parameter, columns) == 0:
plotrows = np.int(num_parameter /columns)
empty = False
if np.mod(num_parameter, columns) == 1:
plotrows = np.int(num_parameter / columns) + 1
empty = True
plotgrid = [plotrows, columns]
return plotgrid, empty