-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp_functions.py
executable file
·4256 lines (2947 loc) · 199 KB
/
app_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# Functions of the image analysis pipeline. This should be imported from the main_env
# imports
import os, sys, time, random, string, shutil, math, itertools, pickle, scipy, zipfile, matplotlib
import copy as cp
from datetime import date
import pandas as pd
from openpyxl.styles import PatternFill, Font
from openpyxl.styles.borders import Border, Side
import matplotlib.colors as mcolors
import multiprocessing as multiproc
import numpy as np
from PIL import Image as PIL_Image
from PIL import ImageEnhance, ImageDraw, ImageFont, ImageColor
import PIL
from sklearn.metrics import auc as sklearn_auc
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from scipy.cluster import hierarchy
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable
import traceback
from PIL import ImageFile, ImageStat
# set parms for matplotlib
#plt.rcParams['font.family'] = 'Arial'
matplotlib.use('Agg')
# define dirs
ScriptsDir = "/workdir_app/scripts"
CondaDir = "/opt/conda"
# general variables
PipelineName = "Q-PHAST"
blank_spot_names = {"h2o", "h20", "water", "empty", "blank"}
allowed_image_endings = {"tiff", "jpg", "jpeg", "png", "tif", "gif"}
#parms_colonyzer = ("greenlab", "lc", "diffims") # original, most testing based on this
#parms_colonyzer = ("") # no extra parms
# functions
def get_date_and_time_for_print():
"""Gets the date of today"""
current_day = date.today().strftime("%d/%m/%Y")
current_time = time.strftime("%H:%M:%S", time.localtime())
return "[%s, %s]"%(current_day, current_time)
def print_with_runtime(x):
"""prints with runtime info"""
#str_print = "%s %s"%(get_date_and_time_for_print(), x)
#run_cmd_simple("echo '%s'"%str_print)
print(x) # this does not include the time, which is good because docker does not have the correct time
def id_generator(size=10, chars=string.ascii_uppercase + string.digits, already_existing_ids=set()):
""" already_existing_ids is a set that indicates whihc IDs can't be picked """
ID = ''.join(random.choice(chars) for _ in range(size))
while ID in already_existing_ids:
ID = ''.join(random.choice(chars) for _ in range(size))
return ID
def remove_file(f):
if os.path.isfile(f):
try: run_cmd("rm %s > /dev/null 2>&1"%f)
except: pass
def delete_folder(f):
if os.path.isdir(f): shutil.rmtree(f)
def make_folder(f):
if not os.path.isdir(f): os.mkdir(f)
def delete_file_or_folder(f):
"""Takes a path and removes it"""
if os.path.isdir(f): shutil.rmtree(f)
if os.path.isfile(f): os.unlink(f)
def run_cmd_simple(cmd):
"""Runs os.system in cmd"""
out_stat = os.system(cmd)
if out_stat!=0: raise ValueError("\n%s\n did not finish correctly. Out status: %i"%(cmd, out_stat))
def run_cmd(cmd, env='main_env'):
"""This function runs a cmd with a given env"""
# define the cmds
SOURCE_CONDA_CMD = "source %s/etc/profile.d/conda.sh > /dev/null 2>&1"%CondaDir
cmd_prefix = "%s && conda activate %s > /dev/null 2>&1 &&"%(SOURCE_CONDA_CMD, env)
# define the cmd
cmd_to_run = "%s %s"%(cmd_prefix, cmd)
# define a tmpdir to write the bash scripts
tmpdir = '/workdir_app/.tmpdir_cmds'
os.makedirs(tmpdir, exist_ok=True)
# define a bash script to print the cmd and run
nchars = 15
already_existing_ids = {f.split(".sh")[0] for f in os.listdir(tmpdir) if len(f)==(nchars+3) and f.endswith(".sh")} # +3 for the ".sh"
bash_script = "%s/%s.sh"%(tmpdir, id_generator(size=nchars, already_existing_ids=already_existing_ids, chars=string.ascii_uppercase))
# write the bash script
open(bash_script, "w").write(cmd_to_run+"\n")
# run
out_stat = os.system("bash %s"%bash_script)
if out_stat!=0: raise ValueError("\n%s\n did not finish correctly. Out status: %i"%(cmd_to_run, out_stat))
# remove the script
run_cmd_simple("rm %s"%bash_script)
def get_matplotlib_color_as_hex(c):
"""Takes a matplotlib color and returns a hex"""
# find the rgb
if c in mcolors.BASE_COLORS: return mcolors.rgb2hex(mcolors.BASE_COLORS[c])
elif c in mcolors.CSS4_COLORS: return mcolors.CSS4_COLORS[c]
else: raise ValueError("invalid color %s"%c)
def save_colored_plate_layout(df, filename):
# saves colored plate layout
# define the tmp
filename_tmp = "%s.tmp.xlsx"%filename
# edit the excel
with pd.ExcelWriter(filename_tmp, engine="openpyxl") as writer:
# define the sheet_name
sheet_name = "sheet1"
# Export DataFrame content
df.to_excel(writer, sheet_name=sheet_name)
# define the sheet
sheet = writer.sheets[sheet_name]
# map each col to the fields
Icol_to_field = dict(zip(range(len(df.columns)) , df.columns))
# go through each row
for Irow, row_tuple in enumerate(sheet):
# define the real Irow
real_Irow = Irow-1
if real_Irow<0: continue
# go through each col
for Icol, cell in enumerate(row_tuple):
# define the real Icol
real_Icol = Icol-1
if real_Icol<0 or real_Irow<0: continue
# define the value
col_name = Icol_to_field[real_Icol]
value = df[col_name].iloc[real_Irow]
# define the color
if value.lower()=="pool": color = "red"
elif value.lower() in blank_spot_names: color = "white"
else:
if (real_Icol in {0, 1, 2, 3, 4, 5} and real_Irow in {0, 1, 2, 3}): color = "c"
elif (real_Icol in {6, 7, 8, 9, 10, 11} and real_Irow in {4, 5, 6, 7}): color = "c"
else: color = "orange"
# format
cell.fill = PatternFill("solid", start_color=get_matplotlib_color_as_hex(color)[1:]) # change background color
cell.border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style="thin"))
cell.font = Font(name='Calibri', size=8, bold=False, italic=False, vertAlign=None, underline='none', strike=False, color=get_matplotlib_color_as_hex("black")[1:])
os.rename(filename_tmp, filename)
def file_is_empty(path):
"""ask if a file is empty or does not exist """
if not os.path.isfile(path):
return_val = True
elif os.stat(path).st_size==0:
return_val = True
else:
return_val = False
return return_val
def run_get_plate_layout(strains_excel, drugs_excel, outdir):
"""Gets the plate layouts to perform the experiment from the list of strains and drugs. It writes the results into outdir. It generates the 'plate_layout.xlsx' (the picture of the actual table), the 'plate_layout_long.xlsx' (the long format layout)"""
##### LOAD AND DEBUG #####
#print_with_runtime("Debugging inputs to design plate layout ...")
# load
df_strains = pd.read_excel(strains_excel)
df_drugs = pd.read_excel(drugs_excel)
# debug and format
if set(df_strains.columns)!={"strain"}: raise ValueError("The strains excel should have these columns: 'strain'")
if set(df_drugs.columns)!={"plate_batch", "plate", "drug", "concentration"}: raise ValueError("The strains excel should have these columns: 'plate_batch', 'plate', 'drug', 'concentration'")
if len(df_strains)!=24: raise ValueError("the strains excel should have 24 strains")
if len(df_drugs)!=len(df_drugs[["plate_batch", "plate"]].drop_duplicates()): raise ValueError("The combination of plate_batch and plate should be unique")
if len(df_drugs)!=len(df_drugs[["drug", "concentration"]].drop_duplicates()): raise ValueError("The combination of drug and concentration should be unique")
df_strains["strain"] = df_strains.strain.apply(lambda s: s.rstrip())
for strain in set(df_strains.strain):
if " " in strain: raise ValueError("The strain names should not have spaces. '%s' is incorrect"%strain)
for k in df_drugs.keys():
if any(pd.isna(df_drugs[k])): raise ValueError("The drugs table should be a perfect rectangle. Column %s has spaces"%k)
for k in df_strains.keys():
if any(pd.isna(df_strains[k])): raise ValueError("The strains table should be a perfect rectangle. Column %s has spaces"%k)
df_drugs["concentration"] = df_drugs["concentration"].apply(lambda x: str(x).replace(",", ".")) # format as floats the concentration
for f, function_format in [("plate_batch", str), ("plate", int), ("drug", str), ("concentration", float)]:
try: df_drugs[f] = df_drugs[f].apply(function_format)
except: raise ValueError("The '%s' should be formatable as %s"%(f, function_format))
if sum(df_drugs.concentration==0)!=1: raise ValueError("There should be only one plate with a concentration of 0.0")
strange_plates = set(df_drugs.plate).difference({1, 2, 3, 4})
if len(strange_plates)>0: raise ValueError("There are strainge numbers in plate: %s"%strange_plates)
df_strains["strain"] = df_strains.strain.apply(lambda x: x.rstrip().lstrip())
##########################
######### CREATE PLATE LAYOUT ##########
#print_with_runtime("Getting plate layout...")
# create df
df_plate_layout = pd.DataFrame(index=list("ABCDEFGH"), columns=list(range(1, 13)))
# define all strains
all_strains = list(df_strains.strain)
# fill the first quadrant
I = 0
for row in ["A", "B", "C", "D"]:
for col in range(1, 6+1):
df_plate_layout.loc[row, col] = all_strains[I]; I+=1
# fill the second quadrant, mirror of the first
I = 0
for row in ["A", "B", "C", "D"]:
for col in reversed(range(7, 12+1)):
df_plate_layout.loc[row, col] = all_strains[I]; I+=1
# fill the thiird quadrant, mirror of the first
I = 0
for row in ["E", "F", "G", "H"]:
for col in reversed(range(1, 6+1)):
df_plate_layout.loc[row, col] = all_strains[I]; I+=1
# fill the fourth quadrant, which is the same as the first
I = 0
for row in ["E", "F", "G", "H"]:
for col in range(7, 12+1):
df_plate_layout.loc[row, col] = all_strains[I]; I+=1
# save excel colored
save_colored_plate_layout(df_plate_layout, "%s/plate_layout.xlsx"%outdir)
########################################
########## GET THE LONG PLATE LAYOUT #############
#print_with_runtime("Getting plate layout in long format...")
# change the index
df_plate_layout.index = list(range(1, 9))
# create the long df for the plate
df_plate_layout_long_core = pd.concat([pd.DataFrame({"column":[col]*8, "strain":df_plate_layout[col], "row":list(df_plate_layout.index)}) for col in df_plate_layout.columns]).sort_values(by=["row", "column"]).reset_index(drop=True)
# create a single df_plate_layout_long with a copy of df_plate_layout_long_core for each combination of plate_batch, plate
def get_df_plate_layout_long_one_row_df_drugs(r):
df = cp.deepcopy(df_plate_layout_long_core)
for f in r.keys(): df[f] = r[f]
return df
df_plate_layout_long = pd.concat([get_df_plate_layout_long_one_row_df_drugs(r) for I,r in df_drugs.iterrows()])
# add the 'bad_spot', which allows tunning
df_plate_layout_long["bad_spot"] = 'F'
# checks
if len(df_plate_layout_long)!=len(df_plate_layout_long.drop_duplicates()): raise ValueError("The df should be unique")
# save
plate_layout_long_file = "%s/plate_layout_long.xlsx"%outdir; plate_layout_long_file_tmp = "%s.tmp.xlsx"%plate_layout_long_file
df_plate_layout_long[["plate_batch", "plate", "row", "column", "strain", "drug", "concentration", "bad_spot"]].reset_index(drop=True).to_excel(plate_layout_long_file_tmp, index=False)
os.rename(plate_layout_long_file_tmp, plate_layout_long_file)
##################################################
def get_fullpath(x):
"""Takes a path and substitutes it bu the full path"""
# normal
if x.startswith("/"): return x
# a ./
elif x.startswith("./"): return "%s/%s"%(os.getcwd(), "/".join(x.split("/")[1:]))
# others (including ../)
else: return "%s/%s"%(os.getcwd(), x)
def soft_link_files(origin, target):
"""This function takes an origin file and makes it accessible through a link (target)"""
if file_is_empty(target):
# rename as full paths
origin = get_fullpath(origin)
target = get_fullpath(target)
# check that the origin exists
if file_is_empty(origin): raise ValueError("The origin %s should exist"%origin)
# remove previous lisqnk
try: run_cmd("rm %s > /dev/null 2>&1"%target)
except: pass
soft_linking_std = "%s.softlinking.std"%(target)
run_cmd("ln -s %s %s > %s 2>&1"%(origin, target, soft_linking_std))
remove_file(soft_linking_std)
# check that it worked
if file_is_empty(target): raise ValueError("The target %s should exist"%target)
def get_dir(filename): return "/".join(filename.split("/")[0:-1])
def get_file(filename): return filename.split("/")[-1]
def process_image_rotation_and_contrast(Iimage, nimages, raw_image, processed_image):
"""Generates a processed image based on raw image that has enhanced contrast and left rotation."""
# log
image_short_name = "<images>/%s/%s"%(get_dir(raw_image).split("/")[-1], get_file(raw_image))
#print_with_runtime("Improving contrast and rotating image %i/%i: %s"%(Iimage, nimages, image_short_name))
if file_is_empty(processed_image):
# define the imageJ binary
imageJ_binary = "/workdir_app/Fiji.app/ImageJ-linux64"
# define tmp files
processed_image_tmp = "%s.tmp.%s"%(processed_image, processed_image.split(".")[-1]); remove_file(processed_image_tmp)
# create a macro to change the image
lines = ['raw_image = "%s";'%raw_image,
'processed_image = "%s";'%processed_image_tmp,
'open(raw_image);',
'run("Flip Vertically");',
'run("Rotate 90 Degrees Left");'
'run("Enhance Contrast...", "saturated=0.3");',
'saveAs("tif", "%s");'%(processed_image_tmp),
'close();'
]
macro_file = "%s.processing_script.ijm"%raw_image
remove_file(macro_file)
open(macro_file, "w").write("\n".join(lines)+"\n")
# run the macro
imageJ_std = "%s.generating.std"%processed_image_tmp
run_cmd("%s --headless -macro %s > %s 2>&1"%(imageJ_binary, macro_file, imageJ_std))
# check that the macro ended well
error_lines = [l for l in open(imageJ_std, "r").readlines() if any([x in l.lower() for x in {"error", "fatal"}])]
if len(error_lines)>0:
raise ValueError("imageJ did not work on '%s'. Check '%s' to see what happened."%(image_short_name, imageJ_std.replace("/output", "<output dir>")));
# clean
for f in [macro_file, imageJ_std]: remove_file(f)
# keep
os.rename(processed_image_tmp, processed_image)
def generates_image_w_appended_image_on_the_right(input_image_file, output_image_file, appended_image_file, image_ending):
"""Generates an output_image_file, which is the result of input_image_file + appended_image_file"""
if file_is_empty(output_image_file):
# load image
input_image = PIL_Image.open(input_image_file)
appended_image = PIL_Image.open(appended_image_file)
# get dimensions
input_image_w, input_image_h = input_image.size
appended_image_w, appended_image_h = appended_image.size
# create the image
max_h = max([input_image_h, appended_image_h])
total_w = input_image_w + appended_image_w
output_image = PIL_Image.new('RGB', (total_w, max_h))
# Paste each image into the new image side by side
output_image.paste(input_image, (0, 0))
output_image.paste(appended_image, (input_image_w, 0))
# Save the concatenated image
output_image_file_tmp = "%s.tmp.%s"%(output_image_file, image_ending)
output_image.save(output_image_file_tmp)
os.rename(output_image_file_tmp, output_image_file)
def process_image_rotation_all_images_batch(Ibatch, nbatches, raw_outdir, processed_outdir, plate_batch, expected_images, image_ending, enhance_image_contrast, image_highest_contrast):
"""Runs the processing of images for all images in one batch"""
# log
log_txt = "Processing images for batch %i/%i: %s"%(Ibatch, nbatches, plate_batch)
if enhance_image_contrast is True: log_txt += " (increasing contrast)"
print_with_runtime(log_txt)
# if there are no processed files
if not os.path.isdir(processed_outdir):
# clean
delete_folder(processed_outdir)
# make tmp folder where to save things folder
processed_outdir_tmp = "%s_tmp"%processed_outdir
delete_folder(processed_outdir_tmp); make_folder(processed_outdir_tmp)
# make a folder with images of the highest contrast appended
merged_images_dir = "%s_merged_images"%processed_outdir
make_folder(merged_images_dir)
# define the width and the heith of all images
image_w, image_h = PIL_Image.open("%s/%s"%(raw_outdir, expected_images[0])).size
# get the merged_images
inputs_fn = [("%s/%s"%(raw_outdir, img), "%s/%s"%(merged_images_dir, img), image_highest_contrast, image_ending) for img in expected_images]
run_function_in_parallel(inputs_fn, generates_image_w_appended_image_on_the_right)
# define the contrast as based on enhance_image_contrast
if enhance_image_contrast is True:
#line_contrast = 'run("Enhance Contrast...", "saturated=0.3");', # initial, uneven contrast
lines_contrast = ['run("Enhance Contrast...", "saturated=0.3 stretch");'] # even contrast, better than equalize
#line_contrast = 'run("Enhance Contrast...", "saturated=0.3 equalize");', # similar, even contrast. The problem is that it is too bright
else: lines_contrast = []
# create a macro to change the image
header = [
'input_dir = "%s/";'%(merged_images_dir),
'list_images = getFileList(input_dir);',
'setBatchMode(true);',
'for (i=0; i<list_images.length; i++) {',
' open(input_dir+list_images[i]);',
]
footer = [
' makeRectangle(0, 0, %i, %i);'%(image_w, image_h),
' run("Crop");',
' run("Flip Vertically");',
' run("Rotate 90 Degrees Left");'
' processed_image_name = "%s/" + replace(list_images[i], "%s", "tif");'%(processed_outdir_tmp, image_ending),
' saveAs("tif", processed_image_name);',
' close();',
'}',
'setBatchMode(false);'
]
lines = header + lines_contrast + footer
run_imageJ_macro(lines, "%s.processing_script.ijm"%raw_outdir, delete_files=False)
# clean
delete_folder(merged_images_dir)
# at the end save
os.rename(processed_outdir_tmp, processed_outdir)
# check that all images are there
missing_images = set(expected_images).difference(set(os.listdir(processed_outdir)))
if len(missing_images)>0: raise ValueError("There are missing images: %s"%missing_images)
for f in expected_images:
if file_is_empty("%s/%s"%(processed_outdir, f)): raise ValueError("image %s should exist"%f)
# clean
def process_image_rotation_and_contrast_PIL(Iimage, nimages, raw_image, processed_image):
"""Generates a processed image based on raw image that has enhanced contrast and left rotation. This is like process_image_rotation_and_contrast (this is what we originally did) but with PIL. This does not work as well as ImageJ."""
# log
image_short_name = "<images>/%s/%s"%(get_dir(raw_image).split("/")[-1], get_file(raw_image))
#print_with_runtime("Improving contrast and rotating image %i/%i: %s"%(Iimage, nimages, image_short_name))
if file_is_empty(processed_image):
# define tmp files
processed_image_tmp = "%s.tmp.%s"%(processed_image, processed_image.split(".")[-1]); remove_file(processed_image_tmp)
# load image
image_object = PIL_Image.open(raw_image)
# rotate 90 deg counter clockwise
image_object = image_object.rotate(90, PIL.Image.NEAREST, expand = 1)
# enhance contrast (saturated=0.3)
#image brightness enhancer
enhancer = ImageEnhance.Contrast(image_object)
image_object_processed = enhancer.enhance(10)
# save
image_object_processed.save(processed_image_tmp)
os.rename(processed_image_tmp, processed_image)
# keep
os.rename(processed_image_tmp, processed_image)
def get_yyyymmddhhmm_tuple_one_image_name(filename):
"""Returns a tuple with the yyyy, mm, dd, hh, mm for one image name"""
# get the numbers_str with all images
numbers_str = ""
recording_numbers = False
for x in get_file(filename):
# start recording once you find some number
if recording_numbers is False and x.isdigit() and int(x)>0: recording_numbers = True
# if you are recoding
if recording_numbers is True and x.isdigit(): numbers_str += x
# check
if len(numbers_str)!=12: raise ValueError("We can't define a YYYYMMDDHHMM for file %s"%get_file(filename))
# get tuple
numbers_tuple = (numbers_str[0:4], numbers_str[4:6], numbers_str[6:8], numbers_str[8:10], numbers_str[10:12])
numbers_tuple = tuple([int(x) for x in numbers_tuple])
# checks
for idx, (name, expected_range) in enumerate([("year", (2000, 2500)), ("month", (1, 12)), ("day", (1, 31)), ("hour", (0, 24)), ("minute", (0, 60))]):
if numbers_tuple[idx]<expected_range[0] or numbers_tuple[idx]>expected_range[1]: print_with_runtime("WARNING: For file %s the parsed %s is %i, which may be incorrect."%(get_file(filename), name, numbers_tuple[idx]))
return numbers_tuple
def get_int_as_str_two_digits(x):
"""Returns the int as a string with two digits"""
x = str(x)
if len(x)==1: x = "0%s"%x
if len(x)!=2: raise ValueError("%s is invalid"%x)
return x
def get_manual_coords(colonizer_coordinates_one_spot, coordinate_obtention_dir_plate):
"""Gets manual coords into colonizer_coordinates_one_spot"""
# run parametryzer
parametryzer_std = "%s/parametryzer.std"%coordinate_obtention_dir_plate
run_cmd("%s/envs/colonyzer_env/bin/parametryzer > %s 2>&1"%(CondaDir, parametryzer_std), env="colonyzer_env")
# checks
if file_is_empty(colonizer_coordinates_one_spot): raise ValueError("%s should exist. Make sure that you clicked the spots or check %s"%(colonizer_coordinates_one_spot, parametryzer_std))
remove_file(parametryzer_std)
def get_automatic_coords(colonizer_coordinates_one_spot, coordinate_obtention_dir_plate, latest_image, plate_batch, plate):
"""Gets automatic coords into colonizer_coordinates_one_spot"""
# run colonyzer
colonyzer_std = "%s/colonyzer.std"%coordinate_obtention_dir_plate
try:
run_cmd("colonyzer --fmt 96 --remove > %s 2>&1"%colonyzer_std, env="colonyzer_env")
auto_colonyzer_worked = True
except:
print_with_runtime("WARNING: Automatic spot location did not work!! You have to set manually the spots")
auto_colonyzer_worked = False
get_manual_coords(colonizer_coordinates_one_spot, coordinate_obtention_dir_plate)
# create the auto file
if auto_colonyzer_worked is True:
# define the coordinates of the upper left and bottom right spots
df_coords = get_tab_as_df_or_empty_df("%s/Output_Data/%s.out"%(coordinate_obtention_dir_plate, latest_image.rstrip(".tif"))).set_index(["Row", "Column"], drop=True)
automatic_coords_str = ",".join([str(int(round(pos, 0))) for pos in (df_coords.loc[1, 1].x, df_coords.loc[1, 1].y, df_coords.loc[8, 12].x, df_coords.loc[8, 12].y)])
# create the colonizer_coordinates_one_spot file as parametryzer does
lines_parametryzer_output = ["# misc",
"default,96,%s,%s"%(automatic_coords_str, date.today().strftime("%Y-%m-%d")),
"#",
"%s,96,%s"%(latest_image, automatic_coords_str)]
open(colonizer_coordinates_one_spot, "w").write("\n".join(lines_parametryzer_output)+"\n")
# show the automatic positioning of the image
print_with_runtime("This is the automatic location of the spots in the image...")
coords_image = "%s/Output_Images/%s_AREA.png"%(coordinate_obtention_dir_plate, latest_image.rstrip(".tif"))
coords_image_w, coords_image_h = PIL_Image.open(coords_image).size
coords_image_resized = "%s.resized.png"%coords_image
coords_image_resized_object = PIL_Image.open(coords_image).resize((int(coords_image_w*0.4), int(coords_image_h*0.4)))
coords_image_resized_w, coords_image_resized_h = coords_image_resized_object.size
coords_image_resized_object.save(coords_image_resized)
display_image_std = "%s/display_image.std"%coordinate_obtention_dir_plate
run_cmd("%s/display_image.py %s %i %i 'automatic location for %s-plate%i' > %s 2>&1"%(ScriptsDir, coords_image_resized, coords_image_resized_w, coords_image_resized_h, plate_batch, plate, display_image_std), env="colonyzer_env")
remove_file(display_image_std)
# clean
for folder in ["Output_Images", "Output_Data", "Output_Reports"]: delete_folder("%s/%s"%(coordinate_obtention_dir_plate, folder))
remove_file(colonyzer_std)
def generate_colonyzer_coordinates_one_plate_batch_and_plate(dest_processed_images_dir, coordinate_obtention_dir_plate, sorted_image_names, automatic_coordinates, plate_batch, plate):
"""Generates a 'Colonyzer.txt' file in each dest_processed_images_dir with all the coordinates."""
# define the dir with the coordinates for one image
colonizer_coordinates_one_spot = "%s/Colonyzer.txt"%coordinate_obtention_dir_plate
colonizer_coordinates = "%s/Colonyzer.txt"%dest_processed_images_dir
# define the latest image to base the coordinates on
latest_image = sorted_image_names[-1]
if file_is_empty(colonizer_coordinates):
# clean
remove_file(colonizer_coordinates_one_spot)
remove_file("%s/%s"%(coordinate_obtention_dir_plate, latest_image))
# softlink one image into coordinate_obtention_dir to get coordinates
#soft_link_files("%s/%s"%(dest_processed_images_dir, latest_image), "%s/%s"%(coordinate_obtention_dir_plate, latest_image))
# move one donwsampled image to coordinate_obtention_dir_plate
image_object = PIL_Image.open("%s/%s"%(dest_processed_images_dir, latest_image))
original_w, original_h = image_object.size
factor_resize = 0.2
image_object.resize((int(original_w*factor_resize), int(original_h*factor_resize))).save("%s/%s"%(coordinate_obtention_dir_plate, latest_image), quality=20, optimize=True) # optimize=True
downsized_w, donwsized_h = PIL_Image.open("%s/%s"%(coordinate_obtention_dir_plate, latest_image)).size
# get to the folder
initial_dir = os.getcwd()
os.chdir(coordinate_obtention_dir_plate)
# get coordinates automatically
if automatic_coordinates is True: get_automatic_coords(colonizer_coordinates_one_spot, coordinate_obtention_dir_plate, latest_image, plate_batch, plate)
# use parametryzer
else: get_manual_coords(colonizer_coordinates_one_spot, coordinate_obtention_dir_plate)
# go back to the initial dir
os.chdir(initial_dir)
# create a colonyzer file with all the info in dest_processed_images_dir/Colonyzer.txt
# get last line
last_line_split = open(colonizer_coordinates_one_spot, "r").readlines()[-1].strip().split(",")
# define the wells
wells = last_line_split[1]
if wells!="96": raise ValueError("You set the analysis for %s-well plates, which is incompatible. Make sure that you press 'g' to save the coordinates."%wells)
# get the coordinates
coordinates_str = ",".join(last_line_split[2:])
# modify the coordinates to take into account that the coordinates image was donwsized by factor_resize
coordinates_str = ",".join([str(int(int(x)*(1/factor_resize))) for x in coordinates_str.split(",")])
# check that the coordinates make sense
expected_w, expected_h = PIL_Image.open("%s/%s"%(dest_processed_images_dir, latest_image)).size
top_left_x, top_left_y, bottom_right_x, bottom_right_y = [int(x) for x in coordinates_str.split(",")]
cropped_w = bottom_right_x - top_left_x
cropped_h = bottom_right_y - top_left_y
error_log = "Make sure that you are first selecting the upper-left spot and then the lower-right spot."
for dim, expected_val, cropped_val in [("width", expected_w, cropped_w), ("height", expected_h, cropped_h)]:
if cropped_val<=0: raise ValueError("The cropped image has <=0 %s. %s"%(dim, error_log))
if cropped_val>expected_val: raise ValueError("The %s of the cropped image is above the original one. %s"%(dim, error_log))
if cropped_val<=(expected_val*0.3): raise ValueError("The %s of the cropped image is %s, and the full image has %s. The cropped image should have a %s which is close to the original one. %s"%(dim, cropped_val, expected_val, dim, error_log))
if cropped_val<=(expected_val*0.5): print_with_runtime("WARNING: The %s of the cropped image is %s, and the full image has %s. The cropped image should have a %s which is close to the original one. %s. If you are sure of this you can skip this warning."%(dim, cropped_val, expected_val, dim, error_log))
# write
non_coordinates_lines = [l for l in open(colonizer_coordinates_one_spot, "r").readlines() if l.startswith("#") or not l.startswith(latest_image)]
coordinates_lines = ["%s,%s,%s\n"%(image, wells, coordinates_str) for image in sorted_image_names]
colonizer_coordinates_tmp = "%s.tmp"%colonizer_coordinates
open(colonizer_coordinates_tmp, "w").write("".join(non_coordinates_lines + coordinates_lines))
os.rename(colonizer_coordinates_tmp, colonizer_coordinates)
def run_colonyzer_one_set_of_parms(parms, outdir_all, image_names_withoutExtension, processed_images_dir_each_plate, reference_plate):
"""Runs colonyzer for a set of parms. This should be run from a directory where there are imnages"""
# get args
sorted_parms = sorted(parms)
parms_str = "_".join(sorted_parms)
if sorted_parms==["none"]: extra_cmds_parmCombination = ""
else: extra_cmds_parmCombination = "".join([" --%s "%x for x in sorted_parms])
# change the parms str if blank
#if parms_str=="": parms_str = "noExtraParms"
# define the outdirs
outdir = "%s/output_%s"%(outdir_all, parms_str)
outdir_tmp = "%s_tmp"%outdir
# if the outdir exists, return
if os.path.isdir(outdir): return
# define the cur_dir
cur_dir = os.getcwd()
# if you provided a reference plate, create a running folder
if not reference_plate is None:
# define the dest_cur_dir, where to place merged images
dest_cur_dir = "%s/working_w_ref_plate"%cur_dir
delete_folder(dest_cur_dir)
# Get the last timepoint image of the reference plate as the image to append
dir_ref = "%s/%s_plate%i"%(processed_images_dir_each_plate, reference_plate[0], reference_plate[1])
sorted_imgs = sorted({f for f in os.listdir(dir_ref) if not f.startswith(".") and f not in {"Colonyzer.txt.tmp", "Colonyzer.txt"}}, key=get_yyyymmddhhmm_tuple_one_image_name)
ref_image_file = "%s/%s"%(dir_ref, sorted_imgs[-1])
# make folder
make_folder(dest_cur_dir)
# Get colonyzer file
soft_link_files("%s/Colonyzer.txt"%(cur_dir), "%s/Colonyzer.txt"%(dest_cur_dir))
# generate figures with appended ref_image_file in the right
imgs_process = sorted({f for f in os.listdir(cur_dir) if not f.startswith(".") and f.split(".")[0] in image_names_withoutExtension})
img_name_to_size = {}
for img in imgs_process:
img_name_to_size[img.split(".")[0]] = PIL_Image.open("%s/%s"%(cur_dir, img)).size
generates_image_w_appended_image_on_the_right("%s/%s"%(cur_dir, img), "%s/%s"%(dest_cur_dir, img), ref_image_file, imgs_process[0].split(".")[-1])
# move to dest_cur_dir and work there
os.chdir(dest_cur_dir)
# check if all images have a data file (which means that they have been analyzed in outdir/Output_Data)
all_images_analized = False
Output_Data_dir = "%s/Output_Data"%outdir
if os.path.isdir(Output_Data_dir):
Output_Data_content = set(os.listdir(Output_Data_dir))
if all(["%s.out"%f in Output_Data_content for f in image_names_withoutExtension]): all_images_analized = True
# run colobyzer
if all_images_analized is False:
# delete and create the outdirs
delete_folder(outdir)
delete_folder(outdir_tmp); make_folder(outdir_tmp)
# run colonizer, which will generate data under . (images_folder)
colonyzer_exec = "%s/envs/colonyzer_env/bin/colonyzer"%CondaDir
colonyzer_std = "%s.running_colonyzer.std"%outdir_tmp
colonyzer_cmd = "%s %s --plots --remove --initpos --fmt 96 > %s 2>&1"%(colonyzer_exec, extra_cmds_parmCombination, colonyzer_std) # --slopefill 0.9 is default, --slopefill 0.5 gave more simialr patterns of growth at high concentrations. slopefill 0.7 did not change
run_cmd(colonyzer_cmd, env="colonyzer_env")
remove_file(colonyzer_std)
# change to initial curdir
os.chdir(cur_dir)
# move to outdir_tmp
for folder in ["Output_Images", "Output_Data", "Output_Reports"]:
# move to tmp
if not reference_plate is None: source_folder = "%s/%s"%(dest_cur_dir, folder)
else: source_folder = "%s/%s"%(cur_dir, folder)
dest_folder = "%s/%s"%(outdir_tmp, folder)
os.rename(source_folder, dest_folder)
# edit the images
if not reference_plate is None and folder=="Output_Images":
for img in sorted(image_names_withoutExtension):
for suffix in {".png", "_AREA.png"}:
image_file = "%s/%s%s"%(dest_folder, img, suffix)
PIL_Image.open(image_file).crop((0, 0, img_name_to_size[img][0], img_name_to_size[img][1])).save(image_file)
# remove dest_cur_dir
if not reference_plate is None: delete_folder(dest_cur_dir)
# change the name, which marks that everything finished well
os.rename(outdir_tmp, outdir)
def get_barcode_from_filename(filename):
"""Gets a filename like img_0_2090716_1448 and returns the barcode."""
# get plateID
plateID = "".join(filename.split("_")[0:2])
# get timestamp
d = list(filename.split("_")[2])
t = list(filename.split("_")[3])
d_string = "%s%s%s%s-%s%s-%s%s"%(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7])
t_string = "%s%s-%s%s-00"%(t[0], t[1], t[2], t[3])
return "%s-%s_%s"%(plateID, d_string, t_string)
def get_barcode_for_filenames(filenames_series):
"""Takes a series of filenames and passes them to get_barcode_from_filename to get barcoded values. The barcode cannot exceed 11 chars, so that it is stripped accoringly by this function"""
# get barcoded items
barcoded_names = filenames_series.apply(get_barcode_from_filename)
# return
oldBar_to_newBar = {"img0":"img_fitness"}
return barcoded_names.apply(lambda x: "%s-%s"%(oldBar_to_newBar[x.split("-")[0]], "-".join(x.split("-")[1:])))
def save_df_as_tab(df, file):
"""Takes a df and saves it as tab"""
file_tmp = "%s.tmp"%file
df.to_csv(file_tmp, sep="\t", index=False, header=True)
os.rename(file_tmp, file)
def get_tab_as_df_or_empty_df(file):
"""Gets df from file or empty df"""
nlines = len([l for l in open(file, "r").readlines() if len(l)>1])
if nlines==0: return pd.DataFrame()
else: return pd.read_csv(file, sep="\t")
def get_df_fitness_measurements_one_parm_set(outdir_all, outdir_name, plate_batch, plate, df_plate_layout):
"""For one plate_batch and plate combination and one parm set, get the fitness measurements with qfa. The outdir_all should be where the colonyzer results are."""
##### RUN QFA AND SAVE #####
rethink_how_growth_is_ninferred_make_sure_that_it_is_comparable_across_batches
# generate the plots with R
fitness_measurements_std = "%s/fitness_measurements.std"%data_path
run_cmd("/workdir_app/scripts/get_fitness_measurements.R %s > %s 2>&1"%("%s/%s"%(outdir_all, outdir_name), fitness_measurements_std), env="main_env")
remove_file(fitness_measurements_std)
# save the df
df_fitness_measurements_file = "%s/%s/df_fitness_measurements.tab"%(outdir_all, outdir_name)
os.rename("%s/%s/logRegression_fits.tbl"%(outdir_all, outdir_name), df_fitness_measurements_file)
############################
return get_tab_as_df_or_empty_df(df_fitness_measurements_file)
def get_growth_measurements_one_plate_batch_and_plate(Ibatch, nbatches, images_folder, outdir_all, plate_batch, plate, sorted_image_names, processed_images_dir_each_plate, reference_plate, df_plate_layout, hours_experiment):
"""For one plate batch and plate, runs colonyzer to get raw growth and fitness measurements."""
print_with_runtime("Getting fitness measurements for plate_batch-plate %i/%i: %s-plate%i"%(Ibatch, nbatches, plate_batch, plate))
# define final file
outdir_name = "output_%s"%("_".join(sorted(parms_colonyzer)))
integrated_growth_df_file = "%s/%s/all_images_data.tab"%(outdir_all, outdir_name)
if file_is_empty(integrated_growth_df_file):
########## RUN COLONYZER #############
# prepare dirs
make_folder(outdir_all)
# clean the hidden files from images_folder
for f in os.listdir(images_folder):
if f.startswith("."): remove_file("%s/%s"%(images_folder, f))
# move into the images dir
initial_dir = os.getcwd()
os.chdir(images_folder)
# check
if file_is_empty("./Colonyzer.txt"): raise ValueError("Colonyzer.txt should exist in %s"%images_folder)
# define the image names that you expect
image_names_withoutExtension = set({x.split(".")[0] for x in sorted_image_names})
# run colonyzer for all parameters
run_colonyzer_one_set_of_parms(parms_colonyzer, outdir_all, image_names_withoutExtension, processed_images_dir_each_plate, reference_plate)
# go back to the initial dir
os.chdir(initial_dir)
######################################
############ CREATE DAT FILE ##############
# get the data path
data_path = "%s/%s/Output_Data"%(outdir_all, outdir_name)
# generate a df with fitness info of all images
all_df = pd.DataFrame()
for f in [x for x in os.listdir(data_path) if x.endswith(".dat")]:
df = pd.read_csv("%s/%s"%(data_path, f), sep="\t", header=None)
all_df = all_df.append(df)
# add barcode in the first place, instead of the filename
all_df[0] = get_barcode_for_filenames(all_df[0])
# sort the values
all_df = all_df.sort_values(by=[0,1,2])
# change the NaN by "NA"
def change_NaN_to_str(cell):
if pd.isna(cell): return "NA"
else: return cell
all_df = all_df.applymap(change_NaN_to_str)
# use qfa to generate the df with growth
integrated_growth_df_dat_file = "%s/%s/all_images_data.dat"%(outdir_all, outdir_name)
all_df.to_csv(integrated_growth_df_dat_file, sep="\t", index=False, header=False)
###########################################
######### CREATE EXTRA FIELDS ########
# create the files that are necessary for the R qfa package to generate the output files
# keep the plate layout that is interesting here
df_plate_layout = df_plate_layout[(df_plate_layout.plate_batch==plate_batch) & (df_plate_layout.plate==plate)].set_index(["row", "column"])
# checks
if len(df_plate_layout[["drug", "concentration"]].drop_duplicates())!=1: raise ValueError("There should be only one plate and concentration")
drug = df_plate_layout.drug.iloc[0]
concentration = df_plate_layout.concentration.iloc[0]
# experiment descrption: file describing the inoculation times, library and plate number for unique plates.
exp_df = pd.DataFrame()
# get all plates
for I, plateBarcode in enumerate(set([x.split("-")[0] for x in all_df[0]])):
startTime = min(all_df[all_df[0].apply(lambda x: x.startswith(plateBarcode))][0].apply(lambda y: "-".join(y.split("-")[1:])))
dict_data = {"Barcode":plateBarcode, "Start.Time":startTime, "Treatment": plate_batch, "Medium":"[%s]=%s"%(drug, concentration) ,"Screen":"screen", "Library":"strain", "Plate": plate, "RepQuad":1}
exp_df = exp_df.append(pd.DataFrame({k: {I+1 : v} for k, v in dict_data.items()}))
# write
exp_df.to_csv("%s/%s/ExptDescription.txt"%(outdir_all, outdir_name), sep="\t", index=False, header=True)
# library description: where you state, for each plate (from 1, 2, 3 ... and as many plates defined in ExptDescription.Plate, the name and the ORF, if interestning)
lib_df = pd.DataFrame()
# define the rows and cols
nWells_ro_NrowsNcols = {96:(8, 12)}
for barcode, plateID in exp_df[["Barcode", "Plate"]].values: