-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpreprocess_img.py
102 lines (79 loc) · 3.4 KB
/
preprocess_img.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
from skimage import io, transform
from PIL import Image
import numpy as np
import pandas as pd
import argparse
import skimage
from tqdm import tqdm
def get_PadChest_img(img_id):
# dataset link: https://bimcv.cipf.es/bimcv-projects/padchest/
# database info csv path:
# img_name: png file name
# original intensity: 6e4+
# original resolution 3000+
path = '/raid/cl522/PadChest/BIMCV-PadChest-FULL/PadChestImage/'
img_path = path + img_id
img = Image.open(img_path)
# convert png file to numpy array
img = np.array(img)
return img
def get_MIMIC_img(subject_id, study_id, dicom):
# dataset link: https://physionet.org/content/mimic-cxr/2.0.0/
path = 'xx' # meta MIMIC path
report_path = 'xx' # report MIMIC path
sub_dir = 'p' + subject_id[0:2] + '/' + 'p' + subject_id + '/' + 's' + study_id + '/' + dicom + '.jpg'
report_sub_dir = 'p' + subject_id[0:2] + '/' + 'p' + subject_id + '/' + 's' + study_id + '.txt'
jpg_path = path + sub_dir
report_path = report_path + report_sub_dir
img = Image.open(jpg_path)
img = np.array(img)
return img
parser = argparse.ArgumentParser(description='extract_data')
parser.add_argument('--resize', type=int)
parser.add_argument('--dataset', type=str)
if __name__ == "__main__":
args = parser.parse_args()
resize = args.resize
if args.dataset == 'PadChest':
# master csv from PadChest dataset
# 'PADCHEST_chest_x_ray_images_labels_160K_01.02.19.csv'
metacsv = pd.read_csv('xx')
temp_npy = np.zeros((metacsv.shape[0], resize, resize), dtype=np.uint8)
for i in tqdm(range(metacsv.shape[0])):
image_idx = metacsv['ImageID'].iloc[i]
img = get_PadChest_img(image_idx)
if len(img.shape) == 2:
pass
if len(img.shape)>2:
img = img[:,:,0]
x, y = np.nonzero(img)
if np.any(x):
xl,xr = x.min(),x.max()
yl,yr = y.min(),y.max()
img = img[xl:xr+1, yl:yr+1]
img = ((img - img.min()) * (1/(img.max() - img.min()) * 256))
img = skimage.transform.resize(img, (resize, resize),
order=1, preserve_range=True, anti_aliasing=False)
img = img.astype(np.uint8)
temp_npy[i,:,:] = img
np.save(f'xx', temp_npy) # save to ext_data folder
elif args.dataset == 'MIMIC':
metacsv = pd.read_csv('xx') # master csv from MGCA preprocessing stage
temp_npy = np.zeros((metacsv.shape[0], resize, resize), dtype=np.uint8)
print(metacsv.shape, temp_npy.shape)
for i in tqdm(range(temp_npy.shape[0])):
dicom_idx = metacsv['dicom_id'][i]
subject_idx = str(int(metacsv['subject_id'][i]))
study_idx = str(int(metacsv['study_id'][i]))
img = get_MIMIC_img(subject_id=subject_idx, study_id=study_idx, dicom=dicom_idx)
x, y = np.nonzero(img)
xl,xr = x.min(),x.max()
yl,yr = y.min(),y.max()
img = img[xl:xr+1, yl:yr+1]
img = ((img - img.min()) * (1/(img.max() - img.min()) * 256))
img = skimage.transform.resize(img, (resize, resize),
order=1, preserve_range=True, anti_aliasing=False)
img = img.astype(np.uint8)
temp_npy[i,:,:] = img
np.save(f'xx', temp_npy) # save to ext_data folder