forked from pedropro/TACO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownload.py
58 lines (45 loc) · 1.71 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
'''
This script downloads TACO's images from Flickr given an annotation json file
Code written by Pedro F. Proenza, 2019
'''
import os.path
import argparse
import json
from PIL import Image
import requests
from io import BytesIO
import sys
parser = argparse.ArgumentParser(description='')
parser.add_argument('--dataset_path', required=False, default= './data/annotations.json', help='Path to annotations')
args = parser.parse_args()
dataset_dir = os.path.dirname(args.dataset_path)
print('Note. If for any reason the connection is broken. Just call me again and I will start where I left.')
# Load annotations
with open(args.dataset_path, 'r') as f:
annotations = json.loads(f.read())
nr_images = len(annotations['images'])
for i in range(nr_images):
image = annotations['images'][i]
file_name = image['file_name']
url_original = image['flickr_url']
url_resized = image['flickr_640_url']
file_path = os.path.join(dataset_dir, file_name)
# Create subdir if necessary
subdir = os.path.dirname(file_path)
if not os.path.isdir(subdir):
os.mkdir(subdir)
if not os.path.isfile(file_path):
# Load and Save Image
response = requests.get(url_original)
img = Image.open(BytesIO(response.content))
if img._getexif():
img.save(file_path, exif=img.info["exif"])
else:
img.save(file_path)
# Show loading bar
bar_size = 30
x = int(bar_size * i / nr_images)
sys.stdout.write("%s[%s%s] - %i/%i\r" % ('Loading: ', "=" * x, "." * (bar_size - x), i, nr_images))
sys.stdout.flush()
i+=1
sys.stdout.write('Finished\n')