Skip to content

Commit

Permalink
Merge pull request hunglc007#52 from wooruang/master
Browse files Browse the repository at this point in the history
Add converter for voc
  • Loading branch information
hunglc007 authored May 23, 2020
2 parents 7b30fc6 + 3bbf254 commit fbb7b42
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 0 deletions.
27 changes: 27 additions & 0 deletions scripts/voc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# VOC Dataset

### Download

```bash
$ bash get_voc2012.sh
```

### Make names for VOC.

```bash
$ python voc_make_names.py [--anno_dir {Annotation directory}] [--output {OUTPUT_NAME}]

# example
$ python voc_make_name.py

$ python voc_make_name.py --anno_dir ../../data/voc/anno --output ../../data/classes/voc.names
```

### Convert VOC Dataset.

```bash
$ python voc_convert.py [--image_dir {Image directory}] [--anno_dir {Annotation directory}] [--train_list_txt {Path of Train list file}] [--val_list_txt {Path of Validation list file}] [--classes {Path of Classes file}] [--train_output {Path of Output file For Train}] [--val_output {Path of Output file For Val}]

#example
$ python voc_convert.py
```
19 changes: 19 additions & 0 deletions scripts/voc/get_voc2012.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash


PARENT_DIR="$( cd "$( dirname $( dirname $( dirname "${BASH_SOURCE[0]}" ) ) )" >/dev/null 2>&1 && pwd )"
DATA_DIR="$PARENT_DIR/data"

DATASET_NAME="VOCtrainval_11-May-2012"

wget -c -P $DATA_DIR http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar

if [[ -d "$DATA_DIR/$DATASET_NAME" ]]; then
echo "Already '$DATA_DIR/$DATASET_NAME' path exists."
exit 1
fi

mkdir $DATA_DIR/$DATASET_NAME

tar xf $DATA_DIR/VOCtrainval_11-May-2012.tar -C $DATA_DIR/$DATASET_NAME

78 changes: 78 additions & 0 deletions scripts/voc/voc_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import sys
import os

from absl import app, flags
from absl.flags import FLAGS
from lxml import etree


flags.DEFINE_string('image_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/JPEGImages', 'path to image dir')
flags.DEFINE_string('anno_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/Annotations', 'path to anno dir')
flags.DEFINE_string('train_list_txt', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/train.txt', 'path to a set of train')
flags.DEFINE_string('val_list_txt', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/val.txt', 'path to a set of val')
flags.DEFINE_string('classes', '../../data/classes/voc2012.names', 'path to a list of class names')
flags.DEFINE_string('train_output', '../../data/dataset/voc2012_train.txt', 'path to a file for train')
flags.DEFINE_string('val_output', '../../data/dataset/voc2012_val.txt', 'path to a file for val')

flags.DEFINE_boolean('no_val', False, 'if uses this flag, it does not convert a list of val')


def convert_annotation(list_txt, output_path, image_dir, anno_dir, class_names):
IMAGE_EXT = '.jpg'
ANNO_EXT = '.xml'

with open(list_txt, 'r') as f, open(output_path, 'w') as wf:
while True:
line = f.readline().strip()
if line is None or not line:
break
im_p = os.path.join(image_dir, line + IMAGE_EXT)
an_p = os.path.join(anno_dir, line + ANNO_EXT)

# Get annotation.
root = etree.parse(an_p).getroot()
bboxes = root.xpath('//object/bndbox')
names = root.xpath('//object/name')

box_annotations = []
for b, n in zip(bboxes, names):
name = n.text
class_idx = class_names.index(name)

xmin = b.find('xmin').text
ymin = b.find('ymin').text
xmax = b.find('xmax').text
ymax = b.find('ymax').text
box_annotations.append(','.join([str(xmin), str(ymin), str(xmax), str(ymax), str(class_idx)]))

annotation = os.path.abspath(im_p) + ' ' + ' '.join(box_annotations) + '\n'

wf.write(annotation)


def convert_voc(image_dir, anno_dir, train_list_txt, val_list_txt, classes, train_output, val_output, no_val):
IMAGE_EXT = '.jpg'
ANNO_EXT = '.xml'

class_names = [c.strip() for c in open(FLAGS.classes).readlines()]

# Training set.
convert_annotation(train_list_txt, train_output, image_dir, anno_dir, class_names)

if no_val:
return

# Validation set.
convert_annotation(val_list_txt, val_output, image_dir, anno_dir, class_names)


def main(_argv):
convert_voc(FLAGS.image_dir, FLAGS.anno_dir, FLAGS.train_list_txt, FLAGS.val_list_txt, FLAGS.classes, FLAGS.train_output, FLAGS.val_output, FLAGS.no_val)
print("Complete convert voc data!")


if __name__ == "__main__":
try:
app.run(main)
except SystemExit:
pass
46 changes: 46 additions & 0 deletions scripts/voc/voc_make_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import sys
import os

from absl import app, flags
from absl.flags import FLAGS
from lxml import etree


flags.DEFINE_string('anno_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/Annotations', 'path to anno dir')
flags.DEFINE_string('output', '../../data/classes/voc2012.names', 'path to anno dir')


def make_names(anno_dir, output):
labels_dict = {}

anno_list = os.listdir(anno_dir)

for anno_file in anno_list:
p = os.path.join(anno_dir, anno_file)

# Get annotation.
root = etree.parse(p).getroot()
names = root.xpath('//object/name')

for n in names:
labels_dict[n.text] = 0

labels = list(labels_dict.keys())
labels.sort()

with open(output, 'w') as f:
for l in labels:
f.writelines(l + '\n')

print(f"Done making a names's file ({os.path.abspath(output)})")


def main(_argv):
make_names(FLAGS.anno_dir, FLAGS.output)


if __name__ == "__main__":
try:
app.run(main)
except SystemExit:
pass

0 comments on commit fbb7b42

Please sign in to comment.