diff --git a/scripts/voc/README.md b/scripts/voc/README.md new file mode 100644 index 00000000..123f8848 --- /dev/null +++ b/scripts/voc/README.md @@ -0,0 +1,27 @@ +# VOC Dataset + +### Download + +```bash +$ bash get_voc2012.sh +``` + +### Make names for VOC. + +```bash +$ python voc_make_names.py [--anno_dir {Annotation directory}] [--output {OUTPUT_NAME}] + +# example +$ python voc_make_name.py + +$ python voc_make_name.py --anno_dir ../../data/voc/anno --output ../../data/classes/voc.names +``` + +### Convert VOC Dataset. + +```bash +$ python voc_convert.py [--image_dir {Image directory}] [--anno_dir {Annotation directory}] [--train_list_txt {Path of Train list file}] [--val_list_txt {Path of Validation list file}] [--classes {Path of Classes file}] [--train_output {Path of Output file For Train}] [--val_output {Path of Output file For Val}] + +#example +$ python voc_convert.py +``` diff --git a/scripts/voc/get_voc2012.sh b/scripts/voc/get_voc2012.sh new file mode 100644 index 00000000..91d748b1 --- /dev/null +++ b/scripts/voc/get_voc2012.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + + +PARENT_DIR="$( cd "$( dirname $( dirname $( dirname "${BASH_SOURCE[0]}" ) ) )" >/dev/null 2>&1 && pwd )" +DATA_DIR="$PARENT_DIR/data" + +DATASET_NAME="VOCtrainval_11-May-2012" + +wget -c -P $DATA_DIR http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + +if [[ -d "$DATA_DIR/$DATASET_NAME" ]]; then + echo "Already '$DATA_DIR/$DATASET_NAME' path exists." + exit 1 +fi + +mkdir $DATA_DIR/$DATASET_NAME + +tar xf $DATA_DIR/VOCtrainval_11-May-2012.tar -C $DATA_DIR/$DATASET_NAME + diff --git a/scripts/voc/voc_convert.py b/scripts/voc/voc_convert.py new file mode 100644 index 00000000..d9c9bbdf --- /dev/null +++ b/scripts/voc/voc_convert.py @@ -0,0 +1,78 @@ +import sys +import os + +from absl import app, flags +from absl.flags import FLAGS +from lxml import etree + + +flags.DEFINE_string('image_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/JPEGImages', 'path to image dir') +flags.DEFINE_string('anno_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/Annotations', 'path to anno dir') +flags.DEFINE_string('train_list_txt', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/train.txt', 'path to a set of train') +flags.DEFINE_string('val_list_txt', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/val.txt', 'path to a set of val') +flags.DEFINE_string('classes', '../../data/classes/voc2012.names', 'path to a list of class names') +flags.DEFINE_string('train_output', '../../data/dataset/voc2012_train.txt', 'path to a file for train') +flags.DEFINE_string('val_output', '../../data/dataset/voc2012_val.txt', 'path to a file for val') + +flags.DEFINE_boolean('no_val', False, 'if uses this flag, it does not convert a list of val') + + +def convert_annotation(list_txt, output_path, image_dir, anno_dir, class_names): + IMAGE_EXT = '.jpg' + ANNO_EXT = '.xml' + + with open(list_txt, 'r') as f, open(output_path, 'w') as wf: + while True: + line = f.readline().strip() + if line is None or not line: + break + im_p = os.path.join(image_dir, line + IMAGE_EXT) + an_p = os.path.join(anno_dir, line + ANNO_EXT) + + # Get annotation. + root = etree.parse(an_p).getroot() + bboxes = root.xpath('//object/bndbox') + names = root.xpath('//object/name') + + box_annotations = [] + for b, n in zip(bboxes, names): + name = n.text + class_idx = class_names.index(name) + + xmin = b.find('xmin').text + ymin = b.find('ymin').text + xmax = b.find('xmax').text + ymax = b.find('ymax').text + box_annotations.append(','.join([str(xmin), str(ymin), str(xmax), str(ymax), str(class_idx)])) + + annotation = os.path.abspath(im_p) + ' ' + ' '.join(box_annotations) + '\n' + + wf.write(annotation) + + +def convert_voc(image_dir, anno_dir, train_list_txt, val_list_txt, classes, train_output, val_output, no_val): + IMAGE_EXT = '.jpg' + ANNO_EXT = '.xml' + + class_names = [c.strip() for c in open(FLAGS.classes).readlines()] + + # Training set. + convert_annotation(train_list_txt, train_output, image_dir, anno_dir, class_names) + + if no_val: + return + + # Validation set. + convert_annotation(val_list_txt, val_output, image_dir, anno_dir, class_names) + + +def main(_argv): + convert_voc(FLAGS.image_dir, FLAGS.anno_dir, FLAGS.train_list_txt, FLAGS.val_list_txt, FLAGS.classes, FLAGS.train_output, FLAGS.val_output, FLAGS.no_val) + print("Complete convert voc data!") + + +if __name__ == "__main__": + try: + app.run(main) + except SystemExit: + pass diff --git a/scripts/voc/voc_make_names.py b/scripts/voc/voc_make_names.py new file mode 100644 index 00000000..a4c73660 --- /dev/null +++ b/scripts/voc/voc_make_names.py @@ -0,0 +1,46 @@ +import sys +import os + +from absl import app, flags +from absl.flags import FLAGS +from lxml import etree + + +flags.DEFINE_string('anno_dir', '../../data/VOCtrainval_11-May-2012/VOCdevkit/VOC2012/Annotations', 'path to anno dir') +flags.DEFINE_string('output', '../../data/classes/voc2012.names', 'path to anno dir') + + +def make_names(anno_dir, output): + labels_dict = {} + + anno_list = os.listdir(anno_dir) + + for anno_file in anno_list: + p = os.path.join(anno_dir, anno_file) + + # Get annotation. + root = etree.parse(p).getroot() + names = root.xpath('//object/name') + + for n in names: + labels_dict[n.text] = 0 + + labels = list(labels_dict.keys()) + labels.sort() + + with open(output, 'w') as f: + for l in labels: + f.writelines(l + '\n') + + print(f"Done making a names's file ({os.path.abspath(output)})") + + +def main(_argv): + make_names(FLAGS.anno_dir, FLAGS.output) + + +if __name__ == "__main__": + try: + app.run(main) + except SystemExit: + pass