最近的項目需要對行人進行檢測,網上搜集的數據集比較少因此手動將COCO數據集裏面的行人類別單獨分離出來,並設置爲xml格式。
1. 首先準備COCO數據集,只需要訓練 數據集就夠用了。
圖片下載地址:http://images.cocodataset.org/zips/train2014.zip
標籤下載地址:http://images.cocodataset.org/annotations/annotations_trainval2014.zip
2. 將下載好的數據集解壓。
3. 運行python代碼將COCO格式轉成VOC。
coco2voc.py 代碼如下:這樣就可以把80個類別全部轉換成VOC格式了,如果想單獨輸出一個類別(不包含其他類別)請看步驟4.
import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re
def instance2xml_base(anno):
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
E.filename(anno['file_name']),
E.source(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
E.image('Flickr'),
E.url(anno['coco_url'])
),
E.size(
E.width(anno['width']),
E.height(anno['height']),
E.depth(3)
),
E.segmented(0),
)
return anno_tree
def instance2xml_bbox(anno, bbox_type='xyxy'):
"""bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
E = objectify.ElementMaker(annotate=False)
anno_tree = E.object(
E.name(anno['category_id']),
E.bndbox(
E.xmin(xmin),
E.ymin(ymin),
E.xmax(xmax),
E.ymax(ymax)
),
E.difficult(anno['iscrowd'])
)
return anno_tree
def parse_instance(content, outdir):
categories = {d['id']: d['name'] for d in content['categories']}
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
# convert category id to name
for instance in merged_info_list:
instance['category_id'] = categories[instance['category_id']]
# group by filename to pool all bbox in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
anno_tree = instance2xml_base(groups[0])
# if one file have multiple different objects, save it in each category sub-directory
filenames = []
for group in groups:
filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
os.path.splitext(name)[0] + ".xml"))
anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
for filename in filenames:
etree.ElementTree(anno_tree).write(filename, pretty_print=True)
print("Formating instance xml file {} done!".format(name))
def keypoints2xml_base(anno):
annotation = etree.Element("annotation")
etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
etree.SubElement(annotation, "filename").text = anno['file_name']
source = etree.SubElement(annotation, "source")
etree.SubElement(source, "database").text = "MS COCO 2014"
etree.SubElement(source, "annotation").text = "MS COCO 2014"
etree.SubElement(source, "image").text = "Flickr"
etree.SubElement(source, "url").text = anno['coco_url']
size = etree.SubElement(annotation, "size")
etree.SubElement(size, "width").text = str(anno["width"])
etree.SubElement(size, "height").text = str(anno["height"])
etree.SubElement(size, "depth").text = '3'
etree.SubElement(annotation, "segmented").text = '0'
return annotation
def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
key_object = etree.SubElement(xmltree, "object")
etree.SubElement(key_object, "name").text = anno['category_id']
bndbox = etree.SubElement(key_object, "bndbox")
etree.SubElement(bndbox, "xmin").text = str(xmin)
etree.SubElement(bndbox, "ymin").text = str(ymin)
etree.SubElement(bndbox, "xmax").text = str(xmax)
etree.SubElement(bndbox, "ymax").text = str(ymax)
etree.SubElement(key_object, "difficult").text = '0'
keypoints = etree.SubElement(key_object, "keypoints")
for i in range(0, len(keypoints_dict)):
keypoint = etree.SubElement(keypoints, keypoints_dict[i+1])
etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i*3])
etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i*3+1])
etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i*3+2])
return xmltree
def parse_keypoints(content, outdir):
keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
# convert category name to person
for keypoint in merged_info_list:
keypoint['category_id'] = "person"
# group by filename to pool all bbox and keypoint in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
anno_tree = keypoints2xml_base(groups[0])
for group in groups:
anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
doc = etree.ElementTree(anno_tree)
doc.write(open(filename, "w"), pretty_print=True)
print("Formating keypoints xml file {} done!".format(name))
def main(args):
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
content = json.load(open(args.anno_file, 'r'))
if args.type == 'instance':
# make subdirectories
sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
for sub_dir in sub_dirs:
sub_dir = os.path.join(args.output_dir, str(sub_dir))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
parse_instance(content, args.output_dir)
elif args.type == 'keypoint':
parse_keypoints(content, args.output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--anno_file", default = '這裏填寫COCO的標籤路徑例如:/Path/to/instances_train2014.json', help="annotation file for object instance/keypoint")
parser.add_argument("--type", default = 'instance',type=str, help="object instance or keypoint", choices=['instance', 'keypoint'])
parser.add_argument("--output_dir", default = '這裏填寫生成VOC標籤的保存路徑', help="output directory for voc annotation xml file")
args = parser.parse_args()
main(args)
4. 只將包含行人的檢測框輸出爲VOC格式。(想輸出其他類別格式和這個一樣,只需要修改一下類別就可以了,在第64行代碼處修改)
import argparse, json
import cytoolz
from lxml import etree, objectify
import os, re
def instance2xml_base(anno):
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
E.filename(anno['file_name']),
E.source(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
E.image('Flickr'),
E.url(anno['coco_url'])
),
E.size(
E.width(anno['width']),
E.height(anno['height']),
E.depth(3)
),
E.segmented(0),
)
return anno_tree
def instance2xml_bbox(anno, bbox_type='xyxy'):
"""bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
E = objectify.ElementMaker(annotate=False)
anno_tree = E.object(
E.name(anno['category_id']),
E.bndbox(
E.xmin(xmin),
E.ymin(ymin),
E.xmax(xmax),
E.ymax(ymax)
),
E.difficult(anno['iscrowd'])
)
return anno_tree
def parse_instance(content, outdir):
categories = {d['id']: d['name'] for d in content['categories']}
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
# convert category id to name
for instance in merged_info_list:
instance['category_id'] = categories[instance['category_id']]
# group by filename to pool all bbox in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
anno_tree = instance2xml_base(groups[0])
# if one file have multiple different objects, save it in each category sub-directory
filenames = []
for group in groups:
if group['category_id']=='person': #這裏可以修改爲COCO的其他類別,就可以輸出相應的VOC格式文件
filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
os.path.splitext(name)[0] + ".xml"))
anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
for filename in filenames:
etree.ElementTree(anno_tree).write(filename, pretty_print=True)
print("Formating instance xml file {} done!".format(name))
def keypoints2xml_base(anno):
annotation = etree.Element("annotation")
etree.SubElement(annotation, "folder").text = "VOC2014_keypoints"
etree.SubElement(annotation, "filename").text = anno['file_name']
source = etree.SubElement(annotation, "source")
etree.SubElement(source, "database").text = "MS COCO 2014"
etree.SubElement(source, "annotation").text = "MS COCO 2014"
etree.SubElement(source, "image").text = "Flickr"
etree.SubElement(source, "url").text = anno['coco_url']
size = etree.SubElement(annotation, "size")
etree.SubElement(size, "width").text = str(anno["width"])
etree.SubElement(size, "height").text = str(anno["height"])
etree.SubElement(size, "depth").text = '3'
etree.SubElement(annotation, "segmented").text = '0'
return annotation
def keypoints2xml_object(anno, xmltree, keypoints_dict, bbox_type='xyxy'):
assert bbox_type in ['xyxy', 'xywh']
if bbox_type == 'xyxy':
xmin, ymin, w, h = anno['bbox']
xmax = xmin+w
ymax = ymin+h
else:
xmin, ymin, xmax, ymax = anno['bbox']
key_object = etree.SubElement(xmltree, "object")
etree.SubElement(key_object, "name").text = anno['category_id']
bndbox = etree.SubElement(key_object, "bndbox")
etree.SubElement(bndbox, "xmin").text = str(xmin)
etree.SubElement(bndbox, "ymin").text = str(ymin)
etree.SubElement(bndbox, "xmax").text = str(xmax)
etree.SubElement(bndbox, "ymax").text = str(ymax)
etree.SubElement(key_object, "difficult").text = '0'
keypoints = etree.SubElement(key_object, "keypoints")
for i in range(0, len(keypoints_dict)):
keypoint = etree.SubElement(keypoints, keypoints_dict[i+1])
etree.SubElement(keypoint, "x").text = str(anno['keypoints'][i*3])
etree.SubElement(keypoint, "y").text = str(anno['keypoints'][i*3+1])
etree.SubElement(keypoint, "v").text = str(anno['keypoints'][i*3+2])
return xmltree
def parse_keypoints(content, outdir):
keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
# merge images and annotations: id in images vs image_id in annotations
merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
# convert category name to person
for keypoint in merged_info_list:
keypoint['category_id'] = "person"
# group by filename to pool all bbox and keypoint in same file
for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
anno_tree = keypoints2xml_base(groups[0])
for group in groups:
anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
doc = etree.ElementTree(anno_tree)
doc.write(open(filename, "w"), pretty_print=True)
print("Formating keypoints xml file {} done!".format(name))
def main(args):
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
content = json.load(open(args.anno_file, 'r'))
if args.type == 'instance':
# make subdirectories
sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
for sub_dir in sub_dirs:
sub_dir = os.path.join(args.output_dir, str(sub_dir))
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
parse_instance(content, args.output_dir)
elif args.type == 'keypoint':
parse_keypoints(content, args.output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--anno_file", default = '這裏填寫COCO的標籤路徑例如:/Path/to/instances_train2014.json', help="annotation file for object instance/keypoint")
parser.add_argument("--type", default = 'instance',type=str, help="object instance or keypoint", choices=['instance', 'keypoint'])
parser.add_argument("--output_dir", default = '這裏填寫生成VOC標籤的保存路徑', help="output directory for voc annotation xml file")
args = parser.parse_args()
main(args)