【Caffe-Ubuntu】JSON 標籤生成自己的 Caffe-LMDB 數據文件

0:生成 LMDB 的流程

  1. 已有的 json 數據集,可以通過 labelme 等開源工具標註,或者自己寫腳本生成
  2. 將 json 文件轉成 voc2007 格式的文件(labelme 格式轉 VOC2007 數據集格式
  3. 設置個人的 labelmap.prototxt
  4. 借用ssd-caffe的 create_list.sh 腳本生成待轉寫格式文件
  5. 借用ssd-caffe的 create_data.sh 腳本生成 LMDB 格式文件

1:製作自己的數據集

這裏推薦幾款好用的標註工具
6. labelme:安裝簡單,支持標定關鍵點,分割等,非常好用,生成json格式的標籤文件。
格式:下面列出的標籤參數都是必須的,否則labelme無法正常識別

{
  "shapes": [
    {
      "shape_type": "polygon", 
      "line_color": null, 
      "points": [
        [ 634,  276  ], 
        [ 703,   275  ], 
        [ 705,   312  ], 
        [ 635,   313  ]
      ], 
      "fill_color": null, 
      "label": "traffic-4"
    }, 
    {
      "shape_type": "polygon", 
      "line_color": null, 
      "points": [
        [ 715,  275  ], 
        [ 785,  274  ], 
        [ 786,  313  ], 
        [ 716,  312  ]
      ], 
      "fill_color": null, 
      "label": "traffic-4-occ-largely"
    }
  ], 
  "lineColor": [ 0,  255,   0,  128 ], 
  "imagePath": "2012-3-23_20-23-25_0.jpg", 
  "fillColor": [ 255,   0,  0,  128  ], 
  "imageData": null
}
  1. labelImg:安裝簡單,非常方便的畫框的標定工具。
    支持PASCAL VOC格式的XML標籤。
  2. 其他:待續。。。

2 :JSON 2 VOC2007

# -*- coding: utf-8 -*-
import os, re
import json
import cv2
import numpy as np
import codecs
from glob import glob
import shutil
from sklearn.model_selection import train_test_split

def iter_files(data_root_path, saved_path):
    count = 0

    for root,dirs,files in os.walk(data_root_path):
        for json_file in files:
            if re.search(".json", json_file):
                file_name = json_file[0:json_file.find(".json")]
                file_path = os.path.join(root, json_file)
                count += 1
                print("====================================================================")
                print(count)
                print(file_path)
                # json 轉 voc2007
                json2voc2007(file_name, root, saved_path)

    for dirname in dirs:
        iter_files(dirname, saved_path)

def json2voc2007(json_file_,labelme_path,saved_path):
    json_filename = os.path.join(labelme_path, json_file_ + ".json")
    json_file = json.load(open(json_filename, "r"))
    height, width, channels = cv2.imread(os.path.join(labelme_path, json_file_ + ".jpg")).shape

    with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
        xml.write('<annotation>\n')
        xml.write('\t<folder>' + 'TrafficSign' + '</folder>\n')
        xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
        xml.write('\t<source>\n')
        xml.write('\t\t<database>The UAV autolanding</database>\n')
        xml.write('\t\t<annotation>UAV AutoLanding</annotation>\n')
        xml.write('\t\t<image>flickr</image>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t</source>\n')
        xml.write('\t<owner>\n')
        xml.write('\t\t<flickrid>NULL</flickrid>\n')
        xml.write('\t\t<name>TrafficSign</name>\n')
        xml.write('\t</owner>\n')
        xml.write('\t<size>\n')
        xml.write('\t\t<width>' + str(width) + '</width>\n')
        xml.write('\t\t<height>' + str(height) + '</height>\n')
        xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
        xml.write('\t</size>\n')
        xml.write('\t\t<segmented>0</segmented>\n')
        for multi in json_file["shapes"]:
            label = multi["label"]
			# 下面這個if是我添加的一個標籤篩選的判斷
            if label == "traffic-3" or \
               label == "traffic-3-occ-partially":
                points = np.array(multi["points"])
                xmin = min(points[:, 0])
                xmax = max(points[:, 0])
                ymin = min(points[:, 1])
                ymax = max(points[:, 1])
                if xmax <= xmin:
                    pass
                elif ymax <= ymin:
                    pass
                else:
                    xml.write('\t<object>\n')
                    xml.write('\t\t<name>' + label + '</name>\n')
                    xml.write('\t\t<pose>Unspecified</pose>\n')
                    xml.write('\t\t<truncated>1</truncated>\n')
                    xml.write('\t\t<difficult>0</difficult>\n')
                    xml.write('\t\t<bndbox>\n')
                    xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
                    xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
                    xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
                    xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
                    xml.write('\t\t</bndbox>\n')
                    xml.write('\t</object>\n')
                    print(json_filename, xmin, ymin, xmax, ymax, label)
        xml.write('</annotation>')

    # 5.複製圖片到 VOC2007/JPEGImages/下
    image = glob(labelme_path + "/" + json_file_ + ".jpg")
    print("copy image files to VOC007/JPEGImages/")
    shutil.copyfile(image[0], saved_path + "JPEGImages/" + json_file_ + ".jpg")

    # 6.split files for txt
    txtsavepath = saved_path + "ImageSets/Main/"
    ftrainval = open(txtsavepath + '/trainval.txt', 'w')
    ftest = open(txtsavepath + '/test.txt', 'w')
    ftrain = open(txtsavepath + '/train.txt', 'w')
    fval = open(txtsavepath + '/val.txt', 'w')
    total_files = glob("./TrafficSign/Annotations/*.xml")
    total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]
    # test_filepath = ""
    for file in total_files:
        ftrainval.write(file + "\n")
    # test
    # for file in os.listdir(test_filepath):
    #    ftest.write(file.split(".jpg")[0] + "\n")
    # split
    # test_size 設置train:val的劃分比例
    train_files, val_files = train_test_split(total_files, test_size=0.10, random_state=42)
    # train
    for file in train_files:
        ftrain.write(file + "\n")
    # val
    for file in val_files:
        fval.write(file + "\n")

    ftrainval.close()
    ftrain.close()
    fval.close()
    # ftest.close()

def main():
    # 1.標籤路徑
    saved_path = "./VOC2007/"  # 保存路徑
    # 2.創建要求文件夾
    if not os.path.exists(saved_path + "Annotations"):
        os.makedirs(saved_path + "Annotations")
    if not os.path.exists(saved_path + "JPEGImages/"):
        os.makedirs(saved_path + "JPEGImages/")
    if not os.path.exists(saved_path + "ImageSets/Main/"):
        os.makedirs(saved_path + "ImageSets/Main/")
    data_root_path = "./data/"

    # 3. 迭代查詢各級文件夾
    iter_files(data_root_path, saved_path)

if __name__ == '__main__':
    main()

3:labelmap.prototxt 設定(以背景,目標兩類爲例)

item {
name: "none_of_the_above"
label: 0
display_name: "background"
}
item {
name: "face"
label: 1
display_name: "face"
}

4:create_list.sh

這裏主要注意,root_dir 要修改爲自己 VOC2007 的路徑

#!/bin/bash
root_dir=$HOME/data/VOC2007
sub_dir=ImageSets/Main
echo $(dirname "${BASH_SOURCE[0]}")
bash_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
for dataset in  val
do
  dst_file=$bash_dir/$dataset.txt
  if [ -f $dst_file ]
  then
    rm -f $dst_file
  fi
 
  echo "Create list for $dataset..."
  echo $root_dir/$sub_dir/$dataset.txt
  dataset_file=$root_dir/$sub_dir/$dataset.txt

  img_file=$bash_dir/$dataset"_img.txt"
  cp $dataset_file $img_file
  sed -i "s/^/\/JPEGImages\//g" $img_file
  sed -i "s/$/.jpg/g" $img_file

  label_file=$bash_dir/$dataset"_label.txt"
  cp $dataset_file $label_file
  sed -i "s/^/\/Annotations\//g" $label_file
  sed -i "s/$/.xml/g" $label_file

  paste -d' ' $img_file $label_file >> $dst_file

  rm -f $label_file
  rm -f $img_file
  

  # Generate image name and size infomation.
  if [ $dataset == "val" ]
  then
    $bash_dir/../../build/tools/get_image_size $root_dir $dst_file $bash_dir/$dataset"_name_size.txt"
  fi

  # Shuffle trainval file.
  if [ $dataset == "train" ]
  then
    rand_file=$dst_file.random
    cat $dst_file | perl -MList::Util=shuffle -e 'print shuffle(<STDIN>);' > $rand_file
    mv $rand_file $dst_file
  fi
done

5:create_data.sh

cur_dir=$(cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
echo $cur_dir
root_dir=$cur_dir/../..
cd $root_dir
redo=1
data_root_dir="$HOME/data/VOC2007"
dataset_name="DataName"
mapfile="$root_dir/data/$dataset_name/labelmap.prototxt"
anno_type="detection"
db="lmdb"
min_dim=0
max_dim=0
width=0
height=0

extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
then
  extra_cmd="$extra_cmd --redo"
fi
for subset in train
do
  sudo python2 $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
done
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章