yolov3 anchors用kmeans聚類出先驗框+anchor寬高比分析

一.yolov v3聚類出框 

# -*- coding: utf-8 -*-
import numpy as np
import random
import argparse
import os

# # 參數名稱
# parser = argparse.ArgumentParser(description='使用該腳本生成YOLO-V3的anchor boxes\n')
# parser.add_argument('--input_annotation_txt_dir', required=True, type=str, help='輸入存儲圖片的標註txt文件(注意不要有中文)')
# parser.add_argument('--output_anchors_txt', required=True, type=str, help='輸出的存儲Anchor boxes的文本文件')
# parser.add_argument('--input_num_anchors', required=True, default=6, type=int, help='輸入要計算的聚類(Anchor boxes的個數)')
# parser.add_argument('--input_cfg_width', required=True, type=int, help="配置文件中width")
# parser.add_argument('--input_cfg_height', required=True, type=int, help="配置文件中height")
# args = parser.parse_args()
# print('args:', args)
'''
centroids 聚類點 尺寸是 numx2,類型是ndarray
annotation_array 其中之一的標註框
'''

def IOU(annotation_array, centroids):
    #
    similarities = []
    # 其中一個標註框
    w, h = annotation_array
    for centroid in centroids:
        c_w, c_h = centroid
        if c_w >= w and c_h >= h:  # 第1中情況
            similarity = w * h / (c_w * c_h)
        elif c_w >= w and c_h <= h:  # 第2中情況
            similarity = w * c_h / (w * h + (c_w - w) * c_h)
        elif c_w <= w and c_h >= h:  # 第3種情況
            similarity = c_w * h / (w * h + (c_h - h) * c_w)
        else:  # 第3種情況
            similarity = (c_w * c_h) / (w * h)
        similarities.append(similarity)
    # 將列表轉換爲ndarray
    return np.array(similarities, np.float32)  # 返回的是一維數組,尺寸爲(num,)

'''
k_means:k均值聚類
annotations_array 所有的標註框的寬高,N個標註框,尺寸是Nx2,類型是ndarray
centroids 聚類點 尺寸是 numx2,類型是ndarray
'''

def k_means(annotations_array, centroids, eps=0.00005, iterations=200000):
    #
    N = annotations_array.shape[0]  # C=2
    num = centroids.shape[0]
    # 損失函數
    distance_sum_pre = -1
    assignments_pre = -1 * np.ones(N, dtype=np.int64)
    #
    iteration = 0
    # 循環處理
    while (True):
        #
        iteration += 1
        #
        distances = []
        # 循環計算每一個標註框與所有的聚類點的距離(IOU)
        for i in range(N):
            distance = 1 - IOU(annotations_array[i], centroids)
            distances.append(distance)
        # 列表轉換成ndarray
        distances_array = np.array(distances, np.float32)  # 該ndarray的尺寸爲 Nxnum
        # 找出每一個標註框到當前聚類點最近的點
        assignments = np.argmin(distances_array, axis=1)  # 計算每一行的最小值的位置索引
        # 計算距離的總和,相當於k均值聚類的損失函數
        distances_sum = np.sum(distances_array)
        # 計算新的聚類點
        centroid_sums = np.zeros(centroids.shape, np.float32)
        for i in range(N):
            centroid_sums[assignments[i]] += annotations_array[i]  # 計算屬於每一聚類類別的和
        for j in range(num):
            centroids[j] = centroid_sums[j] / (np.sum(assignments == j))
        # 前後兩次的距離變化
        diff = abs(distances_sum - distance_sum_pre)
        # 打印結果
        print("iteration: {},distance: {}, diff: {}, avg_IOU: {}\n".format(iteration, distances_sum, diff,
                                                                           np.sum(1 - distances_array) / (N * num)))
        # 三種情況跳出while循環:1:循環20000次,2:eps計算平均的距離很小 3:以上的情況
        if (assignments == assignments_pre).all():
            print("按照前後兩次的得到的聚類結果是否相同結束循環\n")
            break
        if diff < eps:
            print("按照eps結束循環\n")
            break
        if iteration > iterations:
            print("按照迭代次數結束循環\n")
            break
        # 記錄上一次迭代
        distance_sum_pre = distances_sum
        assignments_pre = assignments.copy()


if __name__ == '__main__':
    # 聚類點的個數,anchor boxes的個數
    num_clusters = 9#args.input_num_anchors
    # 索引出文件夾中的每一個標註文件的名字(.txt)
    names = [i for i in os.listdir('train_images_tif_txt') if 'txt' in i]#args.input_annotation_txt_dir)
    print('names:',names)
    # # 標註的框的寬和高
    annotations_w_h = []
    for name in names:
        txt_path = os.path.join('train_images_tif_txt', name)
        # 讀取txt文件中的每一行
        f = open(txt_path, 'r')
        for line in f.readlines():
            line = line.rstrip('\n')
            w, h = line.split(' ')[3:]  # 這時讀到的w,h是字符串類型
            # eval()函數用來將字符串轉換爲數值型
            annotations_w_h.append((eval(w), eval(h)))
        f.close()
        # 將列表annotations_w_h轉換爲numpy中的array,尺寸是(N,2),N代表多少框
        annotations_array = np.array(annotations_w_h, dtype=np.float32)
    N = annotations_array.shape[0]
    # 對於k-means聚類,隨機初始化聚類點
    random_indices = [random.randrange(N) for i in range(num_clusters)]  # 產生隨機數
    centroids = annotations_array[random_indices]
    # k-means聚類
    k_means(annotations_array, centroids, 0.00005, 200000)
    # 對centroids按照寬排序,並寫入文件
    widths = centroids[:, 0]
    sorted_indices = np.argsort(widths)
    anchors = centroids[sorted_indices]
    print('anchors:',anchors)
    # # 將anchor寫入文件並保存
    f_anchors = open('./anchors_txt.txt', 'w')
    # #
    for anchor in anchors:           #cfg_w train的時候用的寬度             #cfg_h train的時候用的高度
        f_anchors.write('%d,%d,' % (int(anchor[0] * 200), int(anchor[1] * 1800)))
        # f_anchors.write('\n')

train_images_tif_txt下存放的是如下所示的標註txt文件.

二.寬高比分析

1.kmeans.py代碼

import numpy as np


def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection)

    return iou_


def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    :param boxes: numpy array of shape (r, 4)
    :return: numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    print('np.random.choice(rows, k, replace=False):',np.random.choice(rows, k))
    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

2.example.py代碼

import glob
import xml.etree.ElementTree as ET
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from kmeans import kmeans, avg_iou

# ANNOTATIONS_PATH = "./data/pascalvoc07-annotations"
ANNOTATIONS_PATH = "./data/widerface-annotations"
CLUSTERS = 9
# 相對原圖是否歸一化
BBOX_NORMALIZE = True


def show_cluster(data, cluster, max_points=2000):
    '''
    Display bouding box's size distribution and anchor generated in scatter.
    '''
    if len(data) > max_points:
        idx = np.random.choice(len(data), max_points)
        data = data[idx]
    plt.scatter(data[:, 0], data[:, 1], s=5, c='lavender')
    plt.scatter(cluster[:, 0], cluster[:, 1], c='red', s=100, marker="^")
    plt.xlabel("Width")
    plt.ylabel("Height")
    plt.title("Bounding and anchor distribution")
    plt.savefig("cluster.png")
    plt.show()


def show_width_height(data, cluster, bins=50):
    '''
    Display bouding box distribution with histgram.
    '''
    if data.dtype != np.float32:
        data = data.astype(np.float32)
    width = data[:, 0]
    height = data[:, 1]
    ratio = height / width

    plt.figure(1, figsize=(20, 6))
    plt.subplot(131)
    plt.hist(width, bins=bins, color='green')
    plt.xlabel('width')
    plt.ylabel('number')
    plt.title('Distribution of Width')

    plt.subplot(132)
    plt.hist(height, bins=bins, color='blue')
    plt.xlabel('Height')
    plt.ylabel('Number')
    plt.title('Distribution of Height')

    plt.subplot(133)
    plt.hist(ratio, bins=bins, color='magenta')
    plt.xlabel('Height / Width')
    plt.ylabel('number')
    plt.title('Distribution of aspect ratio(Height / Width)')
    plt.savefig("shape-distribution.png")
    plt.show()


def sort_cluster(cluster):
    '''
    Sort the cluster to with area small to big.
    '''
    if cluster.dtype != np.float32:
        cluster = cluster.astype(np.float32)
    area = cluster[:, 0] * cluster[:, 1]
    cluster = cluster[area.argsort()]
    ratio = cluster[:, 1:2] / cluster[:, 0:1]
    return np.concatenate([cluster, ratio], axis=-1)


# def load_dataset(path, normalized=True):
#     '''
#     load dataset from pasvoc formatl xml files
#     return [[w,h],[w,h]]
#     '''
#     dataset = []
#     for xml_file in glob.glob("{}/*xml".format(path)):
#         tree = ET.parse(xml_file)
#
#         height = int(tree.findtext("./size/height"))
#         width = int(tree.findtext("./size/width"))
#
#         for obj in tree.iter("object"):
#             if normalized:
#                 xmin = int(obj.findtext("bndbox/xmin")) / float(width)
#                 ymin = int(obj.findtext("bndbox/ymin")) / float(height)
#                 xmax = int(obj.findtext("bndbox/xmax")) / float(width)
#                 ymax = int(obj.findtext("bndbox/ymax")) / float(height)
#             else:
#                 xmin = int(obj.findtext("bndbox/xmin"))
#                 ymin = int(obj.findtext("bndbox/ymin"))
#                 xmax = int(obj.findtext("bndbox/xmax"))
#                 ymax = int(obj.findtext("bndbox/ymax"))
#             if (xmax - xmin) == 0 or (ymax - ymin) == 0:
#                 continue  # to avoid divded by zero error.
#             dataset.append([xmax - xmin, ymax - ymin])
#
#     return np.array(dataset)

def load_dataset(path, normalized=True):
    '''
    load dataset from pasvoc formatl xml files
    return [[w,h],[w,h]]
    '''
    dataset = []
    names = [i for i in os.listdir(path) if 'txt' in i]  # args.input_annotation_txt_dir)
    # print('names:', names)
    # # 標註的框的寬和高
    # annotations_w_h = []
    for name in names:
        txt_path = os.path.join(path, name)
        img_path = txt_path.replace('.txt', '.jpg')
        img = cv2.imread(img_path)
        img_h, img_w, _ = img.shape
        # 讀取txt文件中的每一行
        f = open(txt_path, 'r')
        for line in f.readlines():
            line = line.rstrip('\n')
            w, h = line.split(' ')[3:]  # 這時讀到的w,h是字符串類型
            # eval()函數用來將字符串轉換爲數值型
            if normalized:
                dataset.append((eval(w), eval(h)))
            else:
                dataset.append((eval(w) * 200, eval(h) * 1800))
        f.close()

    return np.array(dataset)


# print("Start to load data annotations on: %s" % ANNOTATIONS_PATH)
# [[w, h], [w, h]]
data = load_dataset(path='./train_img', normalized=BBOX_NORMALIZE)
print(data[:3])
print("Start to do kmeans, please wait for a moment.")
out = kmeans(data, k=CLUSTERS)
print('==out', out)
out_sorted = sort_cluster(out)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
#
show_cluster(data, out, max_points=2000)

if out.dtype != np.float32:
    out = out.astype(np.float32)

print("Recommanded aspect ratios(width/height)")
print("Width    Height   Height/Width")
for i in range(len(out_sorted)):
    print("%.3f      %.3f     %.1f" % (out_sorted[i, 0], out_sorted[i, 1], out_sorted[i, 2]))
show_width_height(data, out, bins=50)

txt是類別, cx,cy,w和h是歸一化後的比例),下圖是其分佈,也就是輸入如果是方形,anchor ratio比就用這個

 下圖是乘以實際尺寸後的分佈,也就是輸入如果是圖片等比例 anchor ratio比就用這個

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章