將自己的數據集製作成形如CIFAR10形式

原創

2020-07-04 00:45

在深度學習中，遇到了cifar10圖像數據集經典好用。萌發了將其他數據也製作成類似的形式的想法。
ModelNet40是普林斯頓提供的3D數據集。有學者提出了多視角的數據結構。

上圖爲圖片名字屬性，下方爲代碼。

"""
This file is to read Multi-view data for ModelNet40
One-view
"""
'''
 40 class = ['airplane', 'bathtub', 'bed', 'bench', 'bookshelf', 'bottle', 'bowl',
             'car', 'chair', 'cone, 'cup', 'curtain', 'desk', 'door', 'dresser', 
             'flower_pot', 'glass_box', 'guitar', 'keyboard', 'lamp', 'laptop',
             'mantel', 'moniter', 'night_stand', 'person', 'piano', 'plant', 'radio',
             'range_hood', 'sink', 'sofa', 'stairs', 'stool', 'table', 'tent', 'toilet',
             'tv_stand', 'vase', 'wardrobe', 'xbox'];
'''

import os
import cv2
import numpy as np
import pickle

# 數據集所在的地址
main_dir = "D:/dataset/ModelNet/modelnet40v1"
# 存儲地址
save_dir = "D:/dataset/ModelNet"

# 創建空餘的存儲
traindata =[[]]
trainlabel = []
##trainname = []
testdata = [[]]
testlabel = []
#testname = []

# 第一類數據
i = 0
print("the numer of class", i)
# os.listdir(main_dir) 獲取 main_dir 的所有文件
for main_name in os.listdir(main_dir):
    #print(main_name)  40類，每一類的文件夾
    # 將每一類添加到路徑中，然後遍歷子文件夾
    sub_dir = os.path.join(main_dir, main_name)
    # print(sub_dir)
    # 每一類下面的兩個子文件夾： 'train' 和 'test'
    for sub_name in os.listdir(sub_dir):
        sub_sub_dir = os.path.join(sub_dir,sub_name)
        # 遍歷訓練子集下所有的圖像，並且生成數據
        if sub_name == 'train':
        #print(sub_sub_dir)
            count_train = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因爲有些類名，經過分割後會分成四項，因此不同於其他的項
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 

                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 
        # 遍歷測試子集下所有的圖像，並且生成數據
        if sub_name == 'test':
        #print(sub_sub_dir)
            count_test = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因爲有些類名，經過分割後會分成四項，因此不同於其他的項
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
        # 遍歷測試子集下所有的圖像，並且生成數據
    '''
    訓練集標籤
    '''
    trainlabels = np.zeros((count_train,))
    if trainlabel == []:
        trainlabel = trainlabels
    else:
        trainlabels[:] = i
        trainlabel = np.append(trainlabel, trainlabels)
    
    '''
    測試集標籤
    '''
    testlabels = np.zeros((count_test,))
    if testlabel == []:
        testlabel = testlabels
    else:
        testlabels[:] = i
        testlabel = np.append(testlabel, testlabels)

    i = i + 1

    print("the numer of class", i)
# 數據類型轉換，將數據轉換成整型
trainlabel = trainlabel.astype(np.int32)
testlabel = testlabel.astype(np.int32)
#np.save(save_dir + 'train.npy', traindata)
# 創建字典來進行保存
train_dict = {'data': traindata, 'label': trainlabel}
test_dict = {'data': testdata, 'label': testlabel}

f_train = open('D:/dataset/ModelNet/data/train','wb')
f_test = open('D:/dataset/ModelNet/data/test','wb')

pickle.dump(train_dict, f_train)
pickle.dump(test_dict, f_test)

參考
深度學習，製作類似cifar10圖像數據集

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

將自己的數據集製作成形如CIFAR10形式

對使用keras代碼的模型中間層可視化

將自己的數據集製作成形如CIFAR10形式

數據格式轉換: mat文件轉換成py文件

使用python製作ImageNet數據集

解決ssh超時斷開問題

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結