cs231n-18/19-assignment2源碼閱讀筆記

最近想自己做一些簡單的比賽試一試,也算是學習的實踐,但是感覺網上在Python對圖像識別數據預處理的教程好像並不多,忽然想起cs231n的課程作業中有大量源碼啊!!!而且還是stanford的大佬所寫!!!不看簡直暴殄天物!!!所以想着通過閱讀這些代碼來強化Python的知識(Python好多不會的需要學習),然後直接把自己閱讀過程中的理解註釋到了代碼中,也分享出來,這裏直接以cs231n-18-19-assignment2的代碼爲例

一、 數據預處理

1.data_utils.py

from __future__ import print_function

from builtins import range
from six.moves import cPickle as pickle
import numpy as np
import os
from imageio import imread
import platform

def load_pickle(f):
    version = platform.python_version_tuple()
    if version[0] == '2': # py版本爲2時,用pickle.load(f)
        return  pickle.load(f)
    elif version[0] == '3': # py版本爲3時,用encoding='latin1'
        return  pickle.load(f, encoding='latin1') #反序列化對象,將文件中的數據解析爲一個python對象,與.dump()作用相反
    raise ValueError("invalid python version: {}".format(version))

# 一次讀取一個batch的數據,並返回處理完成的X和Y數組
def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f: #以二進制格式打開一個文件用於只讀。文件指針將會放在文件的開頭。這是默認模式。
        datadict = load_pickle(f) 
        X = datadict['data'] # 可以看出,使用pickle.load()反序列化之後的Py對象是一個字典,直接從字典中取出值
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") # 把取出的X還原成原來的圖像形狀,transpose的作用是把圖像的高和寬放到前面去,深度放在最後一位,也就是把X變成10000*32*32*3的結構
        Y = np.array(Y) # 直接用數據構造一個ndarray
        return X, Y # 返回的X/Y都是ndarray類型的數據

def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1,6): # b=1,2,3,4,5 一共5個batch的數據
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f) #使用load_CIFAR_batch讀取數據並返回處理完成的X和Y數據
        xs.append(X) # X數據添加到xs列表中
        ys.append(Y) # Y數據添加到ys列表中,循環結束後xs和ys就都讀取了所有的CIFAR數據
    Xtr = np.concatenate(xs) # 把列表xs中的5個batch數據進行縱向拼接(default:axis=0),簡單地說就是把5個Batch的5個ndarray合併爲一個ndarray
    Ytr = np.concatenate(ys) # 同上
    del X, Y # 清理X,Y變量
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) # 直接加載test_batch的數據,這裏原始數據應該是已經相對應與data_batch_d做了順序處理,所以能夠加載然後直接使用
    return Xtr, Ytr, Xte, Yte # 返回處理好的訓練集和測試集


def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
                     subtract_mean=True):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for classifiers. These are the same steps as we used for the SVM, but
    condensed to a single function.
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'  # CIFAR10的目錄
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # 加載CIFAR10數據並返回訓練集和測試集

    # Subsample the data
    mask = list(range(num_training, num_training + num_validation)) # mask是range(49000,50000)列表,多一個list是爲了把range給列出來,成爲一個列表
    X_val = X_train[mask] # 這裏就是把訓練集X_train劃分成了訓練集和驗證集
    y_val = y_train[mask] # 同上
    mask = list(range(num_training))
    X_train = X_train[mask] # 再把X_train的前49000個數據劃分到訓練集
    y_train = y_train[mask]
    mask = list(range(num_test)) # 取出num_test個數據作爲使用的測試集數據,這裏數據量爲1000
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    if subtract_mean:
        mean_image = np.mean(X_train, axis=0) # axis=0表示對矩陣進行縱向處理,而每一列相當於代表的是所有圖像的同一個特徵,求出這個所有各個特徵的均值,結果是一個行向量
        X_train -= mean_image # 訓練集減去上面求得的行向量,也就是所有特徵都減去了該特徵種類的均值
        X_val -= mean_image # 同上
        X_test -= mean_image # 同上,這裏testset減去的也是trainset的均值,而不是testset自己的均值

    # Transpose so that channels come first
    X_train = X_train.transpose(0, 3, 1, 2).copy() #重置一下位置,把圖像通道放在第一個,原本數據是num*32*32*3,現在變爲num*3*32*32,高寬的順序還是不變
    X_val = X_val.transpose(0, 3, 1, 2).copy() # 前面在load_cifar_batch函數中做過一次變換,這裏相當於變回來,做了一次copy()
    X_test = X_test.transpose(0, 3, 1, 2).copy() # 這裏的transpose不會改變X_test的本身維度,而是複製一份新的數據,但也是複製到了X_test,覆蓋了原本的數據

    # Package data into a dictionary 打包成字典並返回,後面處理數據就比較方便了
    return {
      'X_train': X_train, 'y_train': y_train,
      'X_val': X_val, 'y_val': y_val,
      'X_test': X_test, 'y_test': y_test,
    }


def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
    """
    Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
    TinyImageNet-200 have the same directory structure, so this can be used
    to load any of them.

    Inputs:
    - path: String giving path to the directory to load.
    - dtype: numpy datatype used to load the data.
    - subtract_mean: Whether to subtract the mean training image.

    Returns: A dictionary with the following entries:
    - class_names: A list where class_names[i] is a list of strings giving the
      WordNet names for class i in the loaded dataset.
    - X_train: (N_tr, 3, 64, 64) array of training images
    - y_train: (N_tr,) array of training labels
    - X_val: (N_val, 3, 64, 64) array of validation images
    - y_val: (N_val,) array of validation labels
    - X_test: (N_test, 3, 64, 64) array of testing images.
    - y_test: (N_test,) array of test labels; if test labels are not available
      (such as in student code) then y_test will be None.
    - mean_image: (3, 64, 64) array giving mean training image
    """
    # First load wnids
    with open(os.path.join(path, 'wnids.txt'), 'r') as f:
        wnids = [x.strip() for x in f]

    # Map wnids to integer labels
    wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}

    # Use words.txt to get names for each class
    with open(os.path.join(path, 'words.txt'), 'r') as f:
        wnid_to_words = dict(line.split('\t') for line in f)
        for wnid, words in wnid_to_words.items():
            wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
    class_names = [wnid_to_words[wnid] for wnid in wnids]

    # Next load training data.
    X_train = []
    y_train = []
    for i, wnid in enumerate(wnids):
        if (i + 1) % 20 == 0:
            print('loading training data for synset %d / %d'
                  % (i + 1, len(wnids)))
        # To figure out the filenames we need to open the boxes file
        boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
        with open(boxes_file, 'r') as f:
            filenames = [x.split('\t')[0] for x in f]
        num_images = len(filenames)

        X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
        y_train_block = wnid_to_label[wnid] * \
                        np.ones(num_images, dtype=np.int64)
        for j, img_file in enumerate(filenames):
            img_file = os.path.join(path, 'train', wnid, 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
        ## grayscale file
                img.shape = (64, 64, 1)
            X_train_block[j] = img.transpose(2, 0, 1)
        X_train.append(X_train_block)
        y_train.append(y_train_block)

    # We need to concatenate all training data
    X_train = np.concatenate(X_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)

    # Next load validation data
    with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
        img_files = []
        val_wnids = []
        for line in f:
            img_file, wnid = line.split('\t')[:2]
            img_files.append(img_file)
            val_wnids.append(wnid)
        num_val = len(img_files)
        y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
        X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
        for i, img_file in enumerate(img_files):
            img_file = os.path.join(path, 'val', 'images', img_file)
            img = imread(img_file)
            if img.ndim == 2:
                img.shape = (64, 64, 1)
            X_val[i] = img.transpose(2, 0, 1)

    # Next load test images
    # Students won't have test labels, so we need to iterate over files in the
    # images directory.
    img_files = os.listdir(os.path.join(path, 'test', 'images'))
    X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
    for i, img_file in enumerate(img_files):
        img_file = os.path.join(path, 'test', 'images', img_file)
        img = imread(img_file)
        if img.ndim == 2:
            img.shape = (64, 64, 1)
        X_test[i] = img.transpose(2, 0, 1)

    y_test = None
    y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
    if os.path.isfile(y_test_file):
        with open(y_test_file, 'r') as f:
            img_file_to_wnid = {}
            for line in f:
                line = line.split('\t')
                img_file_to_wnid[line[0]] = line[1]
        y_test = [wnid_to_label[img_file_to_wnid[img_file]]
                  for img_file in img_files]
        y_test = np.array(y_test)

    mean_image = X_train.mean(axis=0)
    if subtract_mean:
        X_train -= mean_image[None]
        X_val -= mean_image[None]
        X_test -= mean_image[None]

    return {
      'class_names': class_names,
      'X_train': X_train,
      'y_train': y_train,
      'X_val': X_val,
      'y_val': y_val,
      'X_test': X_test,
      'y_test': y_test,
      'class_names': class_names,
      'mean_image': mean_image,
    }


def load_models(models_dir):
    """
    Load saved models from disk. This will attempt to unpickle all files in a
    directory; any files that give errors on unpickling (such as README.txt)
    will be skipped.

    Inputs:
    - models_dir: String giving the path to a directory containing model files.
      Each model file is a pickled dictionary with a 'model' field.

    Returns:
    A dictionary mapping model file names to models.
    """
    models = {}
    for model_file in os.listdir(models_dir):
        with open(os.path.join(models_dir, model_file), 'rb') as f:
            try:
                models[model_file] = load_pickle(f)['model']
            except pickle.UnpicklingError:
                continue
    return models


def load_imagenet_val(num=None):
    """Load a handful of validation images from ImageNet.

    Inputs:
    - num: Number of images to load (max of 25)

    Returns:
    - X: numpy array with shape [num, 224, 224, 3]
    - y: numpy array of integer image labels, shape [num]
    - class_names: dict mapping integer label to class name
    """
    imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
    if not os.path.isfile(imagenet_fn):
      print('file %s not found' % imagenet_fn)
      print('Run the following:')
      print('cd cs231n/datasets')
      print('bash get_imagenet_val.sh')
      assert False, 'Need to download imagenet_val_25.npz'
    f = np.load(imagenet_fn)
    X = f['X']
    y = f['y']
    class_names = f['label_map'].item()
    if num is not None:
        X = X[:num]
        y = y[:num]
    return X, y, class_names

二、 自帶函數

1. 梯度檢驗(gradient_check.py)

from __future__ import print_function
from builtins import range
from past.builtins import xrange

import numpy as np
from random import randrange

def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
    """
    a naive implementation of numerical gradient of f at x
    - f should be a function that takes a single argument
    - x is the point (numpy array) to evaluate the gradient at
    """

    fx = f(x) # evaluate function value at original point 求前向傳播的函數,後面的調用中可以看到,這個fx其實是一個數,也就是Loss
    grad = np.zeros_like(x) # 先生成一個跟所需要求梯度的變量一樣形狀的矩陣
    # iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) # numpy自帶的迭代器,可以看一個例子就明白https://www.jianshu.com/p/f2bd63766204
    while not it.finished:

        # evaluate function at x+h
        ix = it.multi_index # ix就是每一次循環時的index,例如(0,0),(0,1)...
        oldval = x[ix] # 取出這個索引的元素
        x[ix] = oldval + h # increment by h
        fxph = f(x) # evalute f(x + h) fxph==f(x plus h) fxph是一個數,而不是矩陣
        x[ix] = oldval - h
        fxmh = f(x) # evaluate f(x - h)
        x[ix] = oldval # restore

        # compute the partial derivative with centered formula
        grad[ix] = (fxph - fxmh) / (2 * h) # the slope 這個語句是因爲fxph和fxmh都是數值,後面調試中發現的
        if verbose: # 如果不是數值是矩陣的話,那麼應該使用下面一個求梯度的函數eval_numerical_gradient_array
            print(ix, grad[ix])
        it.iternext() # step to next dimension

    return grad


def eval_numerical_gradient_array(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        pos = f(x).copy()
        x[ix] = oldval - h
        neg = f(x).copy()
        x[ix] = oldval

        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        it.iternext()
    return grad


def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
    """
    Compute numeric gradients for a function that operates on input
    and output blobs.

    We assume that f accepts several input blobs as arguments, followed by a
    blob where outputs will be written. For example, f might be called like:

    f(x, w, out)

    where x and w are input Blobs, and the result of f will be written to out.

    Inputs:
    - f: function
    - inputs: tuple of input blobs
    - output: output blob
    - h: step size
    """
    numeric_diffs = []
    for input_blob in inputs:
        diff = np.zeros_like(input_blob.diffs)
        it = np.nditer(input_blob.vals, flags=['multi_index'],
                       op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            orig = input_blob.vals[idx]

            input_blob.vals[idx] = orig + h
            f(*(inputs + (output,)))
            pos = np.copy(output.vals)
            input_blob.vals[idx] = orig - h
            f(*(inputs + (output,)))
            neg = np.copy(output.vals)
            input_blob.vals[idx] = orig

            diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)

            it.iternext()
        numeric_diffs.append(diff)
    return numeric_diffs


def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
    return eval_numerical_gradient_blobs(lambda *args: net.forward(),
                inputs, output, h=h)


def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
    """
    sample a few random elements and only return numerical
    in this dimensions.
    """

    for i in range(num_checks):
        ix = tuple([randrange(m) for m in x.shape])

        oldval = x[ix]
        x[ix] = oldval + h # increment by h
        fxph = f(x) # evaluate f(x + h)
        x[ix] = oldval - h # increment by h
        fxmh = f(x) # evaluate f(x - h)
        x[ix] = oldval # reset

        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = (abs(grad_numerical - grad_analytic) /
                    (abs(grad_numerical) + abs(grad_analytic)))
        print('numerical: %f analytic: %f, relative error: %e'
              %(grad_numerical, grad_analytic, rel_error))

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章