caffe adaboost

caffe  voting  

voting   ensemble_accuracy_layer.cpp 代碼如下, 因爲這裏有根據概率求準確率的部分,所以弱分類器模型的可以不用配置Accuracy層,

Softmax後<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">直接用en</span><span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">semble layer。</span>

Softmax 層和 accuracy 層的配置文件如下:
layer {
  name: "3_prob"
  type: "Softmax"
  bottom: "3_ip2"
  top: "3_prob"
}

layer {
  name: "1_accuracy"
  type: "Accuracy"
  bottom: "1_prob"
	bottom: "label"
  top: "1_accuracy"
  include {
    phase: TEST
  }
}

caffe.proto文件中 層定義,因爲ensemble 層只需要 name,type,bottom,top  四個參數即可。所以不需要在 caffe.proto 文件中設置。

message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

ensemble 層配置函數如下:

layer {
  name: "ensemble<span style="font-family: Arial, Helvetica, sans-serif;">_accuracy"</span>
  type: "Esemble"
  bottom: "prob1"
<pre name="code" class="cpp">  bottom: "prob2"
  bottom: "prob3"
bottom: "label" top: "ensemble_accuracy"
include { phase: TEST }}




#include <algorithm>
#include <functional>
#include <utility>
#include <vector>

#include "caffe/layer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

template <typename Dtype>
void EnsembleAccuracyLayer<Dtype>::Reshape(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  vector<int> top_shape(0);  
  top[0]->Reshape(top_shape);
}

template <typename Dtype>
void EnsembleAccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  
  Dtype accuracy = 0;
  int n_pred = bottom.size()-1;
  const Dtype* bottom_label = bottom[n_pred]->cpu_data();
  int num = bottom[0]->num();   // 樣本個數
  int dim = bottom[0]->count() / bottom[0]->num();  // 每個樣本維數

	Dtype max_prob = 0;    // 得到最大<span style="font-family: Arial, Helvetica, sans-serif;">概率</span>

	int arg_max = 0; 
	Dtype prob_pred = 0;
  //For each data point
  for (int i = 0; i < num; ++i) { // 對每個樣本結果進行遍歷
		
		int label = static_cast<int>(bottom_label[i]);

		//Find if any classifier is correct
		for (int j = 0; j< n_pred; ++j) {
			const Dtype* bottom_data = bottom[j]->cpu_data();
			max_prob = 0;
			for(int k = 0; k < dim; k++){
				prob_pred = std::max(bottom_data[i * dim + k], 
															Dtype(kLOG_THRESHOLD));
				if(prob_pred > max_prob){// 如果 這一類概率大於max_prob ,這保存該類概率
					max_prob = prob_pred;  // 保存最大概率
					arg_max = k;   // 保存所屬標籤。
				} 
			
			}
			if(arg_max == label){   // 如果 語出的類和標籤相同,則識別對,
				accuracy++;
				break;
  		}
    }
	}
    
		
	

  top[0]->mutable_cpu_data()[0] = accuracy/(double)num;

}

INSTANTIATE_CLASS(EnsembleAccuracyLayer);
REGISTER_LAYER_CLASS(EnsembleAccuracy);

}  // namespace caffe


caffe adaboost實現方式

https://github.com/terrychenism/NeuralNetTests/blob/master/caffe_utils/cnn_adaboost.py  參考代碼,

今天 在caffe上實現adaboost算法,

先訓練弱分類器,用弱分類器的模型即可,如果把caffe訓練好的模型當弱分類器,只需要調用caffe,使用該模型即可,不需要重新訓練該弱分類器。



下面代碼是調用caffe訓練的模型,使用adaboost弱分類器。 這裏主要使用了sklearn 庫。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author: Tairui Chen


import numpy as np
import os
import sys
import argparse
import glob
import time
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier

import caffe

g_rnd = np.random.randint(100000)

def create_weighted_db(X, y, weights, name='boost'):
    X = X.reshape(-1, 3, 32, 32)
    train_fn = os.path.join(DIR, name + '.h5')

    dd.io.save(train_fn, dict(data=X,
                              label=y.astype(np.float32),
                              sample_weight=weights), compress=False)
    with open(os.path.join(DIR, name + '.txt'), 'w') as f:
        print(train_fn, file=f)


class CNN(BaseEstimator, ClassifierMixin):
    def __init__(self):
        pass

    def get_params(self, deep=False):
        return {}

    def fit(self, X, y, sample_weight=None):
        global g_seed
        global g_loop
        if sample_weight is None:
            sample_weight = np.ones(X.shape[0], np.float32)
            print('Calling fit with sample_weight None')
        else:
            sample_weight *= X.shape[0]
            print('Calling fit with sample_weight sum', sample_weight.sum())

        #sample_weight = np.ones(X.shape[0], np.float32)

        #II = sample_weight > 0
        #X = X[II]
        #y = y[II]
        #sample_weight = sample_weight[II]

        #sample_weight = np.ones(X.shape[0])
        w = sample_weight
        #sample_weight[:10] = 0.0
        #w[:1000] = 0.0
        #w = sample_weight
        #w0 = w / w.sum()
        #print('Weight entropy:', -np.sum(w0 * np.log2(w0)))
        print('Weight max:', w.max())
        print('Weight min:', w.min())
        #import sys; sys.exit(0)

        self.classes_ = np.unique(y)
        self.n_classes_ = len(self.classes_)

        # Set up weighted database
        create_weighted_db(X, y, sample_weight)

        #steps = [(0.001, 2000, 2000)]
        steps = [(0.001, 0.004, 60000), (0.0001, 0.004, 5000), (0.00001, 0.004, 5000)]
        #steps = [(0.00001, 10000, 10000), (0.000001, 5000, 15000), (0.0000001, 5000, 20000)]
        #steps = [(0.001, 10000, 10000)]
        #steps = [(0.001, 200, 1000)]

        name = os.path.join(CONF_DIR, 'adaboost_{}_loop{}'.format(g_rnd, g_loop))
        bare_conf_fn = os.path.join(CONF_DIR, 'boost_bare.prototxt')
        conf_fn = os.path.join(CONF_DIR, 'solver.prototxt.template')
        #bare_conf_fn = 'regaug_bare.prototxt'
        #conf_fn = 'regaug_solver.prototxt.template'

        net, info = train_model(name, conf_fn, bare_conf_fn, steps,
                                seed=g_seed, device_id=DEVICE_ID)

        loss_fn = 'info/info_{}_loop{}.h5'.format(g_rnd, g_loop)
        dd.io.save(loss_fn, info)
        print('Saved to', loss_fn)

        g_loop += 1

        print('Classifier set up')

        self.net_ = net

    def predict_proba(self, X):
        X = X.reshape(-1, 3, 32, 32)
        #X = X.transpose(0, 2, 3, 1)
        prob = np.zeros((X.shape[0], self.n_classes_))

        M = 2500
        for k in range(int(np.ceil(X.shape[0] / M))):
            y = self.net_.forward_all(data=X[k*M:(k+1)*M]).values()[0].squeeze(axis=(2,3))
            prob[k*M:(k+1)*M] = y

        T = 30.0

        eps = 0.0001

        #prob = prob.clip(eps, 1-eps)

        log_prob = np.log(prob)
        print('log_prob', log_prob.min(), log_prob.max())
        #log_prob = log_prob.clip(min=-4, max=4)
        new_prob = np.exp(log_prob / T)
        new_prob /= dd.apply_once(np.sum, new_prob, [1])

        return new_prob

    def predict(self, X):
        prob = self.predict_proba(X)
        return prob.argmax(-1)




train_data = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_data.npy')
train_labels = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/train_labels.npy')

model_path = 'cifar10/' # substitute your path here
# GoogleNet
net_fn   = model_path + 'VGG_mini_ABN.prototxt'
param_fn = model_path + 'cifar10_vgg_iter_120000.caffemodel'

caffe.set_mode_cpu()
net = caffe.Classifier(net_fn, param_fn,
                       mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
                       channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB


def preprocess(net, img):
    return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']


for i in range(10):
	img = train_data[i].transpose((1, 2, 0)) * 255
	img = img.astype(np.uint8)[:, :, ::-1]
	end = 'prob'
	h, w = img.shape[:2]
	src, dst = net.blobs['data'], net.blobs[end]
	src.data[0] = preprocess(net, img)
	net.forward(end=end)
	features = dst.data[0].copy()
 
 
X = train_data
y = train_labels
X *= 255.0
mean_x = X.mean(0)
X -= mean_x

te_X= np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_data.npy')
te_y = np.load('G:/EDU/_SOURCE_CODE/chainer/examples/cifar10/data/test_labels.npy')

create_weighted_db(te_X, te_y, np.ones(te_X.shape[0], dtype=np.float32), name='test')  

clf = AdaBoostClassifier(base_estimator=CNN(), algorithm='SAMME.R', n_estimators=10,
                                 random_state=0)
clf.fit(X.reshape(X.shape[0], -1), y)

for i, score in enumerate(clf.staged_score(X.reshape(X.shape[0], -1), y)):
                print(i+1, 'train score', score)

for i, score in enumerate(clf.staged_score(te_X.reshape(te_X.shape[0], -1), te_y)):
                print(i+1, 'test score', score)


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章