矩陣乘法逐步加速演示

矩陣乘法

#include<iostream>
#include<ctime>
using namespace std;
int main() {
    int n = 1000;
    double* a = new double[n*n];
    double* b = new double[n*n];
    double* c = new double[n*n];
    int i, j, k;
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            a[i*n + j] = i + j;
            b[i*n + j] = i + j;
        }
    auto tic = clock();
    double s{ 0 };
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            s = 0.0;
            for (k = 0; k < n; ++k)
                s += a[i*n + k] * a[k*n + j];
            c[i*n + j] = s;
        }
    auto toc = clock() - tic;
    cout << toc << endl;
}

耗時 1600ms

後面矩陣轉置

#include<iostream>
#include<ctime>
using namespace std;
int main() {
    int n = 1000;
    double* a = new double[n*n];
    double* b = new double[n*n];
    double* c = new double[n*n];
    int i, j, k;
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            a[i*n + j] = i + j;
            b[i*n + j] = i + j;
        }
    auto tic = clock();
    double s{ 0 };
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            s = 0.0;
            for (k = 0; k < n; ++k)
                s += a[i*n + k] * b[j*n + k];
            c[i*n + j] = s;
        }
    auto toc = clock() - tic;
    cout << toc << endl;
}

耗時 960ms

循環展開2個乘積

#include<iostream>
#include<ctime>
using namespace std;
int main() {
    int n = 1000;
    double* a = new double[n*n];
    double* b = new double[n*n];
    double* c = new double[n*n];
    int i, j, k;
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            a[i*n + j] = i + j;
            b[i*n + j] = i + j;
        }
    auto tic = clock();
    double s{ 0 };
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            s = 0.0;
            for (k = 0; k < n; k += 2)
                s += a[i*n + k] * b[j*n + k] +
                a[i*n + k + 1] * b[j*n + k + 1];
            c[i*n + j] = s;
        }
    auto toc = clock() - tic;
    cout << toc << endl;
}

耗時 560 ms

循環展開4個乘積

#include<iostream>
#include<ctime>
using namespace std;
int main() {
    int n = 1000;
    double* a = new double[n*n];
    double* b = new double[n*n];
    double* c = new double[n*n];
    int i, j, k, i1, j1;
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            a[i*n + j] = i + j;
            b[i*n + j] = i + j;
        }
    auto tic = clock();
    double s{ 0 };
    for (i = 0; i < n; ++i) {
        i1 = i * n;
        for (j = 0; j < n; ++j) {
            j1 = j * n;
            s = 0.0;
            for (k = 0; k < n; k += 4)
                s += a[i1 + k] * b[j1 + k] +
                a[i1 + k + 1] * b[j1 + k + 1] +
                a[i1 + k + 2] * b[j1 + k + 2] +
                a[i1 + k + 3] * b[j1 + k + 3];
            c[i*n + j] = s;
        }
    }
    auto toc = clock() - tic;
    cout << toc << endl;
}

耗時 480 ms

蛇形順序

#include<iostream>
#include<ctime>
using namespace std;
int main() {
    int n = 1000;
    double* a = new double[n*n];
    double* b = new double[n*n];
    double* c = new double[n*n];
    int i, j, k, i1, j1;
    for (i = 0; i < n; ++i)
        for (j = 0; j < n; ++j) {
            a[i*n + j] = i + j;
            b[i*n + j] = i + j;
        }
    auto tic = clock();
    double s{ 0 };
    for (i = 0; i < n; ++i) {
        i1 = i * n;
        if (i % 2 == 0) {
            for (j = 0; j < n; ++j) {
                j1 = j * n;
                s = 0.0;
                for (k = 0; k < n; k += 4)
                    s += a[i1 + k] * b[j1 + k] +
                    a[i1 + k + 1] * b[j1 + k + 1] +
                    a[i1 + k + 2] * b[j1 + k + 2] +
                    a[i1 + k + 3] * b[j1 + k + 3];
                c[i*n + j] = s;
            }
        }
        else {
            for (j = n - 1; j >= 0; --j) {
                j1 = j * n;
                s = 0.0;
                for (k = 0; k < n; k += 4)
                    s += a[i1 + k] * b[j1 + k] +
                    a[i1 + k + 1] * b[j1 + k + 1] +
                    a[i1 + k + 2] * b[j1 + k + 2] +
                    a[i1 + k + 3] * b[j1 + k + 3];
                c[i*n + j] = s;
            }
        }
    }
    auto toc = clock() - tic;
    cout << toc << endl;
}

耗時 430 ms

注: visual C++ 2017, release, Intel()R) Core()TM) i7-8700K CPU 3.7G Hz

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章