說明1:Intel MKL官方參考手冊以及官方參考例程(位於安裝目錄下examples文件夾中)是最好的參考資料,本文程序大多根據官方例程改寫,均可正常運行,我用的編譯環境是VS2015+Intel MKL庫2019;
說明2:由於本人主要是使用Intel MKL庫做通信系統仿真,因此文章函數大多是複數單精度浮點形式,實數域函數參考官方手冊對參數類型進行相應修改即可,也可以參考以下文章:MKL學習——基本操作C++實現,該文章主要是針對實數域的操作。
1. 隨機數生成
1.1 等概率的01分佈 viRngUniform
/* 生成信源(等概率的01序列)—— 等概率的伯努利分佈 */
#include <stdio.h>
#include <math.h>
#include "mkl.h"
#include "errcheck.inc"
#define SEED 111
#define BRNG VSL_BRNG_MCG31
#define METHOD VSL_RNG_METHOD_UNIFORM_STD
#define N 1000 //隨機數的數量
#define NN 100
int main()
{
int r[N];
VSLStreamStatePtr stream;
int i, errcode;
int a = 0, b = 2;
/***** Initialize *****/
errcode = vslNewStream(&stream, BRNG, SEED);
CheckVslError(errcode); // 檢測隨機數生成過程是否出錯
/***** Call RNG *****/
errcode = viRngUniform(METHOD, stream, N, r, a, b);
CheckVslError(errcode);
/***** Printing results *****/
printf("Results (first 100 of 1000):\n");
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("r[%d]=%d\n", i, r[i]);
}
/***** Deinitialize *****/
errcode = vslDeleteStream(&stream);
CheckVslError(errcode);
return 0;
}
1.2 (復)高斯分佈 vsRngGaussian
/* 生成獨立同分布的準靜態瑞利衰落信道(還需要將元素儲存到矩陣中) */
#include <stdio.h>
#include <math.h>
#include "mkl.h"
#include "errcheck.inc"
#define SEED 777
#define BRNG VSL_BRNG_MCG31
#define METHOD VSL_RNG_METHOD_GAUSSIAN_ICDF
#define N 1000
#define NN 10
int main()
{
float real[N];
float imag[N];
MKL_Complex8 H[1000];
VSLStreamStatePtr stream;
int i, errcode1,errcode2;
float a = 0.0, sigma = 1.0;
/***** Initialize *****/
vslNewStream(&stream, BRNG, SEED);
/***** Call RNG *****/
errcode1 = vsRngGaussian(METHOD, stream, N, real, a, sigma);
CheckVslError(errcode1);
errcode2 = vsRngGaussian(METHOD, stream, N, imag, a, sigma);
CheckVslError(errcode2);
for (i = 0; i<NN; i++) {
H[i].real = real[i]/(float)sqrt(2);
H[i].imag = imag[i]/(float)sqrt(2);
}
/***** Printing results *****/
printf("Results (first 10 of 1000):\n");
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("real[%d]=%.4f\n", i, real[i]);
}
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("imag[%d]=%.4f\n", i, imag[i]);
}
printf("---------------------------\n");
for (i = 0; i<NN; i++) {
printf("H[%d]=%.4f+(%.4fi)\n", i, H[i].real, H[i].imag);
}
/***** Deinitialize *****/
vslDeleteStream(&stream);
return 0;
}
2. 向量-向量運算
2.1 向量數乘與加法 cblas_caxpby
/* 向量數乘與加法 y = alpha*x+beta*y */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main( )
{
MKL_INT n, incx, incy;
MKL_Complex8 alpha, beta;
MKL_Complex8 *x, *y;
MKL_INT len_x, len_y;
/***************** 參數初始化 *****************/
n = 5;
incx = 1;
incy = 1;
alpha.imag = 0;
alpha.real = 2;
beta.imag = 0;
beta.real = 1;
len_x = 1 + (n - 1)* incx;
len_y = 1 + (n - 1)* incy;
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/******************* 給複數向量x,y賦值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
y[i].real = (float)(i + 2);
y[i].imag = (float)(i + 3);
}
/********** Call CBLAS_CAXPBY subroutine ( C Interface )**********/
cblas_caxpby(n, &alpha, x, incx, &beta, y, incy);
/********************** Printing results *******************/
printf("y = ax + by\n");
for (int i = 0; i < n; i++) {
printf("----------------------------------------\n");
printf("y[%d] = %.0f+%.0fi\n", i, y[i].real, y[i].imag);
}
mkl_free(x);
mkl_free(y);
return 0;
}
2.2 向量內積 cblas_cdotu_sub
/* 向量點乘(內積) res = \sigma i=0 to n (x_i*y_i) */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT n, incx, incy;
MKL_Complex8 *x, *y;
MKL_Complex8 res;
MKL_INT len_x, len_y;
/***************** 參數初始化 *****************/
n = 5;
incx = 1;
incy = 1;
len_x = 1 + (n - 1)*incx;
len_y = 1 + (n - 1)*incy;
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/******************* 給複數向量x,y賦值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
y[i].real = (float)(i + 2);
y[i].imag = (float)(i + 3);
}
/* Call CBLAS_CDOTU_SUB subroutine ( C Interface ) */
cblas_cdotu_sub(n, x, incx, y, incy, &res);
/********************** Printing results *******************/
printf("%.0f+%.0fi\n", res.real, res.imag);
mkl_free(x);
mkl_free(y);
return 0;
}
2.3 向量範數 cblas_scnrm2
/* 向量範數 res = ||x|| */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT n, incx;
float res;
MKL_Complex8 *x;
MKL_INT len_x;
/***************** 參數初始化 *****************/
n = 5;
incx = 1;
len_x = 1 + (n - 1)*(incx);
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
/******************* 給複數向量x賦值 *********************/
for (int i = 0; i < n; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
}
/************* Call CBLAS_SCNRM2 subroutine ( C Interface ) *********/
res = cblas_scnrm2(n, x, incx);
/************************* Print output data ********************/
printf("||x|| = %f\n", res);
mkl_free(x);
return 0;
}
3. 矩陣-向量運算
3.1 矩陣存儲方案
/* 以“實數矩陣-向量乘法”爲例說明矩陣儲存方案 y = alpha*A*x + beta*y
以下代碼以行優先 CblasRowMajor 爲例 */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, lda, incx, incy;
float alpha, beta;
float *a, *x, *y;
CBLAS_LAYOUT layout = CblasRowMajor; // 可變爲CblasColMajor
CBLAS_TRANSPOSE trans = CblasNoTrans;
MKL_INT nx, ny, len_x, len_y;
m = 2;
n = 5;
incx = incy = 1;
alpha = 1;
beta = 0;
a = (float *)mkl_calloc(m*n, sizeof(float), 64);
if (trans == CblasNoTrans) {
nx = n;
ny = m;
}
else {
nx = m;
ny = n;
}
if (layout == CblasRowMajor)
lda = n;
else
lda = m;
len_x = 1 + (nx - 1)*(incx);
len_y = 1 + (ny - 1)*(incy);
x = (float *)mkl_calloc(len_x, sizeof(float), 64);
y = (float *)mkl_calloc(len_y, sizeof(float), 64);
/* Print input data */
printf("矩陣爲\n");
for (int i = 0; i < m*n; i++) {
if (i%nx == 0 && i != 0)
printf("\n");
a[i] = (float)i;
printf("%2.0f", a[i]);
}
printf("\n");
printf("向量爲\n");
for (int i = 0; i < len_x; i++)
{
x[i] =(float)(i + 1);
printf("%2.0f", x[i]);
printf("\n");
}
/* Call CBLAS_SGEMV subroutine ( C Interface ) */
cblas_sgemv(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);
/* Print output data */
printf("矩陣-向量乘法結果\n");
for (int i = 0; i < len_y; i++) {
printf("%2.0f ", y[i]);
printf("\n");
}
mkl_free(a);
mkl_free(x);
mkl_free(y);
return 0;
}
上述過程中,矩陣a由數組對其賦值,具體儲存過程如下:
行優先CblasRowMajor
:初始化時,將數組逐行存入矩陣,可表示如下:
列優先CblasColMajor
:行優先:初始化時,將數組逐列存入矩陣,可表示如下:
需注意,不管是行優先還是列優先,只要矩陣不轉置,矩陣維度都不會發生變化。
3.2 矩陣-向量乘法 cblas_cgemv
trans參數控制矩陣A以什麼形式參與運算,三個取值含義如下:
CblasNoTrans:A;CblasTrans:A的轉置;CblasConjTrans:A的共軛轉置
/* 複數矩陣-向量乘法 y = alpha*A*x + beta*y */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, lda, incx, incy;
MKL_Complex8 alpha, beta;
MKL_Complex8 *a, *x, *y;
CBLAS_LAYOUT layout = CblasRowMajor;
CBLAS_TRANSPOSE trans = CblasNoTrans;
MKL_INT nx, ny, len_x, len_y;
/***************** 參數初始化 *****************/
m = 2;
n = 5;
incx = incy = 1;
alpha.real = 1;
alpha.imag = 0;
beta.real = 0;
beta.imag = 0;
a = (MKL_Complex8 *)mkl_calloc(m*n, sizeof(MKL_Complex8), 64);
if (trans == CblasNoTrans) {
nx = n;
ny = m;
} else {
nx = m;
ny = n;
}
if (layout == CblasRowMajor)
lda = n;
else
lda = m;
len_x = 1 + (nx - 1)*(incx);
len_y = 1 + (ny - 1)*(incy);
x = (MKL_Complex8 *)mkl_calloc(len_x, sizeof(MKL_Complex8), 64);
y = (MKL_Complex8 *)mkl_calloc(len_y, sizeof(MKL_Complex8), 64);
/************** 矩陣與向量賦值 *******************/
for (int i = 0; i < m*n; i++) {
a[i].real = (float)(i + 1);
a[i].imag = (float)i;
}
for (int i = 0; i < len_x; i++) {
x[i].real = (float)i;
x[i].imag = (float)(i + 1);
}
/******* Call CBLAS_CGEMV subroutine ( C Interface )*******/
cblas_cgemv(layout, trans, m, n, &alpha, a, lda, x, incx, &beta, y, incy);
/***************** Print output data *****************/
for (int i = 0; i < len_y; i++) {
printf("%f+%fi\n", y[i].real, y[i].imag);
}
mkl_free(a);
mkl_free(x);
mkl_free(y);
return 0;
}
4. 矩陣-矩陣運算
4.1 矩陣乘法 cblas_cgemm
/* 矩陣乘法 C = alpha A*B + beta*C */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
MKL_INT m, n, k;
MKL_INT lda, ldb, ldc;
MKL_Complex8 alpha, beta;
MKL_Complex8 *a, *b, *c;
CBLAS_LAYOUT layout = CblasRowMajor;
CBLAS_TRANSPOSE transA = CblasNoTrans;
CBLAS_TRANSPOSE transB = CblasNoTrans;
MKL_INT ma, na, mb, nb;
/***************** 參數初始化 *****************/
m = 2;
n = 5;
k = 3;
alpha.real = 1;
alpha.imag = 0;
beta.real = beta.imag = 0;
if (transA == CblasNoTrans) {
ma = m;
na = k;
} else {
ma = k;
na = m;
}
if (transB == CblasNoTrans) {
mb = k;
nb = n;
} else {
mb = n;
nb = k;
}
a = (MKL_Complex8 *)mkl_calloc(ma*na, sizeof(MKL_Complex8), 64);
b = (MKL_Complex8 *)mkl_calloc(mb*nb, sizeof(MKL_Complex8), 64);
c = (MKL_Complex8 *)mkl_calloc(ma*nb, sizeof(MKL_Complex8), 64);
/************** 矩陣與向量賦值 *******************/
for (int i = 0; i < ma*na; i++) {
a[i].real = (float)(i + 1);
a[i].imag = (float)i;
}
for (int i = 0; i < mb*nb; i++) {
b[i].real = (float)i;
b[i].imag = (float)(i + 1);
}
if (layout == CblasRowMajor) {
lda = na;
ldb = nb;
ldc = nb;
} else {
lda = ma;
ldb = mb;
ldc = ma;
}
/* Call CGEMM subroutine ( C Interface ) */
cblas_cgemm(layout, transA, transB, m, n, k, &alpha, a, lda, b, ldb,
&beta, c, ldc);
/* Print output data */
for (int i = 0; i < ma*nb; i++) {
printf("%f+%fi\n", c[i].real, c[i].imag);
}
mkl_free(a);
mkl_free(b);
mkl_free(c);
return 0;
}
4.2 矩陣求逆 LAPACKE_cgetrf+LAPACKE_cgetri
/* 矩陣求逆 */
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
#define LAYOUT LAPACK_ROW_MAJOR
#define N 3
int main()
{
int layout = LAYOUT;
int m = N;
int n = N;
int info1, info2, i;
int lda = n;
int ipiv[3];
MKL_Complex8 *a;
a = (MKL_Complex8 *)mkl_calloc(m*n, sizeof(MKL_Complex8), 64);
/************* 矩陣賦值 ***************/
for (i=0; i<m*n; i++) {
a[i].real = (float)0;
a[i].imag = (float)0;
}
a[0].real = a[5].real = 1;
a[1].real = a[4].real = 2;
a[2].real = a[3].real = 3;
a[6].imag = 1;
a[7].imag = 3;
a[8].imag = 2;
/* LU分解 + 求逆 */
info1 = LAPACKE_cgetrf(layout, m, n, a, lda, ipiv);
info2 = LAPACKE_cgetri(layout, m, a, lda, ipiv);
/* print results*/
printf("%d\n", info2);
for (i = 0; i<N*N; i++) {
printf("%f+(%f)i ", a[i].real, a[i].imag);
}
printf("\n");
mkl_free(a);
return 0;
}
4.3 矩陣的QR分解 LAPACKE_cgeqrf+LAPACKE_cungqr
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mkl.h"
int main()
{
int nR = 4;
int nT = 3;
int iseed[4] = { 1,2,3,5 };
MKL_Complex8* H = NULL;
H = (MKL_Complex8 *)mkl_calloc((nR + nT)*nT, sizeof(MKL_Complex8), 64);
LAPACKE_clarnv(3, iseed, (nR + nT)*nT, H); // 復高斯分佈CN(0,2)
printf("------- Before QR decomposition -------\n");
for (int i = 0; i < (nR + nT)*nT; i++) {
if (i%nT == 0)
printf("\n");
printf("%f+(%f)i ", H[i].real, H[i].imag);
}
printf("\n");
MKL_Complex8 tau[3]; // 注意此處數組大小應隨nT變化
int info1 = LAPACKE_cgeqrf(LAPACK_ROW_MAJOR, nR + nT, nT, H, nT, tau); // QR分解
int info2 = LAPACKE_cungqr(LAPACK_ROW_MAJOR, nR + nT, nT, nT, H, nT, tau); // 僅輸出Q矩陣的前nT列
printf("------- After QR decomposition -------\n");
for (int i = 0; i < (nR + nT)*nT; i++) {
if (i%nT == 0)
printf("\n");
printf("%f+(%f)i ", H[i].real, H[i].imag);
}
printf("\n");
mkl_free(H);
return 0;
}
5. 常用的子函數模塊
5.1 計時函數
double start = dsecnd();
....
double stop = dsecnd();
printf("Elapsed time = %f seconds\n", stop - start);
// 首次調用時初始化會花費一些時間,如果想要更精確的結果,可在正式計時開始前先調用一次
5.2 Print matrix
void print_matrix(MKL_LAYOUT layout, MKL_INT m, MKL_INT n, MKL_Complex8* a, MKL_INT lda) {
MKL_INT i, j;
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
if (layout == MKL_COL_MAJOR) {
printf(" (%6.3f,%6.3f)", a[i + j * lda].real, a[i + j * lda].imag);
} else {
printf(" (%6.3f,%6.3f)", a[j + i * lda].real, a[j + i * lda].imag);
}
}
printf("\n");
}
}
5.3 生成單位矩陣
int n=500; // 矩陣維度
float *IdentityMatrix;
IdentityMatrix = (float *)mkl_calloc(n*n, sizeof(float), 64);
for (int i=0; i<n*n; i++) {
IdentityMatrix[i*(n+1)] = 1.0f;
}
mkl_free(IdentityMatrix);
博主不定期發佈『保研/推免、C/C++、5G移動通信、Linux、生活隨筆』系列文章,如果覺得本文對你有幫助,可以『點贊+關注』支持一下哦!