EasyRTSPLive高效轉碼之EasyVideoDecoder採用Intel集成顯卡高效硬件解碼解決方案(附源碼)

在我之前寫的一篇文章《EasyRTSPLive傳統視頻監控互聯網+實現利器解決方案》中提到RTSP轉RTMP的轉流過程,簡化流程就是通過EasyRTSPClient拉RTSP流,獲取音視頻編碼數據,然後再通過EasyRTMP推出去,流程非常簡單;然後再實際開發過程中,我們發現其實這個過程並沒有想象中那麼簡單;首先,RTSP協議支持多種音視頻編碼格式,如音頻支持AAC,G711,G726,等,視頻支持H264,H625,MJPEG, MPEG等等各種格式,而EasyRTMP推流只支持H264(已擴展支持H265)格式,這時,音頻我們可以通過EasyAACEncoder將音頻轉碼成AAC格式,而視頻我們可以通過EasyVideoDecoder解碼成原始數據,然後再通過EasyVideoEncoder將原始數據轉碼成RTMP推送指定的格式,本文,我們將重點講述EasyVideoDecoder基於Intel硬解碼庫的硬解碼流程。

EasyVideoDecoder基於Intel硬解碼庫EasyIntelHardDecoder

EasyIntelHardDecoder庫是基於Intel主板集成顯卡的硬件解碼程序,內部進行解碼採用D3D進行顯示,其解碼效率比ffmpeg軟件解碼效率提到至少5-6倍;

1. 接口聲明如下:

// Intel Media Hardware Codec SDK Interface [8/17/2016 SwordTwelve]

#ifndef INTELHARDCODEC_INTERFACE_H
#define INTELHARDCODEC_INTERFACE_H

#ifdef __cplusplus

#define HARDCODEC_MAKEFOURCC(A,B,C,D)    ((((int)A))+(((int)B)<<8)+(((int)C)<<16)+(((int)D)<<24))

/* Hardware Codec FourCC */
typedef enum tagINTELHARDCODEC_FORMAT{
	HARDCODEC_FOURCC_NV12         = HARDCODEC_MAKEFOURCC('N','V','1','2'),   /* Native Format */
	HARDCODEC_FOURCC_YV12         = HARDCODEC_MAKEFOURCC('Y','V','1','2'),
	HARDCODEC_FOURCC_YUY2         = HARDCODEC_MAKEFOURCC('Y','U','Y','2'),
	HARDCODEC_FOURCC_RGB3         = HARDCODEC_MAKEFOURCC('R','G','B','3'),   /* RGB24 */
	HARDCODEC_FOURCC_RGB4         = HARDCODEC_MAKEFOURCC('R','G','B','4'),   /* RGB32 */
	HARDCODEC_FOURCC_P8           = 41,         /*  D3DFMT_P8   */
	HARDCODEC_FOURCC_P8_TEXTURE   = HARDCODEC_MAKEFOURCC('P','8','M','B') 
}INTELHARDCODEC_FORMAT;


class EasyIntelHardDecoder_Interface
{
	//導出接口
public:
public:	//DLL 接口
	virtual int  WINAPI Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode = 1) = 0;
	virtual int  WINAPI Decode(unsigned char * pData, int len, OUT INTELHARDCODEC_FORMAT& outFormat, OUT unsigned char * pYUVData) = 0;
	virtual void WINAPI	Close() = 0;

};

//視頻獲取定義接口指針類型
typedef	EasyIntelHardDecoder_Interface*	LPIntelHardDecoder;	

LPIntelHardDecoder	APIENTRY Create_EasyIntelHardDecoder();//創建控制接口指針
void APIENTRY Release_EasyIntelHardDecoder(LPIntelHardDecoder lpHardDecoder);//銷燬控制接口指針

#endif//__cplusplus
#endif//INTELHARDCODEC_INTERFACE_H
2. EasyIntelHardDecoder解碼庫調用流程
  • 第一步,初始化解碼器
    首先,檢查設備是否支持DXVA2,代碼如下所示:
bool sSupportDxva2(HWND hwnd)
{
	HRESULT hr;
	mfxU32 adapterNum = 0; 
	IDirect3D9*                 m_pD3D9 = NULL;
	IDirect3DDevice9*           m_pD3DD9 = NULL;
	IDirect3DDeviceManager9*    m_pDeviceManager9 = NULL;
	D3DPRESENT_PARAMETERS       m_D3DPP;

	m_pD3D9 = Direct3DCreate9(D3D_SDK_VERSION);
	if (!m_pD3D9)
		return false;

	ZeroMemory(&m_D3DPP, sizeof(m_D3DPP));
	m_D3DPP.Windowed = true;
	m_D3DPP.hDeviceWindow = hwnd;

	m_D3DPP.Flags                      = D3DPRESENTFLAG_VIDEO;
	m_D3DPP.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
	m_D3DPP.PresentationInterval       = D3DPRESENT_INTERVAL_ONE;
	m_D3DPP.BackBufferCount            = 1;
	m_D3DPP.BackBufferFormat           = D3DFMT_X8R8G8B8;

	m_D3DPP.BackBufferWidth  = GetSystemMetrics(SM_CXSCREEN);
	m_D3DPP.BackBufferHeight = GetSystemMetrics(SM_CYSCREEN);

	m_D3DPP.Flags |= D3DPRESENTFLAG_LOCKABLE_BACKBUFFER;


	m_D3DPP.SwapEffect = D3DSWAPEFFECT_DISCARD; // D3DSWAPEFFECT_OVERLAY

	hr = m_pD3D9->CreateDevice(
		adapterNum,
		D3DDEVTYPE_HAL,
		hwnd,
		D3DCREATE_SOFTWARE_VERTEXPROCESSING | D3DCREATE_MULTITHREADED | D3DCREATE_FPU_PRESERVE,
		&m_D3DPP,
		&m_pD3DD9);
	if (FAILED(hr))
	{
		m_pD3D9->Release();
		return false;
	}
	hr = m_pD3DD9->Reset(&m_D3DPP);
	if (FAILED(hr))
		return false;
	hr = m_pD3DD9->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
	if (FAILED(hr))
	{
		m_pD3DD9->Release();
		m_pD3D9->Release();
		return false; 
	}

	UINT resetToken = 0;

	hr = DXVA2CreateDirect3DDeviceManager9(&resetToken, &m_pDeviceManager9);
	if (FAILED(hr))
	{
		m_pD3DD9->Release();
		m_pD3D9->Release();
		return false; 
	}
	m_pDeviceManager9->Release();
	m_pD3DD9->Release();
	m_pD3D9->Release();
	return true;
}

然後,初始化解碼器,代碼如下:

int	Init(HWND hWnd, bool bDxv2Show, bool bShowToScale, int mode )
{
	//檢測是否支持硬件解碼
	bool bUseIntelSdk = isSupportDxva2(hWnd);
	if (!bUseIntelSdk)
	{
		return -1;
	}
	m_bDxv2Show = bDxv2Show;
	m_bShowToScale = bShowToScale;
	m_hWnd = hWnd;
	::GetClientRect(m_hWnd,&m_WndRect);
	
	m_pD3dRender = new CDecodeD3DRender();	
	m_pD3dRender->Init(hWnd);	

	mfxStatus sts = MFX_ERR_NONE;
//	mfxVersion version = {MFX_VERSION_MINOR, MFX_VERSION_MAJOR};
	mfxVersion version = {0, 1};

	if (mode == 1)
	{
		if (MFX_ERR_NONE != m_mfxSession.Init(MFX_IMPL_HARDWARE_ANY, &version))
			sts = m_mfxSession.Init(MFX_IMPL_HARDWARE, &version);
		if(sts != MFX_ERR_NONE)
			sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
	}
	else
	{
		sts = m_mfxSession.Init(MFX_IMPL_SOFTWARE, &version);
	}

	MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

	// create decoder
	m_pmfxDEC = new MFXVideoDECODE(m_mfxSession);
	MSDK_CHECK_POINTER(m_pmfxDEC, MFX_ERR_MEMORY_ALLOC);

	// set video type in parameters
	m_mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC;
	m_mfxVideoParams.AsyncDepth = 1;
	//  [12/6/2016 dingshuai]
	m_mfxVideoParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12;//MFX_FOURCC_YV12;


	// set memory type
	m_bd3dAlloc = true;

	memset(&m_mfxBS, 0, sizeof(m_mfxBS));
	
	sts = InitMfxBitstream(&m_mfxBS, 1024*1024*2);	
	return sts;
}

  • 第二步,調用解碼函數
    解碼器是異步工作的,所以在解碼函數調用後,我們需要從緩衝區內把數據取出進行D3D顯示或者回調到上層顯示,代碼如下所示:
mfxStatus CIntelMediaDecode::h264DecPacket(unsigned char * p_data, int len, unsigned char* p_yuvData)
{
	mfxSyncPoint	syncp;
	mfxStatus		sts = MFX_ERR_NONE;
	mfxU16 			nIndex = 0; // index of free surface   

	//while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts)
	{
		if (MFX_WRN_DEVICE_BUSY == sts)
		{
			MSDK_SLEEP(1); // just wait and then repeat the same call to DecodeFrameAsync

		}

		if (len > (m_mfxBS.MaxLength - m_mfxBS.DataLength))
		{
			//	sts = ExtendMfxBitstream(&m_mfxBS, m_mfxBS.MaxLength * 2); 
			//	MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
			m_mfxBS.DataOffset = 0;
			m_mfxBS.DataLength = 0;
		}

		if (len > 0)
		{
			memmove(m_mfxBS.Data, m_mfxBS.Data+m_mfxBS.DataOffset, m_mfxBS.DataLength);
			m_mfxBS.DataOffset = 0;

			memcpy(m_mfxBS.Data+m_mfxBS.DataLength, p_data, len);
			m_mfxBS.DataLength += len;
		}

		if (m_mfxBS.DataLength < 5)
		{
			m_sPrevState = MFX_ERR_MORE_DATA;
			return MFX_ERR_MORE_DATA;
		}

		if (MFX_ERR_MORE_SURFACE == m_sPrevState || MFX_ERR_NONE == m_sPrevState)
		{
			nIndex = GetFreeSurfaceIndex(m_pmfxSurfaces, m_mfxResponse.NumFrameActual); // find new working surface 
			if (MSDK_INVALID_SURF_IDX == nIndex)
			{
				return MFX_ERR_MEMORY_ALLOC;            
			}
		}

		sts = m_pmfxDEC->DecodeFrameAsync(&m_mfxBS, &(m_pmfxSurfaces[nIndex]), &m_pmfxOutSurface, &syncp);
		if (MFX_ERR_DEVICE_LOST == sts || MFX_ERR_DEVICE_FAILED == sts)
			return sts;
		m_sPrevState = sts;

		// ignore warnings if output is available, 
		// if no output and no action required just repeat the same call
		if (MFX_ERR_NONE < sts && syncp)
		{
			sts = MFX_ERR_NONE;
		}

		if (MFX_ERR_NONE == sts)
		{
			sts = m_mfxSession.SyncOperation(syncp, MSDK_DEC_WAIT_INTERVAL);
		}

		if (MFX_ERR_NONE == sts)
		{
#if 0
			if (m_nSnapShotRequest == 1)
			{
				picture->linesize[0] = m_pmfxOutSurface->Data.Pitch;
				picture->linesize[1] = m_pmfxOutSurface->Data.Pitch;
				picture->linesize[2] = 0;

				picture->data[0] = m_pmfxOutSurface->Data.Y;
				picture->data[1] = m_pmfxOutSurface->Data.UV;
				picture->data[2] = 0;

				int ret = SetSnapFrameData(picture);
				if (ret > 0)
				{
					m_nSnapShotDataReady = 1;
				}
				m_nSnapShotRequest = 0;
			}
#endif

#if 1
			if (m_bExternalAlloc) 
			{
				sts = m_pMFXAllocator->Lock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
				MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
			}
#endif

			++m_nFrameIndex;

			int nYUVSize = m_nSrcWidth*m_nSrcHeight;
			//YUV數據拷貝
			if (p_yuvData)//YUV420
			{
				memcpy(p_yuvData, m_pmfxOutSurface->Data.Y, nYUVSize);
				memcpy(p_yuvData+m_nSrcWidth*m_nSrcHeight, m_pmfxOutSurface->Data.UV, m_nSrcWidth*m_nSrcHeight>>1);

				// 硬件編碼出來是NV12,外部爲了顯示方便應該轉成I420 [12/6/2016 dingshuai]
#if 0
				int i = 0;  
				int yuvDely = nYUVSize*5/4;
				int nHelfYUVSize = nYUVSize/2;
				// Write Cb  
				for(int idx = 0; idx <  (nHelfYUVSize); idx+=2)  
				{  
					*(p_yuvData+nYUVSize + i) = m_pmfxOutSurface->Data.UV[idx];  
					*(p_yuvData+yuvDely +i) = m_pmfxOutSurface->Data.UV[idx+1];  
					i++;  
				}  
#endif

			}

#if 1
			if (m_bExternalAlloc) 
			{
				sts = m_pMFXAllocator->Unlock(m_pMFXAllocator->pthis, m_pmfxOutSurface->Data.MemId, &(m_pmfxOutSurface->Data));
				MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
			}
#endif
			if (m_bDxv2Show)
			{
				::GetClientRect(m_hWnd, &m_WndRect);
				sts = m_pD3dRender->RenderFrame(m_pmfxOutSurface, m_pMFXAllocator,
					m_WndRect, m_DisplayTitle, m_bEndedDrag, m_bShowToScale, &m_TargetRect);
				if (sts == MFX_ERR_NULL_PTR)
					sts = MFX_ERR_NONE;
			}
		}
	}
	return sts;
}
  • 第三步,關閉解碼器
void Close()
{
	WipeMfxBitstream(&m_mfxBS);
	MSDK_SAFE_DELETE(m_pmfxDEC);  
	MSDK_SAFE_DELETE(m_pD3dRender);

	if (m_bIntelSystemInitFinish != true)
		return;

	m_bIntelSystemInitFinish = false;

	DeleteFrames();

	// allocator if used as external for MediaSDK must be deleted after decoder
	DeleteAllocator();

	m_mfxSession.Close();
	
	if (NULL != m_pSnapShotBuf)
	{
		free(m_pSnapShotBuf);
		m_pSnapShotBuf = NULL;
	}
}

至此,我們已經完成了對Intel集顯解碼器的封裝,通過閱讀EasyPlayer-RTSP的代碼可以對該庫的調用流程有一個系統的認識;此外,目前集成的Intel集顯解碼庫是比較舊的版本,目前尚不支持H265解碼,有興趣的朋友可以下載最新的Intel集顯編解碼庫Demo自行封裝解碼庫,相信現在的版本會更加的易用和高效。

有任何技術問題,歡迎大家和我技術交流:
[email protected]

大家也可以加入EasyPlayer流媒體播放器 QQ羣進行討論:
544917793

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章