CUDA學習——CUDA代碼常用編寫技巧(轉)

1. 聲明 __shared__ 變量或數組:

__shared__ float sh_farr[ 256];
__shared__ int a;
2.結構體指針成員的分配設備內存:

typedef struct Teacher_t
...{
    int a;
    unsigned int    *g_mem1;
    float            *g_mem2;
}Teacher;
void initMem( Teacher& t, const unsigned int mat_size)
...{
    unsigned int mat_size_ui = sizeof(int) * mat_size;
    unsigned int mat_size_f = sizeof(float) * mat_size;
    CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_ui) );
    CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_f) );
    ...
}
3.計時:

unsigned int timer = 0;
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
...{
      ...//kernel
}
CUT_SAFE_CALL( cutStopTimer( timer));
printf( "Total time: %f ms ", cutGetTimerValue( timer) );
    CUT_SAFE_CALL( cutDeleteTimer( timer));
4. 獲取輸入命令行中包含的文件名:

/**/////////////////////////////////////////////////////////////////////////////////
//! Check if a particular filename has to be used for the file where the result
//! is stored
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param filename filename of result file, updated if user specified
//!                   filename
/**/////////////////////////////////////////////////////////////////////////////////
void
getResultFilename( int argc, char** argv, char*& filename)
...{

    char* temp = NULL;
    cutGetCmdLineArgumentstr( argc, (const char**) argv, "filename-result", &temp);
    if( NULL != temp)
    ...{
        filename = (char*) malloc( sizeof(char) * strlen( temp));
        strcpy( filename, temp);
        cutFree( temp);
    }
    printf( "Result filename: '%s' ", filename);
}
類似的:

/**/////////////////////////////////////////////////////////////////////////////////
//! Check if a specific precision of the eigenvalue has to be obtained
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param iters_timing numbers of iterations for timing, updated if a
//!                      specific number is specified on the command line
/**/////////////////////////////////////////////////////////////////////////////////
void
getPrecision( int argc, char** argv, float& precision)
...{
    float temp = -1.0f;
    cutGetCmdLineArgumentf( argc, (const char**) argv, "precision", &temp);
    if( temp > 0.0f)
    ...{
        precision = temp;
    }
    printf( "Precision: %f ", precision);
}
5.Host調用完kernel函數需要進行線程同步,而在kernel或global函數只需要在必要的地方__syncthreads();即可:

CUDA_SAFE_CALL( cudaThreadSynchronize());

本文來自CSDN博客,轉載請標明出處:http://blog.csdn.net/dvchn/archive/2008/02/25/2119590.aspx

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章