CUDA从入门到精通到精通_笔记4:GPU设备属性查询的代码

/******************************************************************************************************************* *文件说明: *        第二个CUDA程序------GPU设备性能参数的查询 *开发环境: *        win7+OpenCv2.4.8+cudaToolkit5.0+CUDA SDK3.0+NIVIDA NVS 3100M *参考手册: *        CUDA_ToolKit_Reference_Manual.pdf *时间地点: *        陕西师范大学 2017.1.8 *作    者: *        九月 *模块说明: *        1--由于我们希望在【设备】上【分配内存】和【执行代码】,因此如果在程序中能够知道设备拥有多少【内存】以及具备 *           哪些功能,那么将非常的有用 *        2--而且,在一台计算机上拥有多个支持CUDA的设备也是非常常见的情形。在这些情况中,我们希望通过某种方式来确定使用 *           的是哪一个GPU设备 *        3--在深入研究如何编写【设备代码】之前,我们需要通过某种机制来判断计算机中当前有哪些设备,以及每个设备都支持哪 *           些功能。 *        4--幸运的是,我们可以通过一个非常简单的接口来获得这样的信息 *        5--首先,我们希望知道在系统中有多少个设备是支持CUDA的,并且这些设备能够运行基于CUDA C编写的【核函数】 *        6--要获得CUDA设备的数量,可以调用cudaGetDeviceCount() *        7--在调用cudaGetDeviceCount()后,可以对每个设备进行迭代、并查询各个【设备】的【相关信息】。CUDA运行时将返回一 *           个cudaDeviceProp类型的结构,其中包含了设备的相关属性。 ********************************************************************************************************************/
#include "cuda_runtime.h" //【1】CUDA运行时头文件,包含了许多的runtime API  #include "device_launch_parameters.h"  #include <driver_types.h>                             //【2】驱动类型的头文件,包含cudaDeviceProp【设备属性】  #include <cuda_runtime_api.h>                         //【3】cuda运行时API的头文件  #include "stdio.h"  #include <iostream>    /******************************************************************************************************************* *模块说明： *        控制台应用程序的入口函数----Main函数 *函数说明: *cudaGetDeviceCount函数原型: *        extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaGetDeviceCount(int *count) *函数作用: *        以*count的形式返回可用于执行的计算能力大于等于1.0的设备数量,如果不存在此设备,那么这个函数将会返回cudaError *        -NoDevice ********************************************************************************************************************/  int main()  {      cudaDeviceProp  strProp;                            //【1】定义一个【设备属性结构体】的【结构体变量】      int            iCount;      cudaGetDeviceCount(&iCount);                        //【2】获得GPU设备的数量      std::printf("The number of GPU = %d\n",iCount);      for(int i=0;i<iCount;i++)                          //【3】迭代的获取每一个【GPU设备】的属性      {          cudaGetDeviceProperties(&strProp,i);            //【4】获取【GPU设备属性】的函数,并将获得的设备属性存放在strProp中          std::printf("----General Information for device = %d-----\n",i);          std::printf("The name of Device:%s\n",strProp.name);                       //【1】NIVIDA显卡的型号                   std::printf("The totalGlobalMem of GPU:%d\n",strProp.totalGlobalMem);      //【2】设备全局内存的总量,单位:字节          std::printf("The sharedMemPerBlock of GPU:%d\n",strProp.sharedMemPerBlock);//【3】在一个线程块Block中可以使用的最大共享内存的数量          std::printf("The regsPerBlock of GPU:%d\n",strProp.regsPerBlock);          //【4】每个线程块中可用的32位寄存器的数量          std::printf("The warpSize:%d\n",strProp.warpSize);                         //【5】每一个线程束包含的线程的数量          std::printf("The memPitch:%d\n",strProp.memPitch);                         //【6】内存复制中,最大的修正量          std::printf("The maxThreadPerBlock:%d\n",strProp.maxThreadsPerBlock);      //【7】在一个线程块中,可以包含的最大线程数量          std::printf("The totalConstMem:%d\n",strProp.totalConstMem);               //【8】常量内存的总量          std::printf("The major:%d\n",strProp.major);          std::printf("The minor:%d\n",strProp.minor);          std::printf("The multiProcessCount:%d\n",strProp.multiProcessorCount);      }                  std::system("pause");      return 0;  }

struct __device_builtin__ cudaDeviceProp {
    char name[256];
    /**< ASCII string identifying device */
    size_t totalGlobalMem;
    /**< Global memory available on device in bytes */
    size_t sharedMemPerBlock;
    /**< Shared memory available per block in bytes */
    int regsPerBlock;
    /**< 32-bit registers available per block */
    int warpSize;
    /**< Warp size in threads */
    size_t memPitch;
    /**< Maximum pitch in bytes allowed by memory copies */
    int maxThreadsPerBlock;
    /**< Maximum number of threads per block */
    int maxThreadsDim[3];
    /**< Maximum size of each dimension of a block */
    int maxGridSize[3];
    /**< Maximum size of each dimension of a grid */
    int clockRate;
    /**< Clock frequency in kilohertz */
    size_t totalConstMem;
    /**< Constant memory available on device in bytes */
    int major;
    /**< Major compute capability */
    int minor;
    /**< Minor compute capability */
    size_t textureAlignment;
    /**< Alignment requirement for textures */
    size_t texturePitchAlignment;
    /**< Pitch alignment requirement for texture references bound to pitched memory */
    int deviceOverlap;
    /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
    int multiProcessorCount;
    /**< Number of multiprocessors on device */
    int kernelExecTimeoutEnabled;
    /**< Specified whether there is a run time limit on kernels */
    int integrated;
    /**< Device is integrated as opposed to discrete */
    int canMapHostMemory;
    /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
    int computeMode;
    /**< Compute mode (See ::cudaComputeMode) */
    int maxTexture1D;
    /**< Maximum 1D texture size */
    int maxTexture1DMipmap;
    /**< Maximum 1D mipmapped texture size */
    int maxTexture1DLinear;
    /**< Maximum size for 1D textures bound to linear memory */
    int maxTexture2D[2];
    /**< Maximum 2D texture dimensions */
    int maxTexture2DMipmap[2];
    /**< Maximum 2D mipmapped texture dimensions */
    int maxTexture2DLinear[3];
    /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
    int maxTexture2DGather[2];
    /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
    int maxTexture3D[3];
    /**< Maximum 3D texture dimensions */
    int maxTextureCubemap;
    /**< Maximum Cubemap texture dimensions */
    int maxTexture1DLayered[2];
    /**< Maximum 1D layered texture dimensions */
    int maxTexture2DLayered[3];
    /**< Maximum 2D layered texture dimensions */
    int maxTextureCubemapLayered[2];
    /**< Maximum Cubemap layered texture dimensions */
    int maxSurface1D;
    /**< Maximum 1D surface size */
    int maxSurface2D[2];
    /**< Maximum 2D surface dimensions */
    int maxSurface3D[3];
    /**< Maximum 3D surface dimensions */
    int maxSurface1DLayered[2];
    /**< Maximum 1D layered surface dimensions */
    int maxSurface2DLayered[3];
    /**< Maximum 2D layered surface dimensions */
    int maxSurfaceCubemap;
    /**< Maximum Cubemap surface dimensions */
    int maxSurfaceCubemapLayered[2];
    /**< Maximum Cubemap layered surface dimensions */
    size_t surfaceAlignment;
    /**< Alignment requirements for surfaces */
    int concurrentKernels;
    /**< Device can possibly execute multiple kernels concurrently */
    int ECCEnabled;
    /**< Device has ECC support enabled */
    int pciBusID;
    /**< PCI bus ID of the device */
    int pciDeviceID;
    /**< PCI device ID of the device */
    int pciDomainID;
    /**< PCI domain ID of the device */
    int tccDriver;
    /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
    int asyncEngineCount;
    /**< Number of asynchronous engines */
    int unifiedAddressing;
    /**< Device shares a unified address space with the host */
    int memoryClockRate;
    /**< Peak memory clock frequency in kilohertz */
    int memoryBusWidth;
    /**< Global memory bus width in bits */
    int l2CacheSize;
    /**< Size of L2 cache in bytes */
    int maxThreadsPerMultiProcessor;
    /**< Maximum resident threads per multiprocessor */
};

就爱阅读 www.92to.com 网友整理上传, 为您提供最全的知识大全, 期待您的分享，转载请注明出处。

来源:

与本文相关文章

暂无,快来抢沙发吧！