Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Stokhos_Cuda_DeviceProp.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_CUDA_DEVICE_PROP_HPP
43 #define STOKHOS_CUDA_DEVICE_PROP_HPP
44 
45 #include "Kokkos_Core.hpp"
46 
47 #include "Teuchos_TestForException.hpp"
48 
49 #include "cuda_runtime_api.h"
50 
51 namespace Stokhos {
52 
53  // Class encapsulating various device attributes
54  class DeviceProp {
55  public:
56 
57  typedef Kokkos::Cuda::size_type size_type;
58 
61 
74 
76  bool has_ldg;
77 
78  DeviceProp(int device_id = -1) :
88  warp_size(0),
90  max_regs_per_sm(0),
92  reg_bank_size(0),
93  has_shuffle(false),
94  has_ldg(false)
95  {
96  // If device_id is negative, use currently selected device
97  if (device_id < 0)
98  cudaGetDevice(&device_id);
99 
100  // Get compute capability
101  int major, minor;
102  cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor,
103  device_id);
104  cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor,
105  device_id);
106  compute_capability_major = major;
107  compute_capability_minor = minor;
108 
109  // Require compute capability >= 2
110  TEUCHOS_TEST_FOR_EXCEPTION(
111  compute_capability_major < 2, std::logic_error,
112  "Cuda compute capability >= 2 is required!");
113 
114  // These come from the CUDA occupancy calculator
115  if (compute_capability_major == 3) {
116  if (compute_capability_minor >= 7) {
117  shared_memory_capacity = 112 * 1024;
118  max_shmem_per_block = 48 * 1024;
119  max_regs_per_sm = 128 * 1024;
120  max_regs_per_block = 64 * 1024;
121  }
122  else {
123  shared_memory_capacity = 48 * 1024;
124  max_shmem_per_block = 48 * 1024;
125  max_regs_per_sm = 64 * 1024;
126  max_regs_per_block = 64 * 1024;
127  }
129  max_threads_per_block = 1024;
130  max_threads_per_sm = 2048;
131  max_blocks_per_sm = 16;
132  max_warps_per_sm = 64;
133  warp_size = 32;
134  warp_granularity = 4;
135  reg_bank_size = 256;
136  has_shuffle = true;
137  has_ldg = true;
138  }
139 
140  else if (compute_capability_major == 2) {
141  shared_memory_capacity = 48 * 1024;
143  max_shmem_per_block = 48 * 1024;
144  max_threads_per_block = 1024;
145  max_threads_per_sm = 1536;
146  max_blocks_per_sm = 8;
147  max_warps_per_sm = 48;
148  warp_size = 32;
149  warp_granularity = 2;
150  max_regs_per_sm = 32 * 1024;
151  max_regs_per_block = 32 * 1024;
152  reg_bank_size = 64;
153  has_shuffle = false;
154  has_ldg = false;
155  }
156  }
157 
158  // Returns number of registers per thread used by the given kernel
159  template <typename Kernel>
160  size_type
161  get_kernel_registers(Kernel kernel) {
162 #ifdef __CUDACC__
163  typedef void (*func_ptr_t)();
164  func_ptr_t func_ptr = reinterpret_cast<func_ptr_t>(kernel);
165  cudaFuncAttributes attrib;
166  cudaFuncGetAttributes(&attrib, func_ptr);
167  return attrib.numRegs;
168 #else
169  return 0;
170 #endif
171  }
172 
173  // Returns number of resident warps per sm for the given kernel
174  template <typename Kernel>
175  size_type
176  get_resident_warps_per_sm(Kernel kernel) {
177  const size_type regs_per_thread = get_kernel_registers(kernel);
178  const size_type regs_per_warp =
179  (warp_size*regs_per_thread + reg_bank_size-1) & ~(reg_bank_size-1);
180  const size_type warps_per_sm =
181  (max_regs_per_sm/regs_per_warp) & ~(warp_granularity-1);
182  return warps_per_sm;
183  }
184  };
185 
186 } // namespace Stokhos
187 
188 #endif /* #ifndef STOKHOS_CUDA_DEVICE_PROP_HPP */
Kokkos::Cuda::size_type size_type
Top-level namespace for Stokhos classes and functions.
size_type get_resident_warps_per_sm(Kernel kernel)
size_type get_kernel_registers(Kernel kernel)