ug4
cuda_manager.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015: G-CSC, Goethe University Frankfurt
3  * Author: Martin Rupp
4  *
5  * This file is part of UG4.
6  *
7  * UG4 is free software: you can redistribute it and/or modify it under the
8  * terms of the GNU Lesser General Public License version 3 (as published by the
9  * Free Software Foundation) with the following additional attribution
10  * requirements (according to LGPL/GPL v3 §7):
11  *
12  * (1) The following notice must be displayed in the Appropriate Legal Notices
13  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14  *
15  * (2) The following notice must be displayed at a prominent place in the
16  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17  *
18  * (3) The following bibliography is recommended for citation and must be
19  * preserved in all covered files:
20  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21  * parallel geometric multigrid solver on hierarchically distributed grids.
22  * Computing and visualization in science 16, 4 (2013), 151-164"
23  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24  * flexible software system for simulating pde based models on high performance
25  * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26  *
27  * This program is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  */
32 
33 #ifndef CUDAManager_H
34 #define CUDAManager_H
35 
36 #define USE_CUSPARSE
37 
38 /* Using updated (v2) interfaces to cublas and cusparse */
39 #include <cuda_runtime.h>
40 
41 #ifdef USE_CUSPARSE
42 #include <cusparse_v2.h>
43 #endif
44 
45 #include <cublas_v2.h>
46 #include <vector>
47 
48 // Utilities and system includes
49 #include "common/error.h"
50 #include "common/log.h"
51 
52 #include <string>
53 
54 namespace ug{
56 
57 const char *CUDAError(int err);
58 
59 template<typename T>
60 inline void CudaCheckStatus(T status, const char * file, int line)
61 {
62  unsigned int s = static_cast<unsigned int>(status );
63  UG_COND_THROW(status != 0, "CUDA error at " << file << ":" << line << " " << s << " = " << ug::CUDAError(status) );
64 }
65 
66 
67 #define CUDA_CHECK_STATUS(status ) CudaCheckStatus(status, __FILE__, __LINE__)
68 
69 #define CUDA_CHECK_SUCCESS(err, desc) \
70 if(err != cudaSuccess)\
71 {\
72  UG_THROW("Error in " << __FUNCTION__ << ": CUDA ERROR " << err <<":\n" <<\
73  ug::CUDAError(err) << "\n----------------------------\n" << desc << "\n");\
74 }
75 
76 
77 template<typename T>
78 T *MyCudaAlloc(size_t N)
79 {
80  UG_DLOG(DID_CUDA, 2, "CUDA: Allocating " << sizeof(T)*N << " bytes.\n");
81 
82  T *p;
83  cudaError_t err = cudaMalloc ((void**) &p, sizeof(T)*N);
84  if(err != cudaSuccess)
85  {
86  UG_THROW("Error in " << __FUNCTION__ << "when allocating " << sizeof(T)*N << " bytes. CUDA ERROR " << err <<": " <<
87  ug::CUDAError(err));
88  }
89  return p;
90 }
91 
92 
94 {
95 public:
96  virtual ~CUDAManager();
97  void init();
98  static CUDAManager &get_instance();
99 
100 #ifdef USE_CUSPARSE
101 public:
102  static inline cusparseHandle_t get_cusparseHandle() { return get_instance().cusparseHandle; }
103 private:
104  cusparseHandle_t cusparseHandle;
105 #endif
106 
107 public:
108  static inline cublasHandle_t get_cublasHandle() { return get_instance().cublasHandle; }
110 
111  template<typename T>
112  T *get_temp_buffer(size_t n)
113  {
114  size_t N = n*sizeof(T);
115  if(N < m_tempSize) return (T*)m_tempBuffer;
116 
117  UG_DLOG(DID_CUDA, 2, "CUDA: Allocating Temp Buffer " << N << " bytes.\n");
118  if(m_tempBuffer)
119  cudaFree(m_tempBuffer);
120 
121  m_tempBuffer = MyCudaAlloc<char>(n);
122 
123  return (T*)m_tempBuffer;
124  }
125 
126  template<typename T>
128  {
129  return (T*)m_tempRetBuffer;
130  }
131 
132  static void get_cuda_devices(std::vector<cudaDeviceProp> &devices);
134 
135 private:
136  cublasHandle_t cublasHandle;
139  size_t m_tempSize;
140 };
141 
142 
143 template<typename T>
144 inline void CudaCpyToDevice(typename T::value_type *dest, T &vec)
145 {
146  UG_DLOG(DID_CUDA, 2, "Copying " << vec.size() << " to device\n");
147  //std::cout << "copy!\n";
148  CUDA_CHECK_SUCCESS( cudaMemcpy(dest, &vec[0], vec.size()*sizeof(typename T::value_type), cudaMemcpyHostToDevice),
149  "cudaMemcpy vec size " << vec.size());
150 }
151 
152 template<typename T>
153 inline void CudaCpyToHost(T &dest, typename T::value_type *src)
154 {
155  UG_DLOG(DID_CUDA, 2, "Copying " << dest.size() << " to host\n");
156  //std::cout << "copy!\n";
157  CUDA_CHECK_SUCCESS( cudaMemcpy(&dest[0], src, dest.size()*sizeof(typename T::value_type), cudaMemcpyDeviceToHost),
158  "cudaMemcpy dest size " << dest.size())
159 }
160 
161 
162 template<typename T>
163 inline typename T::value_type *CudaCreateAndCopyToDevice(T &vec)
164 {
165  UG_DLOG(DID_CUDA, 2, "Create and Copying " << vec.size() << " to host\n");
166  typename T::value_type *dest;
167  int N = vec.size()*sizeof(typename T::value_type);
168  CUDA_CHECK_SUCCESS( cudaMalloc((void **)&dest, N),
169  "Error at cudaMalloc of " << N << " bytes");
170 
171  CudaCpyToDevice(dest, vec);
172  return dest;
173 }
174 
175 template<typename T>
176 T CUDA_GetElementFromDevice(T *p, size_t i=0)
177 {
178  T t;
179  cudaMemcpy(&t, p+i, sizeof(T), cudaMemcpyDeviceToHost);
180  return t;
181 }
182 }
183 #endif /* CUDAManager_H */
parameterString p
parameterString s
Definition: cuda_manager.h:94
static cublasHandle_t get_cublasHandle()
Definition: cuda_manager.h:108
cublasHandle_t cublasHandle
Definition: cuda_manager.h:136
T * get_temp_return_buffer()
Definition: cuda_manager.h:127
void init()
Definition: cuda_manager.cpp:83
static int get_max_multiprocessor_cuda_device()
Definition: cuda_manager.cpp:54
static cusparseHandle_t get_cusparseHandle()
Definition: cuda_manager.h:102
cusparseHandle_t cusparseHandle
Definition: cuda_manager.h:104
size_t m_maxThreadsPerBlock
Definition: cuda_manager.h:109
void * m_tempRetBuffer
Definition: cuda_manager.h:138
void * m_tempBuffer
Definition: cuda_manager.h:137
static CUDAManager & get_instance()
Definition: cuda_manager.cpp:153
size_t m_tempSize
Definition: cuda_manager.h:139
virtual ~CUDAManager()
Definition: cuda_manager.cpp:68
T * get_temp_buffer(size_t n)
Definition: cuda_manager.h:112
static void get_cuda_devices(std::vector< cudaDeviceProp > &devices)
Definition: cuda_manager.cpp:45
Definition: debug_id.h:94
#define CUDA_CHECK_SUCCESS(err, desc)
Definition: cuda_manager.h:69
#define UG_THROW(msg)
Definition: error.h:57
#define UG_DLOG(__debugID__, level, msg)
Definition: log.h:298
#define UG_COND_THROW(cond, msg)
UG_COND_THROW(cond, msg) : performs a UG_THROW(msg) if cond == true.
Definition: error.h:61
the ug namespace
T * MyCudaAlloc(size_t N)
Definition: cuda_manager.h:78
DebugID DID_CUDA("CUDA")
Definition: cuda_manager.h:55
void CudaCpyToHost(T &dest, typename T::value_type *src)
Definition: cuda_manager.h:153
void CudaCpyToDevice(typename T::value_type *dest, T &vec)
Definition: cuda_manager.h:144
T CUDA_GetElementFromDevice(T *p, size_t i=0)
Definition: cuda_manager.h:176
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition: cuda_manager.h:163
const char * CUDAError(int err)
Definition: cuda_error.cpp:35
void CudaCheckStatus(T status, const char *file, int line)
Definition: cuda_manager.h:60
T value_type
Definition: sparsematrix_interface.h:2