ug4
gpuvector.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2015: G-CSC, Goethe University Frankfurt
3  * Author: Martin Rupp
4  *
5  * This file is part of UG4.
6  *
7  * UG4 is free software: you can redistribute it and/or modify it under the
8  * terms of the GNU Lesser General Public License version 3 (as published by the
9  * Free Software Foundation) with the following additional attribution
10  * requirements (according to LGPL/GPL v3 §7):
11  *
12  * (1) The following notice must be displayed in the Appropriate Legal Notices
13  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14  *
15  * (2) The following notice must be displayed at a prominent place in the
16  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17  *
18  * (3) The following bibliography is recommended for citation and must be
19  * preserved in all covered files:
20  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21  * parallel geometric multigrid solver on hierarchically distributed grids.
22  * Computing and visualization in science 16, 4 (2013), 151-164"
23  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24  * flexible software system for simulating pde based models on high performance
25  * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26  *
27  * This program is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  */
32 
33 #ifndef __H__UG__CRS_ALGEBRA__VECTOR__
34 #define __H__UG__CRS_ALGEBRA__VECTOR__
35 
36 #include "../cpu_algebra/vector.h"
37 #include "cuda/cuda_manager.h"
38 #include "cuda/common_cuda.h"
39 
40 namespace ug{
42 // GPUVector
44 
47 
49 template <typename TValueType>
50 class GPUVector : public Vector<TValueType>
51 {
52 public:
53 
54  typedef TValueType value_type;
56 
58  using super::size;
59  using super::resize;
60  using super::reserve;
61 
62 
64  GPUVector() : Vector<TValueType>() {m_GPUState = ON_CPU; }
65 
67  GPUVector(size_t _length) : Vector<TValueType>(_length) {m_GPUState = ON_CPU; }
68 
71 
74 
75  void resize(size_t newSize, bool bCopyValues=true)
76  {
77  UG_LOG(this << "GPUVector::resize(" << newSize << ")\n");
78  assure_on_cpu();
80  super::resize(newSize, bCopyValues);
81  }
82  void reserve(size_t newCapacity, bool bCopyValues=true)
83  {
84  UG_LOG(this << "GPUVector::reserve(" << newCapacity << ")\n");
85  reserve(newCapacity, bCopyValues);
86  }
87 
88 
90  inline value_type &operator [] (size_t i)
91  {
92  assure_on_cpu();
94  return super::operator[](i);
95  }
96  inline const value_type &operator [] (size_t i) const
97  {
98  assure_on_cpu();
99  return super::operator[](i);
100  }
101 
102 
103 protected:
105  virtual vector_type* virtual_clone() const;
106 
108  virtual vector_type* virtual_clone_without_values() const;
109 
110 public:
112  {
113  if(on_gpu()) return;
114  if(m_sizeOnGPU != size())
115  {
116  cudaFree(m_devValues);
118  }
119  else
122  }
123 
124  void assure_on_gpu() const
125  {
126  const_cast<GPUVector<value_type> *>(this)->assure_on_gpu();
127  }
128 
130  {
131  if(on_cpu()) return;
132  // do this before so CudaCpyToHost can access [0] as dest without
133  // calling assure_on_cpu again.
135  CudaCpyToHost(*this, m_devValues);
136  }
137 
138  void assure_on_cpu() const
139  {
140  const_cast<GPUVector<value_type> *>(this)->assure_on_cpu();
141  }
142 
143  bool on_cpu()
144  {
145  return m_GPUState & ON_CPU;
146  }
147 
148  bool on_gpu()
149  {
150  return m_GPUState & ON_GPU;
151  }
152 
153 private:
155  {
156  ON_GPU = 1,
157  ON_CPU = 2,
158  ON_GPU_AND_CPU = 3
159  };
161 
162 public:
163  double *get_dev_ptr()
164  {
165  assure_on_gpu();
166  m_GPUState = ON_GPU; // not valid on CPU anymore
167  return m_devValues;
168  }
169  const double *get_dev_ptr() const
170  {
171  assure_on_gpu();
172  return m_devValues;
173  }
174 
175 public:
176  inline void operator = (const GPUVector<value_type> &v)
177  {
178  CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), 1.0, v.get_dev_ptr());
179  }
180  inline void operator += (const GPUVector<value_type> &v)
181  {
182  CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), 1.0, v.get_dev_ptr());
183  }
184  inline void operator -= (const GPUVector<value_type> &v)
185  {
186  CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), -1.0, v.get_dev_ptr());
187  }
188 
189  inline void add(double alpha, const GPUVector<value_type> &v)
190  {
191  CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), alpha, v.get_dev_ptr());
192  }
193 
194  inline void operator *= (const number &a)
195  {
196  CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), a, get_dev_ptr());
197  }
198 
200  inline double norm() const
201  {
202  double res=0;
203  cublasDnrm2(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, &res);
204  return res;
205  }
206 
207  double dotprod(const GPUVector<value_type> &w) const
208  {
209  assert(size() == w.size());
210  double res=0;
211  cublasDdot(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, w.get_dev_ptr(), 1, &res);
212  cudaThreadSynchronize();
213  return res;
214  }
215 
216 private:
217  double *m_devValues;
218  size_t m_sizeOnGPU;
219 };
220 
221 template<typename value_type>
223 {
224  return new GPUVector<value_type>(*this);
225 }
226 
227 template<typename value_type>
229 {
230  return SmartPtr<GPUVector<value_type> >(this->virtual_clone());
231 }
232 
233 template<typename value_type>
235 {
236  return new GPUVector<value_type>(this->size());
237 }
238 
239 template<typename value_type>
241 {
242  return SmartPtr<GPUVector<value_type> >(this->virtual_clone_without_values());
243 }
244 
245 
247 
248 
249 // templated
250 
251 // operations for vectors
252 //-----------------------------------------------------------------------------
253 // these functions execute vector operations by using the operations on the elements of the vector
254 
255 // todo: change vector_t to TE_VEC<vector_t>
256 
257 
258 // VecScale: These function calculate dest = sum_i alpha_i v_i
259 
261 template<typename T>
262 inline void VecScaleAssign(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1)
263 {
264  UG_LOG("VecScaleAssign\n");
265  for(size_t i=0; i<dest.size(); i++)
266  VecScaleAssign(dest[i], alpha1, v1[i]);
267 }
268 
270 template<typename T>
271 inline void VecAssign(GPUVector<T> &dest, const GPUVector<T> &v1)
272 {
273  UG_LOG("VecAssign\n");
274  for(size_t i=0; i<dest.size(); i++)
275  dest[i] = v1[i];
276 }
277 
279 template<typename T>
280 inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2)
281 {
282  CUDA_VecAdd_2(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), dest.size());
283 }
284 
286 template<typename T>
287 inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2, double alpha3, const GPUVector<T> &v3)
288 {
289  CUDA_VecAdd_3(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), alpha3, v3.get_dev_ptr(), dest.size());
290 }
291 
292 
293 // VecProd
294 
296 template<typename T>
297 inline void VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2, double &res)
298 {
299 // UG_LOG("VecProd\n");
300  assert(v1.size() == v2.size());
301  cublasDdot(CUDAHelper::get_cublasHandle(), v1.size(), v1.get_dev_ptr(), 1, v2.get_dev_ptr(), 1, &res);
302  cudaThreadSynchronize();
303 }
304 
306 template<typename T>
307 inline double VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2)
308 {
309 // UG_LOG("VecProd\n");
310  double res = 0;
311  VecProd(v1, v2, res);
312  return res;
313 }
314 
315 
317 template<typename T>
318 inline void VecNormSquaredAdd(const GPUVector<T> &a, const GPUVector<T> &b, double &sum)
319 {
320  UG_LOG("VecNormSA\n");
321  for(int i=0; i<a.size(); i++)
322  VecNormSquaredAdd(a[i], sum);
323 }
324 
326 template<typename T>
327 inline double VecNormSquared(const GPUVector<T> &a, const GPUVector<T> &b)
328 {
329  UG_LOG("VecNormS\n");
330  double sum=0;
331  VecNormSquaredAdd(a, sum);
332  return sum;
333 }
334 
335 
336 
337 
338 
339 
340 
341 
342 
343 
344 // end group crs_algebra
346 
347 } // namespace ug
348 
349 #endif
Definition: gpuvector.h:51
void assure_on_gpu() const
Definition: gpuvector.h:124
TValueType value_type
Definition: gpuvector.h:54
bool on_gpu()
Definition: gpuvector.h:148
GPUVector(size_t _length)
constructor with length
Definition: gpuvector.h:67
void operator-=(const GPUVector< value_type > &v)
Definition: gpuvector.h:184
size_t m_sizeOnGPU
Definition: gpuvector.h:218
const double * get_dev_ptr() const
Definition: gpuvector.h:169
double * m_devValues
Definition: gpuvector.h:217
void assure_on_cpu()
Definition: gpuvector.h:129
bool on_cpu()
Definition: gpuvector.h:143
GPUVector< TValueType > vector_type
Definition: gpuvector.h:55
int m_GPUState
Definition: gpuvector.h:160
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition: gpuvector.h:82
void assure_on_cpu() const
Definition: gpuvector.h:138
void operator+=(const GPUVector< value_type > &v)
Definition: gpuvector.h:180
double dotprod(const GPUVector< value_type > &w) const
Definition: gpuvector.h:207
void resize(size_t newSize, bool bCopyValues=true)
Definition: gpuvector.h:75
double norm() const
return sqrt(sum values[i]^2) (euclidian norm)
Definition: gpuvector.h:200
GPUVector()
constructor
Definition: gpuvector.h:64
value_type & operator[](size_t i)
access element i of the vector
Definition: gpuvector.h:90
void add(double alpha, const GPUVector< value_type > &v)
Definition: gpuvector.h:189
void operator*=(const number &a)
Definition: gpuvector.h:194
void operator=(const GPUVector< value_type > &v)
Definition: gpuvector.h:176
void assure_on_gpu()
Definition: gpuvector.h:111
size_t size() const
Definition: vector.h:181
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition: vector.h:113
Vector< TValueType > super
Definition: gpuvector.h:57
GPU_STATE
Definition: gpuvector.h:155
@ ON_GPU
Definition: gpuvector.h:156
@ ON_GPU_AND_CPU
Definition: gpuvector.h:158
@ ON_CPU
Definition: gpuvector.h:157
double * get_dev_ptr()
Definition: gpuvector.h:163
Definition: vector.h:55
TValueType value_type
Definition: vector.h:57
value_type & operator[](size_t i)
access element i of the vector
Definition: vector_impl.h:47
void resize(size_t newSize, bool bCopyValues=true)
Definition: vector.h:109
size_t size() const
Definition: vector.h:181
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition: vector.h:113
bool CUDA_VecAdd_2(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, const int N)
bool CUDA_VecAdd2(const int len, FPTYPE alpha, FPTYPE *x, FPTYPE beta, const FPTYPE *y)
bool CUDA_VecAdd_3(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, FPTYPE alpha3, const FPTYPE *v3, const int N)
SmartPtr< vector_type > clone() const
clones the vector (deep-copy) including values
Definition: gpuvector.h:228
SmartPtr< vector_type > clone_without_values() const
clones the vector (deep-copy) excluding values
Definition: gpuvector.h:240
virtual vector_type * virtual_clone_without_values() const
virtual clone using covariant return type excluding values
Definition: gpuvector.h:234
virtual vector_type * virtual_clone() const
virtual clone using covariant return type
Definition: gpuvector.h:222
number alpha
#define UG_LOG(msg)
Definition: log.h:367
double number
Definition: types.h:124
void VecScaleAdd(vector_t &vOut, typename vector_t::value_type s1, const vector_t &v1, typename vector_t::value_type s2, const vector_t &v2)
Scales two Vectors, adds them and returns the sum in a third vector.
Definition: math_vector_functions_common_impl.hpp:265
CPUAlgebra::vector_type vector_type
the ug namespace
void VecAssign(vector_t &dest, const vector_t &v1)
sets dest = v1 entrywise
Definition: operations_vec.h:154
double VecProd(const double &a, const double &b)
returns scal<a, b>
Definition: operations_vec.h:84
void CudaCpyToHost(T &dest, typename T::value_type *src)
Definition: cuda_manager.h:153
void CudaCpyToDevice(typename T::value_type *dest, T &vec)
Definition: cuda_manager.h:144
void VecNormSquaredAdd(const double &a, double &s)
calculates s += norm_2^2(a)
Definition: operations_vec.h:106
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition: cuda_manager.h:163
double VecNormSquared(const double &a)
returns norm_2^2(a)
Definition: operations_vec.h:100
void VecScaleAssign(double &dest, double alpha1, const double &v1)
calculates dest = alpha1*v1. for doubles
Definition: operations_vec.h:49