Loading [MathJax]/extensions/tex2jax.js
ug4
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpuvector.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2009-2015: G-CSC, Goethe University Frankfurt
3 * Author: Martin Rupp
4 *
5 * This file is part of UG4.
6 *
7 * UG4 is free software: you can redistribute it and/or modify it under the
8 * terms of the GNU Lesser General Public License version 3 (as published by the
9 * Free Software Foundation) with the following additional attribution
10 * requirements (according to LGPL/GPL v3 §7):
11 *
12 * (1) The following notice must be displayed in the Appropriate Legal Notices
13 * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 *
15 * (2) The following notice must be displayed at a prominent place in the
16 * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 *
18 * (3) The following bibliography is recommended for citation and must be
19 * preserved in all covered files:
20 * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 * parallel geometric multigrid solver on hierarchically distributed grids.
22 * Computing and visualization in science 16, 4 (2013), 151-164"
23 * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 * flexible software system for simulating pde based models on high performance
25 * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU Lesser General Public License for more details.
31 */
32
33#ifndef __H__UG__CRS_ALGEBRA__VECTOR__
34#define __H__UG__CRS_ALGEBRA__VECTOR__
35
36#include "../cpu_algebra/vector.h"
37#include "cuda/cuda_manager.h"
38#include "cuda/common_cuda.h"
39
40namespace ug{
42// GPUVector
44
47
49template <typename TValueType>
50class GPUVector : public Vector<TValueType>
51{
52public:
53
54 typedef TValueType value_type;
56
58 using super::size;
59 using super::resize;
60 using super::reserve;
61
62
64 GPUVector() : Vector<TValueType>() {m_GPUState = ON_CPU; }
65
67 GPUVector(size_t _length) : Vector<TValueType>(_length) {m_GPUState = ON_CPU; }
68
71
74
75 void resize(size_t newSize, bool bCopyValues=true)
76 {
77 UG_LOG(this << "GPUVector::resize(" << newSize << ")\n");
80 super::resize(newSize, bCopyValues);
81 }
82 void reserve(size_t newCapacity, bool bCopyValues=true)
83 {
84 UG_LOG(this << "GPUVector::reserve(" << newCapacity << ")\n");
85 reserve(newCapacity, bCopyValues);
86 }
87
88
90 inline value_type &operator [] (size_t i)
91 {
94 return super::operator[](i);
95 }
96 inline const value_type &operator [] (size_t i) const
97 {
99 return super::operator[](i);
100 }
101
102
103protected:
105 virtual vector_type* virtual_clone() const;
106
109
110public:
112 {
113 if(on_gpu()) return;
114 if(m_sizeOnGPU != size())
115 {
116 cudaFree(m_devValues);
118 }
119 else
122 }
123
124 void assure_on_gpu() const
125 {
126 const_cast<GPUVector<value_type> *>(this)->assure_on_gpu();
127 }
128
130 {
131 if(on_cpu()) return;
132 // do this before so CudaCpyToHost can access [0] as dest without
133 // calling assure_on_cpu again.
136 }
137
138 void assure_on_cpu() const
139 {
140 const_cast<GPUVector<value_type> *>(this)->assure_on_cpu();
141 }
142
143 bool on_cpu()
144 {
145 return m_GPUState & ON_CPU;
146 }
147
148 bool on_gpu()
149 {
150 return m_GPUState & ON_GPU;
151 }
152
153private:
155 {
159 };
161
162public:
163 double *get_dev_ptr()
164 {
166 m_GPUState = ON_GPU; // not valid on CPU anymore
167 return m_devValues;
168 }
169 const double *get_dev_ptr() const
170 {
172 return m_devValues;
173 }
174
175public:
176 inline void operator = (const GPUVector<value_type> &v)
177 {
178 CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), 1.0, v.get_dev_ptr());
179 }
181 {
182 CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), 1.0, v.get_dev_ptr());
183 }
185 {
186 CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), -1.0, v.get_dev_ptr());
187 }
188
189 inline void add(double alpha, const GPUVector<value_type> &v)
190 {
191 CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), alpha, v.get_dev_ptr());
192 }
193
194 inline void operator *= (const number &a)
195 {
196 CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), a, get_dev_ptr());
197 }
198
200 inline double norm() const
201 {
202 double res=0;
203 cublasDnrm2(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, &res);
204 return res;
205 }
206
207 double dotprod(const GPUVector<value_type> &w) const
208 {
209 assert(size() == w.size());
210 double res=0;
211 cublasDdot(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, w.get_dev_ptr(), 1, &res);
212 cudaThreadSynchronize();
213 return res;
214 }
215
216private:
217 double *m_devValues;
219};
220
221template<typename value_type>
226
227template<typename value_type>
232
233template<typename value_type>
238
239template<typename value_type>
241{
242 return SmartPtr<GPUVector<value_type> >(this->virtual_clone_without_values());
243}
244
245
247
248
249// templated
250
251// operations for vectors
252//-----------------------------------------------------------------------------
253// these functions execute vector operations by using the operations on the elements of the vector
254
255// todo: change vector_t to TE_VEC<vector_t>
256
257
258// VecScale: These function calculate dest = sum_i alpha_i v_i
259
261template<typename T>
262inline void VecScaleAssign(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1)
263{
264 UG_LOG("VecScaleAssign\n");
265 for(size_t i=0; i<dest.size(); i++)
266 VecScaleAssign(dest[i], alpha1, v1[i]);
267}
268
270template<typename T>
271inline void VecAssign(GPUVector<T> &dest, const GPUVector<T> &v1)
272{
273 UG_LOG("VecAssign\n");
274 for(size_t i=0; i<dest.size(); i++)
275 dest[i] = v1[i];
276}
277
279template<typename T>
280inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2)
281{
282 CUDA_VecAdd_2(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), dest.size());
283}
284
286template<typename T>
287inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2, double alpha3, const GPUVector<T> &v3)
288{
289 CUDA_VecAdd_3(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), alpha3, v3.get_dev_ptr(), dest.size());
290}
291
292
293// VecProd
294
296template<typename T>
297inline void VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2, double &res)
298{
299// UG_LOG("VecProd\n");
300 assert(v1.size() == v2.size());
301 cublasDdot(CUDAHelper::get_cublasHandle(), v1.size(), v1.get_dev_ptr(), 1, v2.get_dev_ptr(), 1, &res);
302 cudaThreadSynchronize();
303}
304
306template<typename T>
307inline double VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2)
308{
309// UG_LOG("VecProd\n");
310 double res = 0;
311 VecProd(v1, v2, res);
312 return res;
313}
314
315
317template<typename T>
318inline void VecNormSquaredAdd(const GPUVector<T> &a, const GPUVector<T> &b, double &sum)
319{
320 UG_LOG("VecNormSA\n");
321 for(int i=0; i<a.size(); i++)
322 VecNormSquaredAdd(a[i], sum);
323}
324
326template<typename T>
327inline double VecNormSquared(const GPUVector<T> &a, const GPUVector<T> &b)
328{
329 UG_LOG("VecNormS\n");
330 double sum=0;
331 VecNormSquaredAdd(a, sum);
332 return sum;
333}
334
335
336
337
338
339
340
341
342
343
344// end group crs_algebra
346
347} // namespace ug
348
349#endif
Definition smart_pointer.h:108
Definition gpuvector.h:51
void assure_on_gpu() const
Definition gpuvector.h:124
TValueType value_type
Definition gpuvector.h:54
bool on_gpu()
Definition gpuvector.h:148
GPUVector(size_t _length)
constructor with length
Definition gpuvector.h:67
void operator-=(const GPUVector< value_type > &v)
Definition gpuvector.h:184
size_t m_sizeOnGPU
Definition gpuvector.h:218
double * get_dev_ptr()
Definition gpuvector.h:163
double * m_devValues
Definition gpuvector.h:217
void assure_on_cpu()
Definition gpuvector.h:129
bool on_cpu()
Definition gpuvector.h:143
GPUVector< TValueType > vector_type
Definition gpuvector.h:55
int m_GPUState
Definition gpuvector.h:160
const double * get_dev_ptr() const
Definition gpuvector.h:169
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition gpuvector.h:82
void assure_on_cpu() const
Definition gpuvector.h:138
void operator+=(const GPUVector< value_type > &v)
Definition gpuvector.h:180
double dotprod(const GPUVector< value_type > &w) const
Definition gpuvector.h:207
void resize(size_t newSize, bool bCopyValues=true)
Definition gpuvector.h:75
double norm() const
return sqrt(sum values[i]^2) (euclidian norm)
Definition gpuvector.h:200
GPUVector()
constructor
Definition gpuvector.h:64
void add(double alpha, const GPUVector< value_type > &v)
Definition gpuvector.h:189
void operator*=(const number &a)
Definition gpuvector.h:194
void operator=(const GPUVector< value_type > &v)
Definition gpuvector.h:176
void assure_on_gpu()
Definition gpuvector.h:111
size_t size() const
Definition vector.h:181
Vector< TValueType > super
Definition gpuvector.h:57
GPU_STATE
Definition gpuvector.h:155
@ ON_GPU
Definition gpuvector.h:156
@ ON_GPU_AND_CPU
Definition gpuvector.h:158
@ ON_CPU
Definition gpuvector.h:157
value_type & operator[](size_t i)
access element i of the vector
Definition gpuvector.h:90
Definition vector.h:55
value_type & operator[](size_t i)
access element i of the vector
Definition vector_impl.h:47
void resize(size_t newSize, bool bCopyValues=true)
Definition vector.h:109
size_t size() const
Definition vector.h:181
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition vector.h:113
bool CUDA_VecAdd_2(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, const int N)
bool CUDA_VecAdd2(const int len, FPTYPE alpha, FPTYPE *x, FPTYPE beta, const FPTYPE *y)
bool CUDA_VecAdd_3(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, FPTYPE alpha3, const FPTYPE *v3, const int N)
SmartPtr< vector_type > clone() const
clones the vector (deep-copy) including values
Definition gpuvector.h:228
SmartPtr< vector_type > clone_without_values() const
clones the vector (deep-copy) excluding values
Definition gpuvector.h:240
virtual vector_type * virtual_clone_without_values() const
virtual clone using covariant return type excluding values
Definition gpuvector.h:234
virtual vector_type * virtual_clone() const
virtual clone using covariant return type
Definition gpuvector.h:222
#define UG_LOG(msg)
Definition log.h:367
double number
Definition types.h:124
void VecScaleAdd(vector_t &vOut, typename vector_t::value_type s1, const vector_t &v1, typename vector_t::value_type s2, const vector_t &v2)
Scales two Vectors, adds them and returns the sum in a third vector.
Definition math_vector_functions_common_impl.hpp:265
the ug namespace
void VecAssign(vector_t &dest, const vector_t &v1)
sets dest = v1 entrywise
Definition operations_vec.h:154
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition cuda_manager.h:163
double VecProd(const double &a, const double &b)
returns scal<a, b>
Definition operations_vec.h:84
void CudaCpyToHost(T &dest, typename T::value_type *src)
Definition cuda_manager.h:153
void CudaCpyToDevice(typename T::value_type *dest, T &vec)
Definition cuda_manager.h:144
void VecNormSquaredAdd(const double &a, double &s)
calculates s += norm_2^2(a)
Definition operations_vec.h:106
double VecNormSquared(const double &a)
returns norm_2^2(a)
Definition operations_vec.h:100
void VecScaleAssign(double &dest, double alpha1, const double &v1)
calculates dest = alpha1*v1. for doubles
Definition operations_vec.h:49