docs/gpuvector_8h_source.html

/*

 * Copyright (c) 2009-2015:  G-CSC, Goethe University Frankfurt

 * Author: Martin Rupp

 *

 * This file is part of UG4.

 *

 * UG4 is free software: you can redistribute it and/or modify it under the

 * terms of the GNU Lesser General Public License version 3 (as published by the

 * Free Software Foundation) with the following additional attribution

 * requirements (according to LGPL/GPL v3 §7):

 *

 * (1) The following notice must be displayed in the Appropriate Legal Notices

 * of covered and combined works: "Based on UG4 (www.ug4.org/license)".

 *

 * (2) The following notice must be displayed at a prominent place in the

 * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".

 *

 * (3) The following bibliography is recommended for citation and must be

 * preserved in all covered files:

 * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively

 *   parallel geometric multigrid solver on hierarchically distributed grids.

 *   Computing and visualization in science 16, 4 (2013), 151-164"

 * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel

 *   flexible software system for simulating pde based models on high performance

 *   computers. Computing and visualization in science 16, 4 (2013), 165-179"

 *

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

 * GNU Lesser General Public License for more details.

 */


#ifndef __H__UG__CRS_ALGEBRA__VECTOR__

#define __H__UG__CRS_ALGEBRA__VECTOR__


#include "../cpu_algebra/vector.h"

#include "cuda/cuda_manager.h"

#include "cuda/common_cuda.h"


namespace ug{

//              GPUVector


template <typename TValueType>


class GPUVector : public Vector<TValueType>

{

public:


  typedef TValueType value_type;

  typedef GPUVector<TValueType> vector_type;


  typedef Vector<TValueType> super;

  using super::size;

  using super::resize;

  using super::reserve;


  GPUVector() : Vector<TValueType>() {m_GPUState = ON_CPU; }


  GPUVector(size_t _length) : Vector<TValueType>(_length) {m_GPUState = ON_CPU; }


  SmartPtr<vector_type> clone() const;


  SmartPtr<vector_type> clone_without_values() const;


  void resize(size_t newSize, bool bCopyValues=true)

  {

    UG_LOG(this << "GPUVector::resize(" << newSize << ")\n");

    assure_on_cpu();

    m_GPUState = ON_CPU;

    super::resize(newSize, bCopyValues);

  }

  void resize(size_t newSize, bool bCopyValues=true) {…}


  void reserve(size_t newCapacity, bool bCopyValues=true)

  {

    UG_LOG(this << "GPUVector::reserve(" << newCapacity << ")\n");

    reserve(newCapacity, bCopyValues);

  }

  void reserve(size_t newCapacity, bool bCopyValues=true) {…}


  inline value_type &operator [] (size_t i)

  {

    assure_on_cpu();

    m_GPUState = ON_CPU;

    return super::operator[](i);

  }

  inline value_type &operator [] (size_t i) {…}


  inline const value_type &operator [] (size_t i) const

  {

    assure_on_cpu();

    return super::operator[](i);

  }

  inline const value_type &operator [] (size_t i) const {…}


protected:

  virtual vector_type* virtual_clone() const;


  virtual vector_type* virtual_clone_without_values() const;


public:


  void assure_on_gpu()

  {

    if(on_gpu()) return;

    if(m_sizeOnGPU != size())

    {

      cudaFree(m_devValues);

      m_devValues = CudaCreateAndCopyToDevice(*this);

    }

    else

      CudaCpyToDevice(m_devValues, *this);

    m_GPUState = m_GPUState | ON_GPU;

  }

  void assure_on_gpu() {…}


  void assure_on_gpu() const

  {

    const_cast<GPUVector<value_type> *>(this)->assure_on_gpu();

  }

  void assure_on_gpu() const {…}


  void assure_on_cpu()

  {

    if(on_cpu()) return;

    // do this before so CudaCpyToHost can access [0] as dest without

    // calling assure_on_cpu again.

    m_GPUState = m_GPUState | ON_CPU;

    CudaCpyToHost(*this, m_devValues);

  }

  void assure_on_cpu() {…}


  void assure_on_cpu() const

  {

    const_cast<GPUVector<value_type> *>(this)->assure_on_cpu();

  }

  void assure_on_cpu() const {…}


  bool on_cpu()

  {

    return m_GPUState & ON_CPU;

  }

  bool on_cpu() {…}


  bool on_gpu()

  {

    return m_GPUState & ON_GPU;

  }

  bool on_gpu() {…}


private:


  enum GPU_STATE

  {

    ON_GPU = 1,

    ON_CPU = 2,

    ON_GPU_AND_CPU = 3

  };

  enum GPU_STATE {…};

  int m_GPUState;


public:


  double *get_dev_ptr()

  {

    assure_on_gpu();

    m_GPUState = ON_GPU; // not valid on CPU anymore

    return m_devValues;

  }

  double *get_dev_ptr() {…}


  const double *get_dev_ptr() const

  {

    assure_on_gpu();

    return m_devValues;

  }

  const double *get_dev_ptr() const {…}


public:


  inline void operator = (const GPUVector<value_type> &v)

  {

    CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), 1.0, v.get_dev_ptr());

  }

  inline void operator = (const GPUVector<value_type> &v) {…}


  inline void operator += (const GPUVector<value_type> &v)

  {

    CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), 1.0, v.get_dev_ptr());

  }

  inline void operator += (const GPUVector<value_type> &v) {…}


  inline void operator -= (const GPUVector<value_type> &v)

  {

    CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), -1.0, v.get_dev_ptr());

  }

  inline void operator -= (const GPUVector<value_type> &v) {…}


  inline void add(double alpha, const GPUVector<value_type> &v)

  {

    CUDA_VecAdd2(size(), 1.0, get_dev_ptr(), alpha, v.get_dev_ptr());

  }

  inline void add(double alpha, const GPUVector<value_type> &v) {…}


  inline void operator *= (const number &a)

  {

    CUDA_VecAdd2(size(), 0.0, get_dev_ptr(), a, get_dev_ptr());

  }

  inline void operator *= (const number &a) {…}


  inline double norm() const

  {

    double res=0;

    cublasDnrm2(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, &res);

    return res;

  }

  inline double norm() const {…}


  double dotprod(const GPUVector<value_type> &w) const

  {

    assert(size() == w.size());

    double res=0;

    cublasDdot(CUDAHelper::get_cublasHandle(), size(), get_dev_ptr(), 1, w.get_dev_ptr(), 1, &res);

    cudaThreadSynchronize();

    return res;

  }

  double dotprod(const GPUVector<value_type> &w) const {…}


private:

  double *m_devValues;

  size_t m_sizeOnGPU;

};

class GPUVector : public Vector<TValueType> {…};


template<typename value_type>


GPUVector<value_type>* GPUVector<value_type>::virtual_clone() const

{

  return new GPUVector<value_type>(*this);

}

GPUVector<value_type>* GPUVector<value_type>::virtual_clone() const {…}


template<typename value_type>


SmartPtr<GPUVector<value_type> > GPUVector<value_type>::clone() const

{

  return SmartPtr<GPUVector<value_type> >(this->virtual_clone());

}

SmartPtr<GPUVector<value_type> > GPUVector<value_type>::clone() const {…}


template<typename value_type>


GPUVector<value_type>* GPUVector<value_type>::virtual_clone_without_values() const

{

  return new GPUVector<value_type>(this->size());

}

GPUVector<value_type>* GPUVector<value_type>::virtual_clone_without_values() const {…}


template<typename value_type>


SmartPtr<GPUVector<value_type> > GPUVector<value_type>::clone_without_values() const

{

  return SmartPtr<GPUVector<value_type> >(this->virtual_clone_without_values());

}

SmartPtr<GPUVector<value_type> > GPUVector<value_type>::clone_without_values() const {…}


// templated


// operations for vectors

//-----------------------------------------------------------------------------

// these functions execute vector operations by using the operations on the elements of the vector


// todo: change vector_t to TE_VEC<vector_t>


// VecScale: These function calculate dest = sum_i alpha_i v_i


template<typename T>


inline void VecScaleAssign(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1)

{

  UG_LOG("VecScaleAssign\n");

  for(size_t i=0; i<dest.size(); i++)

    VecScaleAssign(dest[i], alpha1, v1[i]);

}

inline void VecScaleAssign(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1) {…}


template<typename T>


inline void VecAssign(GPUVector<T> &dest, const GPUVector<T> &v1)

{

  UG_LOG("VecAssign\n");

  for(size_t i=0; i<dest.size(); i++)

    dest[i] = v1[i];

}

inline void VecAssign(GPUVector<T> &dest, const GPUVector<T> &v1) {…}


template<typename T>


inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2)

{

  CUDA_VecAdd_2(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), dest.size());

}

inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2) {…}


template<typename T>


inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2, double alpha3, const GPUVector<T> &v3)

{

  CUDA_VecAdd_3(dest.get_dev_ptr(), alpha1, v1.get_dev_ptr(), alpha2, v2.get_dev_ptr(), alpha3, v3.get_dev_ptr(), dest.size());

}

inline void VecScaleAdd(GPUVector<T> &dest, double alpha1, const GPUVector<T> &v1, double alpha2, const GPUVector<T> &v2, double alpha3, const GPUVector<T> &v3) {…}


// VecProd


template<typename T>


inline void VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2, double &res)

{

//  UG_LOG("VecProd\n");

  assert(v1.size() == v2.size());

  cublasDdot(CUDAHelper::get_cublasHandle(), v1.size(), v1.get_dev_ptr(), 1, v2.get_dev_ptr(), 1, &res);

  cudaThreadSynchronize();

}

inline void VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2, double &res) {…}


template<typename T>


inline double VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2)

{

//  UG_LOG("VecProd\n");

  double res = 0;

  VecProd(v1, v2, res);

  return res;

}

inline double VecProd(const GPUVector<T> &v1, const GPUVector<T> &v2) {…}


template<typename T>


inline void VecNormSquaredAdd(const GPUVector<T> &a, const GPUVector<T> &b, double &sum)

{

  UG_LOG("VecNormSA\n");

  for(int i=0; i<a.size(); i++)

    VecNormSquaredAdd(a[i], sum);

}

inline void VecNormSquaredAdd(const GPUVector<T> &a, const GPUVector<T> &b, double &sum) {…}


template<typename T>


inline double VecNormSquared(const GPUVector<T> &a, const GPUVector<T> &b)

{

  UG_LOG("VecNormS\n");

  double sum=0;

  VecNormSquaredAdd(a, sum);

  return sum;

}

inline double VecNormSquared(const GPUVector<T> &a, const GPUVector<T> &b) {…}


// end group crs_algebra


} // namespace ug


#endif

SmartPtr
Definition smart_pointer.h:108

ug::GPUVector
Definition gpuvector.h:51

ug::GPUVector::assure_on_gpu
void assure_on_gpu() const
Definition gpuvector.h:124

ug::GPUVector::value_type
TValueType value_type
Definition gpuvector.h:54

ug::GPUVector::on_gpu
bool on_gpu()
Definition gpuvector.h:148

ug::GPUVector::GPUVector
GPUVector(size_t _length)
constructor with length
Definition gpuvector.h:67

ug::GPUVector::operator-=
void operator-=(const GPUVector< value_type > &v)
Definition gpuvector.h:184

ug::GPUVector::m_sizeOnGPU
size_t m_sizeOnGPU
Definition gpuvector.h:218

ug::GPUVector::get_dev_ptr
double * get_dev_ptr()
Definition gpuvector.h:163

ug::GPUVector::m_devValues
double * m_devValues
Definition gpuvector.h:217

ug::GPUVector::assure_on_cpu
void assure_on_cpu()
Definition gpuvector.h:129

ug::GPUVector::on_cpu
bool on_cpu()
Definition gpuvector.h:143

ug::GPUVector::vector_type
GPUVector< TValueType > vector_type
Definition gpuvector.h:55

ug::GPUVector::m_GPUState
int m_GPUState
Definition gpuvector.h:160

ug::GPUVector::get_dev_ptr
const double * get_dev_ptr() const
Definition gpuvector.h:169

ug::GPUVector::reserve
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition gpuvector.h:82

ug::GPUVector::assure_on_cpu
void assure_on_cpu() const
Definition gpuvector.h:138

ug::GPUVector::operator+=
void operator+=(const GPUVector< value_type > &v)
Definition gpuvector.h:180

ug::GPUVector::dotprod
double dotprod(const GPUVector< value_type > &w) const
Definition gpuvector.h:207

ug::GPUVector::resize
void resize(size_t newSize, bool bCopyValues=true)
Definition gpuvector.h:75

ug::GPUVector::norm
double norm() const
return sqrt(sum values[i]^2) (euclidian norm)
Definition gpuvector.h:200

ug::GPUVector::GPUVector
GPUVector()
constructor
Definition gpuvector.h:64

ug::GPUVector::add
void add(double alpha, const GPUVector< value_type > &v)
Definition gpuvector.h:189

ug::GPUVector::operator*=
void operator*=(const number &a)
Definition gpuvector.h:194

ug::GPUVector::operator=
void operator=(const GPUVector< value_type > &v)
Definition gpuvector.h:176

ug::GPUVector::assure_on_gpu
void assure_on_gpu()
Definition gpuvector.h:111

ug::GPUVector::size
size_t size() const
Definition vector.h:181

ug::GPUVector::super
Vector< TValueType > super
Definition gpuvector.h:57

ug::GPUVector::GPU_STATE
GPU_STATE
Definition gpuvector.h:155

ug::GPUVector::ON_GPU
@ ON_GPU
Definition gpuvector.h:156

ug::GPUVector::ON_GPU_AND_CPU
@ ON_GPU_AND_CPU
Definition gpuvector.h:158

ug::GPUVector::ON_CPU
@ ON_CPU
Definition gpuvector.h:157

ug::GPUVector::operator[]
value_type & operator[](size_t i)
access element i of the vector
Definition gpuvector.h:90

ug::Vector
Definition vector.h:55

ug::Vector::operator[]
value_type & operator[](size_t i)
access element i of the vector
Definition vector_impl.h:47

ug::Vector::resize
void resize(size_t newSize, bool bCopyValues=true)
Definition vector.h:109

ug::Vector::size
size_t size() const
Definition vector.h:181

ug::Vector::reserve
void reserve(size_t newCapacity, bool bCopyValues=true)
Definition vector.h:113

common_cuda.h

CUDA_VecAdd_2
bool CUDA_VecAdd_2(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, const int N)

CUDA_VecAdd2
bool CUDA_VecAdd2(const int len, FPTYPE alpha, FPTYPE *x, FPTYPE beta, const FPTYPE *y)

CUDA_VecAdd_3
bool CUDA_VecAdd_3(FPTYPE *dest, FPTYPE alpha1, const FPTYPE *v1, FPTYPE alpha2, const FPTYPE *v2, FPTYPE alpha3, const FPTYPE *v3, const int N)

cuda_manager.h

ug::GPUVector::clone
SmartPtr< vector_type > clone() const
clones the vector (deep-copy) including values
Definition gpuvector.h:228

ug::GPUVector::clone_without_values
SmartPtr< vector_type > clone_without_values() const
clones the vector (deep-copy) excluding values
Definition gpuvector.h:240

ug::GPUVector::virtual_clone_without_values
virtual vector_type * virtual_clone_without_values() const
virtual clone using covariant return type excluding values
Definition gpuvector.h:234

ug::GPUVector::virtual_clone
virtual vector_type * virtual_clone() const
virtual clone using covariant return type
Definition gpuvector.h:222

UG_LOG
#define UG_LOG(msg)
Definition log.h:367

number
double number
Definition types.h:124

ug::VecScaleAdd
void VecScaleAdd(vector_t &vOut, typename vector_t::value_type s1, const vector_t &v1, typename vector_t::value_type s2, const vector_t &v2)
Scales two Vectors, adds them and returns the sum in a third vector.
Definition math_vector_functions_common_impl.hpp:265

ug
the ug namespace

ug::VecAssign
void VecAssign(vector_t &dest, const vector_t &v1)
sets dest = v1 entrywise
Definition operations_vec.h:154

ug::CudaCreateAndCopyToDevice
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition cuda_manager.h:163

ug::VecProd
double VecProd(const double &a, const double &b)
returns scal<a, b>
Definition operations_vec.h:84

ug::CudaCpyToHost
void CudaCpyToHost(T &dest, typename T::value_type *src)
Definition cuda_manager.h:153

ug::CudaCpyToDevice
void CudaCpyToDevice(typename T::value_type *dest, T &vec)
Definition cuda_manager.h:144

ug::VecNormSquaredAdd
void VecNormSquaredAdd(const double &a, double &s)
calculates s += norm_2^2(a)
Definition operations_vec.h:106

ug::VecNormSquared
double VecNormSquared(const double &a)
returns norm_2^2(a)
Definition operations_vec.h:100

ug::VecScaleAssign
void VecScaleAssign(double &dest, double alpha1, const double &v1)
calculates dest = alpha1*v1. for doubles
Definition operations_vec.h:49