ug4
gpusparsematrix.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015: G-CSC, Goethe University Frankfurt
3  * Author: Martin Rupp
4  *
5  * This file is part of UG4.
6  *
7  * UG4 is free software: you can redistribute it and/or modify it under the
8  * terms of the GNU Lesser General Public License version 3 (as published by the
9  * Free Software Foundation) with the following additional attribution
10  * requirements (according to LGPL/GPL v3 §7):
11  *
12  * (1) The following notice must be displayed in the Appropriate Legal Notices
13  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14  *
15  * (2) The following notice must be displayed at a prominent place in the
16  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17  *
18  * (3) The following bibliography is recommended for citation and must be
19  * preserved in all covered files:
20  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21  * parallel geometric multigrid solver on hierarchically distributed grids.
22  * Computing and visualization in science 16, 4 (2013), 151-164"
23  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24  * flexible software system for simulating pde based models on high performance
25  * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26  *
27  * This program is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  */
32 
33 #ifndef __H__UG__CPU_ALGEBRA__GPUSparseMatrix__
34 #define __H__UG__CPU_ALGEBRA__GPUSparseMatrix__
35 
36 
37 
38 #include "math.h"
39 #include "common/common.h"
40 #include "../algebra_common/sparsematrix_util.h"
41 #include <iostream>
42 #include <algorithm>
44 
45 #include "../algebra_common/connection.h"
46 #include "../algebra_common/matrixrow.h"
47 #include "../common/operations_mat/operations_mat.h"
48 
49 #include "cuda/cuda_manager.h"
50 #include "common/debug_print.h"
51 
52 #define PROFILE_GPUMATRIX(name) PROFILE_BEGIN_GROUP(name, "GPUSparseMatrix algebra")
53 
54 
55 namespace ug{
56 
59 
60 
61 // example for the variable CRS storage structure:
62 // say we have:
63 // rowStart = 0 3 8
64 // rowEnd = 3 6 11
65 // rowMax = 3 8 11
66 // cols ( | marking end of row): 2 5 6 | 2 6 7 x x| 8 9 10
67 
68 // now insert (0 3): row 0 is full (rowEnd[0]==rowMax[0]), copy it to the end, and insert index
69 // rowStart = 11 3 8
70 // rowEnd = 15 6 11
71 // rowMax = 17 8 11
72 // cols ( | marking end of row): x x x | 2 6 7 x x| 8 9 10 | 2 3 5 6 x x |
73 
74 // now insert (1 3): row 1 not full, we can add it
75 // rowStart 11 3 8
76 // rowEnd 15 7 11
77 // rowMax = 17 8 11
78 // cols : x x x | 2 3 6 7 x | 8 9 10 | 2 3 5 6 x x |
79 
80 // defragment:
81 // rowStart 0 4 8
82 // rowEnd 4 8 11
83 // rowMax = 4 8 11
84 // cols : 2 3 5 6 | 2 3 6 7 | 8 9 10
85 
86 
100 template<typename TValueType>
102 {
103 public:
104  typedef TValueType value_type;
105  enum {rows_sorted=true};
106 
108 
109 public:
113 
114 public:
115  // construction etc
116  //----------------------
117 
119  GPUSparseMatrix();
121  virtual ~GPUSparseMatrix ()
122  {
123  freeGPU();
124  }
125 
126 
133  bool resize_and_clear(size_t newRows, size_t newCols);
134  bool resize_and_keep_values(size_t newRows, size_t newCols);
135 
142  bool set_as_transpose_of(const GPUSparseMatrix<value_type> &B, double scale=1.0);
143 
150  bool set_as_copy_of(const GPUSparseMatrix<value_type> &B, double scale=1.0);
152  {
153  set_as_copy_of(B);
154  return *this;
155  }
156 
157 
158 public:
160  template<typename vector_t>
161  bool axpy(vector_t &dest,
162  const number &alpha1, const vector_t &v1,
163  const number &beta1, const vector_t &w1) const;
164 
166  template<typename vector_t>
167  void axpy(double alpha, vector_t &x, double beta, const vector_t &y) const;
168 
169 
171  template<typename vector_t>
172  bool axpy_transposed(vector_t &dest,
173  const number &alpha1, const vector_t &v1,
174  const number &beta1, const vector_t &w1) const;
175 
177  template<typename vector_t>
178  void apply_ignore_zero_rows(vector_t &dest,
179  const number &beta1, const vector_t &w1) const { assert(0); }
180 
182  template<typename vector_t>
184  const number &beta1, const vector_t &w1) const { assert(0); }
185 
186 
187 
188  // DEPRECATED!
190  // apply is deprecated because of axpy(res, 0.0, res, 1.0, beta, w1)
191  template<typename Vector_type>
192  bool apply(Vector_type &res, const Vector_type &x) const
193  {
194  return axpy(res, 0.0, res, 1.0, x);
195  }
196 
198  // apply is deprecated because of axpy(res, 0.0, res, 1.0, beta, w1)
199  template<typename Vector_type>
200  bool apply_transposed(Vector_type &res, const Vector_type &x) const
201  {
202  return axpy_transposed(res, 0.0, res, 1.0, x);
203  }
204 
205  // matmult_minus is deprecated because of axpy(res, 1.0, res, -1.0, x);
207  template<typename Vector_type>
208  bool matmul_minus(Vector_type &res, const Vector_type &x) const
209  {
210  return axpy(res, 1.0, res, -1.0, x);
211  }
212 
213 
214 
220  inline bool is_isolated(size_t i) const;
221 
222  bool scale(double d);
223  GPUSparseMatrix<value_type> &operator *= (double d) { scale(d); return *this; }
224 
225  // submatrix set/get functions
226  //-------------------------------
227 
239  template<typename M>
240  void add(const M &mat);
241  template<typename M>
243  void set(const M &mat);
245  template<typename M>
246  void get(M &mat) const;
247 
248  // finalizing functions
249  //----------------------
250 
251 
252 
253  inline void check_rc(size_t r, size_t c) const
254  {
255  UG_ASSERT(r < num_rows() && c < num_cols(), "tried to access element (" << r << ", " << c << ") of " << num_rows() << " x " << num_cols() << " matrix.");
256  }
257 
259  bool set(double a);
260 
268  const value_type &operator () (size_t r, size_t c) const
269  {
270  check_rc(r, c);
271  int j=get_index_const(r, c);
272  if(j == -1)
273  {
274  static value_type v(0.0);
275  return v;
276  }
277  UG_ASSERT(cols[j]==(int)c && j >= rowStart[r] && j < rowEnd[r], "");
278  return values[j];
279  }
280 
289  value_type &operator() (size_t r, size_t c)
290  {
291  check_rc(r, c);
292  int j=get_index(r, c);
293  UG_ASSERT(j != -1 && cols[j]==(int)c && j >= rowStart[r] && j < rowEnd[r], "");
294  return values[j];
295  }
296 
297 public:
298  // row functions
299 
307  void set_matrix_row(size_t row, connection *c, size_t nr);
308 
320  void add_matrix_row(size_t row, connection *c, size_t nr);
321 
323  inline size_t num_connections(size_t i) const
324  {
325  if(rowStart[i] == -1) return 0;
326  else return rowEnd[i]-rowStart[i];
327  }
328 
330  template<typename vector_t>
331  inline void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const;
332 public:
333  // accessor functions
334  //----------------------
335 
337  size_t num_rows() const { return rowEnd.size(); }
338 
340  size_t num_cols() const { return m_numCols; }
341 
343  size_t total_num_connections() const { return nnz; }
344 
345 public:
346 
347  // Iterators
348  //---------------------------
349 
350  // const_row_iterator
351 
352 
353  //typedef const connection * const_row_iterator;
354  //typedef connection * const_row_iterator;
360  void add_iterator() const
361  {
362  iIterators++;
363  }
364  void remove_iterator() const
365  {
366  iIterators--;
367  }
368  // a row_iterator has to suppport
369  // operator ++, operator +=, index() const, const value_type &value() const, value_type &value()
370  // a const_row_iterator has to suppport
371  // operator ++, operator +=, index() const, const value_type &value() const
372 
373  inline void check_row(size_t row, int i) const
374  {
375  UG_ASSERT(i < rowEnd[row] && i >= rowStart[row], "row iterator row " << row << " pos " << i << " out of bounds [" << rowStart[row] << ", " << rowEnd[row] << "]");
376  }
377 
383  {
385  size_t row;
386  size_t i;
387  public:
388  inline void check() const {A.check_row(row, i); }
389  row_iterator(GPUSparseMatrix &_A, size_t _row, size_t _i) : A(_A), row(_row), i(_i) { A.add_iterator(); }
391  row_iterator *operator ->() { return this; }
392  value_type &value() { check(); return A.values[i]; }
393  size_t index() const { check(); return A.cols[i]; }
394  bool operator != (const row_iterator &o) const { return i != o.i; }
395  void operator ++ () { ++i; }
396  void operator += (int nr) { i+=nr; }
397  bool operator == (const row_iterator &other) const { return other.i == i; check(); }
398  };
400  {
402  size_t row;
403  size_t i;
404  public:
405  inline void check() const {A.check_row(row, i); }
406  const_row_iterator(const GPUSparseMatrix &_A, size_t _row, size_t _i) : A(_A), row(_row), i(_i) {A.add_iterator();}
408  const_row_iterator *operator ->() { return this; }
409  const value_type &value() const { check(); return A.values[i]; }
410  size_t index() const { check(); return A.cols[i]; }
411  bool operator != (const const_row_iterator &o) const { return i != o.i; }
412  void operator ++ () { ++i; }
413  void operator += (int nr) { i+=nr; }
414  bool operator == (const const_row_iterator &other) const { return other.i == i; }
415  };
416 
417 
418 
419 
420  row_iterator begin_row(size_t r) { return row_iterator(*this, r, rowStart[r]); }
421  row_iterator end_row(size_t r) { return row_iterator(*this, r, rowEnd[r]); }
422  const_row_iterator begin_row(size_t r) const { return const_row_iterator(*this, r, rowStart[r]); }
423  const_row_iterator end_row(size_t r) const { return const_row_iterator(*this, r, rowEnd[r]); }
424 
425  row_type get_row(size_t r) { return row_type(*this, r); }
426  const_row_type get_row(size_t r) const { return const_row_type(*this, r); }
427 
428 public:
429  // connectivity functions
430  //-------------------------
431 
432  bool has_connection(size_t r, size_t c) const
433  {
434  check_rc(r, c);
435  bool bFound;
436  get_connection(r, c, bFound);
437  return bFound;
438  }
439 
445  row_iterator get_iterator_or_next(size_t r, size_t c)
446  {
447  check_rc(r, c);
448  if(rowStart[r] == -1 || rowStart[r] == rowEnd[r])
449  return end_row(r);
450  else
451  {
452  int j=get_index_internal(r, c);
453  if(j > maxValues) return end_row(r);
454  else return row_iterator(*this, r, j);
455  }
456  }
457 
463  const_row_iterator get_connection(size_t r, size_t c, bool &bFound) const
464  {
465  check_rc(r, c);
466  int j=get_index_const(r, c);
467  if(j != -1)
468  {
469  bFound = true;
470  return const_row_iterator(*this, r, j);
471  }
472  else
473  {
474  bFound = false;
475  return end_row(r);
476  }
477  }
483  row_iterator get_connection(size_t r, size_t c, bool &bFound)
484  {
485  check_rc(r, c);
486  int j=get_index_const(r, c);
487  if(j != -1)
488  {
489  bFound = true;
490  return row_iterator(*this, r, j);
491  }
492  else
493  {
494  bFound = false;
495  return end_row(r);
496  }
497  }
498 
504  const_row_iterator get_connection(size_t r, size_t c) const
505  {
506  bool b;
507  return get_connection(r, c, b);
508  }
515  row_iterator get_connection(size_t r, size_t c)
516  {
517  check_rc(r, c);
518  assert(bNeedsValues);
519  int j=get_index(r, c);
520  return row_iterator(*this, r, j);
521  }
522 
523 
524  void defragment()
525  {
526  if(num_rows() != 0 && num_cols() != 0)
528  }
529 
530 public:
531  // output functions
532  //----------------------
533 
534  void print(const char * const name = NULL) const;
535  void printtype() const;
536 
537  void print_to_file(const char *filename) const;
538  void printrow(size_t row) const;
539 
540  friend std::ostream &operator<<(std::ostream &out, const GPUSparseMatrix &m)
541  {
542  out << "GPUSparseMatrix " //<< m.name
543  << " [ " << m.num_rows() << " x " << m.num_cols() << " ]";
544  return out;
545  }
546 
547 
548  void p() const { print(); } // for use in gdb
549  void pr(size_t row) const {printrow(row); } // for use in gdb
550 
551 private:
552  // disallowed operations (not defined):
553  //---------------------------------------
555 
556 
557  void assureValuesSize(size_t s);
558  size_t get_nnz() const { return nnz; }
559 
560 protected:
561  int get_index_internal(size_t row, int col) const;
562  int get_index_const(int r, int c) const;
563  int get_index(int r, int c);
564  void copyToNewSize(size_t newSize)
565  {
566  copyToNewSize(newSize, num_cols());
567  }
568  void copyToNewSize(size_t newSize, size_t maxCols);
569  void check_fragmentation() const;
570  int get_nnz_max_cols(size_t maxCols);
571 
572 
573 protected:
574  std::vector<int> rowStart;
575  std::vector<int> rowEnd;
576  std::vector<int> rowMax;
577  std::vector<int> cols;
578  size_t fragmented;
579  size_t nnz;
581 
582  std::vector<value_type> values;
585  mutable int iIterators;
586 
587 
588 
589 
590 
591 
592 
594 public:
595  void initGPU()
596  {
597  d_cols = d_rowStart = NULL;
598  d_values = NULL;
599  descr = 0;
600  bOnDevice = false;
601  }
602  void freeGPU()
603  {
604  cudaFree(d_cols);
605  cudaFree(d_rowStart);
606  cudaFree(d_values);
607  }
608 
609  const int *get_device_cols() const { check_device(); return d_cols; }
610  const int *get_device_rowStart() const { check_device(); return d_rowStart; }
611  const double *get_device_value_ptr() const { check_device(); return d_values; }
612  cusparseMatDescr_t get_matrix_descr() const { check_device(); return descr; }
613 
615  {
617  descr = 0;
618  cusparseStatus_t cusparseStatus = cusparseCreateMatDescr(&descr);
619 
620  if (checkCudaErrors(cusparseStatus))
621  {
622  exit(EXIT_FAILURE);
623  }
624 
625  cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
626  cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
627 
628  defragment();
629  d_values = &values[0];
630 
631  assert(cols.size() == values.size() && cols.size() == nnz);
632 
633  UG_LOG("cols.size = " << cols.size()*sizeof(int) << " values.size() == " << values.size()*sizeof(value_type) << " rowStart.size = " << rowStart.size()*sizeof(int) << "\n");
634 
635  UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
637  UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
639  UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
640 // PrintVector(values, "GPUVector::values");
642  }
643 
644 
645  void check_device() const
646  {
647  if(bOnDevice==true) return;
649  c->bOnDevice=true;
650  c->copy_to_device();
651  }
652 private:
653  //using CRSSparseMatrix::nnz;
654 
656  double *d_values;
657  cusparseMatDescr_t descr;
658 
659  bool bOnDevice;
660 };
661 
662 
663 template<typename T>
665 {
666  enum{
668  };
669 };
670 
672 template<typename vector_t, typename matrix_t>
673 inline void MatMultTransposedAdd(vector_t &dest,
674  const number &alpha1, const vector_t &v1,
675  const number &beta1, const GPUSparseMatrix<matrix_t> &A1, const vector_t &w1)
676 {
677  A1.axpy_transposed(dest, alpha1, v1, beta1, w1);
678 }
679 
680 // end group cpu_algebra
682 
683 } // namespace ug
684 
685 //#include "matrixrow.h"
686 #include "gpusparsematrix_impl.h"
687 #include "gpusparsematrix_print.h"
688 
689 #endif
location name
Definition: checkpoint_util.lua:128
Definition: connection.h:40
static CUDAManager & get_instance()
Definition: cuda_manager.cpp:153
Definition: matrixrow.h:118
Definition: gpusparsematrix.h:400
const_row_iterator(const GPUSparseMatrix &_A, size_t _row, size_t _i)
Definition: gpusparsematrix.h:406
size_t row
Definition: gpusparsematrix.h:402
void check() const
Definition: gpusparsematrix.h:405
const GPUSparseMatrix & A
Definition: gpusparsematrix.h:401
bool operator!=(const const_row_iterator &o) const
Definition: gpusparsematrix.h:411
bool operator==(const const_row_iterator &other) const
Definition: gpusparsematrix.h:414
size_t i
Definition: gpusparsematrix.h:403
void operator++()
Definition: gpusparsematrix.h:412
~const_row_iterator()
Definition: gpusparsematrix.h:407
const value_type & value() const
Definition: gpusparsematrix.h:409
size_t index() const
Definition: gpusparsematrix.h:410
void operator+=(int nr)
Definition: gpusparsematrix.h:413
const_row_iterator * operator->()
Definition: gpusparsematrix.h:408
Definition: gpusparsematrix.h:383
~row_iterator()
Definition: gpusparsematrix.h:390
void operator+=(int nr)
Definition: gpusparsematrix.h:396
size_t i
Definition: gpusparsematrix.h:386
GPUSparseMatrix & A
Definition: gpusparsematrix.h:384
void operator++()
Definition: gpusparsematrix.h:395
size_t row
Definition: gpusparsematrix.h:385
row_iterator(GPUSparseMatrix &_A, size_t _row, size_t _i)
Definition: gpusparsematrix.h:389
bool operator==(const row_iterator &other) const
Definition: gpusparsematrix.h:397
size_t index() const
Definition: gpusparsematrix.h:393
void check() const
Definition: gpusparsematrix.h:388
row_iterator * operator->()
Definition: gpusparsematrix.h:391
value_type & value()
Definition: gpusparsematrix.h:392
bool operator!=(const row_iterator &o) const
Definition: gpusparsematrix.h:394
sparse matrix for big, variable sparse matrices.
Definition: gpusparsematrix.h:102
row_iterator get_iterator_or_next(size_t r, size_t c)
Definition: gpusparsematrix.h:445
void check_device() const
Definition: gpusparsematrix.h:645
const value_type & operator()(size_t r, size_t c) const
Definition: gpusparsematrix.h:268
const_row_iterator begin_row(size_t r) const
Definition: gpusparsematrix.h:422
GPUSparseMatrix< value_type > & operator=(const GPUSparseMatrix< value_type > &B)
Definition: gpusparsematrix.h:151
bool is_isolated(size_t i) const
check for isolated condition of an index
Definition: gpusparsematrix_impl.h:218
void check_rc(size_t r, size_t c) const
Definition: gpusparsematrix.h:253
bool resize_and_keep_values(size_t newRows, size_t newCols)
Definition: gpusparsematrix_impl.h:84
int * d_rowStart
Definition: gpusparsematrix.h:655
bool matmul_minus(Vector_type &res, const Vector_type &x) const
calculate res -= A x
Definition: gpusparsematrix.h:208
void remove_iterator() const
Definition: gpusparsematrix.h:364
row_iterator end_row(size_t r)
Definition: gpusparsematrix.h:421
cusparseMatDescr_t get_matrix_descr() const
Definition: gpusparsematrix.h:612
size_t get_nnz() const
Definition: gpusparsematrix.h:558
double * d_values
Definition: gpusparsematrix.h:656
void set(const M &mat)
set local matrix
Definition: gpusparsematrix_impl.h:320
bool set_as_transpose_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
write in a empty GPUSparseMatrix (this) the transpose GPUSparseMatrix of B.
Definition: gpusparsematrix_impl.h:113
std::vector< int > rowStart
Definition: gpusparsematrix.h:574
void defragment()
Definition: gpusparsematrix.h:524
std::vector< int > rowMax
Definition: gpusparsematrix.h:576
const int * get_device_cols() const
Definition: gpusparsematrix.h:609
ConstMatrixRow< this_type > const_row_type
Definition: gpusparsematrix.h:112
void add_matrix_row(size_t row, connection *c, size_t nr)
Definition: gpusparsematrix_impl.h:264
row_iterator begin_row(size_t r)
Definition: gpusparsematrix.h:420
std::vector< int > cols
Definition: gpusparsematrix.h:577
std::vector< value_type > values
Definition: gpusparsematrix.h:582
size_t num_rows() const
returns number of rows
Definition: gpusparsematrix.h:337
size_t num_connections(size_t i) const
returns number of connections of row row.
Definition: gpusparsematrix.h:323
GPUSparseMatrix< value_type > & operator*=(double d)
Definition: gpusparsematrix.h:223
void assureValuesSize(size_t s)
Definition: gpusparsematrix_impl.h:555
int get_nnz_max_cols(size_t maxCols)
Definition: gpusparsematrix_impl.h:565
TValueType value_type
Definition: gpusparsematrix.h:104
int get_index_internal(size_t row, int col) const
Definition: gpusparsematrix_impl.h:350
virtual ~GPUSparseMatrix()
destructor
Definition: gpusparsematrix.h:121
bool apply_transposed(Vector_type &res, const Vector_type &x) const
calculate res = A.T x
Definition: gpusparsematrix.h:200
void apply_transposed_ignore_zero_rows(vector_t &dest, const number &beta1, const vector_t &w1) const
calculated dest = beta1*A*w1 . For empty cols of A (=empty rows of A^T), dest will not be changed
Definition: gpusparsematrix.h:183
bool set_as_copy_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
create/recreate this as a copy of GPUSparseMatrix B
Definition: gpusparsematrix_impl.h:273
void set_matrix_row(size_t row, connection *c, size_t nr)
Definition: gpusparsematrix_impl.h:230
const_row_iterator end_row(size_t r) const
Definition: gpusparsematrix.h:423
MatrixRow< this_type > row_type
Definition: gpusparsematrix.h:111
row_type get_row(size_t r)
Definition: gpusparsematrix.h:425
void pr(size_t row) const
Definition: gpusparsematrix.h:549
GPUSparseMatrix< value_type > this_type
Definition: gpusparsematrix.h:107
bool axpy(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A*w1 (A = this matrix)
Definition: gpusparsematrix_impl.h:170
std::vector< int > rowEnd
Definition: gpusparsematrix.h:575
bool scale(double d)
Definition: gpusparsematrix_impl.h:286
void apply_ignore_zero_rows(vector_t &dest, const number &beta1, const vector_t &w1) const
calculated dest = beta1*A*w1 . For empty rows, dest will not be changed
Definition: gpusparsematrix.h:178
int get_index_const(int r, int c) const
Definition: gpusparsematrix_impl.h:381
const_row_iterator get_connection(size_t r, size_t c, bool &bFound) const
Definition: gpusparsematrix.h:463
cusparseMatDescr_t descr
Definition: gpusparsematrix.h:657
const_row_iterator get_connection(size_t r, size_t c) const
Definition: gpusparsematrix.h:504
bool bNeedsValues
Definition: gpusparsematrix.h:580
void freeGPU()
Definition: gpusparsematrix.h:602
GPUSparseMatrix(GPUSparseMatrix &)
disallow copy operator
int iIterators
Definition: gpusparsematrix.h:585
void check_fragmentation() const
Definition: gpusparsematrix_impl.h:548
void add(const M &mat)
Definition: gpusparsematrix_impl.h:304
void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const
calculates dest += alpha * A[row, .] v;
Definition: gpusparsematrix_impl.h:149
bool has_connection(size_t r, size_t c) const
Definition: gpusparsematrix.h:432
size_t nnz
Definition: gpusparsematrix.h:579
void copyToNewSize(size_t newSize)
Definition: gpusparsematrix.h:564
void print_to_file(const char *filename) const
friend std::ostream & operator<<(std::ostream &out, const GPUSparseMatrix &m)
Definition: gpusparsematrix.h:540
void get(M &mat) const
get local matrix
Definition: gpusparsematrix_impl.h:335
size_t total_num_connections() const
returns the total number of connections
Definition: gpusparsematrix.h:343
@ rows_sorted
Definition: gpusparsematrix.h:105
size_t fragmented
Definition: gpusparsematrix.h:578
const int * get_device_rowStart() const
Definition: gpusparsematrix.h:610
int maxValues
Definition: gpusparsematrix.h:583
void add_iterator() const
Definition: gpusparsematrix.h:360
bool axpy_transposed(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A^T*w1 (A = this matrix)
Definition: gpusparsematrix_impl.h:190
void initGPU()
Definition: gpusparsematrix.h:595
row_iterator get_connection(size_t r, size_t c, bool &bFound)
Definition: gpusparsematrix.h:483
GPUSparseMatrix()
constructor for empty GPUSparseMatrix
Definition: gpusparsematrix_impl.h:52
row_iterator get_connection(size_t r, size_t c)
Definition: gpusparsematrix.h:515
bool apply(Vector_type &res, const Vector_type &x) const
calculate res = A x
Definition: gpusparsematrix.h:192
int * d_cols
Definition: gpusparsematrix.h:655
void p() const
Definition: gpusparsematrix.h:548
const_row_type get_row(size_t r) const
Definition: gpusparsematrix.h:426
int m_numCols
Definition: gpusparsematrix.h:584
void copy_to_device()
Definition: gpusparsematrix.h:614
void check_row(size_t row, int i) const
Definition: gpusparsematrix.h:373
bool resize_and_clear(size_t newRows, size_t newCols)
resizes the GPUSparseMatrix
Definition: gpusparsematrix_impl.h:66
AlgebraicConnection< TValueType > connection
Definition: gpusparsematrix.h:110
bool bOnDevice
Definition: gpusparsematrix.h:659
const double * get_device_value_ptr() const
Definition: gpusparsematrix.h:611
size_t num_cols() const
returns the number of cols
Definition: gpusparsematrix.h:340
int get_index(int r, int c)
Definition: gpusparsematrix_impl.h:393
Definition: matrixrow.h:65
void printtype() const
Definition: gpusparsematrix_print.h:72
void print(const char *const name=NULL) const
Definition: gpusparsematrix_print.h:47
void printrow(size_t row) const
Definition: gpusparsematrix_print.h:58
number alpha
#define UG_ASSERT(expr, msg)
Definition: assert.h:70
#define UG_LOG(msg)
Definition: log.h:367
double number
Definition: types.h:124
the ug namespace
bool MatMultTransposedAdd(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const matrix_t &A1, const vector_t &w1)
calculates dest = alpha1*v1 + beta1 * A1 *w1;
Definition: operations_mat.h:121
@ MATRIX_USE_ROW_FUNCTIONS
Definition: matrix_algebra_types.h:68
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition: cuda_manager.h:163
T value_type
Definition: sparsematrix_interface.h:2
Definition: matrix_algebra_types.h:79
static const int type
Definition: matrix_algebra_types.h:80