ug4
Loading...
Searching...
No Matches
gpusparsematrix.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013-2015: G-CSC, Goethe University Frankfurt
3 * Author: Martin Rupp
4 *
5 * This file is part of UG4.
6 *
7 * UG4 is free software: you can redistribute it and/or modify it under the
8 * terms of the GNU Lesser General Public License version 3 (as published by the
9 * Free Software Foundation) with the following additional attribution
10 * requirements (according to LGPL/GPL v3 §7):
11 *
12 * (1) The following notice must be displayed in the Appropriate Legal Notices
13 * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 *
15 * (2) The following notice must be displayed at a prominent place in the
16 * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 *
18 * (3) The following bibliography is recommended for citation and must be
19 * preserved in all covered files:
20 * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 * parallel geometric multigrid solver on hierarchically distributed grids.
22 * Computing and visualization in science 16, 4 (2013), 151-164"
23 * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 * flexible software system for simulating pde based models on high performance
25 * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU Lesser General Public License for more details.
31 */
32
33#ifndef __H__UG__CPU_ALGEBRA__GPUSparseMatrix__
34#define __H__UG__CPU_ALGEBRA__GPUSparseMatrix__
35
36
37
38#include "math.h"
39#include "common/common.h"
40#include "../algebra_common/sparsematrix_util.h"
41#include <iostream>
42#include <algorithm>
44
45#include "../algebra_common/connection.h"
46#include "../algebra_common/matrixrow.h"
47#include "../common/operations_mat/operations_mat.h"
48
49#include "cuda/cuda_manager.h"
50#include "common/debug_print.h"
51
52#define PROFILE_GPUMATRIX(name) PROFILE_BEGIN_GROUP(name, "GPUSparseMatrix algebra")
53
54
55namespace ug{
56
59
60
61// example for the variable CRS storage structure:
62// say we have:
63// rowStart = 0 3 8
64// rowEnd = 3 6 11
65// rowMax = 3 8 11
66// cols ( | marking end of row): 2 5 6 | 2 6 7 x x| 8 9 10
67
68// now insert (0 3): row 0 is full (rowEnd[0]==rowMax[0]), copy it to the end, and insert index
69// rowStart = 11 3 8
70// rowEnd = 15 6 11
71// rowMax = 17 8 11
72// cols ( | marking end of row): x x x | 2 6 7 x x| 8 9 10 | 2 3 5 6 x x |
73
74// now insert (1 3): row 1 not full, we can add it
75// rowStart 11 3 8
76// rowEnd 15 7 11
77// rowMax = 17 8 11
78// cols : x x x | 2 3 6 7 x | 8 9 10 | 2 3 5 6 x x |
79
80// defragment:
81// rowStart 0 4 8
82// rowEnd 4 8 11
83// rowMax = 4 8 11
84// cols : 2 3 5 6 | 2 3 6 7 | 8 9 10
85
86
100template<typename TValueType>
102{
103public:
104 typedef TValueType value_type;
105 enum {rows_sorted=true};
106
108
109public:
113
114public:
115 // construction etc
116 //----------------------
117
122 {
123 freeGPU();
124 }
125
126
133 bool resize_and_clear(size_t newRows, size_t newCols);
134 bool resize_and_keep_values(size_t newRows, size_t newCols);
135
142 bool set_as_transpose_of(const GPUSparseMatrix<value_type> &B, double scale=1.0);
143
150 bool set_as_copy_of(const GPUSparseMatrix<value_type> &B, double scale=1.0);
156
157
158public:
160 template<typename vector_t>
161 bool axpy(vector_t &dest,
162 const number &alpha1, const vector_t &v1,
163 const number &beta1, const vector_t &w1) const;
164
166 template<typename vector_t>
167 void axpy(double alpha, vector_t &x, double beta, const vector_t &y) const;
168
169
171 template<typename vector_t>
172 bool axpy_transposed(vector_t &dest,
173 const number &alpha1, const vector_t &v1,
174 const number &beta1, const vector_t &w1) const;
175
177 template<typename vector_t>
178 void apply_ignore_zero_rows(vector_t &dest,
179 const number &beta1, const vector_t &w1) const { assert(0); }
180
182 template<typename vector_t>
184 const number &beta1, const vector_t &w1) const { assert(0); }
185
186
187
188 // DEPRECATED!
190 // apply is deprecated because of axpy(res, 0.0, res, 1.0, beta, w1)
191 template<typename Vector_type>
192 bool apply(Vector_type &res, const Vector_type &x) const
193 {
194 return axpy(res, 0.0, res, 1.0, x);
195 }
196
198 // apply is deprecated because of axpy(res, 0.0, res, 1.0, beta, w1)
199 template<typename Vector_type>
200 bool apply_transposed(Vector_type &res, const Vector_type &x) const
201 {
202 return axpy_transposed(res, 0.0, res, 1.0, x);
203 }
204
205 // matmult_minus is deprecated because of axpy(res, 1.0, res, -1.0, x);
207 template<typename Vector_type>
208 bool matmul_minus(Vector_type &res, const Vector_type &x) const
209 {
210 return axpy(res, 1.0, res, -1.0, x);
211 }
212
213
214
220 inline bool is_isolated(size_t i) const;
221
222 bool scale(double d);
223 GPUSparseMatrix<value_type> &operator *= (double d) { scale(d); return *this; }
224
225 // submatrix set/get functions
226 //-------------------------------
227
239 template<typename M>
240 void add(const M &mat);
241 template<typename M>
243 void set(const M &mat);
245 template<typename M>
246 void get(M &mat) const;
247
248 // finalizing functions
249 //----------------------
250
251
252
253 inline void check_rc(size_t r, size_t c) const
254 {
255 UG_ASSERT(r < num_rows() && c < num_cols(), "tried to access element (" << r << ", " << c << ") of " << num_rows() << " x " << num_cols() << " matrix.");
256 }
257
259 bool set(double a);
260
268 const value_type &operator () (size_t r, size_t c) const
269 {
270 check_rc(r, c);
271 int j=get_index_const(r, c);
272 if(j == -1)
273 {
274 static value_type v(0.0);
275 return v;
276 }
277 UG_ASSERT(cols[j]==(int)c && j >= rowStart[r] && j < rowEnd[r], "");
278 return values[j];
279 }
280
289 value_type &operator() (size_t r, size_t c)
290 {
291 check_rc(r, c);
292 int j=get_index(r, c);
293 UG_ASSERT(j != -1 && cols[j]==(int)c && j >= rowStart[r] && j < rowEnd[r], "");
294 return values[j];
295 }
296
297public:
298 // row functions
299
307 void set_matrix_row(size_t row, connection *c, size_t nr);
308
320 void add_matrix_row(size_t row, connection *c, size_t nr);
321
323 inline size_t num_connections(size_t i) const
324 {
325 if(rowStart[i] == -1) return 0;
326 else return rowEnd[i]-rowStart[i];
327 }
328
330 template<typename vector_t>
331 inline void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const;
332public:
333 // accessor functions
334 //----------------------
335
337 size_t num_rows() const { return rowEnd.size(); }
338
340 size_t num_cols() const { return m_numCols; }
341
343 size_t total_num_connections() const { return nnz; }
344
345public:
346
347 // Iterators
348 //---------------------------
349
350 // const_row_iterator
351
352
353 //typedef const connection * const_row_iterator;
354 //typedef connection * const_row_iterator;
360 void add_iterator() const
361 {
362 iIterators++;
363 }
364 void remove_iterator() const
365 {
366 iIterators--;
367 }
368 // a row_iterator has to suppport
369 // operator ++, operator +=, index() const, const value_type &value() const, value_type &value()
370 // a const_row_iterator has to suppport
371 // operator ++, operator +=, index() const, const value_type &value() const
372
373 inline void check_row(size_t row, int i) const
374 {
375 UG_ASSERT(i < rowEnd[row] && i >= rowStart[row], "row iterator row " << row << " pos " << i << " out of bounds [" << rowStart[row] << ", " << rowEnd[row] << "]");
376 }
377
383 {
385 size_t row;
386 size_t i;
387 public:
388 inline void check() const {A.check_row(row, i); }
389 row_iterator(GPUSparseMatrix &_A, size_t _row, size_t _i) : A(_A), row(_row), i(_i) { A.add_iterator(); }
391 row_iterator *operator ->() { return this; }
392 value_type &value() { check(); return A.values[i]; }
393 size_t index() const { check(); return A.cols[i]; }
394 bool operator != (const row_iterator &o) const { return i != o.i; }
395 void operator ++ () { ++i; }
396 void operator += (int nr) { i+=nr; }
397 bool operator == (const row_iterator &other) const { return other.i == i; check(); }
398 };
400 {
402 size_t row;
403 size_t i;
404 public:
405 inline void check() const {A.check_row(row, i); }
406 const_row_iterator(const GPUSparseMatrix &_A, size_t _row, size_t _i) : A(_A), row(_row), i(_i) {A.add_iterator();}
408 const_row_iterator *operator ->() { return this; }
409 const value_type &value() const { check(); return A.values[i]; }
410 size_t index() const { check(); return A.cols[i]; }
411 bool operator != (const const_row_iterator &o) const { return i != o.i; }
412 void operator ++ () { ++i; }
413 void operator += (int nr) { i+=nr; }
414 bool operator == (const const_row_iterator &other) const { return other.i == i; }
415 };
416
417
418
419
420 row_iterator begin_row(size_t r) { return row_iterator(*this, r, rowStart[r]); }
421 row_iterator end_row(size_t r) { return row_iterator(*this, r, rowEnd[r]); }
422 const_row_iterator begin_row(size_t r) const { return const_row_iterator(*this, r, rowStart[r]); }
423 const_row_iterator end_row(size_t r) const { return const_row_iterator(*this, r, rowEnd[r]); }
424
425 row_type get_row(size_t r) { return row_type(*this, r); }
426 const_row_type get_row(size_t r) const { return const_row_type(*this, r); }
427
428public:
429 // connectivity functions
430 //-------------------------
431
432 bool has_connection(size_t r, size_t c) const
433 {
434 check_rc(r, c);
435 bool bFound;
436 get_connection(r, c, bFound);
437 return bFound;
438 }
439
446 {
447 check_rc(r, c);
448 if(rowStart[r] == -1 || rowStart[r] == rowEnd[r])
449 return end_row(r);
450 else
451 {
452 int j=get_index_internal(r, c);
453 if(j > maxValues) return end_row(r);
454 else return row_iterator(*this, r, j);
455 }
456 }
457
463 const_row_iterator get_connection(size_t r, size_t c, bool &bFound) const
464 {
465 check_rc(r, c);
466 int j=get_index_const(r, c);
467 if(j != -1)
468 {
469 bFound = true;
470 return const_row_iterator(*this, r, j);
471 }
472 else
473 {
474 bFound = false;
475 return end_row(r);
476 }
477 }
483 row_iterator get_connection(size_t r, size_t c, bool &bFound)
484 {
485 check_rc(r, c);
486 int j=get_index_const(r, c);
487 if(j != -1)
488 {
489 bFound = true;
490 return row_iterator(*this, r, j);
491 }
492 else
493 {
494 bFound = false;
495 return end_row(r);
496 }
497 }
498
504 const_row_iterator get_connection(size_t r, size_t c) const
505 {
506 bool b;
507 return get_connection(r, c, b);
508 }
515 row_iterator get_connection(size_t r, size_t c)
516 {
517 check_rc(r, c);
518 assert(bNeedsValues);
519 int j=get_index(r, c);
520 return row_iterator(*this, r, j);
521 }
522
523
525 {
526 if(num_rows() != 0 && num_cols() != 0)
528 }
529
530public:
531 // output functions
532 //----------------------
533
534 void print(const char * const name = NULL) const;
535 void printtype() const;
536
537 void print_to_file(const char *filename) const;
538 void printrow(size_t row) const;
539
540 friend std::ostream &operator<<(std::ostream &out, const GPUSparseMatrix &m)
541 {
542 out << "GPUSparseMatrix " //<< m.name
543 << " [ " << m.num_rows() << " x " << m.num_cols() << " ]";
544 return out;
545 }
546
547
548 void p() const { print(); } // for use in gdb
549 void pr(size_t row) const {printrow(row); } // for use in gdb
550
551private:
552 // disallowed operations (not defined):
553 //---------------------------------------
555
556
557 void assureValuesSize(size_t s);
558 size_t get_nnz() const { return nnz; }
559
560protected:
561 int get_index_internal(size_t row, int col) const;
562 int get_index_const(int r, int c) const;
563 int get_index(int r, int c);
564 void copyToNewSize(size_t newSize)
565 {
566 copyToNewSize(newSize, num_cols());
567 }
568 void copyToNewSize(size_t newSize, size_t maxCols);
569 void check_fragmentation() const;
570 int get_nnz_max_cols(size_t maxCols);
571
572
573protected:
574 std::vector<int> rowStart;
575 std::vector<int> rowEnd;
576 std::vector<int> rowMax;
577 std::vector<int> cols;
579 size_t nnz;
581
582 std::vector<value_type> values;
585 mutable int iIterators;
586
587
588
589
590
591
592
594public:
595 void initGPU()
596 {
597 d_cols = d_rowStart = NULL;
598 d_values = NULL;
599 descr = 0;
600 bOnDevice = false;
601 }
602 void freeGPU()
603 {
604 cudaFree(d_cols);
605 cudaFree(d_rowStart);
606 cudaFree(d_values);
607 }
608
609 const int *get_device_cols() const { check_device(); return d_cols; }
610 const int *get_device_rowStart() const { check_device(); return d_rowStart; }
611 const double *get_device_value_ptr() const { check_device(); return d_values; }
612 cusparseMatDescr_t get_matrix_descr() const { check_device(); return descr; }
613
615 {
617 descr = 0;
618 cusparseStatus_t cusparseStatus = cusparseCreateMatDescr(&descr);
619
620 if (checkCudaErrors(cusparseStatus))
621 {
622 exit(EXIT_FAILURE);
623 }
624
625 cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
626 cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
627
628 defragment();
629 d_values = &values[0];
630
631 assert(cols.size() == values.size() && cols.size() == nnz);
632
633 UG_LOG("cols.size = " << cols.size()*sizeof(int) << " values.size() == " << values.size()*sizeof(value_type) << " rowStart.size = " << rowStart.size()*sizeof(int) << "\n");
634
635 UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
637 UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
639 UG_LOG("gpusparsematrix.h:"<<__LINE__ << "\n")
640// PrintVector(values, "GPUVector::values");
642 }
643
644
645 void check_device() const
646 {
647 if(bOnDevice==true) return;
649 c->bOnDevice=true;
650 c->copy_to_device();
651 }
652private:
653 //using CRSSparseMatrix::nnz;
654
656 double *d_values;
657 cusparseMatDescr_t descr;
658
660};
661
662
663template<typename T>
670
672template<typename vector_t, typename matrix_t>
673inline void MatMultTransposedAdd(vector_t &dest,
674 const number &alpha1, const vector_t &v1,
675 const number &beta1, const GPUSparseMatrix<matrix_t> &A1, const vector_t &w1)
676{
677 A1.axpy_transposed(dest, alpha1, v1, beta1, w1);
678}
679
680// end group cpu_algebra
682
683} // namespace ug
684
685//#include "matrixrow.h"
686#include "gpusparsematrix_impl.h"
688
689#endif
location name
Definition checkpoint_util.lua:128
Definition connection.h:40
static CUDAManager & get_instance()
Definition cuda_manager.cpp:153
Definition matrixrow.h:118
Definition gpusparsematrix.h:400
const_row_iterator(const GPUSparseMatrix &_A, size_t _row, size_t _i)
Definition gpusparsematrix.h:406
size_t row
Definition gpusparsematrix.h:402
void check() const
Definition gpusparsematrix.h:405
const GPUSparseMatrix & A
Definition gpusparsematrix.h:401
bool operator!=(const const_row_iterator &o) const
Definition gpusparsematrix.h:411
bool operator==(const const_row_iterator &other) const
Definition gpusparsematrix.h:414
size_t i
Definition gpusparsematrix.h:403
const_row_iterator * operator->()
Definition gpusparsematrix.h:408
void operator++()
Definition gpusparsematrix.h:412
~const_row_iterator()
Definition gpusparsematrix.h:407
size_t index() const
Definition gpusparsematrix.h:410
void operator+=(int nr)
Definition gpusparsematrix.h:413
const value_type & value() const
Definition gpusparsematrix.h:409
Definition gpusparsematrix.h:383
~row_iterator()
Definition gpusparsematrix.h:390
void operator+=(int nr)
Definition gpusparsematrix.h:396
size_t i
Definition gpusparsematrix.h:386
GPUSparseMatrix & A
Definition gpusparsematrix.h:384
void operator++()
Definition gpusparsematrix.h:395
size_t row
Definition gpusparsematrix.h:385
row_iterator(GPUSparseMatrix &_A, size_t _row, size_t _i)
Definition gpusparsematrix.h:389
row_iterator * operator->()
Definition gpusparsematrix.h:391
bool operator==(const row_iterator &other) const
Definition gpusparsematrix.h:397
size_t index() const
Definition gpusparsematrix.h:393
void check() const
Definition gpusparsematrix.h:388
value_type & value()
Definition gpusparsematrix.h:392
bool operator!=(const row_iterator &o) const
Definition gpusparsematrix.h:394
sparse matrix for big, variable sparse matrices.
Definition gpusparsematrix.h:102
row_iterator get_iterator_or_next(size_t r, size_t c)
Definition gpusparsematrix.h:445
void check_device() const
Definition gpusparsematrix.h:645
const_row_iterator begin_row(size_t r) const
Definition gpusparsematrix.h:422
bool is_isolated(size_t i) const
check for isolated condition of an index
Definition gpusparsematrix_impl.h:218
void check_rc(size_t r, size_t c) const
Definition gpusparsematrix.h:253
bool resize_and_keep_values(size_t newRows, size_t newCols)
Definition gpusparsematrix_impl.h:84
int * d_rowStart
Definition gpusparsematrix.h:655
friend std::ostream & operator<<(std::ostream &out, const GPUSparseMatrix &m)
Definition gpusparsematrix.h:540
bool matmul_minus(Vector_type &res, const Vector_type &x) const
calculate res -= A x
Definition gpusparsematrix.h:208
void remove_iterator() const
Definition gpusparsematrix.h:364
row_iterator end_row(size_t r)
Definition gpusparsematrix.h:421
cusparseMatDescr_t get_matrix_descr() const
Definition gpusparsematrix.h:612
size_t get_nnz() const
Definition gpusparsematrix.h:558
double * d_values
Definition gpusparsematrix.h:656
void set(const M &mat)
set local matrix
Definition gpusparsematrix_impl.h:320
const int * get_device_rowStart() const
Definition gpusparsematrix.h:610
const int * get_device_cols() const
Definition gpusparsematrix.h:609
bool set_as_transpose_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
write in a empty GPUSparseMatrix (this) the transpose GPUSparseMatrix of B.
Definition gpusparsematrix_impl.h:113
std::vector< int > rowStart
Definition gpusparsematrix.h:574
void defragment()
Definition gpusparsematrix.h:524
std::vector< int > rowMax
Definition gpusparsematrix.h:576
ConstMatrixRow< this_type > const_row_type
Definition gpusparsematrix.h:112
void add_matrix_row(size_t row, connection *c, size_t nr)
Definition gpusparsematrix_impl.h:264
row_iterator begin_row(size_t r)
Definition gpusparsematrix.h:420
std::vector< int > cols
Definition gpusparsematrix.h:577
std::vector< value_type > values
Definition gpusparsematrix.h:582
size_t num_rows() const
returns number of rows
Definition gpusparsematrix.h:337
size_t num_connections(size_t i) const
returns number of connections of row row.
Definition gpusparsematrix.h:323
void assureValuesSize(size_t s)
Definition gpusparsematrix_impl.h:555
int get_nnz_max_cols(size_t maxCols)
Definition gpusparsematrix_impl.h:565
TValueType value_type
Definition gpusparsematrix.h:104
int get_index_internal(size_t row, int col) const
Definition gpusparsematrix_impl.h:350
const double * get_device_value_ptr() const
Definition gpusparsematrix.h:611
virtual ~GPUSparseMatrix()
destructor
Definition gpusparsematrix.h:121
GPUSparseMatrix< value_type > & operator*=(double d)
Definition gpusparsematrix.h:223
bool apply_transposed(Vector_type &res, const Vector_type &x) const
calculate res = A.T x
Definition gpusparsematrix.h:200
void apply_transposed_ignore_zero_rows(vector_t &dest, const number &beta1, const vector_t &w1) const
calculated dest = beta1*A*w1 . For empty cols of A (=empty rows of A^T), dest will not be changed
Definition gpusparsematrix.h:183
bool set_as_copy_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
create/recreate this as a copy of GPUSparseMatrix B
Definition gpusparsematrix_impl.h:273
void set_matrix_row(size_t row, connection *c, size_t nr)
Definition gpusparsematrix_impl.h:230
const_row_iterator end_row(size_t r) const
Definition gpusparsematrix.h:423
MatrixRow< this_type > row_type
Definition gpusparsematrix.h:111
row_type get_row(size_t r)
Definition gpusparsematrix.h:425
void pr(size_t row) const
Definition gpusparsematrix.h:549
GPUSparseMatrix< value_type > this_type
Definition gpusparsematrix.h:107
bool axpy(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A*w1 (A = this matrix)
Definition gpusparsematrix_impl.h:170
std::vector< int > rowEnd
Definition gpusparsematrix.h:575
bool scale(double d)
Definition gpusparsematrix_impl.h:286
void apply_ignore_zero_rows(vector_t &dest, const number &beta1, const vector_t &w1) const
calculated dest = beta1*A*w1 . For empty rows, dest will not be changed
Definition gpusparsematrix.h:178
int get_index_const(int r, int c) const
Definition gpusparsematrix_impl.h:381
const_row_iterator get_connection(size_t r, size_t c, bool &bFound) const
Definition gpusparsematrix.h:463
cusparseMatDescr_t descr
Definition gpusparsematrix.h:657
const_row_iterator get_connection(size_t r, size_t c) const
Definition gpusparsematrix.h:504
bool bNeedsValues
Definition gpusparsematrix.h:580
void freeGPU()
Definition gpusparsematrix.h:602
GPUSparseMatrix(GPUSparseMatrix &)
disallow copy operator
int iIterators
Definition gpusparsematrix.h:585
void check_fragmentation() const
Definition gpusparsematrix_impl.h:548
void add(const M &mat)
Definition gpusparsematrix_impl.h:304
void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const
calculates dest += alpha * A[row, .] v;
Definition gpusparsematrix_impl.h:149
bool has_connection(size_t r, size_t c) const
Definition gpusparsematrix.h:432
size_t nnz
Definition gpusparsematrix.h:579
void copyToNewSize(size_t newSize)
Definition gpusparsematrix.h:564
void print_to_file(const char *filename) const
const value_type & operator()(size_t r, size_t c) const
Definition gpusparsematrix.h:268
void get(M &mat) const
get local matrix
Definition gpusparsematrix_impl.h:335
size_t total_num_connections() const
returns the total number of connections
Definition gpusparsematrix.h:343
@ rows_sorted
Definition gpusparsematrix.h:105
size_t fragmented
Definition gpusparsematrix.h:578
int maxValues
Definition gpusparsematrix.h:583
void add_iterator() const
Definition gpusparsematrix.h:360
bool axpy_transposed(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A^T*w1 (A = this matrix)
Definition gpusparsematrix_impl.h:190
void initGPU()
Definition gpusparsematrix.h:595
row_iterator get_connection(size_t r, size_t c, bool &bFound)
Definition gpusparsematrix.h:483
GPUSparseMatrix()
constructor for empty GPUSparseMatrix
Definition gpusparsematrix_impl.h:52
row_iterator get_connection(size_t r, size_t c)
Definition gpusparsematrix.h:515
bool apply(Vector_type &res, const Vector_type &x) const
calculate res = A x
Definition gpusparsematrix.h:192
int * d_cols
Definition gpusparsematrix.h:655
void p() const
Definition gpusparsematrix.h:548
const_row_type get_row(size_t r) const
Definition gpusparsematrix.h:426
GPUSparseMatrix< value_type > & operator=(const GPUSparseMatrix< value_type > &B)
Definition gpusparsematrix.h:151
int m_numCols
Definition gpusparsematrix.h:584
void copy_to_device()
Definition gpusparsematrix.h:614
void check_row(size_t row, int i) const
Definition gpusparsematrix.h:373
bool resize_and_clear(size_t newRows, size_t newCols)
resizes the GPUSparseMatrix
Definition gpusparsematrix_impl.h:66
AlgebraicConnection< TValueType > connection
Definition gpusparsematrix.h:110
bool bOnDevice
Definition gpusparsematrix.h:659
size_t num_cols() const
returns the number of cols
Definition gpusparsematrix.h:340
int get_index(int r, int c)
Definition gpusparsematrix_impl.h:393
Definition matrixrow.h:65
void printtype() const
Definition gpusparsematrix_print.h:72
void print(const char *const name=NULL) const
Definition gpusparsematrix_print.h:47
void printrow(size_t row) const
Definition gpusparsematrix_print.h:58
#define UG_ASSERT(expr, msg)
Definition assert.h:70
#define UG_LOG(msg)
Definition log.h:367
double number
Definition types.h:124
the ug namespace
T::value_type * CudaCreateAndCopyToDevice(T &vec)
Definition cuda_manager.h:163
bool MatMultTransposedAdd(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const matrix_t &A1, const vector_t &w1)
calculates dest = alpha1*v1 + beta1 * A1 *w1;
Definition operations_mat.h:121
@ MATRIX_USE_ROW_FUNCTIONS
Definition matrix_algebra_types.h:68
Definition matrix_algebra_types.h:79
static const int type
Definition matrix_algebra_types.h:80