ug4
Loading...
Searching...
No Matches
gpusparsematrix_impl.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013-2015: G-CSC, Goethe University Frankfurt
3 * Author: Martin Rupp
4 *
5 * This file is part of UG4.
6 *
7 * UG4 is free software: you can redistribute it and/or modify it under the
8 * terms of the GNU Lesser General Public License version 3 (as published by the
9 * Free Software Foundation) with the following additional attribution
10 * requirements (according to LGPL/GPL v3 §7):
11 *
12 * (1) The following notice must be displayed in the Appropriate Legal Notices
13 * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 *
15 * (2) The following notice must be displayed at a prominent place in the
16 * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 *
18 * (3) The following bibliography is recommended for citation and must be
19 * preserved in all covered files:
20 * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 * parallel geometric multigrid solver on hierarchically distributed grids.
22 * Computing and visualization in science 16, 4 (2013), 151-164"
23 * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 * flexible software system for simulating pde based models on high performance
25 * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU Lesser General Public License for more details.
31 */
32
33#ifndef __H__UG__CPU_ALGEBRA__GPUSparseMatrix_IMPL__
34#define __H__UG__CPU_ALGEBRA__GPUSparseMatrix_IMPL__
35
36// creation etc
37
38#include <fstream>
39#include <cstring>
40
43#include "gpusparsematrix.h"
44#include <vector>
45#include <algorithm>
46#include "cuda/cuda_manager.h"
47
48
49namespace ug{
50
51template<typename T>
53{
54 initGPU();
55 PROFILE_GPUMATRIX(GPUSparseMatrix_constructor);
56 bNeedsValues = true;
57 iIterators=0;
58 nnz = 0;
59 m_numCols = 0;
60 maxValues = 0;
61 cols.resize(32);
62 if(bNeedsValues) values.resize(32);
63}
64
65template<typename T>
66bool GPUSparseMatrix<T>::resize_and_clear(size_t newRows, size_t newCols)
67{
68 PROFILE_GPUMATRIX(GPUSparseMatrix_resize_and_clear);
69 UG_LOG(this << "GPUSparseMatrix::resize_and_clear(" << newRows << ", " << newCols << ")");
70 rowStart.clear(); rowStart.resize(newRows+1, -1);
71 rowMax.clear(); rowMax.resize(newRows);
72 rowEnd.clear(); rowEnd.resize(newRows, -1);
73 m_numCols = newCols;
74 nnz = 0;
75
76 cols.clear(); cols.resize(newRows);
77 values.clear();
78 if(bNeedsValues) values.resize(newRows);
79 maxValues = 0;
80 return true;
81}
82
83template<typename T>
84bool GPUSparseMatrix<T>::resize_and_keep_values(size_t newRows, size_t newCols)
85{
86 PROFILE_GPUMATRIX(GPUSparseMatrix_resize_and_keep_values);
87 UG_LOG(this << "GPUSparseMatrix_resize_and_keep_values(" << newRows << ", " << newCols << ")");
88 //UG_LOG("GPUSparseMatrix resize " << newRows << "x" << newCols << "\n");
89 if(newRows == 0 && newCols == 0)
90 return resize_and_clear(0,0);
91
92 if(newRows != num_rows())
93 {
94 size_t oldrows = num_rows();
95 rowStart.resize(newRows+1, -1);
96 rowMax.resize(newRows);
97 rowEnd.resize(newRows, -1);
98 for(size_t i=oldrows; i<newRows; i++)
99 {
100 rowStart[i] = -1;
101 rowEnd[i] = -1;
102 }
103 }
104 if((int)newCols < m_numCols)
105 copyToNewSize(get_nnz_max_cols(newCols), newCols);
106
107 m_numCols = newCols;
108 return true;
109}
110
111
112template<typename T>
114{
115 PROFILE_GPUMATRIX(GPUSparseMatrix_set_as_transpose_of);
116 resize_and_clear(B.num_cols(), B.num_rows());
117 /*rowStart.resize(B.num_cols(), 0);
118 rowMax.resize(B.num_cols(), 0);
119 rowEnd.resize(B.num_cols(), 0);
120 nnz = B.nnz;
121 bNeedsValues = B.bNeedsValues;
122 if(bNeedsValues) values.resize(nnz);
123 cols.resize(nnz);
124 maxValues = B.maxValues;
125 fragmented = 0;
126 m_numCols = B.num_rows();
127 size_t r, c;
128 for(r=0; r<num_rows(); r++)
129 for(const_row_iterator it = B.begin_row(r); it != B.end_row(r); ++it)
130 rowMax[it.index()]++;
131 rowEnd[0] = rowMax[0];
132 rowEnd[0] = 0;
133 for(c=1; c<B.num_cols(); c++)
134 {
135 rowStart[c] = rowMax[c-1];
136 rowMax[c] = rowStart[c]+rowMax[c];
137 rowEnd[c] = rowStart[c];
138 }*/
139
140 for(size_t r=0; r<B.num_rows(); r++)
141 for(const_row_iterator it = B.begin_row(r); it != B.end_row(r); ++it)
142 operator()(it.index(), r) = scale*it.value();
143 // todo: sort rows
144 return true;
145}
146
147template<typename T>
148template<typename vector_t>
149inline void GPUSparseMatrix<T>::mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const
150{
151 for(const_row_iterator conn = begin_row(row); conn != end_row(row); ++conn)
152 MatMultAdd(dest, 1.0, dest, alpha, conn.value(), v[conn.index()]);
153}
154
156template<typename T>
157template<typename vector_t>
158void GPUSparseMatrix<T>::axpy(double alpha, vector_t &x, double beta, const vector_t &y) const
159{
160 check_device();
161 cusparseDcsrmv(CUDAManager::get_cusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, num_rows(), num_cols(), get_nnz(),
162 &beta, get_matrix_descr(), get_device_value_ptr(), get_device_rowStart(), get_device_cols(), y.get_dev_ptr(),
163 &alpha, x.get_dev_ptr());
164}
165
166
167// calculate dest = alpha1*v1 + beta1*A*w1 (A = this matrix)
168template<typename T>
169template<typename vector_t>
170bool GPUSparseMatrix<T>::axpy(vector_t &dest,
171 const number &alpha1, const vector_t &v1,
172 const number &beta1, const vector_t &w1) const
173{
174// UG_LOG("GPUSparseMatrix axpy\n");
175 PROFILE_GPUMATRIX(GPUSparseMatrix_axpy);
176
177 if(alpha1 == 0.0 || &dest == &v1)
178 axpy(alpha1, dest, beta1, w1);
179 else
180 {
181 axpy(0.0, dest, beta1, w1);
182 dest.add(alpha1, v1);
183 }
184 return true;
185}
186
187// calculate dest = alpha1*v1 + beta1*A^T*w1 (A = this matrix)
188template<typename T>
189template<typename vector_t>
191 const number &alpha1, const vector_t &v1,
192 const number &beta1, const vector_t &w1) const
193{
194 PROFILE_GPUMATRIX(GPUSparseMatrix_axpy_transposed);
195 UG_ASSERT(0, "not implemented");
196 return true;
197}
198
199template<typename T>
201{
202 PROFILE_GPUMATRIX(GPUSparseMatrix_set);
203 check_fragmentation();
204 for(size_t row=0; row<num_rows(); row++)
205 for(row_iterator it = begin_row(row); it != end_row(row); ++it)
206 {
207 if(it.index() == row)
208 it.value() = a;
209 else
210 it.value() = 0.0;
211 }
212
213 return true;
214}
215
216
217template<typename T>
218inline bool GPUSparseMatrix<T>::is_isolated(size_t i) const
219{
220 UG_ASSERT(i < num_rows() && i >= 0, *this << ": " << i << " out of bounds.");
221
222 for(const_row_iterator it = begin_row(i); it != end_row(i); ++it)
223 if(it.index() != i && it.value() != 0.0)
224 return false;
225 return true;
226}
227
228
229template<typename T>
230void GPUSparseMatrix<T>::set_matrix_row(size_t row, connection *c, size_t nr)
231{
232 /*PROFILE_GPUMATRIX(GPUSparseMatrix_set_matrix_row);
233 bool bSorted=false;
234 for(size_t i=0; bSorted && i+1 < nr; i++)
235 bSorted = c[i].iIndex < c[i+1].iIndex;
236 if(bSorted)
237 {
238 int start;
239 if(rowStart[row] == -1 || rowMax[row] - rowStart[row] < (int)nr)
240 {
241 assureValuesSize(maxValues+nr);
242 start = maxValues;
243 rowMax[row] = start+nr;
244 rowStart[row] = start;
245 maxValues+=nr;
246 }
247 else
248 start = rowStart[row];
249 rowEnd[row] = start+nr;
250 for(size_t i=0; i<nr; i++)
251 {
252 cols[start+i] = c[i].iIndex;
253 values[start+i] = c[i].dValue;
254 }
255 }
256 else*/
257 {
258 for(size_t i=0; i<nr; i++)
259 operator()(row, c[i].iIndex) = c[i].dValue;
260 }
261}
262
263template<typename T>
264void GPUSparseMatrix<T>::add_matrix_row(size_t row, connection *c, size_t nr)
265{
266 //PROFILE_GPUMATRIX(GPUSparseMatrix_add_matrix_row);
267 for(size_t i=0; i<nr; i++)
268 operator()(row, c[i].iIndex) += c[i].dValue;
269}
270
271
272template<typename T>
274{
275 //PROFILE_GPUMATRIX(GPUSparseMatrix_set_as_copy_of);
276 resize_and_clear(B.num_rows(), B.num_cols());
277 for(size_t i=0; i < B.num_rows(); i++)
278 for(const_row_iterator it = B.begin_row(i); it != B.end_row(i); ++it)
279 operator()(i, it.index()) = scale*it.value();
280 return true;
281}
282
283
284
285template<typename T>
287{
288 //PROFILE_GPUMATRIX(GPUSparseMatrix_scale);
289 for(size_t i=0; i < num_rows(); i++)
290 for(row_iterator it = begin_row(i); it != end_row(i); ++it)
291 it.value() *= d;
292 return true;
293}
294
295
296
297
298
299//======================================================================================================
300// submatrix set/get
301
302template<typename T>
303template<typename M>
304void GPUSparseMatrix<T>::add(const M &mat)
305{
306 for(size_t i=0; i < mat.num_rows(); i++)
307 {
308 int r = mat.row_index(i);
309 for(size_t j=0; j < mat.num_cols(); j++)
310 {
311 int c = mat.col_index(j);
312 (*this)(r,c) += mat(i,j);
313 }
314 }
315}
316
317
318template<typename T>
319template<typename M>
320void GPUSparseMatrix<T>::set(const M &mat)
321{
322 for(size_t i=0; i < mat.num_rows(); i++)
323 {
324 int r = mat.row_index(i);
325 for(size_t j=0; j < mat.num_cols(); j++)
326 {
327 int c = mat.col_index(j);
328 (*this)(r,c) = mat(i,j);
329 }
330 }
331}
332
333template<typename T>
334template<typename M>
335void GPUSparseMatrix<T>::get(M &mat) const
336{
337 for(size_t i=0; i < mat.num_rows(); i++)
338 {
339 int r = mat.row_index(i);
340 for(size_t j=0; j < mat.num_cols(); j++)
341 {
342 int c = mat.col_index(j);
343 mat(i,j) = (*this)(r,c);
344 }
345 }
346}
347
348
349template<typename T>
350int GPUSparseMatrix<T>::get_index_internal(size_t row, int col) const
351{
352// PROFILE_GPUMATRIX(SP_get_index_internal);
353 assert(rowStart[row] != -1);
354 int l = rowStart[row];
355 int r = rowEnd[row];
356 int mid=0;
357 while(l < r)
358 {
359 mid = (l+r)/2;
360 if(cols[mid] < col)
361 l = mid+1;
362 else if(cols[mid] > col)
363 r = mid-1;
364 else
365 return mid;
366 }
367 mid = (l+r)/2;
368 int ret;
369 if(mid < rowStart[row])
370 ret = rowStart[row];
371 else if(mid == rowEnd[row] || col <= cols[mid])
372 ret = mid;
373 else ret = mid+1;
374 UG_ASSERT(ret <= rowEnd[row] && ret >= rowStart[row], "row iterator row " << row << " pos " << ret << " out of bounds [" << rowStart[row] << ", " << rowEnd[row] << "]");
375 return ret;
376}
377
378
379
380template<typename T>
382{
383 if(rowStart[r] == -1 || rowStart[r] == rowEnd[r]) return -1;
384 int index=get_index_internal(r, c);
385 if(index >= rowStart[r] && index < rowEnd[r] && cols[index] == c)
386 return index;
387 else
388 return -1;
389}
390
391
392template<typename T>
394{
395// UG_LOG("get_index " << r << ", " << c << "\n");
396// UG_LOG(rowStart[r] << " - " << rowMax[r] << " - " << rowEnd[r] << " - " << cols.size() << " - " << maxValues << "\n");
397 if(rowStart[r] == -1 || rowStart[r] == rowEnd[r])
398 {
399// UG_LOG("new row\n");
400 // row did not start, start new row at the end of cols array
401 assureValuesSize(maxValues+1);
402 rowStart[r] = maxValues;
403 rowEnd[r] = maxValues+1;
404 rowMax[r] = maxValues+1;
405 if(bNeedsValues) values[maxValues] = 0.0;
406 cols[maxValues] = c;
407 maxValues++;
408 nnz++;
409 return maxValues-1;
410 }
411
412 /* for(int i=rowStart[r]; i<rowEnd[r]; i++)
413 if(cols[i] == c)
414 return i;*/
415
416 // get the index where (r,c) _should_ be
417 int index=get_index_internal(r, c);
418
419 if(index < rowEnd[r]
420 && cols[index] == c)
421 {
422// UG_LOG("found\n");
423 return index; // we found it
424 }
425
426 // we did not find it, so we have to add it
427
428#ifndef NDEBUG
429 assert(index == rowEnd[r] || cols[index] > c);
430 assert(index == rowStart[r] || cols[index-1] < c);
431 for(int i=rowStart[r]+1; i<rowEnd[r]; i++)
432 assert(cols[i] > cols[i-1]);
433#endif
434 if(rowEnd[r] == rowMax[r] && rowEnd[r] == maxValues
435 && maxValues < (int)cols.size())
436 {
437// UG_LOG("at end\n");
438 // this row is stored at the end, so we can easily make more room
439 rowMax[r]++;
440 maxValues++;
441 }
442
443 if(rowEnd[r] == rowMax[r])
444 {
445// UG_LOG("renew\n");
446 // row is full, we need to copy it to another place
447 int newSize = (rowEnd[r]-rowStart[r])*2;
448 if(maxValues+newSize > (int)cols.size())
449 {
450// UG_LOG("ass\n");
451 assureValuesSize(maxValues+newSize);
452 index=get_index_internal(r, c);
453 }
454 // copy it to the end and insert index
455 fragmented += rowEnd[r]-rowStart[r];
456 index = index-rowStart[r]+maxValues;
457 int j=rowEnd[r]-rowStart[r]+maxValues;
458 if(rowEnd[r] != 0)
459 for(int i=rowEnd[r]-1; i>=rowStart[r]; i--, j--)
460 {
461 if(j==index) j--;
462 if(bNeedsValues) values[j] = values[i];
463 cols[j] = cols[i];
464 if(i==rowStart[r]) break;
465 }
466 rowEnd[r] = maxValues+rowEnd[r]-rowStart[r]+1;
467 rowStart[r] = maxValues;
468 rowMax[r] = maxValues+newSize;
469 maxValues += newSize;
470 }
471 else
472 {
473// UG_LOG("enlength\n");
474 // move part > index so we can insert the index
475 if(rowEnd[r] != 0)
476 for(int i=rowEnd[r]-1; i>=index; i--)
477 {
478 if(bNeedsValues) values[i+1] = values[i];
479 cols[i+1] = cols[i];
480 if(i==index) break;
481 }
482 rowEnd[r]++;
483 }
484 if(bNeedsValues) values[index] = 0.0;
485 cols[index] = c;
486
487 nnz++;
488#ifndef NDEBUG
489 assert(index >= rowStart[r] && index < rowEnd[r]);
490 for(int i=rowStart[r]+1; i<rowEnd[r]; i++)
491 assert(cols[i] > cols[i-1]);
492#endif
493 return index;
494
495}
496
497template<typename T>
498void GPUSparseMatrix<T>::copyToNewSize(size_t newSize, size_t maxCol)
499{
500 PROFILE_GPUMATRIX(GPUSparseMatrix_copyToNewSize);
501 /*UG_LOG("copyToNewSize: from " << values.size() << " to " << newSize << "\n");
502 UG_LOG("sizes are " << cols.size() << " and " << values.size() << ", ");
503 UG_LOG(reset_floats << "capacities are " << cols.capacity() << " and " << values.capacity() << ", NNZ = " << nnz << ", fragmentation = " <<
504 (1-((double)nnz)/cols.size())*100.0 << "%\n");
505 if(newSize == nnz) { UG_LOG("Defragmenting to NNZ."); }*/
506 if( (iIterators > 0)
507 || (newSize > values.size() && (100.0*nnz)/newSize < 20 && newSize <= cols.capacity()) )
508 {
509 UG_ASSERT(newSize > values.size(), "no nnz-defragmenting while using iterators.");
510 //UG_LOG("copyToNew not defragmenting because of iterators or low fragmentation.\n");}
511 cols.resize(newSize);
512 cols.resize(cols.capacity());
513 if(bNeedsValues) { values.resize(newSize); values.resize(cols.size()); }
514 return;
515 }
516
517 std::vector<value_type> v(newSize);
518 std::vector<int> c(newSize);
519 size_t j=0;
520 for(size_t r=0; r<num_rows(); r++)
521 {
522 if(rowStart[r] == -1)
523 rowStart[r] = rowEnd[r] = rowMax[r] = j;
524 else
525 {
526 size_t start=j;
527 for(int k=rowStart[r]; k<rowEnd[r]; k++)
528 {
529 if(cols[k] < (int)maxCol)
530 {
531 if(bNeedsValues) v[j] = values[k];
532 c[j] = cols[k];
533 j++;
534 }
535 }
536 rowStart[r] = start;
537 rowEnd[r] = rowMax[r] = j;
538 }
539 }
540 rowStart[num_rows()] = rowEnd[num_rows()-1];
541 fragmented = 0;
542 maxValues = j;
543 if(bNeedsValues) std::swap(values, v);
544 std::swap(cols, c);
545}
546
547template<typename T>
549{
550 if((double)nnz/(double)maxValues < 0.9)
551 (const_cast<this_type*>(this))->defragment();
552}
553
554template<typename T>
556{
557 if(s <= cols.size()) return;
558 size_t newSize = nnz*2;
559 if(newSize < s) newSize = s;
560 copyToNewSize(newSize);
561
562}
563
564template<typename T>
566{
567 int j=0;
568 for(size_t r=0; r<num_rows(); r++)
569 {
570 if(rowStart[r] == -1) continue;
571 for(int k=rowStart[r]; k<rowEnd[r]; k++)
572 if(cols[k] < (int)maxCols)
573 j++;
574 }
575 return j;
576}
577
578} // namespace ug
579
580#endif
581
parameterString s
Definition connection.h:40
size_t iIndex
Definition connection.h:42
static cusparseHandle_t get_cusparseHandle()
Definition cuda_manager.h:102
Definition gpusparsematrix.h:400
Definition gpusparsematrix.h:383
sparse matrix for big, variable sparse matrices.
Definition gpusparsematrix.h:102
bool is_isolated(size_t i) const
check for isolated condition of an index
Definition gpusparsematrix_impl.h:218
bool resize_and_keep_values(size_t newRows, size_t newCols)
Definition gpusparsematrix_impl.h:84
row_iterator end_row(size_t r)
Definition gpusparsematrix.h:421
void set(const M &mat)
set local matrix
Definition gpusparsematrix_impl.h:320
bool set_as_transpose_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
write in a empty GPUSparseMatrix (this) the transpose GPUSparseMatrix of B.
Definition gpusparsematrix_impl.h:113
void add_matrix_row(size_t row, connection *c, size_t nr)
Definition gpusparsematrix_impl.h:264
row_iterator begin_row(size_t r)
Definition gpusparsematrix.h:420
size_t num_rows() const
returns number of rows
Definition gpusparsematrix.h:337
void assureValuesSize(size_t s)
Definition gpusparsematrix_impl.h:555
int get_nnz_max_cols(size_t maxCols)
Definition gpusparsematrix_impl.h:565
int get_index_internal(size_t row, int col) const
Definition gpusparsematrix_impl.h:350
bool set_as_copy_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
create/recreate this as a copy of GPUSparseMatrix B
Definition gpusparsematrix_impl.h:273
void set_matrix_row(size_t row, connection *c, size_t nr)
Definition gpusparsematrix_impl.h:230
GPUSparseMatrix< value_type > this_type
Definition gpusparsematrix.h:107
bool axpy(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A*w1 (A = this matrix)
Definition gpusparsematrix_impl.h:170
bool scale(double d)
Definition gpusparsematrix_impl.h:286
int get_index_const(int r, int c) const
Definition gpusparsematrix_impl.h:381
void check_fragmentation() const
Definition gpusparsematrix_impl.h:548
void add(const M &mat)
Definition gpusparsematrix_impl.h:304
void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const
calculates dest += alpha * A[row, .] v;
Definition gpusparsematrix_impl.h:149
void copyToNewSize(size_t newSize)
Definition gpusparsematrix.h:564
void get(M &mat) const
get local matrix
Definition gpusparsematrix_impl.h:335
bool axpy_transposed(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A^T*w1 (A = this matrix)
Definition gpusparsematrix_impl.h:190
GPUSparseMatrix()
constructor for empty GPUSparseMatrix
Definition gpusparsematrix_impl.h:52
bool resize_and_clear(size_t newRows, size_t newCols)
resizes the GPUSparseMatrix
Definition gpusparsematrix_impl.h:66
size_t num_cols() const
returns the number of cols
Definition gpusparsematrix.h:340
int get_index(int r, int c)
Definition gpusparsematrix_impl.h:393
#define PROFILE_GPUMATRIX(name)
Definition gpusparsematrix.h:52
#define UG_ASSERT(expr, msg)
Definition assert.h:70
#define UG_LOG(msg)
Definition log.h:367
double number
Definition types.h:124
the ug namespace
bool MatMultAdd(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const matrix_t &A1, const vector_t &w1)
calculates dest = alpha1*v1 + beta1 * A1 *w1;
Definition operations_mat.h:68
size_t num_rows() const
Definition sparsematrix_interface.h:38
const_row_iterator end_row(size_t row) const
const_row_iterator begin_row(size_t row) const
size_t num_cols() const
Definition sparsematrix_interface.h:39