33 #ifndef __H__UG__CPU_ALGEBRA__GPUSparseMatrix_IMPL__
34 #define __H__UG__CPU_ALGEBRA__GPUSparseMatrix_IMPL__
62 if(bNeedsValues) values.resize(32);
69 UG_LOG(
this <<
"GPUSparseMatrix::resize_and_clear(" << newRows <<
", " << newCols <<
")");
70 rowStart.clear(); rowStart.resize(newRows+1, -1);
71 rowMax.clear(); rowMax.resize(newRows);
72 rowEnd.clear(); rowEnd.resize(newRows, -1);
76 cols.clear(); cols.resize(newRows);
78 if(bNeedsValues) values.resize(newRows);
87 UG_LOG(
this <<
"GPUSparseMatrix_resize_and_keep_values(" << newRows <<
", " << newCols <<
")");
89 if(newRows == 0 && newCols == 0)
90 return resize_and_clear(0,0);
95 rowStart.resize(newRows+1, -1);
96 rowMax.resize(newRows);
97 rowEnd.resize(newRows, -1);
98 for(
size_t i=oldrows; i<newRows; i++)
104 if((
int)newCols < m_numCols)
105 copyToNewSize(get_nnz_max_cols(newCols), newCols);
140 for(
size_t r=0; r<B.
num_rows(); r++)
148 template<
typename vector_t>
157 template<
typename vector_t>
162 &beta, get_matrix_descr(), get_device_value_ptr(), get_device_rowStart(), get_device_cols(), y.get_dev_ptr(),
163 &
alpha, x.get_dev_ptr());
169 template<
typename vector_t>
171 const number &alpha1,
const vector_t &v1,
172 const number &beta1,
const vector_t &w1)
const
177 if(alpha1 == 0.0 || &dest == &v1)
178 axpy(alpha1, dest, beta1, w1);
181 axpy(0.0, dest, beta1, w1);
182 dest.add(alpha1, v1);
189 template<
typename vector_t>
191 const number &alpha1,
const vector_t &v1,
192 const number &beta1,
const vector_t &w1)
const
203 check_fragmentation();
204 for(
size_t row=0; row<
num_rows(); row++)
207 if(it.index() == row)
223 if(it.index() != i && it.value() != 0.0)
258 for(
size_t i=0; i<nr; i++)
259 operator()(row, c[i].
iIndex) = c[i].dValue;
267 for(
size_t i=0; i<nr; i++)
268 operator()(row, c[i].
iIndex) += c[i].dValue;
277 for(
size_t i=0; i < B.
num_rows(); i++)
289 for(
size_t i=0; i <
num_rows(); i++)
306 for(
size_t i=0; i < mat.num_rows(); i++)
308 int r = mat.row_index(i);
309 for(
size_t j=0; j < mat.num_cols(); j++)
311 int c = mat.col_index(j);
312 (*this)(r,c) += mat(i,j);
322 for(
size_t i=0; i < mat.num_rows(); i++)
324 int r = mat.row_index(i);
325 for(
size_t j=0; j < mat.num_cols(); j++)
327 int c = mat.col_index(j);
328 (*this)(r,c) = mat(i,j);
337 for(
size_t i=0; i < mat.num_rows(); i++)
339 int r = mat.row_index(i);
340 for(
size_t j=0; j < mat.num_cols(); j++)
342 int c = mat.col_index(j);
343 mat(i,j) = (*this)(r,c);
353 assert(rowStart[row] != -1);
354 int l = rowStart[row];
362 else if(cols[mid] > col)
369 if(mid < rowStart[row])
371 else if(mid == rowEnd[row] || col <= cols[mid])
374 UG_ASSERT(ret <= rowEnd[row] && ret >= rowStart[row],
"row iterator row " << row <<
" pos " << ret <<
" out of bounds [" << rowStart[row] <<
", " << rowEnd[row] <<
"]");
383 if(rowStart[r] == -1 || rowStart[r] == rowEnd[r])
return -1;
384 int index=get_index_internal(r, c);
385 if(index >= rowStart[r] && index < rowEnd[r] && cols[index] == c)
397 if(rowStart[r] == -1 || rowStart[r] == rowEnd[r])
401 assureValuesSize(maxValues+1);
402 rowStart[r] = maxValues;
403 rowEnd[r] = maxValues+1;
404 rowMax[r] = maxValues+1;
405 if(bNeedsValues) values[maxValues] = 0.0;
417 int index=get_index_internal(r, c);
429 assert(index == rowEnd[r] || cols[index] > c);
430 assert(index == rowStart[r] || cols[index-1] < c);
431 for(
int i=rowStart[r]+1; i<rowEnd[r]; i++)
432 assert(cols[i] > cols[i-1]);
434 if(rowEnd[r] == rowMax[r] && rowEnd[r] == maxValues
435 && maxValues < (
int)cols.size())
443 if(rowEnd[r] == rowMax[r])
447 int newSize = (rowEnd[r]-rowStart[r])*2;
448 if(maxValues+newSize > (
int)cols.size())
451 assureValuesSize(maxValues+newSize);
452 index=get_index_internal(r, c);
455 fragmented += rowEnd[r]-rowStart[r];
456 index = index-rowStart[r]+maxValues;
457 int j=rowEnd[r]-rowStart[r]+maxValues;
459 for(
int i=rowEnd[r]-1; i>=rowStart[r]; i--, j--)
462 if(bNeedsValues) values[j] = values[i];
464 if(i==rowStart[r])
break;
466 rowEnd[r] = maxValues+rowEnd[r]-rowStart[r]+1;
467 rowStart[r] = maxValues;
468 rowMax[r] = maxValues+newSize;
469 maxValues += newSize;
476 for(
int i=rowEnd[r]-1; i>=index; i--)
478 if(bNeedsValues) values[i+1] = values[i];
484 if(bNeedsValues) values[index] = 0.0;
489 assert(index >= rowStart[r] && index < rowEnd[r]);
490 for(
int i=rowStart[r]+1; i<rowEnd[r]; i++)
491 assert(cols[i] > cols[i-1]);
507 || (newSize > values.size() && (100.0*nnz)/newSize < 20 && newSize <= cols.capacity()) )
509 UG_ASSERT(newSize > values.size(),
"no nnz-defragmenting while using iterators.");
511 cols.resize(newSize);
512 cols.resize(cols.capacity());
513 if(bNeedsValues) { values.resize(newSize); values.resize(cols.size()); }
517 std::vector<value_type> v(newSize);
518 std::vector<int> c(newSize);
522 if(rowStart[r] == -1)
523 rowStart[r] = rowEnd[r] = rowMax[r] = j;
527 for(
int k=rowStart[r]; k<rowEnd[r]; k++)
529 if(cols[k] < (
int)maxCol)
531 if(bNeedsValues) v[j] = values[k];
537 rowEnd[r] = rowMax[r] = j;
543 if(bNeedsValues) std::swap(values, v);
550 if((
double)nnz/(
double)maxValues < 0.9)
551 (
const_cast<this_type*
>(
this))->defragment();
557 if(
s <= cols.size())
return;
558 size_t newSize = nnz*2;
559 if(newSize <
s) newSize =
s;
560 copyToNewSize(newSize);
570 if(rowStart[r] == -1)
continue;
571 for(
int k=rowStart[r]; k<rowEnd[r]; k++)
572 if(cols[k] < (
int)maxCols)
Definition: connection.h:40
size_t iIndex
Definition: connection.h:42
static cusparseHandle_t get_cusparseHandle()
Definition: cuda_manager.h:102
Definition: gpusparsematrix.h:400
Definition: gpusparsematrix.h:383
sparse matrix for big, variable sparse matrices.
Definition: gpusparsematrix.h:102
bool is_isolated(size_t i) const
check for isolated condition of an index
Definition: gpusparsematrix_impl.h:218
bool resize_and_keep_values(size_t newRows, size_t newCols)
Definition: gpusparsematrix_impl.h:84
row_iterator end_row(size_t r)
Definition: gpusparsematrix.h:421
void set(const M &mat)
set local matrix
Definition: gpusparsematrix_impl.h:320
bool set_as_transpose_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
write in a empty GPUSparseMatrix (this) the transpose GPUSparseMatrix of B.
Definition: gpusparsematrix_impl.h:113
void add_matrix_row(size_t row, connection *c, size_t nr)
Definition: gpusparsematrix_impl.h:264
row_iterator begin_row(size_t r)
Definition: gpusparsematrix.h:420
size_t num_rows() const
returns number of rows
Definition: gpusparsematrix.h:337
void assureValuesSize(size_t s)
Definition: gpusparsematrix_impl.h:555
int get_nnz_max_cols(size_t maxCols)
Definition: gpusparsematrix_impl.h:565
int get_index_internal(size_t row, int col) const
Definition: gpusparsematrix_impl.h:350
bool set_as_copy_of(const GPUSparseMatrix< value_type > &B, double scale=1.0)
create/recreate this as a copy of GPUSparseMatrix B
Definition: gpusparsematrix_impl.h:273
void set_matrix_row(size_t row, connection *c, size_t nr)
Definition: gpusparsematrix_impl.h:230
bool axpy(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A*w1 (A = this matrix)
Definition: gpusparsematrix_impl.h:170
bool scale(double d)
Definition: gpusparsematrix_impl.h:286
int get_index_const(int r, int c) const
Definition: gpusparsematrix_impl.h:381
void check_fragmentation() const
Definition: gpusparsematrix_impl.h:548
void add(const M &mat)
Definition: gpusparsematrix_impl.h:304
void mat_mult_add_row(size_t row, typename vector_t::value_type &dest, double alpha, const vector_t &v) const
calculates dest += alpha * A[row, .] v;
Definition: gpusparsematrix_impl.h:149
void copyToNewSize(size_t newSize)
Definition: gpusparsematrix.h:564
void get(M &mat) const
get local matrix
Definition: gpusparsematrix_impl.h:335
bool axpy_transposed(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const vector_t &w1) const
calculate dest = alpha1*v1 + beta1*A^T*w1 (A = this matrix)
Definition: gpusparsematrix_impl.h:190
GPUSparseMatrix()
constructor for empty GPUSparseMatrix
Definition: gpusparsematrix_impl.h:52
bool resize_and_clear(size_t newRows, size_t newCols)
resizes the GPUSparseMatrix
Definition: gpusparsematrix_impl.h:66
size_t num_cols() const
returns the number of cols
Definition: gpusparsematrix.h:340
int get_index(int r, int c)
Definition: gpusparsematrix_impl.h:393
#define PROFILE_GPUMATRIX(name)
Definition: gpusparsematrix.h:52
#define UG_ASSERT(expr, msg)
Definition: assert.h:70
#define UG_LOG(msg)
Definition: log.h:367
double number
Definition: types.h:124
bool MatMultAdd(vector_t &dest, const number &alpha1, const vector_t &v1, const number &beta1, const matrix_t &A1, const vector_t &w1)
calculates dest = alpha1*v1 + beta1 * A1 *w1;
Definition: operations_mat.h:68
T value_type
Definition: sparsematrix_interface.h:2
size_t num_rows() const
Definition: sparsematrix_interface.h:38
const_row_iterator end_row(size_t row) const
value_type & operator()(size_t r, size_t c)
const_row_iterator begin_row(size_t row) const
size_t num_cols() const
Definition: sparsematrix_interface.h:39