Loading [MathJax]/extensions/tex2jax.js
ug4
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
pcl_multi_group_communicator_impl.hpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2017: G-CSC, Goethe University Frankfurt
3 * Author: Sebastian Reiter
4 *
5 * This file is part of UG4.
6 *
7 * UG4 is free software: you can redistribute it and/or modify it under the
8 * terms of the GNU Lesser General Public License version 3 (as published by the
9 * Free Software Foundation) with the following additional attribution
10 * requirements (according to LGPL/GPL v3 §7):
11 *
12 * (1) The following notice must be displayed in the Appropriate Legal Notices
13 * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14 *
15 * (2) The following notice must be displayed at a prominent place in the
16 * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17 *
18 * (3) The following bibliography is recommended for citation and must be
19 * preserved in all covered files:
20 * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21 * parallel geometric multigrid solver on hierarchically distributed grids.
22 * Computing and visualization in science 16, 4 (2013), 151-164"
23 * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24 * flexible software system for simulating pde based models on high performance
25 * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26 *
27 * This program is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU Lesser General Public License for more details.
31 */
32
33#ifndef __H__PCL_multi_group_communicator_impl
34#define __H__PCL_multi_group_communicator_impl
35
36#include "pcl_methods.h"
37#include "pcl_datatype.h"
38#include "pcl_reduce_traits.h"
39
40namespace pcl {
41
42template<typename T>
44allreduce (const T *sendBuf, T *recvBuf,
45 size_t countPerGroup, ReduceOperation op) const
46{
47// UG_LOG("<dbg> allreduce STARTS\n");
48//todo: If groups are big (e.g. >= 0.25*m_com.size()) an MPI allreduce
49// possibly on sub-communicators could be beneficial.
50
51//todo: Performance may be gained if multiple sends from one proc to another
52// would be handled in just one send.
53 using namespace std;
54
55 if(m_memberships.empty()){
56 // UG_LOG("<dbg> allreduce ENDS EARLY\n");
57 return;
58 }
59
60 const int tag = 4378;
61 const int procRank = m_com.get_local_proc_id ();
62
63 vector<T> receivedVals;
64 size_t numSends = 0;
65 size_t numRecvs = 0;
66
67// UG_LOG("<dbg> 1\n");
68// (1) collect values of all procs on the root of each group (first proc in group)
69// get number of requried sends/receives
70 {
71
72 for(size_t imem = 0; imem < m_memberships.size(); ++imem){
73 const size_t o = m_groupOffsets [imem];
74 const size_t s = m_groupOffsets [imem + 1] - o;
75
76 if (s <= 1) continue;
77
78 if (m_groupMembers [o] == procRank)
79 numRecvs += (s - 1);
80 else
81 ++ numSends;
82 }
83
84 receivedVals.resize (numRecvs * countPerGroup);
85
86 // send / receive
87 std::vector<MPI_Request> sendRequests(numSends);
88 std::vector<MPI_Request> recvRequests(numRecvs);
89 size_t sendCount = 0;
90 size_t recvCount = 0;
91
92 for(size_t imem = 0; imem < m_memberships.size(); ++imem){
93 const size_t o = m_groupOffsets [imem];
94 const size_t s = m_groupOffsets [imem + 1] - o;
95
96 if (s == 1) continue;
97
98 if (m_groupMembers [o] == procRank){
99 for(size_t i = 1; i < s; ++i){
100 MPI_Irecv(&receivedVals.front() + recvCount * countPerGroup,
101 (int) countPerGroup * sizeof (T),
102 MPI_UNSIGNED_CHAR,
103 m_com.get_proc_id ((size_t)m_groupMembers [o + i]),
104 tag,
106 &recvRequests[recvCount]);
107 ++recvCount;
108 }
109 }
110 else {
111 //note: const_cast required for some MPI implementations...
112 MPI_Isend(const_cast<T*>(sendBuf) + imem * countPerGroup,
113 (int) countPerGroup * sizeof (T),
114 MPI_UNSIGNED_CHAR,
115 m_com.get_proc_id ((size_t)m_groupMembers [o]),
116 tag,
118 &sendRequests[sendCount]);
119 ++sendCount;
120 }
121 }
122
123 UG_COND_THROW(sendCount != sendRequests.size(),
124 "sendCount != sendRequests.size()");
125
126 UG_COND_THROW(recvCount != recvRequests.size(),
127 "recvCount != recvRequests.size()");
128
129 Waitall (sendRequests);
130 Waitall (recvRequests);
131 }
132// UG_LOG("<dbg> 2\n");
133// (2) apply the reduce operation
134 {
135 Reducer<T> reducer (op);
136
137 size_t recvHandled = 0;
138
139 for(size_t imem = 0; imem < m_memberships.size(); ++imem){
140 const size_t o = m_groupOffsets [imem];
141 const size_t s = m_groupOffsets [imem + 1] - o;
142
143 if (m_groupMembers [o] == procRank){
144 // copy data local data to recvBuf
145 const size_t targetBaseInd = imem * countPerGroup;
146
147 for(size_t i = 0; i < countPerGroup; ++i)
148 recvBuf[targetBaseInd + i] = sendBuf[targetBaseInd + i];
149
150 // perform reduce operation for the data of each group member
151 for(size_t iblock = 1; iblock < s; ++iblock){
152 size_t srcBaseInd = recvHandled * countPerGroup;
153
154 for(size_t i = 0; i < countPerGroup; ++i){
155 recvBuf[targetBaseInd + i] =
156 reducer (recvBuf[targetBaseInd + i],
157 receivedVals[srcBaseInd + i]);
158 }
159 ++recvHandled;
160 }
161 }
162 }
163 }
164// UG_LOG("<dbg> 3\n");
165// (3) send values from group-roots to all group members
166 {
167 // we have to send data to each process from which we received data and vice versa
168 const size_t numSendBack = numRecvs;
169 const size_t numRecvBack = numSends;
170
171 std::vector<MPI_Request> sendRequests(numSendBack);
172 std::vector<MPI_Request> recvRequests(numRecvBack);
173
174 size_t sendCount = 0;
175 size_t recvCount = 0;
176
177 for(size_t imem = 0; imem < m_memberships.size(); ++imem){
178 const size_t o = m_groupOffsets [imem];
179 const size_t s = m_groupOffsets [imem + 1] - o;
180
181 if (s == 1) continue;
182
183 if (m_groupMembers [o] == procRank){
184 for(size_t i = 1; i < s; ++i){
185 MPI_Isend(recvBuf + imem * countPerGroup,
186 (int) countPerGroup * sizeof (T),
187 MPI_UNSIGNED_CHAR,
188 m_com.get_proc_id ((size_t)m_groupMembers [o + i]),
189 tag,
191 &sendRequests[sendCount]);
192 ++sendCount;
193 }
194 }
195 else {
196 MPI_Irecv(recvBuf + imem * countPerGroup,
197 (int) countPerGroup * sizeof (T),
198 MPI_UNSIGNED_CHAR,
199 m_com.get_proc_id ((size_t)m_groupMembers [o]),
200 tag,
202 &recvRequests[recvCount]);
203 ++recvCount;
204 }
205 }
206
207 UG_COND_THROW(sendCount != sendRequests.size(),
208 "sendCount != sendRequests.size()");
209
210 UG_COND_THROW(recvCount != recvRequests.size(),
211 "recvCount != recvRequests.size()");
212
213 Waitall (sendRequests);
214 Waitall (recvRequests);
215 }
216// UG_LOG("<dbg> allreduce ENDS\n");
217}
218
219}// end of namespace
220
221#endif //__H__PCL_multi_group_communicator_impl
parameterString s
std::vector< int > m_groupMembers
size: m_groupOffsets.back(). Consecutively holds proc-indices of each group in which the process part...
Definition pcl_multi_group_communicator.h:75
std::vector< size_t > m_memberships
size: #memberships. Holds indices to groups in which the process participates
Definition pcl_multi_group_communicator.h:74
ProcessCommunicator m_com
Definition pcl_multi_group_communicator.h:72
std::vector< size_t > m_groupOffsets
size: #memberships+1. Offset of each group in m_groupMembers. The last entry always holds m_groupMemb...
Definition pcl_multi_group_communicator.h:76
void allreduce(const T *sendBuf, T *recvBuf, size_t countPerGroup, ReduceOperation op) const
performs an allreduce between all groups
Definition pcl_multi_group_communicator_impl.hpp:44
MPI_Comm get_mpi_communicator() const
returns the mpi-communicator, in case someone needs it
Definition pcl_process_communicator.h:102
int get_proc_id(size_t index) const
returns the i-th process in the communicator
Definition pcl_process_communicator.cpp:86
int get_local_proc_id(int globalProcID=pcl::ProcRank()) const
returns the proc-id relative to this communicator
Definition pcl_process_communicator.cpp:95
Definition pcl_reduce_traits.h:88
void Waitall(std::vector< MPI_Request > &requests, std::vector< MPI_Status > &statuses)
Definition pcl_methods.h:136
MPI_Op ReduceOperation
Definition pcl_methods.h:74
#define UG_COND_THROW(cond, msg)
UG_COND_THROW(cond, msg) : performs a UG_THROW(msg) if cond == true.
Definition error.h:61
Definition parallel_grid_layout.h:46
Definition smart_pointer.h:814