ug4
pcl_multi_group_communicator_impl.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017: G-CSC, Goethe University Frankfurt
3  * Author: Sebastian Reiter
4  *
5  * This file is part of UG4.
6  *
7  * UG4 is free software: you can redistribute it and/or modify it under the
8  * terms of the GNU Lesser General Public License version 3 (as published by the
9  * Free Software Foundation) with the following additional attribution
10  * requirements (according to LGPL/GPL v3 §7):
11  *
12  * (1) The following notice must be displayed in the Appropriate Legal Notices
13  * of covered and combined works: "Based on UG4 (www.ug4.org/license)".
14  *
15  * (2) The following notice must be displayed at a prominent place in the
16  * terminal output of covered works: "Based on UG4 (www.ug4.org/license)".
17  *
18  * (3) The following bibliography is recommended for citation and must be
19  * preserved in all covered files:
20  * "Reiter, S., Vogel, A., Heppner, I., Rupp, M., and Wittum, G. A massively
21  * parallel geometric multigrid solver on hierarchically distributed grids.
22  * Computing and visualization in science 16, 4 (2013), 151-164"
23  * "Vogel, A., Reiter, S., Rupp, M., Nägel, A., and Wittum, G. UG4 -- a novel
24  * flexible software system for simulating pde based models on high performance
25  * computers. Computing and visualization in science 16, 4 (2013), 165-179"
26  *
27  * This program is distributed in the hope that it will be useful,
28  * but WITHOUT ANY WARRANTY; without even the implied warranty of
29  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30  * GNU Lesser General Public License for more details.
31  */
32 
33 #ifndef __H__PCL_multi_group_communicator_impl
34 #define __H__PCL_multi_group_communicator_impl
35 
36 #include "pcl_methods.h"
37 #include "pcl_datatype.h"
38 #include "pcl_reduce_traits.h"
39 
40 namespace pcl {
41 
42 template<typename T>
44 allreduce (const T *sendBuf, T *recvBuf,
45  size_t countPerGroup, ReduceOperation op) const
46 {
47 // UG_LOG("<dbg> allreduce STARTS\n");
48 //todo: If groups are big (e.g. >= 0.25*m_com.size()) an MPI allreduce
49 // possibly on sub-communicators could be beneficial.
50 
51 //todo: Performance may be gained if multiple sends from one proc to another
52 // would be handled in just one send.
53  using namespace std;
54 
55  if(m_memberships.empty()){
56  // UG_LOG("<dbg> allreduce ENDS EARLY\n");
57  return;
58  }
59 
60  const int tag = 4378;
61  const int procRank = m_com.get_local_proc_id ();
62 
63  vector<T> receivedVals;
64  size_t numSends = 0;
65  size_t numRecvs = 0;
66 
67 // UG_LOG("<dbg> 1\n");
68 // (1) collect values of all procs on the root of each group (first proc in group)
69 // get number of requried sends/receives
70  {
71 
72  for(size_t imem = 0; imem < m_memberships.size(); ++imem){
73  const size_t o = m_groupOffsets [imem];
74  const size_t s = m_groupOffsets [imem + 1] - o;
75 
76  if (s <= 1) continue;
77 
78  if (m_groupMembers [o] == procRank)
79  numRecvs += (s - 1);
80  else
81  ++ numSends;
82  }
83 
84  receivedVals.resize (numRecvs * countPerGroup);
85 
86  // send / receive
87  std::vector<MPI_Request> sendRequests(numSends);
88  std::vector<MPI_Request> recvRequests(numRecvs);
89  size_t sendCount = 0;
90  size_t recvCount = 0;
91 
92  for(size_t imem = 0; imem < m_memberships.size(); ++imem){
93  const size_t o = m_groupOffsets [imem];
94  const size_t s = m_groupOffsets [imem + 1] - o;
95 
96  if (s == 1) continue;
97 
98  if (m_groupMembers [o] == procRank){
99  for(size_t i = 1; i < s; ++i){
100  MPI_Irecv(&receivedVals.front() + recvCount * countPerGroup,
101  (int) countPerGroup * sizeof (T),
102  MPI_UNSIGNED_CHAR,
103  m_com.get_proc_id ((size_t)m_groupMembers [o + i]),
104  tag,
106  &recvRequests[recvCount]);
107  ++recvCount;
108  }
109  }
110  else {
111  //note: const_cast required for some MPI implementations...
112  MPI_Isend(const_cast<T*>(sendBuf) + imem * countPerGroup,
113  (int) countPerGroup * sizeof (T),
114  MPI_UNSIGNED_CHAR,
115  m_com.get_proc_id ((size_t)m_groupMembers [o]),
116  tag,
118  &sendRequests[sendCount]);
119  ++sendCount;
120  }
121  }
122 
123  UG_COND_THROW(sendCount != sendRequests.size(),
124  "sendCount != sendRequests.size()");
125 
126  UG_COND_THROW(recvCount != recvRequests.size(),
127  "recvCount != recvRequests.size()");
128 
129  Waitall (sendRequests);
130  Waitall (recvRequests);
131  }
132 // UG_LOG("<dbg> 2\n");
133 // (2) apply the reduce operation
134  {
135  Reducer<T> reducer (op);
136 
137  size_t recvHandled = 0;
138 
139  for(size_t imem = 0; imem < m_memberships.size(); ++imem){
140  const size_t o = m_groupOffsets [imem];
141  const size_t s = m_groupOffsets [imem + 1] - o;
142 
143  if (m_groupMembers [o] == procRank){
144  // copy data local data to recvBuf
145  const size_t targetBaseInd = imem * countPerGroup;
146 
147  for(size_t i = 0; i < countPerGroup; ++i)
148  recvBuf[targetBaseInd + i] = sendBuf[targetBaseInd + i];
149 
150  // perform reduce operation for the data of each group member
151  for(size_t iblock = 1; iblock < s; ++iblock){
152  size_t srcBaseInd = recvHandled * countPerGroup;
153 
154  for(size_t i = 0; i < countPerGroup; ++i){
155  recvBuf[targetBaseInd + i] =
156  reducer (recvBuf[targetBaseInd + i],
157  receivedVals[srcBaseInd + i]);
158  }
159  ++recvHandled;
160  }
161  }
162  }
163  }
164 // UG_LOG("<dbg> 3\n");
165 // (3) send values from group-roots to all group members
166  {
167  // we have to send data to each process from which we received data and vice versa
168  const size_t numSendBack = numRecvs;
169  const size_t numRecvBack = numSends;
170 
171  std::vector<MPI_Request> sendRequests(numSendBack);
172  std::vector<MPI_Request> recvRequests(numRecvBack);
173 
174  size_t sendCount = 0;
175  size_t recvCount = 0;
176 
177  for(size_t imem = 0; imem < m_memberships.size(); ++imem){
178  const size_t o = m_groupOffsets [imem];
179  const size_t s = m_groupOffsets [imem + 1] - o;
180 
181  if (s == 1) continue;
182 
183  if (m_groupMembers [o] == procRank){
184  for(size_t i = 1; i < s; ++i){
185  MPI_Isend(recvBuf + imem * countPerGroup,
186  (int) countPerGroup * sizeof (T),
187  MPI_UNSIGNED_CHAR,
188  m_com.get_proc_id ((size_t)m_groupMembers [o + i]),
189  tag,
191  &sendRequests[sendCount]);
192  ++sendCount;
193  }
194  }
195  else {
196  MPI_Irecv(recvBuf + imem * countPerGroup,
197  (int) countPerGroup * sizeof (T),
198  MPI_UNSIGNED_CHAR,
199  m_com.get_proc_id ((size_t)m_groupMembers [o]),
200  tag,
202  &recvRequests[recvCount]);
203  ++recvCount;
204  }
205  }
206 
207  UG_COND_THROW(sendCount != sendRequests.size(),
208  "sendCount != sendRequests.size()");
209 
210  UG_COND_THROW(recvCount != recvRequests.size(),
211  "recvCount != recvRequests.size()");
212 
213  Waitall (sendRequests);
214  Waitall (recvRequests);
215  }
216 // UG_LOG("<dbg> allreduce ENDS\n");
217 }
218 
219 }// end of namespace
220 
221 #endif //__H__PCL_multi_group_communicator_impl
parameterString s
std::vector< int > m_groupMembers
size: m_groupOffsets.back(). Consecutively holds proc-indices of each group in which the process part...
Definition: pcl_multi_group_communicator.h:75
std::vector< size_t > m_memberships
size: #memberships. Holds indices to groups in which the process participates
Definition: pcl_multi_group_communicator.h:74
ProcessCommunicator m_com
Definition: pcl_multi_group_communicator.h:72
std::vector< size_t > m_groupOffsets
size: #memberships+1. Offset of each group in m_groupMembers. The last entry always holds m_groupMemb...
Definition: pcl_multi_group_communicator.h:76
void allreduce(const T *sendBuf, T *recvBuf, size_t countPerGroup, ReduceOperation op) const
performs an allreduce between all groups
Definition: pcl_multi_group_communicator_impl.hpp:44
MPI_Comm get_mpi_communicator() const
returns the mpi-communicator, in case someone needs it
Definition: pcl_process_communicator.h:102
int get_proc_id(size_t index) const
returns the i-th process in the communicator
Definition: pcl_process_communicator.cpp:86
int get_local_proc_id(int globalProcID=pcl::ProcRank()) const
returns the proc-id relative to this communicator
Definition: pcl_process_communicator.cpp:95
Definition: pcl_reduce_traits.h:88
void Waitall(std::vector< MPI_Request > &requests, std::vector< MPI_Status > &statuses)
Definition: pcl_methods.h:136
MPI_Op ReduceOperation
Definition: pcl_methods.h:74
#define UG_COND_THROW(cond, msg)
UG_COND_THROW(cond, msg) : performs a UG_THROW(msg) if cond == true.
Definition: error.h:61
Definition: parallel_grid_layout.h:46
Definition: smart_pointer.h:814