Skip to content

Commit adbff4f

Browse files
committed
Sending cell counts to neighbouring procs, collecting cell countsfrom each process
1 parent 5c4d42b commit adbff4f

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed

src/pmpo_MPMesh.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class MPMesh{
8383

8484
// Full assembly on GPU
8585
void assembleField(int vtxPerElm, int nCells, int nVerticesSolve, int nVertices, double* array_sub, double* array_full);
86+
void startCommunication(int nVerticesSolve);
8687

8788
void printVTP_mesh(int printVTPIndex);
8889
};

src/pmpo_MPMesh_assembly.hpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,110 @@ void MPMesh::assembleField(int vtxPerElm, int nCells, int nVerticesSolve, int nV
528528
kkDblViewHostU arrayHost(array_full, nVertices);
529529
Kokkos::deep_copy(arrayHost, array_full_d);
530530
pumipic::RecordTime("polyMPOgetAssemblyField", timer2.seconds());
531+
532+
MPMesh::startCommunication(nVerticesSolve);
533+
}
534+
535+
//Start Communication routine
536+
void MPMesh::startCommunication(int nVerticesSolve){
537+
538+
MPI_Comm comm = p_MPs->getMPIComm();
539+
int comm_rank, nProcs;
540+
MPI_Comm_rank(comm, &comm_rank);
541+
MPI_Comm_size(comm, &nProcs);
542+
543+
int nCells = p_mesh->getNumElements();
544+
int numVertices = p_mesh->getNumVertices();
545+
546+
//Owning processes in GPU and copy to CPU
547+
auto elmOwners = p_mesh->getElm2Process();
548+
auto elmOwners_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
549+
elmOwners);
550+
551+
auto vtxGlobal= p_mesh->getVtxGlobal();
552+
auto vtxGlobal_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
553+
vtxGlobal);
554+
555+
//Elment to Vertex Connection in GPU and communicate to CPU
556+
auto elm2VtxConn = p_mesh->getElm2VtxConn();
557+
auto elm2VtxConn_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
558+
elm2VtxConn);
559+
//Debugging
560+
for (auto k=0; k<elmOwners_host.size(); k++)
561+
if(k==0 || k==elmOwners_host.size()-1)
562+
printf("Rank %d Owning Proc %d \n", comm_rank, elmOwners_host(k));
563+
564+
//Find adjacent processes and no of adjacent cells for each process
565+
std::map<int, int> adjProcsForCells;
566+
std::vector<int> send_each_proc(nProcs);
567+
for (auto iCell=0; iCell<nCells; iCell++){
568+
if(elmOwners_host(iCell) != comm_rank){
569+
auto ownerProc = elmOwners_host[iCell];
570+
adjProcsForCells[ownerProc] = adjProcsForCells[ownerProc]+1;
571+
send_each_proc[ownerProc] +=1;
572+
}
573+
}
574+
//Debugging
575+
printf("Size adjProcs %d \n", adjProcsForCells.size());
576+
for (const auto& [key, value] : adjProcsForCells){
577+
printf ("Process %d sends to %d size %d\n", comm_rank, key, value);
578+
}
579+
580+
//Find received particles in each process
581+
std::vector<int> total_recv_each_proc(nProcs);
582+
MPI_Alltoall(send_each_proc.data(), 1, MPI_INT, total_recv_each_proc.data(), 1, MPI_INT, comm);
583+
//Debiugging for receiving
584+
for (int i=0; i<nProcs; i++)
585+
printf("Rank %d receiving from %d size %d \n", comm_rank, i, total_recv_each_proc[i]);
586+
587+
//Create maps of data to Send
588+
std::map<int, std::vector<int>> cellDataToSend;
589+
std::map<int, int> counter;
590+
for (const auto& [key, value] : adjProcsForCells){
591+
cellDataToSend[key].resize(4 * value * maxVtxsPerElm);
592+
counter[key] = 0;
593+
}
594+
595+
for (auto iCell=0; iCell<nCells; iCell++){
596+
if(elmOwners_host(iCell) != comm_rank){
597+
int ownerProc = elmOwners_host[iCell];
598+
auto idx_start = counter[ownerProc]*4*maxVtxsPerElm;
599+
int nVtxE = elm2VtxConn_host(iCell,0);
600+
for (int v=0; v<nVtxE; v++){
601+
int vID = elm2VtxConn_host(iCell, v+1)-1;
602+
int idx = idx_start + v*4;
603+
if (vID < nVerticesSolve)
604+
cellDataToSend[ownerProc][idx+0] = 0; //TO DO better way
605+
else
606+
cellDataToSend[ownerProc][idx+0] = 1;
607+
cellDataToSend[ownerProc][idx+1] = comm_rank; //sending Proc TODO not needed
608+
cellDataToSend[ownerProc][idx+2] = vID; //localID
609+
cellDataToSend[ownerProc][idx+3] = vtxGlobal_host(vID); //globalID
610+
}
611+
counter[ownerProc] = counter[ownerProc] + 1;
612+
//assert(counter[ownerProc] == adjProcsForCells.find(ownerProc));
613+
}
614+
}
615+
616+
std::vector<MPI_Request> s_requests;
617+
s_requests.resize(cellDataToSend.size());
618+
int count_s_request=0;
619+
for (auto & [proc, vec] : cellDataToSend){
620+
MPI_Isend(vec.data(), vec.size(), MPI_INT, proc, MPI_ANY_TAG, comm, &s_requests[count_s_request]);
621+
count_s_request=count_s_request+1;
622+
}
623+
624+
std::vector<std::vector<int>> cellDataToReceive;
625+
cellDataToReceive.resize(nProcs); //
626+
for (int iProc=0; iProc< nProcs; iProc++)
627+
cellDataToReceive[iProc].resize(total_recv_each_proc[iProc]);
628+
629+
std::vector<MPI_Request> r_requests;
630+
r_requests.resize(nProcs);
631+
for (int iProc=0; iProc< nProcs; iProc++)
632+
MPI_Irecv(cellDataToReceive[iProc].data(), total_recv_each_proc[iProc], MPI_INT, iProc,
633+
MPI_ANY_TAG, comm, &r_requests[iProc]);
634+
531635
}
532636

533637
template <MeshFieldIndex meshFieldIndex>

0 commit comments

Comments
 (0)