Skip to content

Commit 685349e

Browse files
committed
Working 4 proc 2562 mesh case
1 parent d7d466b commit 685349e

File tree

7 files changed

+247
-56
lines changed

7 files changed

+247
-56
lines changed

src/pmpo_MPMesh.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ class MPMesh{
4141
std::vector<std::vector<int>> haloOwnerLocalIDs;
4242
std::vector<std::vector<std::pair<int,int>>> ownerToHalos;
4343
void startCommunication();
44-
void communicateFields();
44+
void communicateFields(const std::vector<std::vector<double>>& fieldData, const int numEntities, const int numEntries, int mode,
45+
std::vector<std::vector<int>>& recvIDVec, std::vector<std::vector<double>>& recvDataVec);
46+
4547
MPMesh(Mesh* inMesh, MaterialPoints* inMPs):
4648
p_mesh(inMesh), p_MPs(inMPs) {
4749
};
@@ -86,6 +88,9 @@ class MPMesh{
8688
double* m33, double* m34,
8789
double* m44);
8890

91+
template <MeshFieldIndex meshFieldIndex>
92+
void reconstruct_full();
93+
8994
template<MeshFieldIndex meshFieldIndex>
9095
void setReconstructSlice(int order, MeshFieldType type);
9196
void reconstructSlices();

src/pmpo_MPMesh_assembly.hpp

Lines changed: 192 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,8 @@ void MPMesh::subAssemblyVtx1(int vtxPerElm, int nCells, int comp, double* array)
445445
Kokkos::Timer timer;
446446

447447
auto VtxCoeffs=this->precomputedVtxCoeffs;
448+
std::cout << "SubAssemblyExtent(0) = " << VtxCoeffs.extent(0) << std::endl;
449+
std::cout << "SubAssemblyExtent(1) = " << VtxCoeffs.extent(1) << std::endl;
448450

449451
// MPI Information
450452
MPI_Comm comm = p_MPs->getMPIComm();
@@ -540,25 +542,32 @@ void MPMesh::startCommunication(){
540542
MPI_Comm_size(comm, &numProcsTot);
541543

542544
//Owning processes and global Numbering
543-
auto elmOwners = p_mesh->getElm2Process();
544-
auto elm2global = p_mesh->getElmGlobal();
545-
545+
546+
//For Elements Checked
547+
//auto entOwners = p_mesh->getElm2Process();
548+
//auto ent2global = p_mesh->getElmGlobal();
549+
//int numEntities = p_mesh->getNumElements();
550+
551+
//For Vertices not checked
552+
auto entOwners = p_mesh->getVtx2Process();
553+
auto ent2global = p_mesh->getVtxGlobal();
554+
int numEntities = p_mesh->getNumVertices();
555+
546556
//Loop over elements and find no of owners and halos
547-
int numElements = p_mesh->getNumElements();
548557
Kokkos::View<int> owner_count("owner_count");
549558
Kokkos::View<int> halo_count("halo_count");
550559
Kokkos::deep_copy(owner_count, 0);
551560
Kokkos::deep_copy(halo_count, 0);
552-
Kokkos::parallel_for("countOwnerHalo", numElements, KOKKOS_LAMBDA(const int elm){
553-
if (elmOwners(elm)==self)
561+
Kokkos::parallel_for("countOwnerHalo", numEntities, KOKKOS_LAMBDA(const int elm){
562+
if (entOwners(elm)==self)
554563
Kokkos::atomic_add(&owner_count(), 1);
555564
else
556565
Kokkos::atomic_add(&halo_count(), 1);
557566
});
558567

559568
Kokkos::deep_copy(numOwnersTot, owner_count);
560569
Kokkos::deep_copy(numHalosTot, halo_count);
561-
assert(numHalosTot+numOwnersTot == numElements);
570+
assert(numHalosTot+numOwnersTot == numEntities);
562571
printf("Rank %d owners %d halo %d\n", self, numOwnersTot, numHalosTot);
563572
int num_ints_per_copy = 2;
564573

@@ -576,23 +585,23 @@ void MPMesh::startCommunication(){
576585
ownerToHalos.resize(numProcsTot);
577586

578587
//Copy owning processes and globalIds to CPU
579-
auto elmOwners_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
580-
elmOwners);
581-
auto elm2global_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
582-
elm2global);
588+
auto entOwners_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
589+
entOwners);
590+
auto ent2global_host = Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace::memory_space(),
591+
ent2global);
583592

584593
//Do Map of Global To Local ID
585594
//TODO make ordered map; which faster?
586595
std::unordered_map<int, int> global2local;
587-
for (int iEnt = 0; iEnt < numElements; iEnt++) {
588-
int globalID = elm2global_host(iEnt);
596+
for (int iEnt = 0; iEnt < numEntities; iEnt++) {
597+
int globalID = ent2global_host(iEnt);
589598
global2local[globalID] = iEnt;
590599
}
591600

592601
//Loop over all halo Entities and find the owning process
593602
for (auto iEnt=numOwnersTot; iEnt<numOwnersTot+numHalosTot; iEnt++){
594-
auto ownerProc = elmOwners_host[iEnt];
595-
assert(elmOwners_host(iEnt) != self);
603+
auto ownerProc = entOwners_host[iEnt];
604+
assert(entOwners_host(iEnt) != self);
596605
numOwnersOnOtherProcs[ownerProc] = numOwnersOnOtherProcs[ownerProc]+1;
597606
haloOwnerProcs.push_back(ownerProc);
598607
}
@@ -605,9 +614,9 @@ void MPMesh::startCommunication(){
605614
sendBufs[proc].reserve(num_ints_per_copy*numOwnersOnOtherProcs[proc]);
606615

607616
for (int iEnt=numOwnersTot; iEnt<numOwnersTot+numHalosTot; iEnt++) {
608-
auto ownerProc = elmOwners_host(iEnt);
617+
auto ownerProc = entOwners_host(iEnt);
609618
assert(ownerProc != self);
610-
sendBufs[ownerProc].push_back(elm2global_host(iEnt));
619+
sendBufs[ownerProc].push_back(ent2global_host(iEnt));
611620
sendBufs[ownerProc].push_back(iEnt);
612621
}
613622

@@ -684,12 +693,10 @@ void MPMesh::startCommunication(){
684693

685694
MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
686695

687-
communicateFields();
688-
689-
bool debug = true;
696+
bool debug = false;
690697
if(! debug) return;
691698

692-
printf("Rank %d Owners %d Halos %d Total %d \n", self, numOwnersTot, numHalosTot, numElements);
699+
printf("Rank %d Owners %d Halos %d Total %d \n", self, numOwnersTot, numHalosTot, numEntities);
693700
for (int i=0; i<numProcsTot; i++){
694701
printf("Rank %d has %d halos which are owners in other rank %d \n", self, numOwnersOnOtherProcs[i], i);
695702
printf("Rank %d has %d owners wicch are halos in other rank %d \n", self, numHalosOnOtherProcs[i], i);
@@ -698,8 +705,8 @@ void MPMesh::startCommunication(){
698705
//Check rank 0 sending to rank 1
699706
if(self==0){
700707
for (int i=0; i<numHalosTot; i++)
701-
if( elmOwners_host(numOwnersTot+i)==1 )
702-
printf("Halo Element with lid %d, gid %d and owner %d \n", numOwnersTot+i, elm2global_host(numOwnersTot+i), elmOwners_host(numOwnersTot+i));
708+
if( entOwners_host(numOwnersTot+i)==1 )
709+
printf("Halo Element with lid %d, gid %d and owner %d \n", numOwnersTot+i, ent2global_host(numOwnersTot+i), entOwners_host(numOwnersTot+i));
703710
}
704711
MPI_Barrier(comm);
705712
if(self==0){
@@ -717,40 +724,182 @@ void MPMesh::startCommunication(){
717724
if(self==1){
718725
for (int i=0; i<localIDBufs[0].size(); i++)
719726
printf("LIDs in owned rank 1 %d \n", localIDBufs[0][i]);
720-
printf("Rank %d local 0 13 Global %d %d\n", self, elm2global_host(0), elm2global_host(13));
727+
//printf("Rank %d local 0 13 Global %d %d\n", self, ent2global_host(0), ent2global_host(13));
721728
}
722729
MPI_Barrier(comm);
723730
//Checking if they have received them back
724731
if(self==0){
725732
for (int i=0; i<haloOwnerLocalIDs[1].size(); i++)
726733
printf("Owner LID in rank 0 %d \n", haloOwnerLocalIDs[1][i]);
727-
printf("Rank %d local 641 644 Global %d %d\n", self, elm2global_host(641), elm2global_host(644));
734+
//printf("Rank %d local 641 644 Global %d %d\n", self, ent2global_host(641), ent2global_host(644));
728735
}
729736
MPI_Barrier(comm);
730737
//OwnerToHalos
731738
if(self==1){
732-
for (const auto &p : ownerToHalos[0]) {
733-
std::cout << "(" << p.first << ", " << p.second << ")\n";
739+
for (const auto &p : ownerToHalos[0]) {
740+
std::cout << "(" << p.first << ", " << p.second << ")\n";
734741
}
735742
}
736743
}
737744

738-
void MPMesh::communicateFields(){
745+
template <MeshFieldIndex meshFieldIndex>
746+
void MPMesh::reconstruct_full() {
747+
std::cout<<__FUNCTION__<<std::endl;
748+
Kokkos::Timer timer;
749+
750+
auto VtxCoeffs=this->precomputedVtxCoeffs;
751+
std::cout << "Extent(0) = " << VtxCoeffs.extent(0) << std::endl; // 10
752+
std::cout << "Extent(1) = " << VtxCoeffs.extent(1) << std::endl; // 20
753+
//Mesh Information
754+
auto elm2VtxConn = p_mesh->getElm2VtxConn();
755+
int numVtx = p_mesh->getNumVertices();
756+
auto vtxCoords = p_mesh->getMeshField<polyMPO::MeshF_VtxCoords>();
757+
int numVertices = p_mesh->getNumVertices();
758+
//Mesh Field
759+
constexpr MaterialPointSlice mpfIndex = meshFieldIndexToMPSlice<meshFieldIndex>;
760+
const int numEntries = mpSliceToNumEntries<mpfIndex>();
761+
p_mesh->fillMeshField<meshFieldIndex>(numVtx, numEntries, 0.0);
762+
auto meshField = p_mesh->getMeshField<meshFieldIndex>();
763+
764+
MPI_Barrier(MPI_COMM_WORLD);
765+
assert(cudaDeviceSynchronize()==cudaSuccess);
766+
767+
//Material Points
768+
calcBasis();
769+
auto mpData = p_MPs->getData<mpfIndex>();
770+
auto weight = p_MPs->getData<MPF_Basis_Vals>();
771+
auto mpPositions = p_MPs->getData<MPF_Cur_Pos_XYZ>();
772+
773+
MPI_Barrier(MPI_COMM_WORLD);
774+
assert(cudaDeviceSynchronize()==cudaSuccess);
775+
776+
//Earth Radius
777+
double radius = 1.0;
778+
if(p_mesh->getGeomType() == geom_spherical_surf)
779+
radius=p_mesh->getSphereRadius();
780+
781+
MPI_Barrier(MPI_COMM_WORLD);
782+
assert(cudaDeviceSynchronize()==cudaSuccess);
783+
784+
//Reconstructed values
785+
Kokkos::View<double**> reconVals("meshField", numVertices, numEntries);
786+
787+
MPI_Barrier(MPI_COMM_WORLD);
788+
assert(cudaDeviceSynchronize()==cudaSuccess);
789+
790+
//Reconstruct
791+
auto reconstruct = PS_LAMBDA(const int& elm, const int& mp, const int& mask) {
792+
if(mask) { //if material point is 'active'/'enabled'
793+
int nVtxE = elm2VtxConn(elm,0); //number of vertices bounding the element
794+
for(int i=0; i<nVtxE; i++){
795+
int vID = elm2VtxConn(elm,i+1)-1;
796+
double w_vtx=weight(mp,i);
797+
double CoordDiffs[vec4d_nEntries] = {1, (vtxCoords(vID,0) - mpPositions(mp,0))/radius,
798+
(vtxCoords(vID,1) - mpPositions(mp,1))/radius,
799+
(vtxCoords(vID,2) - mpPositions(mp,2))/radius};
800+
801+
auto factor = w_vtx*(VtxCoeffs(vID,0) + VtxCoeffs(vID,1)*CoordDiffs[1] +
802+
VtxCoeffs(vID,2)*CoordDiffs[2] +
803+
VtxCoeffs(vID,3)*CoordDiffs[3]);
804+
805+
for (int k=0; k<numEntries; k++){
806+
auto val = factor*mpData(mp,k);
807+
Kokkos::atomic_add(&reconVals(vID,k), val);
808+
}
809+
}
810+
}
811+
};
812+
p_MPs->parallel_for(reconstruct, "reconstruct");
813+
814+
MPI_Barrier(MPI_COMM_WORLD);
815+
assert(cudaDeviceSynchronize()==cudaSuccess);
816+
817+
// create host mirror and copy device -> host
818+
auto reconVals_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), reconVals);
819+
std::vector<std::vector<double>> fieldData(numVertices, std::vector<double>(numEntries, 0.0));
820+
for (int i = 0; i < numVertices; ++i) {
821+
for (int j = 0; j < numEntries; ++j) {
822+
fieldData[i][j] = reconVals_host(i, j);
823+
}
824+
}
825+
826+
MPI_Barrier(MPI_COMM_WORLD);
827+
assert(cudaDeviceSynchronize()==cudaSuccess);
828+
829+
//Mode 0 is Gather: Halos Send to Owners
830+
//Mode 1 is Scatter: Owners Send to Halos
831+
int mode=0;
832+
std::vector<std::vector<int>> recvIDVec;
833+
std::vector<std::vector<double>> recvDataVec;
834+
communicateFields(fieldData, numVertices, numEntries, mode, recvIDVec, recvDataVec);
835+
836+
int numProcsTot = recvIDVec.size();
837+
//Copy recvData and recvIDVec in GPU so that contributions can be taken
838+
839+
840+
int totalSize = 0;
841+
std::vector<int> offsets(numProcsTot);
842+
for(int i=0; i<numProcsTot; i++) {
843+
offsets[i] = totalSize;
844+
totalSize += recvDataVec[i].size();
845+
}
846+
std::vector<double> flatData(totalSize);
847+
for(int i=0; i<numProcsTot; i++) {
848+
std::copy(recvDataVec[i].begin(), recvDataVec[i].end(), flatData.begin() + offsets[i]);
849+
}
850+
Kokkos::View<double*> recvDataGPU("recvDataGPU", totalSize);
851+
Kokkos::deep_copy(recvDataGPU, Kokkos::View<double*, Kokkos::HostSpace>(flatData.data(), totalSize));
852+
853+
//Kokkos::View<int*> offsetsGPU("offsetsGPU", numProcsTot);
854+
//Kokkos::deep_copy(offsetsGPU, Kokkos::View<int*, Kokkos::HostSpace>(offsets.data(), numProcsTot));
855+
856+
totalSize = 0;
857+
for(int i=0; i<numProcsTot; i++) {
858+
offsets[i] = totalSize;
859+
totalSize += recvIDVec[i].size();
860+
}
861+
std::vector<double> flatIDVec(totalSize);
862+
for(int i=0; i<numProcsTot; i++) {
863+
std::copy(recvIDVec[i].begin(), recvIDVec[i].end(), flatIDVec.begin() + offsets[i]);
864+
}
865+
Kokkos::View<double*> recvIDGPU("recvIDGPU", totalSize);
866+
Kokkos::deep_copy(recvIDGPU, Kokkos::View<double*, Kokkos::HostSpace>(flatIDVec.data(), totalSize));
867+
//Kokkos::View<int*> offsetsIDGPU("offsetsIDGPU", numProcsTot);
868+
//Kokkos::deep_copy(offsetsIDGPU, Kokkos::View<int*, Kokkos::HostSpace>(offsets.data(), numProcsTot));
869+
870+
871+
//Asssign the field
872+
Kokkos::parallel_for("assigning", numVtx, KOKKOS_LAMBDA(const int vtx){
873+
for(int k=0; k<numEntries; k++)
874+
meshField(vtx, k) = reconVals(vtx,k);
875+
});
876+
877+
Kokkos::parallel_for("assigning2", recvIDGPU.size(), KOKKOS_LAMBDA(const int i){
878+
int vertex = recvIDGPU(i);
879+
for(int k=0; k<numEntries; k++)
880+
Kokkos::atomic_add(&meshField(vertex,k), recvDataGPU(i*numEntries+k));
881+
});
882+
883+
}
884+
885+
void MPMesh::communicateFields(const std::vector<std::vector<double>>& fieldData, const int numEntities, const int numEntries, int mode,
886+
std::vector<std::vector<int>>& recvIDVec, std::vector<std::vector<double>>& recvDataVec){
739887

740888
int self, numProcsTot;
741889
MPI_Comm comm = p_MPs->getMPIComm();
742890
MPI_Comm_rank(comm, &self);
743891
MPI_Comm_size(comm, &numProcsTot);
744892

745-
//Mode 0 is Gather, mode 1 is Scatter
746-
int mode = 1; //TODO make it enum
747-
int num_doubles_per_ent = 2; //This will come as input or vector size of the field
748-
893+
assert(numEntities == numOwnersTot + numHalosTot);
894+
749895
std::vector<MPI_Request> recvRequests;
750896
std::vector<MPI_Request> sendRequests;
751897

752-
std::vector<std::vector<int>> sendIDVec(numProcsTot), recvIDVec(numProcsTot);
753-
std::vector<std::vector<double>> sendDataVec(numProcsTot), recvDataVec(numProcsTot);
898+
std::vector<std::vector<int>> sendIDVec(numProcsTot);
899+
std::vector<std::vector<double>> sendDataVec(numProcsTot);
900+
901+
recvIDVec.resize(numProcsTot);
902+
recvDataVec.resize(numProcsTot);
754903

755904
for(int i = 0; i < numProcsTot; i++){
756905
if(i==self) continue;
@@ -768,27 +917,29 @@ void MPMesh::communicateFields(){
768917
}
769918

770919
if(numToSend > 0){
771-
sendDataVec[i].reserve(numToSend*num_doubles_per_ent);
920+
sendDataVec[i].reserve(numToSend*numEntries);
772921
if (mode == 1) sendIDVec[i].reserve(numToSend);
773922

774923
}
775924
if(numToRecv > 0){
776-
recvDataVec[i].resize(numToRecv*num_doubles_per_ent);
925+
recvDataVec[i].resize(numToRecv*numEntries);
777926
recvIDVec[i].resize(numToRecv);
778927
}
779928
}
780929

781930
// Create dummy fieldData: first owners, then halos
782-
std::vector<std::vector<double>> fieldData(numOwnersTot + numHalosTot, std::vector<double>(num_doubles_per_ent));
931+
/*
932+
std::vector<std::vector<double>> fieldData(numOwnersTot + numHalosTot, std::vector<double>(numEntries));
783933
for (int i = 0; i < numOwnersTot + numHalosTot; ++i)
784-
for (int j = 0; j < num_doubles_per_ent; ++j)
934+
for (int j = 0; j < numEntries; ++j)
785935
fieldData[i][j] = numOwnersTot + i;
786-
936+
*/
937+
787938
if(mode == 0){
788939
// Halos sends to owners
789940
for (int iEnt = 0; iEnt < numHalosTot; iEnt++){
790941
auto ownerProc = haloOwnerProcs[iEnt];
791-
for (int iDouble = 0; iDouble < num_doubles_per_ent; iDouble++)
942+
for (int iDouble = 0; iDouble < numEntries; iDouble++)
792943
sendDataVec[ownerProc].push_back(fieldData[numOwnersTot+iEnt][iDouble]);
793944
}
794945
}
@@ -798,7 +949,7 @@ void MPMesh::communicateFields(){
798949
for (int iProc=0; iProc<ownerToHalos.size(); iProc++) {
799950
for (auto& [ownerID, haloID] : ownerToHalos[iProc]) {
800951
sendIDVec[iProc].push_back(haloID);
801-
for (int iDouble = 0; iDouble < num_doubles_per_ent; iDouble++)
952+
for (int iDouble = 0; iDouble < numEntries; iDouble++)
802953
sendDataVec[iProc].push_back(fieldData[ownerID][iDouble]);
803954
}
804955
}

0 commit comments

Comments
 (0)