@@ -445,6 +445,8 @@ void MPMesh::subAssemblyVtx1(int vtxPerElm, int nCells, int comp, double* array)
445445 Kokkos::Timer timer;
446446
447447 auto VtxCoeffs=this ->precomputedVtxCoeffs ;
448+ std::cout << " SubAssemblyExtent(0) = " << VtxCoeffs.extent (0 ) << std::endl;
449+ std::cout << " SubAssemblyExtent(1) = " << VtxCoeffs.extent (1 ) << std::endl;
448450
449451 // MPI Information
450452 MPI_Comm comm = p_MPs->getMPIComm ();
@@ -540,25 +542,32 @@ void MPMesh::startCommunication(){
540542 MPI_Comm_size (comm, &numProcsTot);
541543
542544 // Owning processes and global Numbering
543- auto elmOwners = p_mesh->getElm2Process ();
544- auto elm2global = p_mesh->getElmGlobal ();
545-
545+
546+ // For Elements Checked
547+ // auto entOwners = p_mesh->getElm2Process();
548+ // auto ent2global = p_mesh->getElmGlobal();
549+ // int numEntities = p_mesh->getNumElements();
550+
551+ // For Vertices not checked
552+ auto entOwners = p_mesh->getVtx2Process ();
553+ auto ent2global = p_mesh->getVtxGlobal ();
554+ int numEntities = p_mesh->getNumVertices ();
555+
546556 // Loop over elements and find no of owners and halos
547- int numElements = p_mesh->getNumElements ();
548557 Kokkos::View<int > owner_count (" owner_count" );
549558 Kokkos::View<int > halo_count (" halo_count" );
550559 Kokkos::deep_copy (owner_count, 0 );
551560 Kokkos::deep_copy (halo_count, 0 );
552- Kokkos::parallel_for (" countOwnerHalo" , numElements , KOKKOS_LAMBDA (const int elm){
553- if (elmOwners (elm)==self)
561+ Kokkos::parallel_for (" countOwnerHalo" , numEntities , KOKKOS_LAMBDA (const int elm){
562+ if (entOwners (elm)==self)
554563 Kokkos::atomic_add (&owner_count (), 1 );
555564 else
556565 Kokkos::atomic_add (&halo_count (), 1 );
557566 });
558567
559568 Kokkos::deep_copy (numOwnersTot, owner_count);
560569 Kokkos::deep_copy (numHalosTot, halo_count);
561- assert (numHalosTot+numOwnersTot == numElements );
570+ assert (numHalosTot+numOwnersTot == numEntities );
562571 printf (" Rank %d owners %d halo %d\n " , self, numOwnersTot, numHalosTot);
563572 int num_ints_per_copy = 2 ;
564573
@@ -576,23 +585,23 @@ void MPMesh::startCommunication(){
576585 ownerToHalos.resize (numProcsTot);
577586
578587 // Copy owning processes and globalIds to CPU
579- auto elmOwners_host = Kokkos::create_mirror_view_and_copy (Kokkos::DefaultHostExecutionSpace::memory_space (),
580- elmOwners );
581- auto elm2global_host = Kokkos::create_mirror_view_and_copy (Kokkos::DefaultHostExecutionSpace::memory_space (),
582- elm2global );
588+ auto entOwners_host = Kokkos::create_mirror_view_and_copy (Kokkos::DefaultHostExecutionSpace::memory_space (),
589+ entOwners );
590+ auto ent2global_host = Kokkos::create_mirror_view_and_copy (Kokkos::DefaultHostExecutionSpace::memory_space (),
591+ ent2global );
583592
584593 // Do Map of Global To Local ID
585594 // TODO make ordered map; which faster?
586595 std::unordered_map<int , int > global2local;
587- for (int iEnt = 0 ; iEnt < numElements ; iEnt++) {
588- int globalID = elm2global_host (iEnt);
596+ for (int iEnt = 0 ; iEnt < numEntities ; iEnt++) {
597+ int globalID = ent2global_host (iEnt);
589598 global2local[globalID] = iEnt;
590599 }
591600
592601 // Loop over all halo Entities and find the owning process
593602 for (auto iEnt=numOwnersTot; iEnt<numOwnersTot+numHalosTot; iEnt++){
594- auto ownerProc = elmOwners_host [iEnt];
595- assert (elmOwners_host (iEnt) != self);
603+ auto ownerProc = entOwners_host [iEnt];
604+ assert (entOwners_host (iEnt) != self);
596605 numOwnersOnOtherProcs[ownerProc] = numOwnersOnOtherProcs[ownerProc]+1 ;
597606 haloOwnerProcs.push_back (ownerProc);
598607 }
@@ -605,9 +614,9 @@ void MPMesh::startCommunication(){
605614 sendBufs[proc].reserve (num_ints_per_copy*numOwnersOnOtherProcs[proc]);
606615
607616 for (int iEnt=numOwnersTot; iEnt<numOwnersTot+numHalosTot; iEnt++) {
608- auto ownerProc = elmOwners_host (iEnt);
617+ auto ownerProc = entOwners_host (iEnt);
609618 assert (ownerProc != self);
610- sendBufs[ownerProc].push_back (elm2global_host (iEnt));
619+ sendBufs[ownerProc].push_back (ent2global_host (iEnt));
611620 sendBufs[ownerProc].push_back (iEnt);
612621 }
613622
@@ -684,12 +693,10 @@ void MPMesh::startCommunication(){
684693
685694 MPI_Waitall (requests.size (), requests.data (), MPI_STATUSES_IGNORE);
686695
687- communicateFields ();
688-
689- bool debug = true ;
696+ bool debug = false ;
690697 if (! debug) return ;
691698
692- printf (" Rank %d Owners %d Halos %d Total %d \n " , self, numOwnersTot, numHalosTot, numElements );
699+ printf (" Rank %d Owners %d Halos %d Total %d \n " , self, numOwnersTot, numHalosTot, numEntities );
693700 for (int i=0 ; i<numProcsTot; i++){
694701 printf (" Rank %d has %d halos which are owners in other rank %d \n " , self, numOwnersOnOtherProcs[i], i);
695702 printf (" Rank %d has %d owners wicch are halos in other rank %d \n " , self, numHalosOnOtherProcs[i], i);
@@ -698,8 +705,8 @@ void MPMesh::startCommunication(){
698705 // Check rank 0 sending to rank 1
699706 if (self==0 ){
700707 for (int i=0 ; i<numHalosTot; i++)
701- if ( elmOwners_host (numOwnersTot+i)==1 )
702- printf (" Halo Element with lid %d, gid %d and owner %d \n " , numOwnersTot+i, elm2global_host (numOwnersTot+i), elmOwners_host (numOwnersTot+i));
708+ if ( entOwners_host (numOwnersTot+i)==1 )
709+ printf (" Halo Element with lid %d, gid %d and owner %d \n " , numOwnersTot+i, ent2global_host (numOwnersTot+i), entOwners_host (numOwnersTot+i));
703710 }
704711 MPI_Barrier (comm);
705712 if (self==0 ){
@@ -717,40 +724,182 @@ void MPMesh::startCommunication(){
717724 if (self==1 ){
718725 for (int i=0 ; i<localIDBufs[0 ].size (); i++)
719726 printf (" LIDs in owned rank 1 %d \n " , localIDBufs[0 ][i]);
720- printf (" Rank %d local 0 13 Global %d %d\n " , self, elm2global_host (0 ), elm2global_host (13 ));
727+ // printf("Rank %d local 0 13 Global %d %d\n", self, ent2global_host (0), ent2global_host (13));
721728 }
722729 MPI_Barrier (comm);
723730 // Checking if they have received them back
724731 if (self==0 ){
725732 for (int i=0 ; i<haloOwnerLocalIDs[1 ].size (); i++)
726733 printf (" Owner LID in rank 0 %d \n " , haloOwnerLocalIDs[1 ][i]);
727- printf (" Rank %d local 641 644 Global %d %d\n " , self, elm2global_host (641 ), elm2global_host (644 ));
734+ // printf("Rank %d local 641 644 Global %d %d\n", self, ent2global_host (641), ent2global_host (644));
728735 }
729736 MPI_Barrier (comm);
730737 // OwnerToHalos
731738 if (self==1 ){
732- for (const auto &p : ownerToHalos[0 ]) {
733- std::cout << " (" << p.first << " , " << p.second << " )\n " ;
739+ for (const auto &p : ownerToHalos[0 ]) {
740+ std::cout << " (" << p.first << " , " << p.second << " )\n " ;
734741 }
735742 }
736743}
737744
738- void MPMesh::communicateFields (){
745+ template <MeshFieldIndex meshFieldIndex>
746+ void MPMesh::reconstruct_full () {
747+ std::cout<<__FUNCTION__<<std::endl;
748+ Kokkos::Timer timer;
749+
750+ auto VtxCoeffs=this ->precomputedVtxCoeffs ;
751+ std::cout << " Extent(0) = " << VtxCoeffs.extent (0 ) << std::endl; // 10
752+ std::cout << " Extent(1) = " << VtxCoeffs.extent (1 ) << std::endl; // 20
753+ // Mesh Information
754+ auto elm2VtxConn = p_mesh->getElm2VtxConn ();
755+ int numVtx = p_mesh->getNumVertices ();
756+ auto vtxCoords = p_mesh->getMeshField <polyMPO::MeshF_VtxCoords>();
757+ int numVertices = p_mesh->getNumVertices ();
758+ // Mesh Field
759+ constexpr MaterialPointSlice mpfIndex = meshFieldIndexToMPSlice<meshFieldIndex>;
760+ const int numEntries = mpSliceToNumEntries<mpfIndex>();
761+ p_mesh->fillMeshField <meshFieldIndex>(numVtx, numEntries, 0.0 );
762+ auto meshField = p_mesh->getMeshField <meshFieldIndex>();
763+
764+ MPI_Barrier (MPI_COMM_WORLD);
765+ assert (cudaDeviceSynchronize ()==cudaSuccess);
766+
767+ // Material Points
768+ calcBasis ();
769+ auto mpData = p_MPs->getData <mpfIndex>();
770+ auto weight = p_MPs->getData <MPF_Basis_Vals>();
771+ auto mpPositions = p_MPs->getData <MPF_Cur_Pos_XYZ>();
772+
773+ MPI_Barrier (MPI_COMM_WORLD);
774+ assert (cudaDeviceSynchronize ()==cudaSuccess);
775+
776+ // Earth Radius
777+ double radius = 1.0 ;
778+ if (p_mesh->getGeomType () == geom_spherical_surf)
779+ radius=p_mesh->getSphereRadius ();
780+
781+ MPI_Barrier (MPI_COMM_WORLD);
782+ assert (cudaDeviceSynchronize ()==cudaSuccess);
783+
784+ // Reconstructed values
785+ Kokkos::View<double **> reconVals (" meshField" , numVertices, numEntries);
786+
787+ MPI_Barrier (MPI_COMM_WORLD);
788+ assert (cudaDeviceSynchronize ()==cudaSuccess);
789+
790+ // Reconstruct
791+ auto reconstruct = PS_LAMBDA (const int & elm, const int & mp, const int & mask) {
792+ if (mask) { // if material point is 'active'/'enabled'
793+ int nVtxE = elm2VtxConn (elm,0 ); // number of vertices bounding the element
794+ for (int i=0 ; i<nVtxE; i++){
795+ int vID = elm2VtxConn (elm,i+1 )-1 ;
796+ double w_vtx=weight (mp,i);
797+ double CoordDiffs[vec4d_nEntries] = {1 , (vtxCoords (vID,0 ) - mpPositions (mp,0 ))/radius,
798+ (vtxCoords (vID,1 ) - mpPositions (mp,1 ))/radius,
799+ (vtxCoords (vID,2 ) - mpPositions (mp,2 ))/radius};
800+
801+ auto factor = w_vtx*(VtxCoeffs (vID,0 ) + VtxCoeffs (vID,1 )*CoordDiffs[1 ] +
802+ VtxCoeffs (vID,2 )*CoordDiffs[2 ] +
803+ VtxCoeffs (vID,3 )*CoordDiffs[3 ]);
804+
805+ for (int k=0 ; k<numEntries; k++){
806+ auto val = factor*mpData (mp,k);
807+ Kokkos::atomic_add (&reconVals (vID,k), val);
808+ }
809+ }
810+ }
811+ };
812+ p_MPs->parallel_for (reconstruct, " reconstruct" );
813+
814+ MPI_Barrier (MPI_COMM_WORLD);
815+ assert (cudaDeviceSynchronize ()==cudaSuccess);
816+
817+ // create host mirror and copy device -> host
818+ auto reconVals_host = Kokkos::create_mirror_view_and_copy (Kokkos::HostSpace (), reconVals);
819+ std::vector<std::vector<double >> fieldData (numVertices, std::vector<double >(numEntries, 0.0 ));
820+ for (int i = 0 ; i < numVertices; ++i) {
821+ for (int j = 0 ; j < numEntries; ++j) {
822+ fieldData[i][j] = reconVals_host (i, j);
823+ }
824+ }
825+
826+ MPI_Barrier (MPI_COMM_WORLD);
827+ assert (cudaDeviceSynchronize ()==cudaSuccess);
828+
829+ // Mode 0 is Gather: Halos Send to Owners
830+ // Mode 1 is Scatter: Owners Send to Halos
831+ int mode=0 ;
832+ std::vector<std::vector<int >> recvIDVec;
833+ std::vector<std::vector<double >> recvDataVec;
834+ communicateFields (fieldData, numVertices, numEntries, mode, recvIDVec, recvDataVec);
835+
836+ int numProcsTot = recvIDVec.size ();
837+ // Copy recvData and recvIDVec in GPU so that contributions can be taken
838+
839+
840+ int totalSize = 0 ;
841+ std::vector<int > offsets (numProcsTot);
842+ for (int i=0 ; i<numProcsTot; i++) {
843+ offsets[i] = totalSize;
844+ totalSize += recvDataVec[i].size ();
845+ }
846+ std::vector<double > flatData (totalSize);
847+ for (int i=0 ; i<numProcsTot; i++) {
848+ std::copy (recvDataVec[i].begin (), recvDataVec[i].end (), flatData.begin () + offsets[i]);
849+ }
850+ Kokkos::View<double *> recvDataGPU (" recvDataGPU" , totalSize);
851+ Kokkos::deep_copy (recvDataGPU, Kokkos::View<double *, Kokkos::HostSpace>(flatData.data (), totalSize));
852+
853+ // Kokkos::View<int*> offsetsGPU("offsetsGPU", numProcsTot);
854+ // Kokkos::deep_copy(offsetsGPU, Kokkos::View<int*, Kokkos::HostSpace>(offsets.data(), numProcsTot));
855+
856+ totalSize = 0 ;
857+ for (int i=0 ; i<numProcsTot; i++) {
858+ offsets[i] = totalSize;
859+ totalSize += recvIDVec[i].size ();
860+ }
861+ std::vector<double > flatIDVec (totalSize);
862+ for (int i=0 ; i<numProcsTot; i++) {
863+ std::copy (recvIDVec[i].begin (), recvIDVec[i].end (), flatIDVec.begin () + offsets[i]);
864+ }
865+ Kokkos::View<double *> recvIDGPU (" recvIDGPU" , totalSize);
866+ Kokkos::deep_copy (recvIDGPU, Kokkos::View<double *, Kokkos::HostSpace>(flatIDVec.data (), totalSize));
867+ // Kokkos::View<int*> offsetsIDGPU("offsetsIDGPU", numProcsTot);
868+ // Kokkos::deep_copy(offsetsIDGPU, Kokkos::View<int*, Kokkos::HostSpace>(offsets.data(), numProcsTot));
869+
870+
871+ // Asssign the field
872+ Kokkos::parallel_for (" assigning" , numVtx, KOKKOS_LAMBDA (const int vtx){
873+ for (int k=0 ; k<numEntries; k++)
874+ meshField (vtx, k) = reconVals (vtx,k);
875+ });
876+
877+ Kokkos::parallel_for (" assigning2" , recvIDGPU.size (), KOKKOS_LAMBDA (const int i){
878+ int vertex = recvIDGPU (i);
879+ for (int k=0 ; k<numEntries; k++)
880+ Kokkos::atomic_add (&meshField (vertex,k), recvDataGPU (i*numEntries+k));
881+ });
882+
883+ }
884+
885+ void MPMesh::communicateFields (const std::vector<std::vector<double >>& fieldData, const int numEntities, const int numEntries, int mode,
886+ std::vector<std::vector<int >>& recvIDVec, std::vector<std::vector<double >>& recvDataVec){
739887
740888 int self, numProcsTot;
741889 MPI_Comm comm = p_MPs->getMPIComm ();
742890 MPI_Comm_rank (comm, &self);
743891 MPI_Comm_size (comm, &numProcsTot);
744892
745- // Mode 0 is Gather, mode 1 is Scatter
746- int mode = 1 ; // TODO make it enum
747- int num_doubles_per_ent = 2 ; // This will come as input or vector size of the field
748-
893+ assert (numEntities == numOwnersTot + numHalosTot);
894+
749895 std::vector<MPI_Request> recvRequests;
750896 std::vector<MPI_Request> sendRequests;
751897
752- std::vector<std::vector<int >> sendIDVec (numProcsTot), recvIDVec (numProcsTot);
753- std::vector<std::vector<double >> sendDataVec (numProcsTot), recvDataVec (numProcsTot);
898+ std::vector<std::vector<int >> sendIDVec (numProcsTot);
899+ std::vector<std::vector<double >> sendDataVec (numProcsTot);
900+
901+ recvIDVec.resize (numProcsTot);
902+ recvDataVec.resize (numProcsTot);
754903
755904 for (int i = 0 ; i < numProcsTot; i++){
756905 if (i==self) continue ;
@@ -768,27 +917,29 @@ void MPMesh::communicateFields(){
768917 }
769918
770919 if (numToSend > 0 ){
771- sendDataVec[i].reserve (numToSend*num_doubles_per_ent );
920+ sendDataVec[i].reserve (numToSend*numEntries );
772921 if (mode == 1 ) sendIDVec[i].reserve (numToSend);
773922
774923 }
775924 if (numToRecv > 0 ){
776- recvDataVec[i].resize (numToRecv*num_doubles_per_ent );
925+ recvDataVec[i].resize (numToRecv*numEntries );
777926 recvIDVec[i].resize (numToRecv);
778927 }
779928 }
780929
781930 // Create dummy fieldData: first owners, then halos
782- std::vector<std::vector<double >> fieldData (numOwnersTot + numHalosTot, std::vector<double >(num_doubles_per_ent));
931+ /*
932+ std::vector<std::vector<double>> fieldData(numOwnersTot + numHalosTot, std::vector<double>(numEntries));
783933 for (int i = 0; i < numOwnersTot + numHalosTot; ++i)
784- for (int j = 0 ; j < num_doubles_per_ent ; ++j)
934+ for (int j = 0; j < numEntries ; ++j)
785935 fieldData[i][j] = numOwnersTot + i;
786-
936+ */
937+
787938 if (mode == 0 ){
788939 // Halos sends to owners
789940 for (int iEnt = 0 ; iEnt < numHalosTot; iEnt++){
790941 auto ownerProc = haloOwnerProcs[iEnt];
791- for (int iDouble = 0 ; iDouble < num_doubles_per_ent ; iDouble++)
942+ for (int iDouble = 0 ; iDouble < numEntries ; iDouble++)
792943 sendDataVec[ownerProc].push_back (fieldData[numOwnersTot+iEnt][iDouble]);
793944 }
794945 }
@@ -798,7 +949,7 @@ void MPMesh::communicateFields(){
798949 for (int iProc=0 ; iProc<ownerToHalos.size (); iProc++) {
799950 for (auto & [ownerID, haloID] : ownerToHalos[iProc]) {
800951 sendIDVec[iProc].push_back (haloID);
801- for (int iDouble = 0 ; iDouble < num_doubles_per_ent ; iDouble++)
952+ for (int iDouble = 0 ; iDouble < numEntries ; iDouble++)
802953 sendDataVec[iProc].push_back (fieldData[ownerID][iDouble]);
803954 }
804955 }
0 commit comments