Skip to content

Commit 3fb4889

Browse files
authored
Merge pull request #75 from BconstantMMK/dev
Fast/FastS: bug fix for openMP
2 parents 0ce42cd + 4f152dd commit 3fb4889

File tree

8 files changed

+73
-61
lines changed

8 files changed

+73
-61
lines changed

Fast/Fast/Fast/bcs.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,16 @@
5858

5959

6060
//Reinitialisation verrou omp
61-
//
6261
E_Int l = ntask*mx_synchro*Nbre_thread_actif + (ithread_loc-1)*mx_synchro;
6362
for (E_Int i = 0; i < mx_synchro ; i++) { ipt_lok[ l + i ] = 0; }
6463

64+
E_Int type = 4;
65+
////Init verrou rhs pour chaque sous zone et chaque thread actif: init val to zero
66+
E_Int* verrou_lhs_thread = verrou_lhs + ntask*Nbre_thread_actif + ithread_loc -1;
67+
verrou_c_( verrou_lhs_thread, type);
68+
69+
//Reinitialisation verrou omp pour calcul residu avant lhs
70+
verrou_lhs_thread = verrou_lhs + (nbtask + ntask)*Nbre_thread_actif + ithread_loc -1;
71+
verrou_c_( verrou_lhs_thread, type );
72+
6573
}//loop zone

Fast/Fast/Fast/computePT.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,9 @@ PyObject* K_FAST::_computePT(PyObject* self, PyObject* args)
598598
FldArrayI shift_lu(mx_nidom*6*threadmax_sdm); E_Int* ipt_shift_lu = shift_lu.begin();
599599
FldArrayI ind_dm_omp( 12*threadmax_sdm); E_Int* ipt_ind_dm_omp = ind_dm_omp.begin();
600600

601-
FldArrayI tab_verrou_lhs(2*mx_nidom*threadmax_sdm); E_Int* verrou_lhs = tab_verrou_lhs.begin();
601+
FldArrayI tab_verrou_lhs(2*mx_nidom*threadmax_sdm);
602+
tab_verrou_lhs.setAllValuesAtNull(); //init value a zero
603+
E_Int* verrou_lhs = tab_verrou_lhs.begin();
602604

603605
FldArrayF cfl( nidom*3*threadmax_sdm); E_Float* ipt_cfl = cfl.begin();
604606

Fast/Fast/Fast/gsdr3.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ E_Int rank =0;
242242

243243
// calcul metric si maillage deformable
244244
//
245-
#include "../FastS/FastS/Metric/cp_metric.cpp"
245+
#include "FastS/Metric/cp_metric.cpp"
246246
}
247247

248248
//---------------------------------------------------------------------

Fast/FastS/FastS/Compute/bcs.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,4 @@
4040
if(nd>=0){ correct_coins_(nd, param_int[nd], ipt_inddm_omp , iptro_CL[nd]); }
4141
}//autorisation
4242

43-
44-
//Reinitialisation verrou omp
45-
//
46-
E_Int l = ntask*mx_synchro*Nbre_thread_actif + (ithread_loc-1)*mx_synchro;
47-
for (E_Int i = 0; i < mx_synchro ; i++) { ipt_lok[ l + i ] = 0; }
48-
4943
}//loop zone

Fast/FastS/FastS/Compute/computePT.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,9 @@ else
510510
FldArrayI shift_lu(mx_nidom*6*threadmax_sdm); E_Int* ipt_shift_lu = shift_lu.begin();
511511
FldArrayI ind_dm_omp( 12*threadmax_sdm); E_Int* ipt_ind_dm_omp = ind_dm_omp.begin();
512512

513-
FldArrayI tab_verrou_lhs(2*mx_nidom*threadmax_sdm); E_Int* verrou_lhs = tab_verrou_lhs.begin();
513+
FldArrayI tab_verrou_lhs(2*mx_nidom*threadmax_sdm);
514+
tab_verrou_lhs.setAllValuesAtNull(); //init value a zero
515+
E_Int* verrou_lhs = tab_verrou_lhs.begin();
514516

515517
FldArrayF cfl(nidom*3*threadmax_sdm); E_Float* ipt_cfl = cfl.begin();
516518

Fast/FastS/FastS/Compute/gsdr3.cpp

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -232,40 +232,36 @@ if(nitcfg==1){param_real[0][TEMPS] = 0.0;}
232232
E_Int* ipt_ind_dm_omp_thread = ipt_ind_dm_socket + 6;
233233

234234
E_Int* ipt_nidom_loc, nb_subzone;
235-
/****************************************************
235+
/****************************************************
236236
-----Boucle sous-iteration
237-
****************************************************/
238-
E_Int nbtask = ipt_omp[nitcfg-1];
239-
E_Int ptiter = ipt_omp[nssiter+ nitcfg-1];
240-
241-
if( nitcfg == 1)
242-
{
243-
//mise a jour metric et vent ale zone cart et 3dhom(3dfull et 2d a la volee)
244-
for (E_Int ntask = 0; ntask < nbtask; ntask++)
245-
{
246-
E_Int pttask = ptiter + ntask*(6+Nbre_thread_actif*7);
247-
E_Int nd = ipt_omp[ pttask ];
248-
if(param_int[nd][LALE]==1) //maillage indeformable
249-
{
250-
mjr_ale_3dhomocart_(nd, param_int[nd] , param_real[nd] ,
251-
socket , Nbre_socket , ithread_sock , thread_parsock,
252-
ipt_ind_dm_socket , ipt_topology_socket,
253-
iptx[nd] , ipty[nd] , iptz[nd] ,
254-
ipti[nd] , iptj[nd] , iptk[nd] ,
255-
ipti0[nd] , iptj0[nd] , iptk0[nd] , iptvol[nd] ,
256-
iptventi[nd] , iptventj[nd] , iptventk[nd] );
257-
//modifier mjr_ale_3dhomocart_ pour faire sauter barrier
258-
#pragma omp barrier
259-
}
260-
}//zone
261-
262-
263-
// calcul metric si maillage deformable
264-
//
265-
#include "FastS/Metric/cp_metric.cpp"
237+
****************************************************/
238+
E_Int nbtask = ipt_omp[nitcfg-1];
239+
E_Int ptiter = ipt_omp[nssiter+ nitcfg-1];
240+
241+
if( nitcfg == 1)
242+
{
243+
//mise a jour metric et vent ale zone cart et 3dhom(3dfull et 2d a la volee)
244+
for (E_Int ntask = 0; ntask < nbtask; ntask++)
245+
{
246+
E_Int pttask = ptiter + ntask*(6+Nbre_thread_actif*7);
247+
E_Int nd = ipt_omp[ pttask ];
248+
if(param_int[nd][LALE]==1) //maillage indeformable
249+
{
250+
mjr_ale_3dhomocart_(nd, param_int[nd] , param_real[nd] ,
251+
socket , Nbre_socket , ithread_sock , thread_parsock,
252+
ipt_ind_dm_socket , ipt_topology_socket,
253+
iptx[nd] , ipty[nd] , iptz[nd] ,
254+
ipti[nd] , iptj[nd] , iptk[nd] ,
255+
ipti0[nd] , iptj0[nd] , iptk0[nd] , iptvol[nd] ,
256+
iptventi[nd] , iptventj[nd] , iptventk[nd] );
257+
//modifier mjr_ale_3dhomocart_ pour faire sauter barrier
258+
#pragma omp barrier
266259
}
260+
}//zone
267261

268-
262+
// calcul metric si maillage deformable
263+
#include "FastS/Metric/cp_metric.cpp"
264+
}
269265
//---------------------------------------------------------------------
270266
// -----Boucle sur num.les domaines de la configuration
271267
// ---------------------------------------------------------------------
@@ -294,12 +290,9 @@ if(nitcfg==1){param_real[0][TEMPS] = 0.0;}
294290
}
295291
#include "FastS/Compute/rhs.cpp"
296292
}
293+
297294
#ifdef Conservatif
298295
#include "FastS/Compute/cp_debitIBM.cpp"
299-
#endif
300-
301-
#if defined __GNUC__ && defined _OPENMP
302-
#pragma omp barrier
303296
#endif
304297

305298
//
@@ -319,7 +312,6 @@ if(nitcfg==1){param_real[0][TEMPS] = 0.0;}
319312
#include "FastS/Compute/Linear_solver/lhs.cpp"
320313
}
321314

322-
323315
// LUSGS
324316
else
325317
{
@@ -439,13 +431,16 @@ E_Int lrhs=0; E_Int lcorner=0;
439431

440432
E_Int Nbre_thread_actif_loc, ithread_loc;
441433
//
442-
//Apply BC (parcour Zones) + reinitialisation verrou pour calcul rhs
434+
//Apply BC (parcour Zones)
443435
//
444436
if(lexit_lu ==0 && layer_mode>=1)
445437
{
446438
#include "FastS/Compute/bcs.cpp"
447439
}
448440

441+
//reinitialisation verrou pour calcul rhs et lhs
442+
#include "FastS/Compute/verrou_lhs_init.cpp"
443+
449444
}//fin zone omp
450445

451446
/*

Fast/FastS/FastS/Compute/rhs.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,6 @@
1919
//printf("topo %d %d %d %d %d \n",ipt_topo_omp[0], ipt_topo_omp[1], ipt_topo_omp[2], Nbre_thread_actif_loc, nd );
2020
//printf("shif %d %d %d \n", shift_zone,shift_coe, shift_wig );
2121

22-
//Init verrou rhs pour chaque sous zone et chaque thread actif: init val to zero
23-
E_Int type = 4;
24-
E_Int* verrou_lhs_thread= verrou_lhs + ntask*Nbre_thread_actif + ithread_loc -1;
25-
verrou_c_( verrou_lhs_thread, type);
26-
verrou_lhs_thread = verrou_lhs + (nbtask + ntask)*Nbre_thread_actif + ithread_loc -1; //pour calcul residu avant LU
27-
verrou_c_( verrou_lhs_thread, type );
28-
2922
// Revoir cet adressage si scater et socket>1 et ou nidom >1
3023
E_Int* ipt_lok_thread = ipt_lok + ntask*mx_synchro*Nbre_thread_actif;
3124

@@ -58,19 +51,17 @@
5851
iptdrodm + shift_zone , iptcoe + shift_coe , iptdelta[nd] , iptro_res[nd] , iptsrc[nd] );
5952

6053
//Flush Rhs
61-
E_Int size = param_int[nd][NEQ]*param_int[nd][NDIMDX];
54+
//E_Int size = param_int[nd][NEQ]*param_int[nd][NDIMDX];
6255
//flush_real_( size , iptdrodm + shift_zone);
6356
if(nitcfg==1)
6457
{
65-
size = param_int[nd][NEQ_COE]*param_int[nd][NDIMDX];
58+
E_Int size = param_int[nd][NEQ_COE]*param_int[nd][NDIMDX];
6659
flush_real_( size , iptcoe + shift_coe);
6760
}
68-
//size = param_int[nd][NDIMDX];
69-
//flush_real_( size , iptmut[nd]);
70-
//#pragma omp flush
61+
7162
//Go verrou rhs pour chaque sous zone et chaque thread actif: valeur mise a un
72-
type = 1;
73-
verrou_lhs_thread= verrou_lhs + ntask*Nbre_thread_actif + ithread_loc -1;
63+
E_Int type = 1;
64+
E_Int* verrou_lhs_thread= verrou_lhs + ntask*Nbre_thread_actif + ithread_loc -1;
7465
verrou_c_( verrou_lhs_thread, type );
7566

7667
if(ithread_loc==1 && lexit_lu==0 && nitcfg*nitrun >15 and (nitcfg < 3 or nitcfg == nssiter-1) ){ timer_omp[cpu_perzone]+=1; } //nbre echantillon
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
for (E_Int ntask = 0; ntask < nbtask; ntask++)
2+
{
3+
E_Int pttask = ptiter + ntask*(6+Nbre_thread_actif*7);
4+
ithread_loc = ipt_omp[ pttask + 2 + ithread -1 ] +1 ;
5+
6+
if (ithread_loc == -1) {continue;}
7+
8+
//Reinitialisation verrou omp rhs
9+
E_Int l = ntask*mx_synchro*Nbre_thread_actif + (ithread_loc-1)*mx_synchro;
10+
for (E_Int i = 0; i < mx_synchro ; i++) { ipt_lok[ l + i ] = 0; }
11+
12+
E_Int type = 4;
13+
//Init verrou rhs pour chaque sous zone et chaque thread actif: init val to zero
14+
E_Int* verrou_lhs_thread = verrou_lhs + ntask*Nbre_thread_actif + ithread_loc -1;
15+
verrou_c_( verrou_lhs_thread, type);
16+
17+
//Reinitialisation verrou omp pour calcul residu avant lhs
18+
verrou_lhs_thread = verrou_lhs + (nbtask + ntask)*Nbre_thread_actif + ithread_loc -1;
19+
verrou_c_( verrou_lhs_thread, type );
20+
}

0 commit comments

Comments
 (0)