HΦ  3.2.0
mltplyMPIBoost.c File Reference
#include "mpi.h"
#include "Common.h"
#include "common/setmemory.h"
#include "wrapperMPI.h"
+ Include dependency graph for mltplyMPIBoost.c:

Go to the source code of this file.

Functions

void zgemm_ (char *TRANSA, char *TRANSB, int *M, int *N, int *K, double complex *ALPHA, double complex *matJL, int *LDA, double complex *arrayz, int *LDB, double complex *BETA, double complex *arrayx, int *LDC)
 
void child_general_int_spin_MPIBoost (struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1, double complex *tmp_v2, double complex *tmp_v3)
 

Function Documentation

◆ child_general_int_spin_MPIBoost()

void child_general_int_spin_MPIBoost ( struct BindStruct X,
double complex *  tmp_v0,
double complex *  tmp_v1,
double complex *  tmp_v2,
double complex *  tmp_v3 
)

Exchange term in Spin model

Author
Mitsuaki Kawamura (The University of Tokyo)
Youhei Yamaji (The University of Tokyo)
Parameters
[in,out]X
[out]tmp_v0Result v0 = H v1
[in]tmp_v1v0 = H v1
[in,out]tmp_v2bufffer
[in,out]tmp_v3bufffer

Definition at line 36 of file mltplyMPIBoost.c.

References BoostList::arrayJ, BindStruct::Boost, BindStruct::Check, CheckList::idim_max, BoostList::ishift_nspin, BoostList::list_6spin_pair, BoostList::list_6spin_star, nproc, BoostList::num_pivot, BoostList::R0, BoostList::vecB, BoostList::W0, and zgemm_().

Referenced by mltplySpinGCBoost().

43 {
44 #ifdef MPI
45 
46  //double complex dam_pr = 0;
47  // MPI_Status statusMPI;
48 
49  // int ierr;
50  // int INFO;
51  char TRANSA, TRANSB;
52  int M, N, K, LDA, LDB, LDC;
53  double complex ALPHA, BETA;
54  long unsigned int i_max;
55  long unsigned int j, k, ell, iloop;
56  long unsigned int i1, i2;
57  long unsigned int iomp;
58  long unsigned int ell4, ell5, ell6, m0, Ipart1;
59  long unsigned int mi, mj, mri, mrj, mrk, mrl;
60  int indj;
61  long unsigned int ellrl, ellrk, ellrj, ellri, elli1, elli2, ellj1, ellj2;
62  long unsigned int iSS1, iSS2, iSSL1, iSSL2;
63  double complex **vecJ;
64  double complex **matJ, **matJ2;
65  double complex *matJL;
66  double complex *matI;
67  double complex **matB;
68  double complex *arrayz;
69  double complex *arrayx;
70  double complex *arrayw;
71  long unsigned int ishift1, ishift2, ishift3, ishift4, ishift5, pivot_flag, num_J_star;
72  long unsigned int pow4, pow5, pow41, pow51;
73  //long unsigned int pow1, pow2, pow3, pow4, pow5, pow11, pow21, pow31, pow41, pow51;
74 
75  i_max = X->Check.idim_max;
76 
77 /*
78 //zero clear
79  #pragma omp parallel for default(none) private(j) \
80  shared(i_max,tmp_v0)
81  for(j=0;j<i_max;j++){
82  tmp_v0[j+1]=0.0;
83  }
84 */
85 
86  vecJ = cd_2d_allocate( 3, 3);
87  matJ = cd_2d_allocate(4, 4);
88  matJ2 = cd_2d_allocate(4, 4);
89  matB = cd_2d_allocate(2,2);
90  matJL = cd_1d_allocate(64*64);
91  matI = cd_1d_allocate(64*64);
92 
93  //defmodelBoost(X->Boost.W0, X->Boost.R0, X->Boost.num_pivot, X->Boost.ishift_nspin, X->Boost.list_6spin_star, X->Boost.list_6spin_pair, 1, X->Boost.arrayJ, X->Boost.vecB);
94 
95  for(iloop=0; iloop < X->Boost.R0; iloop++){
96 
97 
98  for(j=iloop*X->Boost.num_pivot; j < (iloop+1)*X->Boost.num_pivot; j++){
99 
100  num_J_star = (long unsigned int)X->Boost.list_6spin_star[j][0]; //(0,j)
101  ishift1 = (long unsigned int)X->Boost.list_6spin_star[j][1]; //(1,j)
102  ishift2 = (long unsigned int)X->Boost.list_6spin_star[j][2]; //(2,j)
103  ishift3 = (long unsigned int)X->Boost.list_6spin_star[j][3]; //(3,j)
104  ishift4 = (long unsigned int)X->Boost.list_6spin_star[j][4]; //(4,j)
105  ishift5 = (long unsigned int)X->Boost.list_6spin_star[j][5]; //(5,j)
106  pivot_flag = (long unsigned int)X->Boost.list_6spin_star[j][6]; //(6,j)
107  //pow1 = (int)pow(2.0,ishift1);
108  //pow2 = (int)pow(2.0,ishift1+ishift2);
109  //pow3 = (int)pow(2.0,ishift1+ishift2+ishift3);
110  pow4 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4);
111  pow5 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5);
112  //pow11= (int)pow(2.0,ishift1+1);
113  //pow21= (int)pow(2.0,ishift1+ishift2+1);
114  //pow31= (int)pow(2.0,ishift1+ishift2+ishift3+1);
115  pow41= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+1);
116  pow51= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+1);
117 
118  for(k=0; k < (64*64); k++){
119  matJL[k] = 0.0 + 0.0*I;
120  matI[k] = 0.0 + 0.0*I;
121  }
122  for(k=0; k < 64; k++){
123  matI[k+64*k] = 1.0;
124  }
125 
126  for(ell=0; ell < num_J_star; ell++){
127  mi = (long unsigned int)X->Boost.list_6spin_pair[j][0][ell]; //(1,ell,j)
128  mj = (long unsigned int)X->Boost.list_6spin_pair[j][1][ell]; //(2,ell,j)
129  mri = (long unsigned int)X->Boost.list_6spin_pair[j][2][ell]; //(3,ell,j)
130  mrj = (long unsigned int)X->Boost.list_6spin_pair[j][3][ell]; //(4,ell,j)
131  mrk = (long unsigned int)X->Boost.list_6spin_pair[j][4][ell]; //(5,ell,j)
132  mrl = (long unsigned int)X->Boost.list_6spin_pair[j][5][ell]; //(6,ell,j)
133  indj = X->Boost.list_6spin_pair[j][6][ell]; //(7,ell,j)
134  for(i1 = 0; i1 < 3; i1++){
135  for(i2 = 0; i2 < 3; i2++){
136  vecJ[i1][i2] = X->Boost.arrayJ[(indj-1)][i1][i2];
137  }
138  }
139  //matJSS(1,1) = vecJ(3,3)
140  matJ[0][0] = vecJ[2][2];
141  //matJSS(1,2)= vecJ(1,1)-vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1)
142  matJ[0][1] = vecJ[0][0]-vecJ[1][1]-I*vecJ[0][1]-I*vecJ[1][0];
143  //matJSS(1,3)= vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2)
144  matJ[0][2] = vecJ[2][0]-I*vecJ[2][1];
145  //matJSS(1,4)= vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3)
146  matJ[0][3] = vecJ[0][2]-I*vecJ[1][2];
147  //matJSS(2,1)= vecJ(1,1)-vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1)
148  matJ[1][0] = vecJ[0][0]-vecJ[1][1]+I*vecJ[0][1]+I*vecJ[1][0];
149  //matJSS(2,2)= vecJ(3,3)
150  matJ[1][1] = vecJ[2][2];
151  //matJSS(2,3)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3)
152  matJ[1][2] =(-1.0)*vecJ[0][2]-I*vecJ[1][2];
153  //matJSS(2,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2)
154  matJ[1][3] =(-1.0)*vecJ[2][0]-I*vecJ[2][1];
155  //matJSS(3,1)= vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2)
156  matJ[2][0] = vecJ[2][0]+I*vecJ[2][1];
157  //matJSS(3,2)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3)
158  matJ[2][1] =(-1.0)*vecJ[0][2]+I*vecJ[1][2];
159  //matJSS(3,3)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3)
160  matJ[2][2] =(-1.0)*vecJ[2][2];
161  //matJSS(3,4)= vecJ(1,1)+vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1)
162  matJ[2][3] = vecJ[0][0]+vecJ[1][1]+I*vecJ[0][1]-I*vecJ[1][0];
163  //matJSS(4,1)= vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3)
164  matJ[3][0] = vecJ[0][2]+I*vecJ[1][2];
165  //matJSS(4,2)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2)
166  matJ[3][1] =(-1.0)*vecJ[2][0]+I*vecJ[2][1];
167  //matJSS(4,3)= vecJ(1,1)+vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1)
168  matJ[3][2] = vecJ[0][0]+vecJ[1][1]-I*vecJ[0][1]+I*vecJ[1][0];
169  //matJSS(4,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3)
170  matJ[3][3] =(-1.0)*vecJ[2][2];
171 
172  matJ2[3][3] = matJ[0][0];
173  matJ2[3][0] = matJ[0][1];
174  matJ2[3][1] = matJ[0][2];
175  matJ2[3][2] = matJ[0][3];
176  matJ2[0][3] = matJ[1][0];
177  matJ2[0][0] = matJ[1][1];
178  matJ2[0][1] = matJ[1][2];
179  matJ2[0][2] = matJ[1][3];
180  matJ2[1][3] = matJ[2][0];
181  matJ2[1][0] = matJ[2][1];
182  matJ2[1][1] = matJ[2][2];
183  matJ2[1][2] = matJ[2][3];
184  matJ2[2][3] = matJ[3][0];
185  matJ2[2][0] = matJ[3][1];
186  matJ2[2][1] = matJ[3][2];
187  matJ2[2][2] = matJ[3][3];
188 
189  for(ellri=0; ellri<2; ellri++){
190  for(ellrj=0; ellrj<2; ellrj++){
191  for(ellrk=0; ellrk<2; ellrk++){
192  for(ellrl=0; ellrl<2; ellrl++){
193  for(elli1=0; elli1<2; elli1++){
194  for(ellj1=0; ellj1<2; ellj1++){
195  for(elli2=0; elli2<2; elli2++){
196  for(ellj2=0; ellj2<2; ellj2++){
197 
198  iSSL1 = elli1*(int)pow(2,mi) + ellj1*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl);
199  iSSL2 = elli2*(int)pow(2,mi) + ellj2*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl);
200  iSS1 = elli1 + 2*ellj1;
201  iSS2 = elli2 + 2*ellj2;
202  matJL[iSSL1+64*iSSL2] += matJ2[iSS1][iSS2];
203  }
204  }
205  }
206  }
207  }
208  }
209  }
210  }
211 
212 
213  }/* loop for ell */
214 
215  /* external magnetic field B */
216  if(pivot_flag==1){
217  matB[0][0] = + X->Boost.vecB[2]; // -BM
218  matB[1][1] = - X->Boost.vecB[2]; // -BM
219  //matB[0][1] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM
220  //matB[1][0] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM
221  matB[0][1] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM
222  matB[1][0] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM
223  for(ellri=0; ellri<2; ellri++){
224  for(ellrj=0; ellrj<2; ellrj++){
225  for(ellrk=0; ellrk<2; ellrk++){
226  for(ellrl=0; ellrl<2; ellrl++){
227  for(ellj1=0; ellj1<2; ellj1++){
228  for(elli1=0; elli1<2; elli1++){
229  for(elli2=0; elli2<2; elli2++){
230  for(ellj2=0; ellj2<X->Boost.ishift_nspin; ellj2++){
231  iSSL1 = elli1*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6));
232  iSSL2 = elli2*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6));
233  matJL[iSSL1+64*iSSL2] += matB[elli1][elli2];
234  }
235  }
236  }
237  }
238  }
239  }
240  }
241  }
242  }
243  /* external magnetic field B */
244 
245  iomp=i_max/(int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+2);
246 
247  #pragma omp parallel default(none) private(arrayx,arrayz,arrayw,ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \
248  shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,tmp_v0,tmp_v1,tmp_v3)
249  {
250 
251  arrayx = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1)));
252  arrayz = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1)));
253  arrayw = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1)));
254 
255 #pragma omp for
256  for(ell6 = 0; ell6 < iomp; ell6++){
257  Ipart1=pow51*2*ell6;
258  for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){
259  for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){
260  for(m0 = 0; m0 < 16; m0++){
261  arrayz[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)];
262  arrayz[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)];
263  arrayz[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)];
264  arrayz[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
265  tmp_v3[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)];
266  tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)];
267  tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)];
268  tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
269  arrayx[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+Ipart1)];
270  arrayx[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)];
271  arrayx[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)];
272  arrayx[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)];
273  }
274  }
275  }
276 
277 
278  for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){
279  for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){
280  for(m0 = 0; m0 < 16; m0++){
281  arrayz[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)];
282  arrayz[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)];
283  arrayz[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)];
284  arrayz[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
285  tmp_v3[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)];
286  tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)];
287  tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)];
288  tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
289  arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)];
290  arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)];
291  arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)];
292  arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)];
293  }
294 
295  }
296  }
297 
298  TRANSA = 'N';
299  TRANSB = 'N';
300  M = 64;
301  N = (int)pow(2.0, ishift4+ishift5-1);
302  K = 64;
303  ALPHA = 1.0;
304  LDA = 64;
305  LDB = 64;
306  BETA = 1.0;
307  LDC = 64;
308 
309  zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matJL,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC);
310  //zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matI,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC);
311 /*
312  for(ell5=0;ell5<(64*N);ell5++){
313  arrayw[ell5]=0.0;
314  }
315  for(ell5=0;ell5<64;ell5++){
316  for(ell4=0;ell4<64;ell4++){
317  for(m0=0;m0<N;m0++){
318  arrayw[(ell5+64*m0)] += matJL[(ell5+64*ell4)]*arrayz[(ell4+64*m0)];
319  }
320  }
321  }
322  for(ell5=0;ell5<64*N;ell5++){
323  arrayx[ell5] += arrayw[ell5];
324  }
325 */
326 
327 
328 
329  for(ell5 = 0; ell5 < (int)pow(2.0,ishift5-1); ell5++){
330  for(ell4 = 0; ell4 < (int)pow(2.0,ishift4-1); ell4++){
331  for(m0 = 0; m0 < 16; m0++){
332  tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)] = arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
333  tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)] = arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
334  tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)] = arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
335  tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)] = arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)))];
336  }
337  }
338  }
339  for(ell5 = 0; ell5 < (int)pow(2.0,ishift5-1); ell5++){
340  for(ell4 = 0; ell4 < (int)pow(2.0,ishift4-1); ell4++){
341  for(m0 = 0; m0 < 16; m0++){
342  tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)] = arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
343  tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)] = arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
344  tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)] = arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
345  tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))];
346  }
347  }
348  }
349 
350  }/* omp parallel for */
351  free_cd_1d_allocate(arrayz);
352  free_cd_1d_allocate(arrayx);
353  free_cd_1d_allocate(arrayw);
354  }/* omp parallel */
355 
356  if(pivot_flag==1){
357  iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin);
358  #pragma omp parallel for default(none) private(ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \
359  firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,tmp_v0,tmp_v1)
360  for(ell5 = 0; ell5 < iomp; ell5++ ){
361  for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){
362  tmp_v0[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v1[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)];
363  }
364  }
365  iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin);
366  #pragma omp parallel for default(none) private(ell4,ell5) \
367  firstprivate(iomp) shared(i_max,X,tmp_v1,tmp_v3)
368  for(ell5 = 0; ell5 < iomp; ell5++ ){
369  for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){
370  tmp_v1[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v3[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)];
371  }
372  }
373  }
374  else{
375  #pragma omp parallel for default(none) private(ell4) \
376  shared(i_max,tmp_v0,tmp_v1,tmp_v3)
377  for(ell4 = 0; ell4 < i_max; ell4++ ){
378  tmp_v0[1 + ell4] = tmp_v1[1 + ell4];
379  tmp_v1[1 + ell4] = tmp_v3[1 + ell4];
380  }
381  }/* if pivot_flag */
382 
383  }/* loop for j */
384 
385  /*
386  ierr = MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
387  ierr = MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
388  */
389  MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
390  MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD);
391 
392 
393  iomp=(int)pow(2.0,X->Boost.W0)/nproc;
394  #pragma omp parallel for default(none) private(ell4,ell5,ell6) \
395  firstprivate(iomp) shared(i_max,X,nproc,tmp_v0,tmp_v1,tmp_v2,tmp_v3)
396  //for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.W0)/nproc; ell4++ ){
397  for(ell4 = 0; ell4 < iomp; ell4++ ){
398  for(ell5 = 0; ell5 < nproc; ell5++ ){
399  for(ell6 = 0; ell6 < (int)(i_max/(int)pow(2.0,X->Boost.W0)); ell6++ ){
400  tmp_v1[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v3[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)];
401  tmp_v0[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v2[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)];
402  }
403  }
404  }
405 
406 
407  }/* loop for iloop */
408 
409 /*
410  dam_pr= X_child_general_int_spin_MPIBoost
411  (
412  matJ, X, tmp_v0, tmp_v1);
413 
414  X->Large.prdct += dam_pr;
415 */
416 // c_free1(arrayz, (int)pow(2.0, 16));
417 // c_free1(arrayx, (int)pow(2.0, 16));
418 // c_free1(arrayw, (int)pow(2.0, 16));
419 
420  free_cd_2d_allocate(vecJ);
421  free_cd_2d_allocate(matJ);
422  free_cd_2d_allocate(matJ2);
423  free_cd_2d_allocate(matB);
424  free_cd_1d_allocate(matJL);
425  free_cd_1d_allocate(matI);
426 #endif
427 
428 }/*void child_general_int_spin_MPIBoost*/
long unsigned int W0
Definition: struct.h:397
int ** list_6spin_star
Definition: struct.h:403
unsigned long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.h:303
long unsigned int num_pivot
Definition: struct.h:398
int *** list_6spin_pair
Definition: struct.h:404
int nproc
Number of processors, defined in InitializeMPI()
Definition: global.h:162
struct BoostList Boost
For Boost.
Definition: struct.h:414
double complex *** arrayJ
Definition: struct.h:401
void zgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double complex *ALPHA, double complex *matJL, int *LDA, double complex *arrayz, int *LDB, double complex *BETA, double complex *arrayx, int *LDC)
long unsigned int R0
Definition: struct.h:396
long unsigned int ishift_nspin
Definition: struct.h:399
double complex vecB[3]
Definition: struct.h:402
struct CheckList Check
Size of the Hilbert space.
Definition: struct.h:411
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ zgemm_()

void zgemm_ ( char *  TRANSA,
char *  TRANSB,
int *  M,
int *  N,
int *  K,
double complex *  ALPHA,
double complex *  matJL,
int *  LDA,
double complex *  arrayz,
int *  LDB,
double complex *  BETA,
double complex *  arrayx,
int *  LDC 
)

Referenced by child_general_int_spin_MPIBoost().

+ Here is the caller graph for this function: