HΦ  3.2.0
mltplyMPIHubbardCore.c
Go to the documentation of this file.
1 /* HPhi - Quantum Lattice Model Simulator */
2 /* Copyright (C) 2015 The University of Tokyo */
3 
4 /* This program is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU General Public License as published by */
6 /* the Free Software Foundation, either version 3 of the License, or */
7 /* (at your option) any later version. */
8 
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13 
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #ifdef MPI
20 #include "mpi.h"
21 #endif
22 #include "Common.h"
23 #include "mltplyCommon.h"
24 #include "mltplyHubbardCore.h"
25 #include "mltplyMPIHubbard.h"
26 #include "mltplyMPIHubbardCore.h"
27 #include "bitcalc.h"
28 #include "wrapperMPI.h"
33 int CheckPE(
34  int org_isite,
35  struct BindStruct *X
36 ){
37  if (org_isite + 1 > X->Def.Nsite) {
38  return TRUE;
39  }
40  else {
41  return FALSE;
42  }
43 }/*int CheckPE*/
51  long unsigned int is1_spin,
52  long unsigned int orgbit,
53  long unsigned int *offbit
54 ) {
55  long unsigned int ibit_tmp;
56  ibit_tmp = orgbit & is1_spin;
57  if (ibit_tmp == 0) {
58  *offbit = orgbit + is1_spin;
59  return TRUE;
60  }
61  *offbit = 0;
62  return FALSE;
63 }/*int CheckBit_Cis*/
71  long unsigned int is1_spin,
72  long unsigned int orgbit,
73  long unsigned int *offbit
74 ) {
75  long unsigned int ibit_tmp;
76  ibit_tmp = orgbit & is1_spin;
77  if (ibit_tmp != 0) {
78  *offbit = orgbit - is1_spin;
79  return TRUE;
80  }
81  *offbit = 0;
82  return FALSE;
83 }/*int CheckBit_Ajt*/
91  int org_isite1,
92  int org_isigma1,
93  int org_isite2,
94  int org_isigma2,
95  int org_isite3,
96  int org_isigma3,
97  int org_isite4,
98  int org_isigma4,
99  struct BindStruct *X,
100  long unsigned int orgbit,
101  long unsigned int *offbit
102 ){
103  long unsigned int tmp_ispin;
104  long unsigned int tmp_org, tmp_off;
105  int iflgBitExist = TRUE;
106  tmp_org=orgbit;
107  tmp_off=0;
108 
109  if (CheckPE(org_isite1, X) == TRUE) {
110  tmp_ispin = X->Def.Tpow[2 * org_isite1 + org_isigma1];
111  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
112  iflgBitExist = FALSE;
113  }
114  tmp_org = tmp_off;
115  }
116 
117  if (CheckPE(org_isite2, X) == TRUE ) {
118  tmp_ispin = X->Def.Tpow[2 * org_isite2 + org_isigma2];
119  if (CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
120  iflgBitExist = FALSE;
121  }
122  tmp_org = tmp_off;
123  }
124 
125  if (CheckPE(org_isite3, X) == TRUE) {
126  tmp_ispin = X->Def.Tpow[2 * org_isite3 + org_isigma3];
127  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
128  iflgBitExist = FALSE;
129  }
130  tmp_org = tmp_off;
131  }
132 
133  if (CheckPE(org_isite4, X) == TRUE) {
134  tmp_ispin = X->Def.Tpow[2 * org_isite4 + org_isigma4];
135  if (CheckBit_Cis(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
136  iflgBitExist = FALSE;
137  }
138  tmp_org = tmp_off;
139  }
140 
141  if(iflgBitExist != TRUE){
142  *offbit=0;
143  return FALSE;
144  }
145 
146  *offbit=tmp_org;
147  return TRUE;
148 }/*int CheckBit_InterAllPE*/
154  int org_isite1,
155  int org_isigma1,
156  int org_isite3,
157  int org_isigma3,
158  struct BindStruct *X,
159  long unsigned int orgbit
160 ){
161  long unsigned int tmp_ispin;
162  long unsigned int tmp_org, tmp_off;
163  int iflgBitExist = TRUE;
164  tmp_org=orgbit;
165 
166  if(CheckPE(org_isite1, X)==TRUE){
167  tmp_ispin = X->Def.Tpow[2 * org_isite1 + org_isigma1];
168  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
169  iflgBitExist=FALSE;
170  }
171  }
172 
173  if (CheckPE(org_isite3, X) == TRUE) {
174  tmp_ispin = X->Def.Tpow[2 * org_isite3 + org_isigma3];
175  if (CheckBit_Ajt(tmp_ispin, tmp_org, &tmp_off) != TRUE) {
176  iflgBitExist = FALSE;
177  }
178  }
179 
180  if(iflgBitExist != TRUE){
181  return FALSE;
182  }
183 
184  return TRUE;
185 }/*int CheckBit_PairPE*/
193  unsigned long int isite1,
194  unsigned long int isite2,
195  unsigned long int isite3,
196  unsigned long int isite4,
197  int *Fsgn,
198  struct BindStruct *X,
199  unsigned long int orgbit,
200  unsigned long int *offbit
201 ){
202  long unsigned int diffA;
203  long unsigned int tmp_off;
204  long unsigned int tmp_ispin1, tmp_ispin2;
205  int tmp_sgn=0;
206 
207  tmp_ispin1=isite2;
208  tmp_ispin2=isite1;
209 
210  if (tmp_ispin1 == tmp_ispin2) {
211  if ((orgbit & tmp_ispin1) == 0) {
212  *offbit = 0;
213  *Fsgn = tmp_sgn;
214  return FALSE;
215  }
216  tmp_sgn=1;
217  tmp_off = orgbit;
218  }
219  else {
220  if (tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1 * 2;
221  else diffA = tmp_ispin1-tmp_ispin2*2;
222 
223  tmp_sgn=X_GC_CisAjt(orgbit, X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off);
224  if(tmp_sgn ==0){
225  *offbit =0;
226  *Fsgn = 0;
227  return FALSE;
228  }
229  }
230 
231  tmp_ispin1 = isite4;
232  tmp_ispin2 = isite3;
233 
234  if(tmp_ispin1 == tmp_ispin2){
235  if( (tmp_off & tmp_ispin1) == 0){
236  *offbit =0;
237  *Fsgn = 0;
238  return FALSE;
239  }
240  *offbit=tmp_off;
241  }
242  else{
243  if(tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1*2;
244  else diffA = tmp_ispin1-tmp_ispin2*2;
245  tmp_sgn *=X_GC_CisAjt(tmp_off, X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit);
246 
247  if(tmp_sgn ==0){
248  *offbit =0;
249  *Fsgn = 0;
250  return FALSE;
251  }
252  }
253 
254  *Fsgn =tmp_sgn;
255  *offbit = *offbit%X->Def.OrgTpow[2*X->Def.Nsite];
256  return TRUE;
257 }/*int GetSgnInterAll*/
264  int org_isite1,
265  int org_ispin1,
266  int org_isite3,
267  int org_ispin3,
268  double complex tmp_V,
269  struct BindStruct *X,
270  double complex *tmp_v0,
271  double complex *tmp_v1
272 ) {
273 #ifdef MPI
274  double complex dam_pr = 0.0;
275  int iCheck;
276  unsigned long int tmp_ispin1;
277  unsigned long int i_max = X->Check.idim_max;
278  unsigned long int tmp_off, j;
279  double complex dmv;
280 // MPI_Status statusMPI;
281 
282  iCheck=CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank);
283  if(iCheck != TRUE){
284  return 0.0;
285  }
286 
287 #pragma omp parallel reduction(+:dam_pr) default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_v0, tmp_v1) \
288  firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off, tmp_ispin1)
289  {
290 
291  if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) {
292  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
293 #pragma omp for
294  for (j = 1; j <= i_max; j++) {
295  dmv = tmp_v1[j] * tmp_V;
296  tmp_v0[j] += dmv;
297  dam_pr += conj(tmp_v1[j]) * dmv;
298  }/*for (j = 1; j <= i_max; j++)*/
299  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
300  else {
301 #pragma omp for
302  for (j = 1; j <= i_max; j++) {
303  dmv = tmp_v1[j] * tmp_V;
304  dam_pr += conj(tmp_v1[j]) * dmv;
305  }/*for (j = 1; j <= i_max; j++)*/
306  }/*if (!(X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
307  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/
308  else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) {
309  if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
310  else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
311 
312  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
313 #pragma omp for
314  for (j = 1; j <= i_max; j++) {
315  if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) {
316  dmv = tmp_v1[j] * tmp_V;
317  tmp_v0[j] += dmv;
318  dam_pr += conj(tmp_v1[j]) * dmv;
319  }
320  }/*for (j = 1; j <= i_max; j++)*/
321  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
322  else {
323 #pragma omp for
324  for (j = 1; j <= i_max; j++) {
325  if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) {
326  dmv = tmp_v1[j] * tmp_V;
327  dam_pr += conj(tmp_v1[j]) * dmv;
328  }
329  }/*for (j = 1; j <= i_max; j++)*/
330  }/*if (!(X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
331  }
332  }/*End of parallel region*/
333  return dam_pr;
334 #else
335  return 0.0;
336 #endif
337 }/*double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI*/
344  int org_isite1,
345  int org_ispin1,
346  int org_isite2,
347  int org_ispin2,
348  int org_isite3,
349  int org_ispin3,
350  double complex tmp_V,
351  struct BindStruct *X,
352  double complex *tmp_v0,
353  double complex *tmp_v1
354 ) {
355 #ifdef MPI
356  double complex dam_pr = 0.0;
357  unsigned long int i_max = X->Check.idim_max;
358  unsigned long int idim_max_buf;
359  int iCheck, ierr, Fsgn;
360  unsigned long int isite1, isite2, isite3;
361  unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
362  unsigned long int j, Asum, Adiff;
363  double complex dmv;
364  unsigned long int origin, tmp_off;
365  unsigned long int org_rankbit;
366  MPI_Status statusMPI;
367 
368  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin);
369  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
370  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
371  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
372 
373  if (iCheck == TRUE) {
374  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
375  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
376  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
377  tmp_isite4 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
378  Asum = tmp_isite1 + tmp_isite2;
379  if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
380  else Adiff = tmp_isite1 - tmp_isite2 * 2;
381  }
382  else {
383  iCheck = CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (long unsigned int) myrank, &origin);
384  if (iCheck == TRUE) {
385  tmp_V = conj(tmp_V);
386  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
387  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
388  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
389  tmp_isite1 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
390  Asum = tmp_isite3 + tmp_isite4;
391  if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
392  else Adiff = tmp_isite3 - tmp_isite4 * 2;
393  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
394  tmp_V = 0;
395  }
396  }
397  else {
398  return 0.0;
399  }
400  }
401 
402  if (myrank == origin) {// only k is in PE
403 
404  if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return 0.0;
405 
406 #pragma omp parallel default(none) reduction(+:dam_pr) \
407 firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared(tmp_v0, tmp_v1)
408  {
409 #pragma omp for
410  for (j = 1; j <= i_max; j++)
411  dam_pr += GC_CisAjt(j, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off);
412 
413  if (X->Large.mode != M_CORR) {
414 #pragma omp for
415  for (j = 1; j <= i_max; j++)
416  dam_pr += GC_CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off);
417  }/*if (X->Large.mode != M_CORR)*/
418  }/*End of paralle region*/
419  return dam_pr;
420  }//myrank =origin
421  else {
422  ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
423  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
424  MPI_COMM_WORLD, &statusMPI);
425  if (ierr != 0) exitMPI(-1);
426  ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
427  v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
428  MPI_COMM_WORLD, &statusMPI);
429  if (ierr != 0) exitMPI(-1);
430 
431 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \
432 shared(v1buf, tmp_v1, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \
433 firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4)
434  {
435  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
436  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
437  else Adiff = isite1 - isite2 * 2;
438  SgnBit(((long unsigned int) myrank & Adiff), &Fsgn);
439  tmp_V *= Fsgn;
440 
441  if (org_isite3 + 1 > X->Def.Nsite) {
442  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
443 #pragma omp for
444  for (j = 1; j <= idim_max_buf; j++) {
445  dmv = tmp_V * v1buf[j];
446  tmp_v0[j] += dmv;
447  dam_pr += conj(tmp_v1[j]) * dmv;
448  }/*for (j = 1; j <= idim_max_buf; j++)*/
449  }
450  else {
451 #pragma omp for
452  for (j = 1; j <= idim_max_buf; j++) {
453  dmv = tmp_V * v1buf[j];
454  dam_pr += conj(tmp_v1[j]) * dmv;
455  }/*for (j = 1; j <= idim_max_buf; j++)*/
456  }
457  }
458  else { //org_isite3 <= X->Def.Nsite
459  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
460 #pragma omp for
461  for (j = 1; j <= idim_max_buf; j++) {
462  if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) {
463  dmv = tmp_V * v1buf[j];
464  tmp_v0[j] += dmv;
465  dam_pr += conj(tmp_v1[j]) * dmv;
466  }
467  }/*for (j = 1; j <= idim_max_buf; j++)*/
468  }
469  else {
470 #pragma omp for
471  for (j = 1; j <= idim_max_buf; j++) {
472  if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) {
473  dmv = tmp_V * v1buf[j];
474  dam_pr += conj(tmp_v1[j]) * dmv;
475  }
476  }/*for (j = 1; j <= idim_max_buf; j++)*/
477  }
478  }
479  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/
480  else {
481  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
482  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
483 #pragma omp for
484  for (j = 1; j <= idim_max_buf; j++) {
485  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
486  dmv = tmp_V * v1buf[j] * Fsgn;
487  tmp_v0[tmp_off + 1] += dmv;
488  dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
489  }
490  }/*for (j = 1; j <= idim_max_buf; j++)*/
491  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
492  else {
493 #pragma omp for
494  for (j = 1; j <= idim_max_buf; j++) {
495  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
496  dmv = tmp_V * v1buf[j] * Fsgn;
497  dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
498  }
499  }/*for (j = 1; j <= idim_max_buf; j++)*/
500  }
501  }
502  }/*End of parallel region*/
503  }/*myrank != origin*/
504  return dam_pr;
505 #else
506  return 0.0;
507 #endif
508 }/*double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI*/
515  int org_isite1,
516  int org_ispin1,
517  int org_isite3,
518  int org_ispin3,
519  int org_isite4,
520  int org_ispin4,
521  double complex tmp_V,
522  struct BindStruct *X,
523  double complex *tmp_v0,
524  double complex *tmp_v1
525 ) {
526 #ifdef MPI
527  double complex dam_pr = 0;
529  org_isite4, org_ispin4, org_isite3, org_ispin3,
530  org_isite1, org_ispin1, conj(tmp_V), X, tmp_v0, tmp_v1);
531  return conj(dam_pr);
532 #else
533  return 0.0;
534 #endif
535 }/*double complex X_GC_child_CisAisCjtAku_Hubbard_MPI*/
542  int org_isite1,
543  int org_ispin1,
544  int org_isite2,
545  int org_ispin2,
546  int org_isite3,
547  int org_ispin3,
548  int org_isite4,
549  int org_ispin4,
550  double complex tmp_V,
551  struct BindStruct *X,
552  double complex *tmp_v0,
553  double complex *tmp_v1
554 ) {
555 #ifdef MPI
556  double complex dam_pr = 0;
557  unsigned long int i_max = X->Check.idim_max;
558  unsigned long int idim_max_buf;
559  int iCheck, ierr, Fsgn;
560  unsigned long int isite1, isite2, isite3, isite4;
561  unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
562  unsigned long int j, Adiff, Bdiff;
563  double complex dmv;
564  unsigned long int origin, tmp_off, tmp_off2;
565  unsigned long int org_rankbit;
566  int iFlgHermite = FALSE;
567  MPI_Status statusMPI;
568 
569  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2,
570  org_isite3, org_ispin3, org_isite4, org_ispin4,
571  X, (long unsigned int) myrank, &origin);
572  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
573  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
574  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
575  isite4 = X->Def.Tpow[2 * org_isite4 + org_ispin4];
576 
577  if (iCheck == TRUE) {
578  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
579  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
580  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
581  tmp_isite4 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
582  }
583  else {
584  iCheck = CheckBit_InterAllPE(org_isite4, org_ispin4, org_isite3, org_ispin3,
585  org_isite2, org_ispin2, org_isite1, org_ispin1,
586  X, (long unsigned int) myrank, &origin);
587  if (iCheck == TRUE) {
588  tmp_V = conj(tmp_V);
589  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
590  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
591  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
592  tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
593  iFlgHermite = TRUE;
594  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
595  tmp_V = 0;
596  }
597  }
598  else {
599  return 0.0;
600  }
601  }
602 
603  if (myrank == origin) {
604  if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv
605  //calc nis
606  dam_pr = X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1);
607  //calc -nisniv
608  dam_pr -= X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1);
609  }/*if (isite1 == isite4 && isite2 == isite3)*/
610  else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE
611  //calc CisAku
612  if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
613  else Adiff = isite1 - isite4 * 2;
614 
615 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \
616 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
617  for (j = 1; j <= i_max; j++)
618  dam_pr += GC_CisAjt(j - 1, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
619 
620  //calc -CisAku njv
621  dam_pr -= X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4,
622  org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1);
623  if (X->Large.mode != M_CORR) { //for hermite
624 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \
625 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
626  for (j = 1; j <= i_max; j++)
627  dam_pr += GC_CisAjt(j - 1, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off);
628 
629  //calc -njvCkuAis
630  dam_pr -= X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4,
631  org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1);
632  }/*if (X->Large.mode != M_CORR)*/
633  }/*if (isite2 == isite3)*/
634  else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE
635  dam_pr = -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3,
636  org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1);
637  if (X->Large.mode != M_CORR) { //for hermite
638  dam_pr += -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2,
639  org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1);
640  }/*if (X->Large.mode != M_CORR)*/
641  }/*if (isite2 != isite3)*/
642  return dam_pr;
643  }//myrank =origin
644  else {
645  ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
646  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
647  MPI_COMM_WORLD, &statusMPI);
648  if (ierr != 0) exitMPI(-1);
649  ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
650  v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
651  MPI_COMM_WORLD, &statusMPI);
652  if (ierr != 0) exitMPI(-1);
653 
654  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
655  && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite) {
656 
657  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
658  else Adiff = isite1 - isite2 * 2;
659  if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
660  else Bdiff = isite3 - isite4 * 2;
661 
662  if (iFlgHermite == FALSE) {
663  Fsgn = X_GC_CisAjt((long unsigned int) myrank, X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
664  Fsgn *= X_GC_CisAjt(tmp_off2, X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
665  tmp_V *= Fsgn;
666  }/*if (iFlgHermite == FALSE)*/
667  else {
668  Fsgn = X_GC_CisAjt((long unsigned int) myrank, X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
669  Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
670  tmp_V *= Fsgn;
671  }/*if (iFlgHermite == TRUE)*/
672  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
673 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0)
674  for (j = 1; j <= idim_max_buf; j++) {
675  dmv = tmp_V * v1buf[j];
676  tmp_v0[j] += dmv;
677  dam_pr += conj(tmp_v1[j]) * dmv;
678  }/*for (j = 1; j <= idim_max_buf; j++)*/
679  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
680  else {
681 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0)
682  for (j = 1; j <= idim_max_buf; j++) {
683  dmv = tmp_V * v1buf[j];
684  dam_pr += conj(tmp_v1[j]) * dmv;
685  }/*for (j = 1; j <= idim_max_buf; j++)*/
686  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
687  }
688  else {
689  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
690  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
691 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0)
692  for (j = 1; j <= idim_max_buf; j++) {
693  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
694  dmv = tmp_V * v1buf[j] * Fsgn;
695  tmp_v0[tmp_off + 1] += dmv;
696  dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
697  }
698  }/*for (j = 1; j <= idim_max_buf; j++)*/
699  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
700  else {
701 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0)
702  for (j = 1; j <= idim_max_buf; j++) {
703  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) {
704  dmv = tmp_V * v1buf[j] * Fsgn;
705  dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv;
706  }
707  }/*for (j = 1; j <= idim_max_buf; j++)*/
708  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
709  }
710  }/*myrank != origin*/
711  return dam_pr;
712 #else
713  return 0.0;
714 #endif
715 }/*double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI*/
722  int org_isite1,
723  int org_ispin1,
724  double complex tmp_V,
725  struct BindStruct *X,
726  double complex *tmp_v0,
727  double complex *tmp_v1
728 ) {
729 #ifdef MPI
730  double complex dam_pr = 0.0;
731  unsigned long int i_max = X->Check.idim_max;
732  unsigned long int j, isite1, tmp_off;
733  double complex dmv;
734 // MPI_Status statusMPI;
735 
736  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
737  if (org_isite1 + 1 > X->Def.Nsite) {
738  if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) return 0.0;
739 
740 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \
741  firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off)
742  {
743  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
744 #pragma omp for
745  for (j = 1; j <= i_max; j++) {
746  dmv = tmp_v1[j] * tmp_V;
747  tmp_v0[j] += dmv;
748  dam_pr += conj(tmp_v1[j]) * dmv;
749  }/*for (j = 1; j <= i_max; j++)*/
750  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
751  else {
752 #pragma omp for
753  for (j = 1; j <= i_max; j++) {
754  dmv = tmp_v1[j] * tmp_V;
755  dam_pr += conj(tmp_v1[j]) * dmv;
756  }/*for (j = 1; j <= i_max; j++)*/
757  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
758  }/*End of parallel region*/
759  }/*if (org_isite1 + 1 > X->Def.Nsite)*/
760  else {
761 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \
762  firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off)
763  {
764  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
765 #pragma omp for
766  for (j = 1; j <= i_max; j++) {
767  if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) {
768  dmv = tmp_v1[j] * tmp_V;
769  tmp_v0[j] += dmv;
770  dam_pr += conj(tmp_v1[j]) * dmv;
771  }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/
772  }/*for (j = 1; j <= i_max; j++)*/
773  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
774  else {
775 #pragma omp for
776  for (j = 1; j <= i_max; j++) {
777  if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) {
778  dmv = tmp_v1[j] * tmp_V;
779  dam_pr += conj(tmp_v1[j]) * dmv;
780  }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/
781  }/*for (j = 1; j <= i_max; j++)*/
782  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
783  }/*End of parallel region*/
784  }/*if (org_isite1 + 1 <= X->Def.Nsite)*/
785  return dam_pr;
786 #else
787  return 0.0;
788 #endif
789 }/*double complex X_GC_child_CisAis_Hubbard_MPI*/
796  int org_isite1,
797  int org_ispin1,
798  int org_isite2,
799  int org_ispin2,
800  double complex tmp_trans,
801  struct BindStruct *X,
802  double complex *tmp_v0,
803  double complex *tmp_v1
804 ) {
805 #ifdef MPI
806  double complex dam_pr = 0.0;
807 // MPI_Status statusMPI;
808 
809  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
810  dam_pr = X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, tmp_v0, tmp_v1);
811  }
812  else if (org_isite1 + 1 > X->Def.Nsite || org_isite2 + 1 > X->Def.Nsite) {
813  dam_pr = X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, tmp_v0, tmp_v1);
814  }
815  else {
816  //error message will be added.
817  exitMPI(-1);
818  }
819  return dam_pr;
820 #else
821  return 0.0;
822 #endif
823 }/*double complex X_GC_child_CisAjt_Hubbard_MPI*/
830  int org_isite1,
831  int org_ispin1,
832  int org_isite3,
833  int org_ispin3,
834  double complex tmp_V,
835  struct BindStruct *X,
836  double complex *tmp_v0,
837  double complex *tmp_v1
838 ) {
839 #ifdef MPI
840  double complex dam_pr = 0.0;
841  int iCheck;
842  unsigned long int tmp_ispin1;
843  unsigned long int i_max = X->Check.idim_max;
844  unsigned long int tmp_off, j;
845  double complex dmv;
846 // MPI_Status statusMPI;
847 
848  iCheck = CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank);
849  if (iCheck != TRUE) return 0.0;
850 
851 #pragma omp parallel reduction(+:dam_pr) default(none) \
852 shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \
853  firstprivate(i_max, tmp_V, X, tmp_ispin1) private(dmv, j, tmp_off)
854  {
855  if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) {
856  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
857 #pragma omp for
858  for (j = 1; j <= i_max; j++) {
859  dmv = tmp_v1[j] * tmp_V;
860  tmp_v0[j] += dmv;
861  dam_pr += conj(tmp_v1[j]) * dmv;
862  }/*for (j = 1; j <= i_max; j++)*/
863  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
864  else {
865 #pragma omp for
866  for (j = 1; j <= i_max; j++) {
867  dmv = tmp_v1[j] * tmp_V;
868  dam_pr += conj(tmp_v1[j]) * dmv;
869  }/*for (j = 1; j <= i_max; j++)*/
870  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
871  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/
872  else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) {
873  if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
874  else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
875 
876  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
877 #pragma omp for
878  for (j = 1; j <= i_max; j++) {
879  if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) {
880  dmv = tmp_v1[j] * tmp_V;
881  tmp_v0[j] += dmv;
882  dam_pr += conj(tmp_v1[j]) * dmv;
883  }
884  }/*for (j = 1; j <= i_max; j++)*/
885  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
886  else {
887 #pragma omp for
888  for (j = 1; j <= i_max; j++) {
889  if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) {
890  dmv = tmp_v1[j] * tmp_V;
891  dam_pr += conj(tmp_v1[j]) * dmv;
892  }
893  }/*for (j = 1; j <= i_max; j++)*/
894  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
895  }/*if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite)*/
896  }/*End of parallel region*/
897  return dam_pr;
898 #else
899  return 0.0;
900 #endif
901 }/*double complex X_child_CisAisCjtAjt_Hubbard_MPI*/
908  int org_isite1,
909  int org_ispin1,
910  int org_isite2,
911  int org_ispin2,
912  int org_isite3,
913  int org_ispin3,
914  int org_isite4,
915  int org_ispin4,
916  double complex tmp_V,
917  struct BindStruct *X,
918  double complex *tmp_v0,
919  double complex *tmp_v1
920 ) {
921 #ifdef MPI
922  double complex dam_pr = 0;
923  unsigned long int i_max = X->Check.idim_max;
924  unsigned long int idim_max_buf;
925  int iCheck, ierr, Fsgn;
926  unsigned long int isite1, isite2, isite3, isite4;
927  unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
928  unsigned long int j, Adiff, Bdiff;
929  double complex dmv;
930  unsigned long int origin, tmp_off, tmp_off2;
931  unsigned long int org_rankbit, ioff;
932  int iFlgHermite = FALSE;
933  MPI_Status statusMPI;
934 
935  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2,
936  org_isite3, org_ispin3, org_isite4, org_ispin4,
937  X, (long unsigned int) myrank, &origin);
938  //printf("iCheck=%d, myrank=%d, origin=%d\n", iCheck, myrank, origin);
939  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
940  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
941  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
942  isite4 = X->Def.Tpow[2 * org_isite4 + org_ispin4];
943 
944  if (iCheck == TRUE) {
945  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
946  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
947  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
948  tmp_isite4 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
949  }/*if (iCheck == TRUE)*/
950  else {
951  iCheck = CheckBit_InterAllPE(org_isite4, org_ispin4, org_isite3, org_ispin3,
952  org_isite2, org_ispin2, org_isite1, org_ispin1,
953  X, (long unsigned int) myrank, &origin);
954  if (iCheck == TRUE) {
955  tmp_V = conj(tmp_V);
956  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
957  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
958  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
959  tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
960  iFlgHermite = TRUE;
961  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
962  }/*if (iCheck == TRUE)*/
963  else return 0.0;
964  }/*if (iCheck == FALSE)*/
965 
966  if (myrank == origin) {
967  if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv
968  //calc nis
969  dam_pr = X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1);
970  //calc -nisniv
971  dam_pr -= X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1);
972  }/* if (isite1 == isite4 && isite2 == isite3)*/
973  else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE
974  if (isite4 > isite1) Adiff = isite4 - isite1 * 2;
975  else Adiff = isite1 - isite4 * 2;
976 
977  //calc CisAku
978 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \
979 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0, list_1)
980  for (j = 1; j <= i_max; j++)
981  dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V);
982 
983  //calc -CisAku njv
984  dam_pr -= X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4,
985  org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1);
986 
987  if (X->Large.mode != M_CORR) { //for hermite
988 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \
989 firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
990  for (j = 1; j <= i_max; j++)
991  dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V);
992 
993  //calc -njvCkuAis
994  dam_pr -= X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1);
995  }/*if (X->Large.mode != M_CORR)*/
996  }/*if (isite2 == isite3)*/
997  else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE
998  dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1);
999 
1000  if (X->Large.mode != M_CORR) //for hermite: CisAkuCjtAis=-CisAisCjtAku
1001  dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2,
1002  org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1);
1003  }/*if (isite2 != isite3)*/
1004  return dam_pr;
1005  }//myrank =origin
1006  else {
1007  ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1008  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1009  MPI_COMM_WORLD, &statusMPI);
1010  if (ierr != 0) exitMPI(-1);
1011 
1012  ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1013  list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1014  MPI_COMM_WORLD, &statusMPI);
1015  if (ierr != 0) exitMPI(-1);
1016 
1017  ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1018  v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1019  MPI_COMM_WORLD, &statusMPI);
1020  if (ierr != 0) exitMPI(-1);
1021  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
1022  && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite)
1023  {
1024  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
1025  else Adiff = isite1 - isite2 * 2;
1026  if (isite4 > isite3) Bdiff = isite4 - isite3 * 2;
1027  else Bdiff = isite3 - isite4 * 2;
1028 
1029  if (iFlgHermite == FALSE) {
1030  Fsgn = X_GC_CisAjt((long unsigned int) myrank, X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2);
1031  Fsgn *= X_GC_CisAjt(tmp_off2, X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off);
1032  tmp_V *= Fsgn;
1033  }/*if (iFlgHermite == FALSE)*/
1034  else {
1035  Fsgn = X_GC_CisAjt((long unsigned int) myrank, X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2);
1036  Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off);
1037  tmp_V *= Fsgn;
1038  }/*if (iFlgHermite == TRUE)*/
1039  dam_pr = 0;
1040 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff) \
1041 firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0, list_2_1, list_2_2, list_1buf)
1042  {
1043  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
1044 #pragma omp for
1045  for (j = 1; j <= idim_max_buf; j++) {
1047  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
1048  {
1049  dmv = tmp_V * v1buf[j];
1050  tmp_v0[ioff] += dmv;
1051  dam_pr += conj(tmp_v1[ioff]) * dmv;
1052  }
1053  }/*for (j = 1; j <= idim_max_buf; j++)*/
1054  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
1055  else {
1056 #pragma omp for
1057  for (j = 1; j <= idim_max_buf; j++) {
1059  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
1060  {
1061  dmv = tmp_V * v1buf[j];
1062  dam_pr += conj(tmp_v1[ioff]) * dmv;
1063  }
1064  }/*for (j = 1; j <= idim_max_buf; j++)*/
1065  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
1066  }/*End of parallel region*/
1067  }//org_isite1+1 > X->Def.Nsite && org_isite2+1 > X->Def.Nsite
1068  // && org_isite3+1 > X->Def.Nsite && org_isite4+1 > X->Def.Nsite
1069  else {
1070  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
1071  dam_pr = 0;
1072 
1073 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, ioff) \
1074 firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \
1075 org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \
1076 shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2)
1077  {
1078  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
1079 #pragma omp for
1080  for (j = 1; j <= idim_max_buf; j++) {
1081  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
1082  list_1buf[j] + org_rankbit, &tmp_off) == TRUE)
1083  {
1084  if (GetOffComp(list_2_1, list_2_2, tmp_off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
1085  {
1086  dmv = tmp_V * v1buf[j] * Fsgn;
1087  tmp_v0[ioff] += dmv;
1088  dam_pr += conj(tmp_v1[ioff]) * dmv;
1089  }
1090  }
1091  }/*for (j = 1; j <= idim_max_buf; j++)*/
1092  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
1093  else {
1094 #pragma omp for
1095  for (j = 1; j <= idim_max_buf; j++) {
1096  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
1097  list_1buf[j] + org_rankbit, &tmp_off) == TRUE)
1098  {
1099  if (GetOffComp(list_2_1, list_2_2, tmp_off,
1100  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE)
1101  {
1102  dmv = tmp_V * v1buf[j] * Fsgn;
1103  dam_pr += conj(tmp_v1[ioff]) * dmv;
1104  }
1105  }
1106  }/*for (j = 1; j <= idim_max_buf; j++)*/
1107  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
1108  }/*End of parallel region*/
1109  }
1110  }/*if (myrank != origin)*/
1111  return dam_pr;
1112 #else
1113  return 0.0;
1114 #endif
1115 }/*double complex X_child_CisAjtCkuAlv_Hubbard_MPI*/
1122  int org_isite1,
1123  int org_ispin1,
1124  int org_isite2,
1125  int org_ispin2,
1126  int org_isite3,
1127  int org_ispin3,
1128  double complex tmp_V,
1129  struct BindStruct *X,
1130  double complex *tmp_v0,
1131  double complex *tmp_v1
1132 ) {
1133 #ifdef MPI
1134  double complex dam_pr = 0.0;
1135  unsigned long int i_max = X->Check.idim_max;
1136  unsigned long int idim_max_buf, ioff;
1137  int iCheck, ierr, Fsgn;
1138  unsigned long int isite1, isite2, isite3;
1139  unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
1140  unsigned long int j, Asum, Adiff;
1141  double complex dmv;
1142  unsigned long int origin, tmp_off;
1143  unsigned long int org_rankbit;
1144  MPI_Status statusMPI;
1145  //printf("Deubg0-0: org_isite1=%d, org_ispin1=%d, org_isite2=%d, org_ispin2=%d, org_isite3=%d, org_ispin3=%d\n", org_isite1, org_ispin1,org_isite2, org_ispin2,org_isite3, org_ispin3);
1146  iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin);
1147  //printf("iCheck=%d, myrank=%d, origin=%d\n", iCheck, myrank, origin);
1148 
1149  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
1150  isite2 = X->Def.Tpow[2 * org_isite2 + org_ispin2];
1151  isite3 = X->Def.Tpow[2 * org_isite3 + org_ispin3];
1152 
1153  if (iCheck == TRUE) {
1154  tmp_isite1 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
1155  tmp_isite2 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
1156  tmp_isite3 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1157  tmp_isite4 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1158  Asum = tmp_isite1 + tmp_isite2;
1159  if (tmp_isite2 > tmp_isite1) Adiff = tmp_isite2 - tmp_isite1 * 2;
1160  else Adiff = tmp_isite1 - tmp_isite2 * 2;
1161  }/*if (iCheck == TRUE)*/
1162  else {
1163  iCheck = CheckBit_InterAllPE(org_isite3, org_ispin3, org_isite3, org_ispin3, org_isite2, org_ispin2, org_isite1, org_ispin1, X, (long unsigned int) myrank, &origin);
1164  if (iCheck == TRUE) {
1165  tmp_V = conj(tmp_V);
1166  tmp_isite4 = X->Def.OrgTpow[2 * org_isite1 + org_ispin1];
1167  tmp_isite3 = X->Def.OrgTpow[2 * org_isite2 + org_ispin2];
1168  tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1169  tmp_isite1 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
1170  Asum = tmp_isite3 + tmp_isite4;
1171  if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
1172  else Adiff = tmp_isite3 - tmp_isite4 * 2;
1173  if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
1174  //printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff);
1175  }/*if (iCheck == TRUE)*/
1176  else return 0.0;
1177  }/*if (iCheck == FALSE)*/
1178 
1179  if (myrank == origin) {// only k is in PE
1180  //for hermite
1181 #pragma omp parallel default(none) reduction(+:dam_pr) \
1182 firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1)
1183  {
1184 #pragma omp for
1185  for (j = 1; j <= i_max; j++)
1186  dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V);
1187 
1188  if (X->Large.mode != M_CORR) {
1189 #pragma omp for
1190  for (j = 1; j <= i_max; j++)
1191  dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V);
1192  }/*if (X->Large.mode != M_CORR)*/
1193  }/*End of parallel region*/
1194  return dam_pr;
1195  }//myrank =origin
1196  else {
1197  ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1198  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1199  MPI_COMM_WORLD, &statusMPI);
1200  if (ierr != 0) exitMPI(-1);
1201  ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1202  list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1203  MPI_COMM_WORLD, &statusMPI);
1204  if (ierr != 0) exitMPI(-1);
1205 
1206  ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1207  v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1208  MPI_COMM_WORLD, &statusMPI);
1209  if (ierr != 0) exitMPI(-1);
1210 
1211 #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \
1212 firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \
1213 shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2)
1214  {
1215 
1216  if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) {
1217  if (isite2 > isite1) Adiff = isite2 - isite1 * 2;
1218  else Adiff = isite1 - isite2 * 2;
1219  SgnBit(((long unsigned int) myrank & Adiff), &Fsgn);
1220  tmp_V *= Fsgn;
1221 
1222  if (org_isite3 + 1 > X->Def.Nsite) {
1223  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
1224 #pragma omp for
1225  for (j = 1; j <= idim_max_buf; j++) {
1226  dmv = tmp_V * v1buf[j];
1228  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
1229  tmp_v0[ioff] += dmv;
1230  dam_pr += conj(tmp_v1[ioff]) * dmv;
1231  }/*for (j = 1; j <= idim_max_buf; j++)*/
1232  }
1233  else {
1234  #pragma omp for
1235  for (j = 1; j <= idim_max_buf; j++) {
1236  dmv = tmp_V * v1buf[j];
1238  dam_pr += conj(tmp_v1[ioff]) * dmv;
1239  }/*for (j = 1; j <= idim_max_buf; j++)*/
1240  }
1241  }/*if (org_isite3 + 1 > X->Def.Nsite)*/
1242  else { //org_isite3 <= X->Def.Nsite
1243  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
1244 #pragma omp for
1245  for (j = 1; j <= idim_max_buf; j++) {
1246  if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) {
1247  dmv = tmp_V * v1buf[j];
1249  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
1250  tmp_v0[ioff] += dmv;
1251  dam_pr += conj(tmp_v1[ioff]) * dmv;
1252  }
1253  }/*for (j = 1; j <= idim_max_buf; j++)*/
1254  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
1255  else {
1256  #pragma omp for
1257  for (j = 1; j <= idim_max_buf; j++) {
1258  if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) {
1259  //printf("calc\n");
1260  dmv = tmp_V * v1buf[j];
1262  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
1263  dam_pr += conj(tmp_v1[ioff]) * dmv;
1264  }
1265  }/*for (j = 1; j <= idim_max_buf; j++)*/
1266  }
1267  }/*if (org_isite3 + 1 <= X->Def.Nsite)*/
1268  }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/
1269  else {
1270  org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin;
1271  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
1272 #pragma omp for
1273  for (j = 1; j <= idim_max_buf; j++) {
1274  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
1275  list_1buf[j] + org_rankbit, &tmp_off) == TRUE) {
1276  dmv = tmp_V * v1buf[j] * Fsgn;
1277  GetOffComp(list_2_1, list_2_2, tmp_off,
1278  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
1279  tmp_v0[ioff] += dmv;
1280  dam_pr += conj(tmp_v1[ioff]) * dmv;
1281  }
1282  }/*for (j = 1; j <= idim_max_buf; j++)*/
1283  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
1284  else {
1285 #pragma omp for
1286  for (j = 1; j <= idim_max_buf; j++) {
1287  if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X,
1288  list_1buf[j] + org_rankbit, &tmp_off) == TRUE)
1289  {
1290  dmv = tmp_V * v1buf[j] * Fsgn;
1291  GetOffComp(list_2_1, list_2_2, tmp_off,
1292  X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff);
1293  dam_pr += conj(tmp_v1[ioff]) * dmv;
1294  }
1295  }/*for (j = 1; j <= idim_max_buf; j++)*/
1296  }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/
1297  }
1298  }/*End of parallel region*/
1299  }/*if (myrank != origin)*/
1300  return dam_pr;
1301 #else
1302  return 0.0;
1303 #endif
1304 }/*double complex X_child_CisAjtCkuAku_Hubbard_MPI*/
1311  int org_isite1,
1312  int org_ispin1,
1313  int org_isite3,
1314  int org_ispin3,
1315  int org_isite4,
1316  int org_ispin4,
1317  double complex tmp_V,
1318  struct BindStruct *X,
1319  double complex *tmp_v0,
1320  double complex *tmp_v1
1321 ) {
1322 #ifdef MPI
1323  double complex dam_pr = 0;
1324 
1326  org_isite4, org_ispin4, org_isite3, org_ispin3,
1327  org_isite1, org_ispin1, conj(tmp_V), X, tmp_v0, tmp_v1);
1328 
1329  return conj(dam_pr);
1330 #else
1331  return 0.0;
1332 #endif
1333 }/*double complex X_child_CisAisCjtAku_Hubbard_MPI*/
1334 
1336  int org_isite1,
1337  int org_ispin1,
1338  double complex tmp_V,
1339  struct BindStruct *X,
1340  double complex *tmp_v0,
1341  double complex *tmp_v1
1342 ) {
1343 #ifdef MPI
1344  double complex dam_pr = 0.0;
1345  unsigned long int i_max = X->Check.idim_max;
1346  unsigned long int j, isite1, tmp_off;
1347  double complex dmv;
1348 // MPI_Status statusMPI;
1349 
1350  isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1];
1351  if (org_isite1 + 1 > X->Def.Nsite) {
1352  if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE)
1353  return 0.0;
1354 
1355 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \
1356  firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off)
1357  {
1358  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
1359 #pragma omp for
1360  for (j = 1; j <= i_max; j++) {
1361  dmv = tmp_v1[j] * tmp_V;
1362  tmp_v0[j] += dmv;
1363  dam_pr += conj(tmp_v1[j]) * dmv;
1364  }/*for (j = 1; j <= i_max; j++)*/
1365  }
1366  else {
1367 #pragma omp for
1368  for (j = 1; j <= i_max; j++) {
1369  dmv = tmp_v1[j] * tmp_V;
1370  dam_pr += conj(tmp_v1[j]) * dmv;
1371  }/*for (j = 1; j <= i_max; j++)*/
1372  }
1373  }/*End of parallel*/
1374  }/*if (org_isite1 + 1 > X->Def.Nsite)*/
1375  else {
1376 #pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1, list_1) \
1377  firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off)
1378  {
1379  if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply
1380 #pragma omp for
1381  for (j = 1; j <= i_max; j++) {
1382  if (X_CisAis(list_1[j], X, isite1) != 0) {
1383  dmv = tmp_v1[j] * tmp_V;
1384  tmp_v0[j] += dmv;
1385  dam_pr += conj(tmp_v1[j]) * dmv;
1386  }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/
1387  }/*for (j = 1; j <= i_max; j++)*/
1388  }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/
1389  else {
1390 #pragma omp for
1391  for (j = 1; j <= i_max; j++) {
1392  if (X_CisAis(list_1[j], X, isite1) != 0) {
1393  dmv = tmp_v1[j] * tmp_V;
1394  dam_pr += conj(tmp_v1[j]) * dmv;
1395  }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/
1396  }/*for (j = 1; j <= i_max; j++)*/
1397  }
1398  }/*End of parallel region*/
1399  }/*if (org_isite1 + 1 <= X->Def.Nsite)*/
1400  return dam_pr;
1401 #else
1402  return 0.0;
1403 #endif
1404 }/*double complex X_child_CisAis_Hubbard_MPI*/
1412 double complex X_GC_Cis_MPI(
1413  int org_isite,
1414  int org_ispin,
1415  double complex tmp_trans,
1416  double complex *tmp_v0,
1417  double complex *tmp_v1,
1418  unsigned long int idim_max,
1419  double complex *tmp_v1buf,
1420  unsigned long int *Tpow
1421 ) {
1422 #ifdef MPI
1423  int mask2, state2, ierr, origin, bit2diff, Fsgn;
1424  unsigned long int idim_max_buf, j;
1425  MPI_Status statusMPI;
1426  double complex trans, dmv, dam_pr;
1427 
1428  // org_isite >= Nsite
1429  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1430 
1431  origin = myrank ^ mask2; // XOR
1432  state2 = origin & mask2;
1433 
1434  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1435  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1436 
1437  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1438 
1439  //SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign
1440  SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign
1441 
1442  ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1443  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1444  MPI_COMM_WORLD, &statusMPI);
1445  if (ierr != 0) exitMPI(-1);
1446 
1447  ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1448  tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1449  MPI_COMM_WORLD, &statusMPI);
1450  if (ierr != 0) exitMPI(-1);
1451 
1452  if (state2 == mask2) {
1453  trans = 0;
1454  }
1455  else if (state2 == 0) {
1456  trans = (double)Fsgn * tmp_trans;
1457  }
1458  else return 0;
1459 
1460  dam_pr = 0.0;
1461 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \
1462  firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0)
1463  for (j = 0; j < idim_max_buf; j++) {
1464  dmv = trans * tmp_v1buf[j + 1];
1465  tmp_v0[j + 1] += dmv;
1466  dam_pr += conj(tmp_v1[j + 1]) * dmv;
1467  }
1468  return (dam_pr);
1469 #else
1470  return 0.0;
1471 #endif
1472 }/*double complex X_GC_Cis_MPI*/
1480 double complex X_GC_Ajt_MPI(
1481  int org_isite,
1482  int org_ispin,
1483  double complex tmp_trans,
1484  double complex *tmp_v0,
1485  double complex *tmp_v1,
1486  unsigned long int idim_max,
1487  double complex *tmp_v1buf,
1488  unsigned long int *Tpow
1489 ) {
1490 #ifdef MPI
1491  int mask2, state2, ierr, origin, bit2diff, Fsgn;
1492  unsigned long int idim_max_buf, j;
1493  MPI_Status statusMPI;
1494  double complex trans, dmv, dam_pr;
1495 
1496  // org_isite >= Nsite
1497  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1498 
1499  origin = myrank ^ mask2; // XOR
1500  state2 = origin & mask2;
1501 
1502  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1503  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1504 
1505  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1506 
1507  //SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign
1508  SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign
1509 
1510  ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1511  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1512  MPI_COMM_WORLD, &statusMPI);
1513  if (ierr != 0) exitMPI(-1);
1514 
1515  ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1516  tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1517  MPI_COMM_WORLD, &statusMPI);
1518  if (ierr != 0) exitMPI(-1);
1519 
1520  if ( state2 == 0 ) trans = 0;
1521  else if (state2 == mask2) trans = (double)Fsgn * tmp_trans;
1522  else return 0;
1523 
1524  dam_pr = 0.0;
1525 #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \
1526 firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0)
1527  for (j = 0; j < idim_max_buf; j++) {
1528  dmv = trans * tmp_v1buf[j + 1];
1529  tmp_v0[j + 1] += dmv;
1530  dam_pr += conj(tmp_v1[j + 1]) * dmv;
1531  }
1532  return (dam_pr);
1533 #else
1534  return 0.0;
1535 #endif
1536 }/*double complex X_GC_Ajt_MPI*/
1542 double complex X_Cis_MPI(
1543  int org_isite,
1544  unsigned int org_ispin,
1545  double complex tmp_trans,
1546  double complex *tmp_v0,
1547  double complex *tmp_v1,
1548  double complex *tmp_v1buf,
1549  unsigned long int idim_max,
1550  long unsigned int *Tpow,
1551  long unsigned int *list_1_org,
1552  long unsigned int *list_1buf_org,
1553  long unsigned int *list_2_1_target,
1554  long unsigned int *list_2_2_target,
1555  long unsigned int _irght,
1556  long unsigned int _ilft,
1557  long unsigned int _ihfbit
1558 ) {
1559 #ifdef MPI
1560  int mask2, state2, ierr, origin, bit2diff, Fsgn;
1561  unsigned long int idim_max_buf, j, ioff;
1562  MPI_Status statusMPI;
1563  double complex trans, dmv, dam_pr;
1564 
1565  // org_isite >= Nsite
1566  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1567 
1568  origin = myrank ^ mask2; // XOR
1569  state2 = origin & mask2;
1570 
1571  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1572  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1573 
1574  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1575 
1576  SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign
1577 
1578  ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1579  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1580  MPI_COMM_WORLD, &statusMPI);
1581  if (ierr != 0) exitMPI(-1);
1582 
1583  ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1584  list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1585  MPI_COMM_WORLD, &statusMPI);
1586  if (ierr != 0) exitMPI(-1);
1587 
1588  ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1589  tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1590  MPI_COMM_WORLD, &statusMPI);
1591  if (ierr != 0) exitMPI(-1);
1592 
1593  if (state2 == mask2) {
1594  trans = 0;
1595  }
1596  else if (state2 == 0) {
1597  trans = (double)Fsgn * tmp_trans;
1598  }
1599  else return 0;
1600 
1601  dam_pr = 0.0;
1602 #pragma omp parallel for default(none) private(j, dmv) \
1603 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \
1604 shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org)
1605  for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original
1606  GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j],
1607  _irght, _ilft, _ihfbit, &ioff);
1608  dmv = trans * tmp_v1buf[j];
1609  tmp_v0[ioff] += dmv;
1610  }/*for (j = 1; j <= idim_max_buf; j++)*/
1611  return (dam_pr);
1612 #else
1613  return 0.0;
1614 #endif
1615 }/*double complex X_GC_Cis_MPI*/
1621 double complex X_Ajt_MPI(
1622  int org_isite,
1623  unsigned int org_ispin,
1624  double complex tmp_trans,
1625  double complex *tmp_v0,
1626  double complex *tmp_v1,
1627  double complex *tmp_v1buf,
1628  unsigned long int idim_max,
1629  long unsigned int *Tpow,
1630  long unsigned int *list_1_org,
1631  long unsigned int *list_1buf_org,
1632  long unsigned int *list_2_1_target,
1633  long unsigned int *list_2_2_target,
1634  long unsigned int _irght,
1635  long unsigned int _ilft,
1636  long unsigned int _ihfbit
1637 ){
1638 #ifdef MPI
1639  int mask2, state2, ierr, origin, bit2diff, Fsgn;
1640  unsigned long int idim_max_buf, j, ioff;
1641  MPI_Status statusMPI;
1642  double complex trans, dmv, dam_pr;
1643 
1644  // org_isite >= Nsite
1645  mask2 = (int)Tpow[2 * org_isite + org_ispin];
1646 
1647  origin = myrank ^ mask2; // XOR
1648  state2 = origin & mask2;
1649 
1650  //if state2 = mask2, the state (org_isite, org_ispin) is not occupied in myrank
1651  //origin: if the state (org_isite, org_ispin) is occupied in myrank, the state is not occupied in origin.
1652 
1653  bit2diff = myrank - ((2 * mask2 - 1) & myrank);
1654 
1655  SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign
1656  ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0,
1657  &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0,
1658  MPI_COMM_WORLD, &statusMPI);
1659  if (ierr != 0) exitMPI(-1);
1660 
1661  ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0,
1662  list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0,
1663  MPI_COMM_WORLD, &statusMPI);
1664  if (ierr != 0) exitMPI(-1);
1665 
1666  ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1667  tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
1668  MPI_COMM_WORLD, &statusMPI);
1669  if (ierr != 0) exitMPI(-1);
1670 
1671  if (state2 == 0) {
1672  trans = 0;
1673  }
1674  else if (state2 == mask2) {
1675  trans = (double)Fsgn * tmp_trans;
1676  }
1677  else return 0;
1678 
1679  dam_pr = 0.0;
1680 #pragma omp parallel for default(none) private(j, dmv) \
1681 firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \
1682 shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org)
1683  for (j = 1; j <= idim_max_buf; j++) {
1684  GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j],
1685  _irght, _ilft, _ihfbit, &ioff);
1686  dmv = trans * tmp_v1buf[j];
1687  tmp_v0[ioff] += dmv;
1688  }
1689  return (dam_pr);
1690 #else
1691  return 0.0;
1692 #endif
1693 }/*double complex X_Ajt_MPI*/
int CheckBit_Cis(long unsigned int is1_spin, long unsigned int orgbit, long unsigned int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.c:86
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.h:410
double complex X_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
int X_CisAis(long unsigned int list_1_j, struct BindStruct *X, long unsigned int is1_spin)
term in Hubbard (canonical)
double complex X_GC_child_general_hopp_MPIdouble(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Hopping term in Hubbard + GC When both site1 and site2 are in the inter process region.
int CheckPE(int org_isite, struct BindStruct *X)
Check whether this site is in the inter process region or not.
long unsigned int ihfbit
Used for Ogata-Lin ???
Definition: struct.h:345
unsigned long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.h:303
double complex X_GC_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
int CheckBit_InterAllPE(int org_isite1, int org_isigma1, int org_isite2, int org_isigma2, int org_isite3, int org_isigma3, int org_isite4, int org_isigma4, struct BindStruct *X, long unsigned int orgbit, long unsigned int *offbit)
Compute the index of final wavefunction associated to , and check whether this operator is relevant o...
long unsigned int * list_1buf
Definition: global.h:48
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.h:412
double complex X_child_CisAisCjtAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
int CheckBit_PairPE(int org_isite1, int org_isigma1, int org_isite3, int org_isigma3, struct BindStruct *X, long unsigned int orgbit)
Check the occupation of both site 1 and site 3.
double complex CisAjt(long unsigned int j, double complex *tmp_v0, double complex *tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, double complex tmp_V)
term for canonical Hubbard
double complex X_GC_Cis_MPI(int org_isite, int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, unsigned long int idim_max, double complex *tmp_v1buf, unsigned long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
double complex X_Cis_MPI(int org_isite, unsigned int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, double complex *tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit)
Compute term of canonical Hubbard system.
double complex X_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
int mode
multiply or expectation value.
Definition: struct.h:330
#define TRUE
Definition: global.h:26
int CheckBit_Ajt(long unsigned int is1_spin, long unsigned int orgbit, long unsigned int *offbit)
Check the occupation of state, and compute the index of final wavefunction associated to ...
long unsigned int * list_1buf_org
Definition: global.h:54
double complex X_Ajt_MPI(int org_isite, unsigned int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, double complex *tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit)
Compute term of canonical Hubbard system.
double complex X_GC_child_general_hopp_MPIsingle(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Hopping term in Hubbard + GC When only site2 is in the inter process region.
double complex GC_CisAjt(long unsigned int j, double complex *tmp_v0, double complex *tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, double complex tmp_V, long unsigned int *tmp_off)
term for grandcanonical Hubbard
long unsigned int * OrgTpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
Definition: struct.h:92
double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
Bind.
Definition: struct.h:409
unsigned int Nsite
Number of sites in the INTRA process region.
Definition: struct.h:56
long unsigned int * Tpow
[2 * DefineList::NsiteMPI] malloc in setmem_def().
Definition: struct.h:90
long unsigned int ilft
Used for Ogata-Lin ???
Definition: struct.h:344
double complex X_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
double complex X_GC_child_CisAjt_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, double complex tmp_trans, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
long unsigned int * list_1_org
Definition: global.h:53
double complex X_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of canonical Hubbard system.
long unsigned int * list_2_1
Definition: global.h:49
int GetOffComp(long unsigned int *_list_2_1, long unsigned int *_list_2_2, long unsigned int _ibit, const long unsigned int _irght, const long unsigned int _ilft, const long unsigned int _ihfbit, long unsigned int *_ioffComp)
function of getting off-diagonal component
Definition: bitcalc.c:195
int GetSgnInterAll(unsigned long int isite1, unsigned long int isite2, unsigned long int isite3, unsigned long int isite4, int *Fsgn, struct BindStruct *X, unsigned long int orgbit, unsigned long int *offbit)
Compute the index of final wavefunction associated to , and Fermion sign.
double complex X_GC_Ajt_MPI(int org_isite, int org_ispin, double complex tmp_trans, double complex *tmp_v0, double complex *tmp_v1, unsigned long int idim_max, double complex *tmp_v1buf, unsigned long int *Tpow)
Single creation/annihilation operator in the inter process region for HubbardGC.
double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
double complex X_GC_child_CisAis_Hubbard_MPI(int org_isite1, int org_ispin1, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
#define FALSE
Definition: global.h:25
long unsigned int * list_1
Definition: global.h:47
int X_GC_CisAjt(long unsigned int list_1_j, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, long unsigned int diff_spin, long unsigned int *tmp_off)
Compute index of wavefunction of final state.
long unsigned int * list_2_2
Definition: global.h:50
double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI(int org_isite1, int org_ispin1, int org_isite2, int org_ispin2, int org_isite3, int org_ispin3, int org_isite4, int org_ispin4, double complex tmp_V, struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Compute term of grandcanonical Hubbard system.
struct EDMainCalStruct X
Definition: struct.h:432
long unsigned int irght
Used for Ogata-Lin ???
Definition: struct.h:343
int myrank
Process ID, defined in InitializeMPI()
Definition: global.h:163
double complex * v1buf
Definition: global.h:37
struct CheckList Check
Size of the Hilbert space.
Definition: struct.h:411
void SgnBit(const long unsigned int org_bit, int *sgn)
function of getting fermion sign (64 bit)
Definition: bitcalc.c:339