24 #include "wrapperMPI.h"    25 #include "expec_cisajs.h"    26 #include "expec_cisajscktaltdc.h"    27 #include "expec_totalspin.h"    28 #include "expec_energy_flct.h"    31 #include "./common/setmemory.h"    33 void zheevd_(
char *jobz, 
char *uplo, 
int *n, 
double complex *a, 
int *lda, 
double *w, 
double complex *work, 
int *lwork, 
double *rwork, 
int * lrwork, 
int *iwork, 
int *liwork, 
int *info);
    34 void zgemm_(
char *transa, 
char *transb, 
int *m, 
int *n, 
int *k, 
double complex *
alpha, 
double complex *a, 
int *lda, 
double complex *b, 
int *ldb, 
double complex *
beta, 
double complex *c, 
int *ldc);
    50   int *iwork, info, isub, jsub, nsub2;
    51   char jobz = 
'V', uplo = 
'U', transa = 
'N', transb = 
'N';
    53   double complex *work, *mat;
    54   int liwork, lwork, lrwork;
    55   double complex one = 1.0, zero = 0.0;
    57   liwork = 5 * nsub + 3;
    58   lwork = nsub*nsub + 2 * nsub;
    59   lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;
    61   iwork = (
int*)malloc(liwork * 
sizeof(
int));
    62   rwork = (
double*)malloc(lrwork * 
sizeof(
double));
    63   work = (
double complex*)malloc(lwork * 
sizeof(
double complex));
    64   mat = (
double complex*)malloc(nsub*nsub * 
sizeof(
double complex));
    65   for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
    69   zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
    81   for (isub = 0; isub < nsub; isub++) {
    82     if (eig[isub] > 1.0e-14) {
    83       for (jsub = 0; jsub < nsub; jsub++)
    84         ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
    88   for (isub = nsub2; isub < nsub; isub++) 
    89     for (jsub = 0; jsub < nsub; jsub++)
    90       ovlp[jsub + nsub*isub] = 0.0;
    96   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
    98   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
   105   zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
   113   zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
   115   for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];
   138   if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
   142     if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
   143     else i = ceil(log10(res));
   145     preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
   157   double complex **wave
   165   long int idim, iv, i_max;
   166   unsigned long int i_max_tmp, sum_i_max;
   172   long unsigned int u_long_i;
   188         fprintf(stdout, 
"Restart file is not found.\n");
   189         fprintf(stdout, 
"Start from scratch.\n");
   194         byte_size = fread(&iproc, 
sizeof(
int), 1, fp);
   195         byte_size = fread(&i_max, 
sizeof(
unsigned long int), 1, fp);
   198           fprintf(stderr, 
"Error: Invalid restart file.\n");
   201         byte_size = fread(wave[ie], 
sizeof(complex 
double), X->
Check.
idim_max + 1, fp);
   210       if (byte_size == 0) printf(
"byte_size: %d \n", (
int)byte_size);
   229       fprintf(
stdoutMPI, 
"  initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n", 
   231 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie)   232       for (idim = 1; idim <= i_max; idim++) wave[ie][idim] = 0.0;
   235       for (iproc = 0; iproc < 
nproc; iproc++) {
   238         if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
   241             wave[ie][iv - sum_i_max + 1] = 1.0;
   243               wave[ie][iv - sum_i_max + 1] += 1.0*I;
   244               wave[ie][iv - sum_i_max + 1] /= sqrt(2.0);
   249         sum_i_max += i_max_tmp;
   256     fprintf(
stdoutMPI, 
"  initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
   258 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \   259               shared(wave, iv, X, nthreads, myrank, i_max)   265       mythread = omp_get_thread_num();
   270       dsfmt_init_gen_rand(&dsfmt, u_long_i);
   275           for (idim = 1; idim <= i_max; idim++)
   276             wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
   280           for (idim = 1; idim <= i_max; idim++)
   281             wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
   288       dnorm = sqrt(creal(
VecProdMPI(i_max, wave[ie], wave[ie])));
   289 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie) private(idim)   290       for (idim = 1; idim <= i_max; idim++) wave[ie][idim] /= dnorm;
   299   double complex **wave
   315     byte_size = fwrite(wave[ie], 
sizeof(complex 
double), X->
Check.
idim_max + 1, fp);
   320   if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
   337   long int idim, i_max;
   338   int ii, jj, ie, je, nsub, stp, mythread, nsub_cut;
   339   double complex ***wxp, 
   343   double *eig, dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
   349   eigsub = d_1d_allocate(nsub);
   350   hsub = cd_1d_allocate(nsub*nsub);
   351   ovlp = cd_1d_allocate(nsub*nsub);
   352   work = cd_2d_allocate(
nthreads, nsub);
   371     for (idim = 1; idim <= i_max; idim++) hwxp[1][ie][idim] = 0.0;
   373     mltply(X, hwxp[1][ie], wxp[1][ie]);
   378     for (idim = 1; idim <= i_max; idim++) wxp[2][ie][idim] = 0.0;
   379     for (idim = 1; idim <= i_max; idim++) hwxp[2][ie][idim] = 0.0;
   381     eig[ie] = creal(
VecProdMPI(i_max, wxp[1][ie], hwxp[1][ie]));
   386   fprintf(
stdoutMPI, 
"    Step   Residual-2-norm     Threshold      Energy\n");
   387   fprintf(fp, 
"    Step   Residual-2-norm     Threshold      Energy\n");
   402       if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
   404     if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
   414 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,ie) private(idim)   415       for (idim = 1; idim <= i_max; idim++)
   416         wxp[0][ie][idim] = hwxp[1][ie][idim] - eig[ie] * wxp[1][ie][idim];
   417       dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
   418       if (dnorm > dnormmax) dnormmax = dnorm;
   424         if (do_precon == 1) {
   426 #pragma omp parallel for default(none) shared(wxp,ie,list_Diagonal,preshift,i_max,eps_LOBPCG) private(idim,precon)   427           for (idim = 1; idim <= i_max; idim++) {
   429             if(fabs(precon) > eps_LOBPCG) wxp[0][ie][idim] /= precon;
   435         dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
   436 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie) private(idim)   437         for (idim = 1; idim <= i_max; idim++) wxp[0][ie][idim] /= dnorm;
   447     fprintf(
stdoutMPI, 
"%9d %15.5e %15.5e      ", stp, dnormmax, eps_LOBPCG);
   448     fprintf(fp, 
"%9d %15.5e %15.5e      ", stp, dnormmax, eps_LOBPCG);
   451       fprintf(fp, 
" %15.5e", eig[ie]);
   453     if(nsub_cut == 0) printf(
"nsub_cut : %d", nsub_cut);
   458     if (dnormmax < eps_LOBPCG) {
   465 #pragma omp parallel default(none) shared(hwxp,i_max,X) private(idim,ie)   467 #pragma omp for nowait   469         for (idim = 1; idim <= i_max; idim++)  hwxp[0][ie][idim] = 0.0;
   472       mltply(X, hwxp[0][ie], wxp[0][ie]);
   481     for (ii = 0; ii < 3; ii++) {
   483         for (jj = 0; jj < 3; jj++) {
   486               = 
VecProdMPI(i_max, wxp[jj][je], hwxp[ii][ie]);
   488               = 
VecProdMPI(i_max, wxp[jj][je], wxp[ii][ie]);
   501     nsub_cut = 
diag_ovrp(nsub, hsub, ovlp, eigsub);
   506       eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
   508 #pragma omp parallel default(none) shared(i_max,X,wxp,hwxp,hsub,nsub,work) private(idim,ie,je,jj,mythread)   511       mythread = omp_get_thread_num();
   517       for (idim = 1; idim <= i_max; idim++) {
   523           work[mythread][ie] = 0.0;
   524           for (jj = 0; jj < 3; jj++)
   526               work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
   528         for (ie = 0; ie < X->
Def.
k_exct; ie++) wxp[1][ie][idim] = work[mythread][ie];
   534           work[mythread][ie] = 0.0;
   535           for (jj = 0; jj < 3; jj++)
   537               work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
   539         for (ie = 0; ie < X->
Def.
k_exct; ie++) hwxp[1][ie][idim] = work[mythread][ie];
   545           work[mythread][ie] = 0.0;
   546           for (jj = 0; jj < 3; jj += 2) {
   548               work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
   551         for (ie = 0; ie < X->
Def.
k_exct; ie++) wxp[2][ie][idim] = work[mythread][ie];
   557           work[mythread][ie] = 0.0;
   558           for (jj = 0; jj < 3; jj += 2)
   560               work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
   562         for (ie = 0; ie < X->
Def.
k_exct; ie++) hwxp[2][ie][idim] = work[mythread][ie];
   569     for (ii = 1; ii < 3; ii++) {
   571         dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[ii][ie], wxp[ii][ie])));
   572 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ie,ii) private(idim)   573         for (idim = 1; idim <= i_max; idim++) {
   574           wxp[ii][ie][idim] /= dnorm;
   575           hwxp[ii][ie][idim] /= dnorm;
   593   free_d_1d_allocate(eig);
   594   free_d_1d_allocate(eigsub);
   595   free_cd_1d_allocate(hsub);
   596   free_cd_1d_allocate(ovlp);
   597   free_cd_2d_allocate(work);
   598   free_cd_3d_allocate(hwxp);
   614 #pragma omp parallel default(none) shared(i_max,wxp,L_vec,X) private(idim,ie)   617 #pragma omp for nowait   618       for (idim = 0; idim < i_max; idim++)
   619         L_vec[ie][idim] = wxp[1][ie][idim + 1];
   622   free_cd_3d_allocate(wxp);
   644   long int i_max = 0, ie, idim;
   647   fprintf(
stdoutMPI, 
"######  Eigenvalue with LOBPCG  #######\n\n");
   656     case SpinlessFermionGC:
   662     case SpinlessFermion:
   683       if(iret ==1) 
return (
TRUE);
   685           fprintf(
stdoutMPI, 
"  LOBPCG is not converged in this process.\n");
   695     fprintf(
stdoutMPI, 
"An Eigenvector is inputted.\n");
   702         fprintf(stderr, 
"Error: Inputvector file is not found.\n");
   705       byte_size = fread(&
step_i, 
sizeof(
int), 1, fp);
   706       byte_size = fread(&i_max, 
sizeof(
long int), 1, fp);
   708         fprintf(stderr, 
"Error: Invalid Inputvector file.\n");
   712 #pragma omp parallel for default(none) shared(L_vec, v1) firstprivate(i_max, ie), private(idim)   713       for (idim = 0; idim < i_max; idim++) {
   714         L_vec[ie][idim] = 
v1[idim + 1];
   720     if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
   743     fprintf(fp, 
"State %ld\n", ie);
   761 #pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim)   763         v1[idim + 1] = 
L_vec[ie][idim];
 const char * cFileNameEnergy_CG
 
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory. 
 
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h 
 
void exitMPI(int errorcode)
MPI Abortation wrapper. 
 
struct DefineList Def
Definision of system (Hamiltonian) etc. 
 
int mltply(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
 
int St
0 or 1, but it affects nothing. 
 
const char * cOutputEigenVecStart
 
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, double complex *alpha, double complex *a, int *lda, double complex *b, int *ldb, double complex *beta, double complex *c, int *ldc)
 
unsigned long int idim_max
The dimension of the Hilbert space of this process. 
 
const char * cFileNameInputEigen
 
const char * cLogLanczos_EigenValueNotConverged
 
const char * cLogLanczos_EigenVecEnd
 
unsigned long int BcastMPI_li(int root, unsigned long int idim)
MPI wrapper function to broadcast unsigned long integer across processes. 
 
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
 
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output. 
 
void phys(struct BindStruct *X, unsigned long int neig)
A main function to calculate physical quantities by full diagonalization method. 
 
struct LargeList Large
Variables for Matrix-Vector product. 
 
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory. 
 
const char * cFileNameOutputEigen
 
const char * cLogInputVecFinish
 
const char * cLanczos_EigenValueFinish
 
struct PhysList Phys
Physical quantities. 
 
static void Output_restart(struct BindStruct *X, double complex **wave)
Output eigenvectors for restart LOBPCG method. 
 
const char * cFileNameLanczosStep
 
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method. 
 
int nthreads
Number of Threads, defined in InitializeMPI() 
 
const char * cLogInputVecStart
 
long int iv
Used for initializing vector. 
 
int nproc
Number of processors, defined in InitializeMPI() 
 
void zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)
 
const char * cFileNameInputVector
 
const char * cLogLanczos_EigenValueEnd
 
const char * cReadEigenVecFinish
 
const char * cLanczos_EigenValueStart
 
double * all_doublon
[CheckList::idim_max+1] Doublon for FullDiag and LOBPCG. malloc in setmem_large(). 
 
const char * cLogOutputVecStart
 
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin. 
 
static int diag_ovrp(int nsub, double complex *hsub, double complex *ovlp, double *eig)
Solve the generalized eigenvalue problem  with the Lowdin's orthogonalization. 
 
const char * cFileNameTimeKeep
 
const char * cReadEigenVecStart
 
double complex VecProdMPI(long unsigned int ndim, double complex *v1, double complex *v2)
Compute conjugate scaler product of process-distributed vector . 
 
const char * cLanczos_EigenValueStep
 
const char * cLogOutputVecFinish
 
double * all_sz
[CheckList::idim_max+1]  for FullDiag and LOBPCG. malloc in setmem_large(). 
 
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved. 
 
long int initial_iv
Seed of random number for initial guesss of wavefunctions. 
 
int myrank
Process ID, defined in InitializeMPI() 
 
const char * cFileNameOutputVector
 
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
 
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
 
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input. 
 
const char * cFileNameEnergy_Lanczos
 
struct CheckList Check
Size of the Hilbert space. 
 
unsigned long int SumMPI_li(unsigned long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes. 
 
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green's function. 
 
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log. 
 
struct BindStruct Bind
Binded struct. 
 
static void Initialize_wave(struct BindStruct *X, double complex **wave)
 
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log. 
 
unsigned int Lanczos_max
Maximum number of iterations. 
 
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI() 
 
double * all_energy
[CheckList::idim_max+1] Energy for FullDiag and LOBPCG. malloc in setmem_large(). ...
 
unsigned int k_exct
Read from Calcmod in readdef.h.