24 #include "wrapperMPI.h" 25 #include "expec_cisajs.h" 26 #include "expec_cisajscktaltdc.h" 27 #include "expec_totalspin.h" 28 #include "expec_energy_flct.h" 31 #include "./common/setmemory.h" 33 void zheevd_(
char *jobz,
char *uplo,
int *n,
double complex *a,
int *lda,
double *w,
double complex *work,
int *lwork,
double *rwork,
int * lrwork,
int *iwork,
int *liwork,
int *info);
34 void zgemm_(
char *transa,
char *transb,
int *m,
int *n,
int *k,
double complex *
alpha,
double complex *a,
int *lda,
double complex *b,
int *ldb,
double complex *
beta,
double complex *c,
int *ldc);
50 int *iwork, info, isub, jsub, nsub2;
51 char jobz =
'V', uplo =
'U', transa =
'N', transb =
'N';
53 double complex *work, *mat;
54 int liwork, lwork, lrwork;
55 double complex one = 1.0, zero = 0.0;
57 liwork = 5 * nsub + 3;
58 lwork = nsub*nsub + 2 * nsub;
59 lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;
61 iwork = (
int*)malloc(liwork *
sizeof(
int));
62 rwork = (
double*)malloc(lrwork *
sizeof(
double));
63 work = (
double complex*)malloc(lwork *
sizeof(
double complex));
64 mat = (
double complex*)malloc(nsub*nsub *
sizeof(
double complex));
65 for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
69 zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
81 for (isub = 0; isub < nsub; isub++) {
82 if (eig[isub] > 1.0e-14) {
83 for (jsub = 0; jsub < nsub; jsub++)
84 ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
88 for (isub = nsub2; isub < nsub; isub++)
89 for (jsub = 0; jsub < nsub; jsub++)
90 ovlp[jsub + nsub*isub] = 0.0;
96 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
98 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
105 zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
113 zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
115 for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];
138 if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
142 if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
143 else i = ceil(log10(res));
145 preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
157 double complex **wave
165 long int idim, iv, i_max;
166 unsigned long int i_max_tmp, sum_i_max;
172 long unsigned int u_long_i;
188 fprintf(stdout,
"Restart file is not found.\n");
189 fprintf(stdout,
"Start from scratch.\n");
194 byte_size = fread(&iproc,
sizeof(
int), 1, fp);
195 byte_size = fread(&i_max,
sizeof(
unsigned long int), 1, fp);
198 fprintf(stderr,
"Error: Invalid restart file.\n");
201 byte_size = fread(wave[ie],
sizeof(complex
double), X->
Check.
idim_max + 1, fp);
210 if (byte_size == 0) printf(
"byte_size: %d \n", (
int)byte_size);
229 fprintf(
stdoutMPI,
" initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n",
231 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie) 232 for (idim = 1; idim <= i_max; idim++) wave[ie][idim] = 0.0;
235 for (iproc = 0; iproc <
nproc; iproc++) {
238 if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
241 wave[ie][iv - sum_i_max + 1] = 1.0;
243 wave[ie][iv - sum_i_max + 1] += 1.0*I;
244 wave[ie][iv - sum_i_max + 1] /= sqrt(2.0);
249 sum_i_max += i_max_tmp;
256 fprintf(
stdoutMPI,
" initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
258 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \ 259 shared(wave, iv, X, nthreads, myrank, i_max) 265 mythread = omp_get_thread_num();
270 dsfmt_init_gen_rand(&dsfmt, u_long_i);
275 for (idim = 1; idim <= i_max; idim++)
276 wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
280 for (idim = 1; idim <= i_max; idim++)
281 wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
288 dnorm = sqrt(creal(
VecProdMPI(i_max, wave[ie], wave[ie])));
289 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie) private(idim) 290 for (idim = 1; idim <= i_max; idim++) wave[ie][idim] /= dnorm;
299 double complex **wave
315 byte_size = fwrite(wave[ie],
sizeof(complex
double), X->
Check.
idim_max + 1, fp);
320 if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
337 long int idim, i_max;
338 int ii, jj, ie, je, nsub, stp, mythread, nsub_cut;
339 double complex ***wxp,
343 double *eig, dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
349 eigsub = d_1d_allocate(nsub);
350 hsub = cd_1d_allocate(nsub*nsub);
351 ovlp = cd_1d_allocate(nsub*nsub);
352 work = cd_2d_allocate(
nthreads, nsub);
371 for (idim = 1; idim <= i_max; idim++) hwxp[1][ie][idim] = 0.0;
373 mltply(X, hwxp[1][ie], wxp[1][ie]);
378 for (idim = 1; idim <= i_max; idim++) wxp[2][ie][idim] = 0.0;
379 for (idim = 1; idim <= i_max; idim++) hwxp[2][ie][idim] = 0.0;
381 eig[ie] = creal(
VecProdMPI(i_max, wxp[1][ie], hwxp[1][ie]));
386 fprintf(
stdoutMPI,
" Step Residual-2-norm Threshold Energy\n");
387 fprintf(fp,
" Step Residual-2-norm Threshold Energy\n");
402 if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
404 if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
414 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,ie) private(idim) 415 for (idim = 1; idim <= i_max; idim++)
416 wxp[0][ie][idim] = hwxp[1][ie][idim] - eig[ie] * wxp[1][ie][idim];
417 dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
418 if (dnorm > dnormmax) dnormmax = dnorm;
424 if (do_precon == 1) {
426 #pragma omp parallel for default(none) shared(wxp,ie,list_Diagonal,preshift,i_max,eps_LOBPCG) private(idim,precon) 427 for (idim = 1; idim <= i_max; idim++) {
429 if(fabs(precon) > eps_LOBPCG) wxp[0][ie][idim] /= precon;
435 dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
436 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie) private(idim) 437 for (idim = 1; idim <= i_max; idim++) wxp[0][ie][idim] /= dnorm;
447 fprintf(
stdoutMPI,
"%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
448 fprintf(fp,
"%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
451 fprintf(fp,
" %15.5e", eig[ie]);
453 if(nsub_cut == 0) printf(
"nsub_cut : %d", nsub_cut);
458 if (dnormmax < eps_LOBPCG) {
465 #pragma omp parallel default(none) shared(hwxp,i_max,X) private(idim,ie) 467 #pragma omp for nowait 469 for (idim = 1; idim <= i_max; idim++) hwxp[0][ie][idim] = 0.0;
472 mltply(X, hwxp[0][ie], wxp[0][ie]);
481 for (ii = 0; ii < 3; ii++) {
483 for (jj = 0; jj < 3; jj++) {
486 =
VecProdMPI(i_max, wxp[jj][je], hwxp[ii][ie]);
488 =
VecProdMPI(i_max, wxp[jj][je], wxp[ii][ie]);
501 nsub_cut =
diag_ovrp(nsub, hsub, ovlp, eigsub);
506 eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
508 #pragma omp parallel default(none) shared(i_max,X,wxp,hwxp,hsub,nsub,work) private(idim,ie,je,jj,mythread) 511 mythread = omp_get_thread_num();
517 for (idim = 1; idim <= i_max; idim++) {
523 work[mythread][ie] = 0.0;
524 for (jj = 0; jj < 3; jj++)
526 work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
528 for (ie = 0; ie < X->
Def.
k_exct; ie++) wxp[1][ie][idim] = work[mythread][ie];
534 work[mythread][ie] = 0.0;
535 for (jj = 0; jj < 3; jj++)
537 work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
539 for (ie = 0; ie < X->
Def.
k_exct; ie++) hwxp[1][ie][idim] = work[mythread][ie];
545 work[mythread][ie] = 0.0;
546 for (jj = 0; jj < 3; jj += 2) {
548 work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
551 for (ie = 0; ie < X->
Def.
k_exct; ie++) wxp[2][ie][idim] = work[mythread][ie];
557 work[mythread][ie] = 0.0;
558 for (jj = 0; jj < 3; jj += 2)
560 work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->
Def.
k_exct + nsub*ie];
562 for (ie = 0; ie < X->
Def.
k_exct; ie++) hwxp[2][ie][idim] = work[mythread][ie];
569 for (ii = 1; ii < 3; ii++) {
571 dnorm = sqrt(creal(
VecProdMPI(i_max, wxp[ii][ie], wxp[ii][ie])));
572 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ie,ii) private(idim) 573 for (idim = 1; idim <= i_max; idim++) {
574 wxp[ii][ie][idim] /= dnorm;
575 hwxp[ii][ie][idim] /= dnorm;
593 free_d_1d_allocate(eig);
594 free_d_1d_allocate(eigsub);
595 free_cd_1d_allocate(hsub);
596 free_cd_1d_allocate(ovlp);
597 free_cd_2d_allocate(work);
598 free_cd_3d_allocate(hwxp);
614 #pragma omp parallel default(none) shared(i_max,wxp,L_vec,X) private(idim,ie) 617 #pragma omp for nowait 618 for (idim = 0; idim < i_max; idim++)
619 L_vec[ie][idim] = wxp[1][ie][idim + 1];
622 free_cd_3d_allocate(wxp);
644 long int i_max = 0, ie, idim;
647 fprintf(
stdoutMPI,
"###### Eigenvalue with LOBPCG #######\n\n");
656 case SpinlessFermionGC:
662 case SpinlessFermion:
683 if(iret ==1)
return (
TRUE);
685 fprintf(
stdoutMPI,
" LOBPCG is not converged in this process.\n");
695 fprintf(
stdoutMPI,
"An Eigenvector is inputted.\n");
702 fprintf(stderr,
"Error: Inputvector file is not found.\n");
705 byte_size = fread(&
step_i,
sizeof(
int), 1, fp);
706 byte_size = fread(&i_max,
sizeof(
long int), 1, fp);
708 fprintf(stderr,
"Error: Invalid Inputvector file.\n");
712 #pragma omp parallel for default(none) shared(L_vec, v1) firstprivate(i_max, ie), private(idim) 713 for (idim = 0; idim < i_max; idim++) {
714 L_vec[ie][idim] =
v1[idim + 1];
720 if(byte_size == 0) printf(
"byte_size : %d\n", (
int)byte_size);
743 fprintf(fp,
"State %ld\n", ie);
761 #pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim) 763 v1[idim + 1] =
L_vec[ie][idim];
const char * cFileNameEnergy_CG
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h
void exitMPI(int errorcode)
MPI Abortation wrapper.
struct DefineList Def
Definision of system (Hamiltonian) etc.
int mltply(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
int St
0 or 1, but it affects nothing.
const char * cOutputEigenVecStart
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, double complex *alpha, double complex *a, int *lda, double complex *b, int *ldb, double complex *beta, double complex *c, int *ldc)
unsigned long int idim_max
The dimension of the Hilbert space of this process.
const char * cFileNameInputEigen
const char * cLogLanczos_EigenValueNotConverged
const char * cLogLanczos_EigenVecEnd
unsigned long int BcastMPI_li(int root, unsigned long int idim)
MPI wrapper function to broadcast unsigned long integer across processes.
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output.
void phys(struct BindStruct *X, unsigned long int neig)
A main function to calculate physical quantities by full diagonalization method.
struct LargeList Large
Variables for Matrix-Vector product.
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.
const char * cFileNameOutputEigen
const char * cLogInputVecFinish
const char * cLanczos_EigenValueFinish
struct PhysList Phys
Physical quantities.
static void Output_restart(struct BindStruct *X, double complex **wave)
Output eigenvectors for restart LOBPCG method.
const char * cFileNameLanczosStep
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method.
int nthreads
Number of Threads, defined in InitializeMPI()
const char * cLogInputVecStart
long int iv
Used for initializing vector.
int nproc
Number of processors, defined in InitializeMPI()
void zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)
const char * cFileNameInputVector
const char * cLogLanczos_EigenValueEnd
const char * cReadEigenVecFinish
const char * cLanczos_EigenValueStart
double * all_doublon
[CheckList::idim_max+1] Doublon for FullDiag and LOBPCG. malloc in setmem_large().
const char * cLogOutputVecStart
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
static int diag_ovrp(int nsub, double complex *hsub, double complex *ovlp, double *eig)
Solve the generalized eigenvalue problem with the Lowdin's orthogonalization.
const char * cFileNameTimeKeep
const char * cReadEigenVecStart
double complex VecProdMPI(long unsigned int ndim, double complex *v1, double complex *v2)
Compute conjugate scaler product of process-distributed vector .
const char * cLanczos_EigenValueStep
const char * cLogOutputVecFinish
double * all_sz
[CheckList::idim_max+1] for FullDiag and LOBPCG. malloc in setmem_large().
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
long int initial_iv
Seed of random number for initial guesss of wavefunctions.
int myrank
Process ID, defined in InitializeMPI()
const char * cFileNameOutputVector
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input.
const char * cFileNameEnergy_Lanczos
struct CheckList Check
Size of the Hilbert space.
unsigned long int SumMPI_li(unsigned long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green's function.
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
struct BindStruct Bind
Binded struct.
static void Initialize_wave(struct BindStruct *X, double complex **wave)
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
unsigned int Lanczos_max
Maximum number of iterations.
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
double * all_energy
[CheckList::idim_max+1] Energy for FullDiag and LOBPCG. malloc in setmem_large(). ...
unsigned int k_exct
Read from Calcmod in readdef.h.