HΦ  3.2.0
CalcByLOBPCG.c
Go to the documentation of this file.
1 /* HPhi - Quantum Lattice Model Simulator */
2 /* Copyright (C) 2015 The University of Tokyo */
3 
4 /* This program is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU General Public License as published by */
6 /* the Free Software Foundation, either version 3 of the License, or */
7 /* (at your option) any later version. */
8 
9 /* This program is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU General Public License for more details. */
13 
14 /* You should have received a copy of the GNU General Public License */
15 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "Common.h"
21 #include "xsetmem.h"
22 #include "mltply.h"
23 #include "FileIO.h"
24 #include "wrapperMPI.h"
25 #include "expec_cisajs.h"
26 #include "expec_cisajscktaltdc.h"
27 #include "expec_totalspin.h"
28 #include "expec_energy_flct.h"
29 #include "phys.h"
30 #include <math.h>
31 #include "./common/setmemory.h"
32 
33 void zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int * lrwork, int *iwork, int *liwork, int *info);
34 void zgemm_(char *transa, char *transb, int *m, int *n, int *k, double complex *alpha, double complex *a, int *lda, double complex *b, int *ldb, double complex *beta, double complex *c, int *ldc);
43 static int diag_ovrp(
44  int nsub,
45  double complex *hsub,
46  double complex *ovlp,
47  double *eig
48 )
49 {
50  int *iwork, info, isub, jsub, nsub2;
51  char jobz = 'V', uplo = 'U', transa = 'N', transb = 'N';
52  double *rwork;
53  double complex *work, *mat;
54  int liwork, lwork, lrwork;
55  double complex one = 1.0, zero = 0.0;
56 
57  liwork = 5 * nsub + 3;
58  lwork = nsub*nsub + 2 * nsub;
59  lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1;
60 
61  iwork = (int*)malloc(liwork * sizeof(int));
62  rwork = (double*)malloc(lrwork * sizeof(double));
63  work = (double complex*)malloc(lwork * sizeof(double complex));
64  mat = (double complex*)malloc(nsub*nsub * sizeof(double complex));
65  for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0;
69  zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
80  nsub2 = 0;
81  for (isub = 0; isub < nsub; isub++) {
82  if (eig[isub] > 1.0e-14) {
83  for (jsub = 0; jsub < nsub; jsub++)
84  ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]);
85  nsub2 += 1;
86  }
87  }
88  for (isub = nsub2; isub < nsub; isub++)
89  for (jsub = 0; jsub < nsub; jsub++)
90  ovlp[jsub + nsub*isub] = 0.0;
95  transa = 'N';
96  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub);
97  transa = 'C';
98  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub);
105  zheevd_(&jobz, &uplo, &nsub2, hsub, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info);
112  transa = 'N';
113  zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub);
114  // printf("%d %d %15.5f %15.5f %15.5f\n", info, nsub2, eig[0], eig[1], eig[2]);
115  for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub];
116 
117  free(mat);
118  free(work);
119  free(rwork);
120  free(iwork);
121 
122  return(nsub2);
123 }/*void diag_ovrp*/
129 static double calc_preshift(
130  double eig,
131  double res,
132  double eps_LOBPCG
133 )
134 {
135  double k, i;
136  double preshift;
137 
138  if (fabs(eig) > 10.0) k = trunc(log10(fabs(eig)));
139  else k = 1.0;
140 
141  if (res < 1.0) {
142  if (eps_LOBPCG > res) i = ceil(log10(eps_LOBPCG));
143  else i = ceil(log10(res));
144 
145  preshift = trunc(eig / pow(10.0, k + i))*pow(10.0, k + i);
146  }
147  else preshift = 0.0;
148 
149  return(preshift);
150 }/*void calc_preshift*/
151 /*
152 @brief Compute initial guess for LOBPCG.
153 If this is resuterting run, read from files.
154 */
155 static void Initialize_wave(
156  struct BindStruct *X,
157  double complex **wave
158 )
159 {
160  FILE *fp;
161  char sdt[D_FileNameMax];
162  size_t byte_size;
163 
164  int iproc, ie, ierr;
165  long int idim, iv, i_max;
166  unsigned long int i_max_tmp, sum_i_max;
167  int mythread;
168  double dnorm;
169  /*
170  For DSFMT
171  */
172  long unsigned int u_long_i;
173  dsfmt_t dsfmt;
177  if (X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN) {
178  //StartTimer(3600);
179  //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i);
180  fprintf(stdoutMPI, "%s", cLogInputVecStart);
181 
182  ierr = 0;
183  for (ie = 0; ie < X->Def.k_exct; ie++) {
184 
185  sprintf(sdt, cFileNameInputVector, ie, myrank);
186  childfopenALL(sdt, "rb", &fp);
187  if (fp == NULL) {
188  fprintf(stdout, "Restart file is not found.\n");
189  fprintf(stdout, "Start from scratch.\n");
190  ierr = 1;
191  break;
192  }
193  else {
194  byte_size = fread(&iproc, sizeof(int), 1, fp);
195  byte_size = fread(&i_max, sizeof(unsigned long int), 1, fp);
196  //fprintf(stdoutMPI, "Debug: i_max=%ld, step_i=%d\n", i_max, step_i);
197  if (i_max != X->Check.idim_max) {
198  fprintf(stderr, "Error: Invalid restart file.\n");
199  exitMPI(-1);
200  }
201  byte_size = fread(wave[ie], sizeof(complex double), X->Check.idim_max + 1, fp);
202  fclose(fp);
203  }
204  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
205 
206  if (ierr == 0) {
207  //TimeKeeperWithRandAndStep(X, cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i);
208  fprintf(stdoutMPI, "%s", cLogInputVecFinish);
209  //StopTimer(3600);
210  if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size);
211  return;
212  }/*if (ierr == 0)*/
213 
214  }/*X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN*/
215 
220  i_max = X->Check.idim_max;
221 
222  if (initial_mode == 0) {
223 
224  for (ie = 0; ie < X->Def.k_exct; ie++) {
225 
226  sum_i_max = SumMPI_li(X->Check.idim_max);
227  X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv + ie) % sum_i_max + 1;
228  iv = X->Large.iv;
229  fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n",
230  initial_mode, iv, i_max, X->Def.k_exct);
231 #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie)
232  for (idim = 1; idim <= i_max; idim++) wave[ie][idim] = 0.0;
233 
234  sum_i_max = 0;
235  for (iproc = 0; iproc < nproc; iproc++) {
236 
237  i_max_tmp = BcastMPI_li(iproc, i_max);
238  if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) {
239 
240  if (myrank == iproc) {
241  wave[ie][iv - sum_i_max + 1] = 1.0;
242  if (X->Def.iInitialVecType == 0) {
243  wave[ie][iv - sum_i_max + 1] += 1.0*I;
244  wave[ie][iv - sum_i_max + 1] /= sqrt(2.0);
245  }
246  }/*if (myrank == iproc)*/
247  }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/
248 
249  sum_i_max += i_max_tmp;
250 
251  }/*for (iproc = 0; iproc < nproc; iproc++)*/
252  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
253  }/*if(initial_mode == 0)*/
254  else if (initial_mode == 1) {
255  iv = X->Def.initial_iv;
256  fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d\n\n",
257  initial_mode, iv, i_max, X->Def.k_exct);
258 #pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt, ie) \
259  shared(wave, iv, X, nthreads, myrank, i_max)
260  {
261  /*
262  Initialise MT
263  */
264 #ifdef _OPENMP
265  mythread = omp_get_thread_num();
266 #else
267  mythread = 0;
268 #endif
269  u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank;
270  dsfmt_init_gen_rand(&dsfmt, u_long_i);
271 
272  for (ie = 0; ie < X->Def.k_exct; ie++) {
273  if (X->Def.iInitialVecType == 0) {
274 #pragma omp for
275  for (idim = 1; idim <= i_max; idim++)
276  wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I;
277  }
278  else {
279 #pragma omp for
280  for (idim = 1; idim <= i_max; idim++)
281  wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5);
282  }
283  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
284 
285  }/*#pragma omp parallel*/
286 
287  for (ie = 0; ie < X->Def.k_exct; ie++) {
288  dnorm = sqrt(creal(VecProdMPI(i_max, wave[ie], wave[ie])));
289 #pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie) private(idim)
290  for (idim = 1; idim <= i_max; idim++) wave[ie][idim] /= dnorm;
291  }
292  }/*else if(initial_mode==1)*/
293 }/*static void Initialize_wave*/
297 static void Output_restart(
298  struct BindStruct *X,
299  double complex **wave
300 )
301 {
302  FILE *fp;
303  size_t byte_size;
304  char sdt[D_FileNameMax];
305  int ie;
306 
307  //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i);
308  fprintf(stdoutMPI, "%s", cLogOutputVecStart);
309 
310  for (ie = 0; ie < X->Def.k_exct; ie++) {
311  sprintf(sdt, cFileNameOutputVector, ie, myrank);
312  if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
313  byte_size = fwrite(&X->Large.itr, sizeof(X->Large.itr), 1, fp);
314  byte_size = fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max), 1, fp);
315  byte_size = fwrite(wave[ie], sizeof(complex double), X->Check.idim_max + 1, fp);
316  fclose(fp);
317  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
318  //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i);
319  fprintf(stdoutMPI, "%s", cLogOutputVecFinish);
320  if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
321 }/*static void Output_restart*/
331  struct BindStruct *X
332 )
333 {
334  char sdt[D_FileNameMax], sdt_2[D_FileNameMax];
335  FILE *fp;
336  int iconv = -1;
337  long int idim, i_max;
338  int ii, jj, ie, je, nsub, stp, mythread, nsub_cut;
339  double complex ***wxp/*[0] w, [1] x, [2] p of Ref.1*/,
340  ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/,
341  *hsub, *ovlp /*Subspace Hamiltonian and Overlap*/,
342  **work;
343  double *eig, dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub;
344  int do_precon = 0;//If = 1, use preconditioning (experimental)
345 
346  nsub = 3 * X->Def.k_exct;
347 
348  eig = d_1d_allocate(X->Def.k_exct);
349  eigsub = d_1d_allocate(nsub);
350  hsub = cd_1d_allocate(nsub*nsub);
351  ovlp = cd_1d_allocate(nsub*nsub);
352  work = cd_2d_allocate(nthreads, nsub);
353 
354  i_max = X->Check.idim_max;
355 
356  free(v0);
357  free(v1);
358  free(vg);
359  wxp = cd_3d_allocate(3, X->Def.k_exct, X->Check.idim_max + 1);
360  hwxp = cd_3d_allocate(3, X->Def.k_exct, X->Check.idim_max + 1);
366  Initialize_wave(X, wxp[1]);
367 
369 
370  for (ie = 0; ie < X->Def.k_exct; ie++)
371  for (idim = 1; idim <= i_max; idim++) hwxp[1][ie][idim] = 0.0;
372  for (ie = 0; ie < X->Def.k_exct; ie++)
373  mltply(X, hwxp[1][ie], wxp[1][ie]);
374  stp = 1;
376 
377  for (ie = 0; ie < X->Def.k_exct; ie++){
378  for (idim = 1; idim <= i_max; idim++) wxp[2][ie][idim] = 0.0;
379  for (idim = 1; idim <= i_max; idim++) hwxp[2][ie][idim] = 0.0;
380 
381  eig[ie] = creal(VecProdMPI(i_max, wxp[1][ie], hwxp[1][ie]));
382  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
383 
384  sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead);
385  childfopenMPI(sdt_2, "w", &fp);
386  fprintf(stdoutMPI, " Step Residual-2-norm Threshold Energy\n");
387  fprintf(fp, " Step Residual-2-norm Threshold Energy\n");
388  fclose(fp);
389 
390  nsub_cut = nsub;
395  for (stp = 1; stp <= X->Def.Lanczos_max; stp++) {
400  eigabs_max = 0.0;
401  for (ie = 0; ie < X->Def.k_exct; ie++)
402  if (fabs(eig[ie]) > eigabs_max) eigabs_max = fabs(eig[ie]);
403  eps_LOBPCG = pow(10, -0.5 *X->Def.LanczosEps);
404  if (eigabs_max > 1.0) eps_LOBPCG *= eigabs_max;
409  dnormmax = 0.0;
410  for (ie = 0; ie < X->Def.k_exct; ie++) {
414 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,ie) private(idim)
415  for (idim = 1; idim <= i_max; idim++)
416  wxp[0][ie][idim] = hwxp[1][ie][idim] - eig[ie] * wxp[1][ie][idim];
417  dnorm = sqrt(creal(VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
418  if (dnorm > dnormmax) dnormmax = dnorm;
419 
420  if (stp /= 1) {
424  if (do_precon == 1) {
425  preshift = calc_preshift(eig[ie], dnorm, eps_LOBPCG);
426 #pragma omp parallel for default(none) shared(wxp,ie,list_Diagonal,preshift,i_max,eps_LOBPCG) private(idim,precon)
427  for (idim = 1; idim <= i_max; idim++) {
428  precon = list_Diagonal[idim] - preshift;
429  if(fabs(precon) > eps_LOBPCG) wxp[0][ie][idim] /= precon;
430  }
431  }/*if(do_precon == 1)*/
435  dnorm = sqrt(creal(VecProdMPI(i_max, wxp[0][ie], wxp[0][ie])));
436 #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie) private(idim)
437  for (idim = 1; idim <= i_max; idim++) wxp[0][ie][idim] /= dnorm;
438  }/*if (stp /= 1)*/
439 
440  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
446  childfopenMPI(sdt_2, "a", &fp);
447  fprintf(stdoutMPI, "%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
448  fprintf(fp, "%9d %15.5e %15.5e ", stp, dnormmax, eps_LOBPCG);
449  for (ie = 0; ie < X->Def.k_exct; ie++) {
450  fprintf(stdoutMPI, " %15.5e", eig[ie]);
451  fprintf(fp, " %15.5e", eig[ie]);
452  }
453  if(nsub_cut == 0) printf("nsub_cut : %d", nsub_cut);
454  fprintf(stdoutMPI, "\n");
455  fprintf(fp, "\n");
456  fclose(fp);
457 
458  if (dnormmax < eps_LOBPCG) {
459  iconv = 0;
460  break;
461  }
465 #pragma omp parallel default(none) shared(hwxp,i_max,X) private(idim,ie)
466  {
467 #pragma omp for nowait
468  for (ie = 0; ie < X->Def.k_exct; ie++)
469  for (idim = 1; idim <= i_max; idim++) hwxp[0][ie][idim] = 0.0;
470  }
471  for (ie = 0; ie < X->Def.k_exct; ie++)
472  mltply(X, hwxp[0][ie], wxp[0][ie]);
473 
481  for (ii = 0; ii < 3; ii++) {
482  for (ie = 0; ie < X->Def.k_exct; ie++){
483  for (jj = 0; jj < 3; jj++) {
484  for (je = 0; je < X->Def.k_exct; je++){
485  hsub[je + jj*X->Def.k_exct + ie * nsub + ii * nsub*X->Def.k_exct]
486  = VecProdMPI(i_max, wxp[jj][je], hwxp[ii][ie]);
487  ovlp[je + jj*X->Def.k_exct + ie * nsub + ii * nsub*X->Def.k_exct]
488  = VecProdMPI(i_max, wxp[jj][je], wxp[ii][ie]);
489  }/*for (je = 0; je < X->Def.k_exct; je++)*/
490  }/*for (jj = 0; jj < 3; jj++)*/
491  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
492  }/*for (ii = 0; ii < 3; ii++)*/
493  for (ie = 0; ie < X->Def.k_exct; ie++)
494  eig[ie] =
495  creal(hsub[ie + 1 * X->Def.k_exct + ie * nsub + 1 * nsub*X->Def.k_exct]);
501  nsub_cut = diag_ovrp(nsub, hsub, ovlp, eigsub);
505  for (ie = 0; ie < X->Def.k_exct; ie++)
506  eig[ie] = 0.5 * (eig[ie] + eigsub[ie]);
507 
508 #pragma omp parallel default(none) shared(i_max,X,wxp,hwxp,hsub,nsub,work) private(idim,ie,je,jj,mythread)
509  {
510 #if defined(_OPENMP)
511  mythread = omp_get_thread_num();
512 #else
513  mythread = 0;
514 #endif
515 
516 #pragma omp for
517  for (idim = 1; idim <= i_max; idim++) {
522  for (ie = 0; ie < X->Def.k_exct; ie++) {
523  work[mythread][ie] = 0.0;
524  for (jj = 0; jj < 3; jj++)
525  for (je = 0; je < X->Def.k_exct; je++)
526  work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie];
527  }
528  for (ie = 0; ie < X->Def.k_exct; ie++) wxp[1][ie][idim] = work[mythread][ie];
533  for (ie = 0; ie < X->Def.k_exct; ie++) {
534  work[mythread][ie] = 0.0;
535  for (jj = 0; jj < 3; jj++)
536  for (je = 0; je < X->Def.k_exct; je++)
537  work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie];
538  }
539  for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[1][ie][idim] = work[mythread][ie];
544  for (ie = 0; ie < X->Def.k_exct; ie++) {
545  work[mythread][ie] = 0.0;
546  for (jj = 0; jj < 3; jj += 2) {
547  for (je = 0; je < X->Def.k_exct; je++)
548  work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie];
549  }
550  }
551  for (ie = 0; ie < X->Def.k_exct; ie++) wxp[2][ie][idim] = work[mythread][ie];
556  for (ie = 0; ie < X->Def.k_exct; ie++) {
557  work[mythread][ie] = 0.0;
558  for (jj = 0; jj < 3; jj += 2)
559  for (je = 0; je < X->Def.k_exct; je++)
560  work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie];
561  }
562  for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[2][ie][idim] = work[mythread][ie];
563 
564  }/*for (idim = 1; idim <= i_max; idim++)*/
565  }/*pragma omp parallel*/
569  for (ii = 1; ii < 3; ii++) {
570  for (ie = 0; ie < X->Def.k_exct; ie++) {
571  dnorm = sqrt(creal(VecProdMPI(i_max, wxp[ii][ie], wxp[ii][ie])));
572 #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ie,ii) private(idim)
573  for (idim = 1; idim <= i_max; idim++) {
574  wxp[ii][ie][idim] /= dnorm;
575  hwxp[ii][ie][idim] /= dnorm;
576  }
577  }/* for (ie = 0; ie < X->Def.k_exct; ie++)*/
578  }/*for (ii = 1; ii < 3; ii++)*/
579 
580  }/*for (stp = 1; stp <= X->Def.Lanczos_max; stp++)*/
585  //fclose(fp);
586 
587  X->Large.itr = stp;
588  sprintf(sdt, cFileNameTimeKeep, X->Def.CDataFileHead);
589 
591  fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd);
592 
593  free_d_1d_allocate(eig);
594  free_d_1d_allocate(eigsub);
595  free_cd_1d_allocate(hsub);
596  free_cd_1d_allocate(ovlp);
597  free_cd_2d_allocate(work);
598  free_cd_3d_allocate(hwxp);
602  if (X->Def.iReStart == RESTART_OUT || X->Def.iReStart == RESTART_INOUT){
603  Output_restart(X, wxp[1]);
604  if(iconv != 0) {
605  sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged);
606  return 1;
607  }
608  }
613  L_vec = cd_2d_allocate(X->Def.k_exct, X->Check.idim_max + 1);
614 #pragma omp parallel default(none) shared(i_max,wxp,L_vec,X) private(idim,ie)
615  {
616  for (ie = 0; ie < X->Def.k_exct; ie++) {
617 #pragma omp for nowait
618  for (idim = 0; idim < i_max; idim++)
619  L_vec[ie][idim] = wxp[1][ie][idim + 1];
620  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
621  }/*X->Def.k_exct, X->Check.idim_max + 1);*/
622  free_cd_3d_allocate(wxp);
623 
624  v0 = cd_1d_allocate(X->Check.idim_max + 1);
625  v1 = cd_1d_allocate(X->Check.idim_max + 1);
626  vg = cd_1d_allocate(X->Check.idim_max + 1);
627  if (iconv != 0) {
628  sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged);
629  return -1;
630  }
631  else {
632  return 0;
633  }
634 }/*int LOBPCG_Main*/
639  struct EDMainCalStruct *X
640 )
641 {
642  char sdt[D_FileNameMax];
643  size_t byte_size;
644  long int i_max = 0, ie, idim;
645  FILE *fp;
646 
647  fprintf(stdoutMPI, "###### Eigenvalue with LOBPCG #######\n\n");
648 
649  if (X->Bind.Def.iInputEigenVec == FALSE) {
650 
651  // this part will be modified
652  switch (X->Bind.Def.iCalcModel) {
653  case HubbardGC:
654  case SpinGC:
655  case KondoGC:
656  case SpinlessFermionGC:
657  initial_mode = 1; // 1 -> random initial vector
658  break;
659  case Hubbard:
660  case Kondo:
661  case Spin:
662  case SpinlessFermion:
663 
664  if (X->Bind.Def.iFlgGeneralSpin == TRUE) {
665  initial_mode = 1;
666  }
667  else {
668  if (X->Bind.Def.initial_iv>0) {
669  initial_mode = 0; // 0 -> only v[iv] = 1
670  }
671  else {
672  initial_mode = 1; // 1 -> random initial vector
673  }
674  }
675  break;
676  default:
677  //fclose(fp);
678  exitMPI(-1);
679  }
680 
681  int iret = LOBPCG_Main(&(X->Bind));
682  if (iret != 0) {
683  if(iret ==1) return (TRUE);
684  else{
685  fprintf(stdoutMPI, " LOBPCG is not converged in this process.\n");
686  return(FALSE);
687  }
688  }
689  }/*if (X->Bind.Def.iInputEigenVec == FALSE)*/
690  else {// X->Bind.Def.iInputEigenVec=true :input v1:
695  fprintf(stdoutMPI, "An Eigenvector is inputted.\n");
696  L_vec = cd_2d_allocate(X->Bind.Def.k_exct, X->Bind.Check.idim_max + 1);
697  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
699  sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, ie, myrank);
700  childfopenALL(sdt, "rb", &fp);
701  if (fp == NULL) {
702  fprintf(stderr, "Error: Inputvector file is not found.\n");
703  exitMPI(-1);
704  }
705  byte_size = fread(&step_i, sizeof(int), 1, fp);
706  byte_size = fread(&i_max, sizeof(long int), 1, fp);
707  if (i_max != X->Bind.Check.idim_max) {
708  fprintf(stderr, "Error: Invalid Inputvector file.\n");
709  exitMPI(-1);
710  }
711  byte_size = fread(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp);
712 #pragma omp parallel for default(none) shared(L_vec, v1) firstprivate(i_max, ie), private(idim)
713  for (idim = 0; idim < i_max; idim++) {
714  L_vec[ie][idim] = v1[idim + 1];
715  }
716  }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/
717  fclose(fp);
719 
720  if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size);
721  }/*X->Bind.Def.iInputEigenVec == TRUE*/
722 
723  fprintf(stdoutMPI, "%s", cLogLanczos_EigenVecEnd);
728  phys(&(X->Bind), X->Bind.Def.k_exct);
729 
730  X->Bind.Def.St=1;
731  if (X->Bind.Def.St == 0) {
732  sprintf(sdt, cFileNameEnergy_Lanczos, X->Bind.Def.CDataFileHead);
733  }
734  else if (X->Bind.Def.St == 1) {
735  sprintf(sdt, cFileNameEnergy_CG, X->Bind.Def.CDataFileHead);
736  }
737 
738  if (childfopenMPI(sdt, "w", &fp) != 0) {
739  exitMPI(-1);
740  }
741  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
742  //phys(&(X->Bind), ie);
743  fprintf(fp, "State %ld\n", ie);
744  fprintf(fp, " Energy %.16lf \n", X->Bind.Phys.all_energy[ie]);
745  fprintf(fp, " Doublon %.16lf \n", X->Bind.Phys.all_doublon[ie]);
746  fprintf(fp, " Sz %.16lf \n", X->Bind.Phys.all_sz[ie]);
747  //fprintf(fp, " S^2 %.16lf \n", X->Bind.Phys.all_s2[ie]);
748  //fprintf(fp, " N_up %.16lf \n", X->Bind.Phys.all_num_up[ie]);
749  //fprintf(fp, " N_down %.16lf \n", X->Bind.Phys.all_num_down[ie]);
750  fprintf(fp, "\n");
751  }
752  fclose(fp);
753  /*
754  Output Eigenvector to a file
755  */
756  if (X->Bind.Def.iOutputEigenVec == TRUE) {
758 
759  for (ie = 0; ie < X->Bind.Def.k_exct; ie++) {
760 
761 #pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim)
762  for (idim = 0; idim < X->Bind.Check.idim_max; idim++)
763  v1[idim + 1] = L_vec[ie][idim];
764 
765  sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, ie, myrank);
766  if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1);
767  byte_size = fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr), 1, fp);
768  byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp);
769  byte_size = fwrite(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp);
770  fclose(fp);
771  }/*for (ie = 0; ie < X->Bind.Def.k_exct; ie++)*/
772 
774  }/*if (X->Bind.Def.iOutputEigenVec == TRUE)*/
775 
776  return TRUE;
777 
778 }/*int CalcByLOBPCG*/
const char * cFileNameEnergy_CG
Definition: global.c:42
int childfopenALL(const char *_cPathChild, const char *_cmode, FILE **_fp)
All processes open file in output/ directory.
Definition: FileIO.c:50
int LanczosEps
log(10 base) of the convergence threshold. Read from Calcmod in readdef.h
Definition: struct.h:48
void exitMPI(int errorcode)
MPI Abortation wrapper.
Definition: wrapperMPI.c:86
struct DefineList Def
Definision of system (Hamiltonian) etc.
Definition: struct.h:410
int mltply(struct BindStruct *X, double complex *tmp_v0, double complex *tmp_v1)
Parent function of multiplying the wavefunction by the Hamiltonian. . First, the calculation of diago...
Definition: mltply.c:56
int St
0 or 1, but it affects nothing.
Definition: struct.h:80
const char * cOutputEigenVecStart
Definition: LogMessage.c:45
void zgemm_(char *transa, char *transb, int *m, int *n, int *k, double complex *alpha, double complex *a, int *lda, double complex *b, int *ldb, double complex *beta, double complex *c, int *ldc)
int itr
Iteration number.
Definition: struct.h:315
unsigned long int idim_max
The dimension of the Hilbert space of this process.
Definition: struct.h:303
int iReStart
Definition: struct.h:221
int initial_mode
Definition: global.h:60
const char * cFileNameInputEigen
Definition: global.c:50
const char * cLogLanczos_EigenValueNotConverged
Definition: LogMessage.c:89
const char * cLogLanczos_EigenVecEnd
unsigned long int BcastMPI_li(int root, unsigned long int idim)
MPI wrapper function to broadcast unsigned long integer across processes.
Definition: wrapperMPI.c:273
static double calc_preshift(double eig, double res, double eps_LOBPCG)
Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES...
Definition: CalcByLOBPCG.c:129
double complex * v1
Definition: global.h:35
#define D_FileNameMax
Definition: global.h:23
int iOutputEigenVec
ASwitch for outputting an eigenvector. 0: no output, 1:output.
Definition: struct.h:202
void phys(struct BindStruct *X, unsigned long int neig)
A main function to calculate physical quantities by full diagonalization method.
Definition: phys.c:48
struct LargeList Large
Variables for Matrix-Vector product.
Definition: struct.h:412
int childfopenMPI(const char *_cPathChild, const char *_cmode, FILE **_fp)
Only the root process open file in output/ directory.
Definition: FileIO.c:27
const char * cFileNameOutputEigen
Definition: global.c:49
const char * cLogInputVecFinish
Definition: LogMessage.c:150
const char * cLanczos_EigenValueFinish
Definition: LogMessage.c:93
struct PhysList Phys
Physical quantities.
Definition: struct.h:413
static void Output_restart(struct BindStruct *X, double complex **wave)
Output eigenvectors for restart LOBPCG method.
Definition: CalcByLOBPCG.c:297
const char * cFileNameLanczosStep
Definition: global.c:39
int CalcByLOBPCG(struct EDMainCalStruct *X)
Driver routine for LOB(P)CG method.
Definition: CalcByLOBPCG.c:638
#define TRUE
Definition: global.h:26
Bind.
Definition: struct.h:419
int nthreads
Number of Threads, defined in InitializeMPI()
Definition: global.h:164
const char * cLogInputVecStart
Definition: LogMessage.c:149
long int iv
Used for initializing vector.
Definition: struct.h:316
int nproc
Number of processors, defined in InitializeMPI()
Definition: global.h:162
Bind.
Definition: struct.h:409
void zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info)
const char * cFileNameInputVector
Definition: global.c:65
const char * cLogLanczos_EigenValueEnd
const char * cReadEigenVecFinish
Definition: LogMessage.c:44
const char * cLanczos_EigenValueStart
Definition: LogMessage.c:91
double * all_doublon
[CheckList::idim_max+1] Doublon for FullDiag and LOBPCG. malloc in setmem_large().
Definition: struct.h:373
double complex ** L_vec
Definition: global.h:74
double * alpha
Definition: global.h:44
const char * cLogOutputVecStart
Definition: LogMessage.c:151
int iFlgGeneralSpin
Flag for the general (Sz/=1/2) spin.
Definition: struct.h:86
#define FALSE
Definition: global.h:25
static int diag_ovrp(int nsub, double complex *hsub, double complex *ovlp, double *eig)
Solve the generalized eigenvalue problem with the Lowdin&#39;s orthogonalization.
Definition: CalcByLOBPCG.c:43
const char * cFileNameTimeKeep
Definition: global.c:23
double complex * vg
Definition: global.h:41
const char * cReadEigenVecStart
Definition: LogMessage.c:43
double complex VecProdMPI(long unsigned int ndim, double complex *v1, double complex *v2)
Compute conjugate scaler product of process-distributed vector .
Definition: wrapperMPI.c:314
double * beta
Definition: global.h:44
const char * cLanczos_EigenValueStep
Definition: LogMessage.c:92
const char * cLogOutputVecFinish
Definition: LogMessage.c:152
struct EDMainCalStruct X
Definition: struct.h:432
double * list_Diagonal
Definition: global.h:46
double * all_sz
[CheckList::idim_max+1] for FullDiag and LOBPCG. malloc in setmem_large().
Definition: struct.h:375
int iCalcModel
Switch for model. 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC, 6:HubbardNConserved.
Definition: struct.h:198
long int initial_iv
Seed of random number for initial guesss of wavefunctions.
Definition: struct.h:76
int myrank
Process ID, defined in InitializeMPI()
Definition: global.h:163
const char * cFileNameOutputVector
Definition: global.c:64
int iInitialVecType
Switch for type of inital vectors. 0:complex type, 1: real type. default value is set as 0 in readdef...
Definition: struct.h:195
int LOBPCG_Main(struct BindStruct *X)
Core routine for the LOBPCG method This method is introduced inS. Yamada, et al., Transactions of JSC...
Definition: CalcByLOBPCG.c:330
double complex * v0
Definition: global.h:34
int iInputEigenVec
Switch for reading an eigenvector. 0: no input, 1:input.
Definition: struct.h:203
const char * cFileNameEnergy_Lanczos
Definition: global.c:40
struct CheckList Check
Size of the Hilbert space.
Definition: struct.h:411
unsigned long int SumMPI_li(unsigned long int idim)
MPI wrapper function to obtain sum of unsigned long integer across processes.
Definition: wrapperMPI.c:239
int step_i
Definition: global.h:66
char * CDataFileHead
Read from Calcmod in readdef.h. Header of output file such as Green&#39;s function.
Definition: struct.h:42
int TimeKeeperWithStep(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType, const int istep)
Functions for writing a time log.
Definition: log.c:78
struct BindStruct Bind
Binded struct.
Definition: struct.h:420
static void Initialize_wave(struct BindStruct *X, double complex **wave)
Definition: CalcByLOBPCG.c:155
int TimeKeeper(struct BindStruct *X, const char *cFileName, const char *cTimeKeeper_Message, const char *cWriteType)
Functions for writing a time log.
Definition: log.c:42
unsigned int Lanczos_max
Maximum number of iterations.
Definition: struct.h:74
FILE * stdoutMPI
File pointer to the standard output defined in InitializeMPI()
Definition: global.h:165
double * all_energy
[CheckList::idim_max+1] Energy for FullDiag and LOBPCG. malloc in setmem_large(). ...
Definition: struct.h:371
unsigned int k_exct
Read from Calcmod in readdef.h.
Definition: struct.h:47