-
Notifications
You must be signed in to change notification settings - Fork 104
/
Copy pathquda.h
1121 lines (915 loc) · 43.9 KB
/
quda.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef _QUDA_H
#define _QUDA_H
/**
* @file quda.h
* @brief Main header file for the QUDA library
*
* Note to QUDA developers: When adding new members to QudaGaugeParam
* and QudaInvertParam, be sure to update lib/check_params.h as well
* as the Fortran interface in lib/quda_fortran.F90.
*/
#include <enum_quda.h>
#include <stdio.h> /* for FILE */
#include <quda_constants.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Parameters having to do with the gauge field or the
* interpretation of the gauge field by various Dirac operators
*/
typedef struct QudaGaugeParam_s {
QudaFieldLocation location; /**< The location of the gauge field */
int X[4]; /**< The local space-time dimensions (without checkboarding) */
double anisotropy; /**< Used for Wilson and Wilson-clover */
double tadpole_coeff; /**< Used for staggered only */
double scale; /**< Used by staggered long links */
QudaLinkType type; /**< The link type of the gauge field (e.g., Wilson, fat, long, etc.) */
QudaGaugeFieldOrder gauge_order; /**< The ordering on the input gauge field */
QudaTboundary t_boundary; /**< The temporal boundary condition that will be used for fermion fields */
QudaPrecision cpu_prec; /**< The precision used by the caller */
QudaPrecision cuda_prec; /**< The precision of the cuda gauge field */
QudaReconstructType reconstruct; /**< The reconstruction type of the cuda gauge field */
QudaPrecision cuda_prec_sloppy; /**< The precision of the sloppy gauge field */
QudaReconstructType reconstruct_sloppy; /**< The recontruction type of the sloppy gauge field */
QudaPrecision cuda_prec_precondition; /**< The precision of the preconditioner gauge field */
QudaReconstructType reconstruct_precondition; /**< The recontruction type of the preconditioner gauge field */
QudaGaugeFixed gauge_fix; /**< Whether the input gauge field is in the axial gauge or not */
int ga_pad; /**< The pad size that the cudaGaugeField will use (default=0) */
int site_ga_pad; /**< Used by link fattening and the gauge and fermion forces */
int staple_pad; /**< Used by link fattening */
int llfat_ga_pad; /**< Used by link fattening */
int mom_ga_pad; /**< Used by the gauge and fermion forces */
double gaugeGiB; /**< The storage used by the gauge fields */
QudaStaggeredPhase staggered_phase_type; /**< Set the staggered phase type of the links */
int staggered_phase_applied; /**< Whether the staggered phase has already been applied to the links */
double i_mu; /**< Imaginary chemical potential */
int overlap; /**< Width of overlapping domains */
int overwrite_mom; /**< When computing momentum, should we overwrite it or accumulate to to */
int use_resident_gauge; /**< Use the resident gauge field as input */
int use_resident_mom; /**< Use the resident momentum field as input*/
int make_resident_gauge; /**< Make the result gauge field resident */
int make_resident_mom; /**< Make the result momentum field resident */
int return_result_gauge; /**< Return the result gauge field */
int return_result_mom; /**< Return the result momentum field */
size_t gauge_offset; /**< Offset into MILC site struct to the gauge field (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
size_t mom_offset; /**< Offset into MILC site struct to the momentum field (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
size_t site_size; /**< Size of MILC site struct (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
} QudaGaugeParam;
/**
* Parameters relating to the solver and the choice of Dirac operator.
*/
typedef struct QudaInvertParam_s {
QudaFieldLocation input_location; /**< The location of the input field */
QudaFieldLocation output_location; /**< The location of the output field */
QudaDslashType dslash_type; /**< The Dirac Dslash type that is being used */
QudaInverterType inv_type; /**< Which linear solver to use */
double mass; /**< Used for staggered only */
double kappa; /**< Used for Wilson and Wilson-clover */
double m5; /**< Domain wall height */
int Ls; /**< Extent of the 5th dimension (for domain wall) */
double b_5[QUDA_MAX_DWF_LS]; /**< MDWF coefficients */
double c_5[QUDA_MAX_DWF_LS]; /**< will be used only for the mobius type of Fermion */
double mu; /**< Twisted mass parameter */
double epsilon; /**< Twisted mass parameter */
QudaTwistFlavorType twist_flavor; /**< Twisted mass flavor */
double tol; /**< Solver tolerance in the L2 residual norm */
double tol_restart; /**< Solver tolerance in the L2 residual norm (used to restart InitCG) */
double tol_hq; /**< Solver tolerance in the heavy quark residual norm */
int compute_true_res; /** Whether to compute the true residual post solve */
double true_res; /**< Actual L2 residual norm achieved in solver */
double true_res_hq; /**< Actual heavy quark residual norm achieved in solver */
int maxiter; /**< Maximum number of iterations in the linear solver */
double reliable_delta; /**< Reliable update tolerance */
int use_sloppy_partial_accumulator; /**< Whether to keep the partial solution accumuator in sloppy precision */
/**< This parameter determines how often we accumulate into the
solution vector from the direction vectors in the solver.
E.g., running with solution_accumulator_pipeline = 4, means we
will update the solution vector every four iterations using the
direction vectors from the prior four iterations. This
increases performance of mixed-precision solvers since it means
less high-precision vector round-trip memory travel, but
requires more low-precision memory allocation. */
int solution_accumulator_pipeline;
/**< This parameter determines how many consective reliable update
residual increases we tolerate before terminating the solver,
i.e., how long do we want to keep trying to converge */
int max_res_increase;
/**< This parameter determines how many total reliable update
residual increases we tolerate before terminating the solver,
i.e., how long do we want to keep trying to converge */
int max_res_increase_total;
/**< After how many iterations shall the heavy quark residual be updated */
int heavy_quark_check;
int pipeline; /**< Whether to use a pipelined solver with less global sums */
int num_offset; /**< Number of offsets in the multi-shift solver */
int num_src; /**< Number of sources in the multiple source solver */
int overlap; /**< Width of domain overlaps */
/** Offsets for multi-shift solver */
double offset[QUDA_MAX_MULTI_SHIFT];
/** Solver tolerance for each offset */
double tol_offset[QUDA_MAX_MULTI_SHIFT];
/** Solver tolerance for each shift when refinement is applied using the heavy-quark residual */
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT];
/** Actual L2 residual norm achieved in solver for each offset */
double true_res_offset[QUDA_MAX_MULTI_SHIFT];
/** Iterated L2 residual norm achieved in multi shift solver for each offset */
double iter_res_offset[QUDA_MAX_MULTI_SHIFT];
/** Actual heavy quark residual norm achieved in solver for each offset */
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT];
/** Residuals in the partial faction expansion */
double residue[QUDA_MAX_MULTI_SHIFT];
/** Whether we should evaluate the action after the linear solver*/
int compute_action;
/** Computed value of the bilinear action (complex-valued)
invert: \phi^\dagger A^{-1} \phi
multishift: \phi^\dagger r(x) \phi = \phi^\dagger (sum_k residue[k] * (A + offset[k])^{-1} ) \phi */
double action[2];
QudaSolutionType solution_type; /**< Type of system to solve */
QudaSolveType solve_type; /**< How to solve it */
QudaMatPCType matpc_type; /**< The preconditioned matrix type */
QudaDagType dagger; /**< Whether we are using the Hermitian conjugate system or not */
QudaMassNormalization mass_normalization; /**< The mass normalization is being used by the caller */
QudaSolverNormalization solver_normalization; /**< The normalization desired in the solver */
QudaPreserveSource preserve_source; /**< Preserve the source or not in the linear solver (deprecated) */
QudaPrecision cpu_prec; /**< The precision used by the input fermion fields */
QudaPrecision cuda_prec; /**< The precision used by the QUDA solver */
QudaPrecision cuda_prec_sloppy; /**< The precision used by the QUDA sloppy operator */
QudaPrecision cuda_prec_precondition; /**< The precision used by the QUDA preconditioner */
QudaDiracFieldOrder dirac_order; /**< The order of the input and output fermion fields */
QudaGammaBasis gamma_basis; /**< Gamma basis of the input and output host fields */
QudaFieldLocation clover_location; /**< The location of the clover field */
QudaPrecision clover_cpu_prec; /**< The precision used for the input clover field */
QudaPrecision clover_cuda_prec; /**< The precision used for the clover field in the QUDA solver */
QudaPrecision clover_cuda_prec_sloppy; /**< The precision used for the clover field in the QUDA sloppy operator */
QudaPrecision clover_cuda_prec_precondition; /**< The precision used for the clover field in the QUDA preconditioner */
QudaCloverFieldOrder clover_order; /**< The order of the input clover field */
QudaUseInitGuess use_init_guess; /**< Whether to use an initial guess in the solver or not */
double clover_coeff; /**< Coefficient of the clover term */
double clover_rho; /**< Real number added to the clover diagonal (not to inverse) */
int compute_clover_trlog; /**< Whether to compute the trace log of the clover term */
double trlogA[2]; /**< The trace log of the clover term (even/odd computed separately) */
int compute_clover; /**< Whether to compute the clover field */
int compute_clover_inverse; /**< Whether to compute the clover inverse field */
int return_clover; /**< Whether to copy back the clover matrix field */
int return_clover_inverse; /**< Whether to copy back the inverted clover matrix field */
QudaVerbosity verbosity; /**< The verbosity setting to use in the solver */
int sp_pad; /**< The padding to use for the fermion fields */
int cl_pad; /**< The padding to use for the clover fields */
int iter; /**< The number of iterations performed by the solver */
double spinorGiB; /**< The memory footprint of the fermion fields */
double cloverGiB; /**< The memory footprint of the clover fields */
double gflops; /**< The Gflops rate of the solver */
double secs; /**< The time taken by the solver */
QudaTune tune; /**< Enable auto-tuning? (default = QUDA_TUNE_YES) */
/** Number of steps in s-step algorithms */
int Nsteps;
/** Maximum size of Krylov space used by solver */
int gcrNkrylov;
/*
* The following parameters are related to the solver
* preconditioner, if enabled.
*/
/**
* The inner Krylov solver used in the preconditioner. Set to
* QUDA_INVALID_INVERTER to disable the preconditioner entirely.
*/
QudaInverterType inv_type_precondition;
/** Preconditioner instance, e.g., multigrid */
void *preconditioner;
/** Deflation instance */
void *deflation_op;
/**
Dirac Dslash used in preconditioner
*/
QudaDslashType dslash_type_precondition;
/** Verbosity of the inner Krylov solver */
QudaVerbosity verbosity_precondition;
/** Tolerance in the inner solver */
double tol_precondition;
/** Maximum number of iterations allowed in the inner solver */
int maxiter_precondition;
/** Relaxation parameter used in GCR-DD (default = 1.0) */
double omega;
/** Number of preconditioner cycles to perform per iteration */
int precondition_cycle;
/** Whether to use additive or multiplicative Schwarz preconditioning */
QudaSchwarzType schwarz_type;
/**
* Whether to use the L2 relative residual, Fermilab heavy-quark
* residual, or both to determine convergence. To require that both
* stopping conditions are satisfied, use a bitwise OR as follows:
*
* p.residual_type = (QudaResidualType) (QUDA_L2_RELATIVE_RESIDUAL
* | QUDA_HEAVY_QUARK_RESIDUAL);
*/
QudaResidualType residual_type;
/**Parameters for deflated solvers*/
/** The precision of the Ritz vectors */
QudaPrecision cuda_prec_ritz;
/** How many vectors to compute after one solve
* for eigCG recommended values 8 or 16
*/
int nev;
/** EeigCG : Search space dimension
* gmresdr : Krylov subspace dimension
*/
int max_search_dim;
/** For systems with many RHS: current RHS index */
int rhs_idx;
/** Specifies deflation space volume: total number of eigenvectors is nev*deflation_grid */
int deflation_grid;
/** eigCG: selection criterion for the reduced eigenvector set */
double eigenval_tol;
/** mixed precision eigCG tuning parameter: minimum search vector space restarts */
int eigcg_max_restarts;
/** initCG tuning parameter: maximum restarts */
int max_restart_num;
/** initCG tuning parameter: tolerance for cg refinement corrections in the deflation stage */
double inc_tol;
/** Whether to make the solution vector(s) after the solve */
int make_resident_solution;
/** Whether to use the resident solution vector(s) */
int use_resident_solution;
/** Whether to use the solution vector to augment the chronological basis */
int make_resident_chrono;
/** Whether to use the resident chronological basis */
int use_resident_chrono;
/** The maximum length of the chronological history to store */
int max_chrono_dim;
/** The index to indeicate which chrono history we are augmenting */
int chrono_index;
/** Which external library to use in the linear solvers (MAGMA or Eigen) */
QudaExtLibType extlib_type;
} QudaInvertParam;
// Parameter set for solving the eigenvalue problems.
// Eigen problems are tightly related with Ritz algorithm.
// And the Lanczos algorithm use the Ritz operator.
// For Ritz matrix operation,
// we need to know about the solution type of dirac operator.
// For acceleration, we are also using chevisov polynomial method.
// And nk, np values are needed Implicit Restart Lanczos method
// which is optimized form of Lanczos algorithm
typedef struct QudaEigParam_s {
QudaInvertParam *invert_param;
//specific for Lanczos method:
QudaSolutionType RitzMat_lanczos;
QudaSolutionType RitzMat_Convcheck;
QudaEigType eig_type;
double *MatPoly_param;
int NPoly;
double Stp_residual;
int nk;
int np;
int f_size;
double eigen_shift;
//more general stuff:
/** Whether to load eigenvectors */
QudaBoolean import_vectors;
/** The precision of the Ritz vectors */
QudaPrecision cuda_prec_ritz;
/** The memory type used to keep the Ritz vectors */
QudaMemoryType mem_type_ritz;
/** Location where deflation should be done */
QudaFieldLocation location;
/** Whether to run the verification checks once set up is complete */
QudaBoolean run_verify;
/** Filename prefix where to load the null-space vectors */
char vec_infile[256];
/** Filename prefix for where to save the null-space vectors */
char vec_outfile[256];
/** The Gflops rate of the multigrid solver setup */
double gflops;
/**< The time taken by the multigrid solver setup */
double secs;
/** Which external library to use in the deflation operations (MAGMA or Eigen) */
QudaExtLibType extlib_type;
} QudaEigParam;
typedef struct QudaMultigridParam_s {
QudaInvertParam *invert_param;
/** Number of multigrid levels */
int n_level;
/** Geometric block sizes to use on each level */
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM];
/** Spin block sizes to use on each level */
int spin_block_size[QUDA_MAX_MG_LEVEL];
/** Number of null-space vectors to use on each level */
int n_vec[QUDA_MAX_MG_LEVEL];
/** Verbosity on each level of the multigrid */
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL];
/** Inverter to use in the setup phase */
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL];
/** Tolerance to use in the setup phase */
double setup_tol[QUDA_MAX_MG_LEVEL];
/** Smoother to use on each level */
QudaInverterType smoother[QUDA_MAX_MG_LEVEL];
/** The type of residual to send to the next coarse grid, and thus the
type of solution to receive back from this coarse grid */
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL];
/** The type of smoother solve to do on each grid (e/o preconditioning or not)*/
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL];
/** The type of multigrid cycle to perform at each level */
QudaMultigridCycleType cycle_type[QUDA_MAX_MG_LEVEL];
/** Number of pre-smoother applications on each level */
int nu_pre[QUDA_MAX_MG_LEVEL];
/** Number of post-smoother applications on each level */
int nu_post[QUDA_MAX_MG_LEVEL];
/** Tolerance to use for the smoother / solver on each level */
double smoother_tol[QUDA_MAX_MG_LEVEL];
/** Over/under relaxation factor for the smoother at each level */
double omega[QUDA_MAX_MG_LEVEL];
/** Whether to use global reductions or not for the smoother / solver at each level */
QudaBoolean global_reduction[QUDA_MAX_MG_LEVEL];
/** Location where each level should be done */
QudaFieldLocation location[QUDA_MAX_MG_LEVEL];
/** Whether to compute the null vectors or reload them */
QudaComputeNullVector compute_null_vector;
/** Whether to generate on all levels or just on level 0 */
QudaBoolean generate_all_levels;
/** Whether to run the verification checks once set up is complete */
QudaBoolean run_verify;
/** Filename prefix where to load the null-space vectors */
char vec_infile[256];
/** Filename prefix for where to save the null-space vectors */
char vec_outfile[256];
/** The Gflops rate of the multigrid solver setup */
double gflops;
/**< The time taken by the multigrid solver setup */
double secs;
/** Multiplicative factor for the mu parameter */
double mu_factor[QUDA_MAX_MG_LEVEL];
} QudaMultigridParam;
/*
* Interface functions, found in interface_quda.cpp
*/
/**
* Set parameters related to status reporting.
*
* In typical usage, this function will be called once (or not at
* all) just before the call to initQuda(), but it's valid to call
* it any number of times at any point during execution. Prior to
* the first time it's called, the parameters take default values
* as indicated below.
*
* @param verbosity Default verbosity, ranging from QUDA_SILENT to
* QUDA_DEBUG_VERBOSE. Within a solver, this
* parameter is overridden by the "verbosity"
* member of QudaInvertParam. The default value
* is QUDA_SUMMARIZE.
*
* @param prefix String to prepend to all messages from QUDA. This
* defaults to the empty string (""), but you may
* wish to specify something like "QUDA: " to
* distinguish QUDA's output from that of your
* application.
*
* @param outfile File pointer (such as stdout, stderr, or a handle
* returned by fopen()) where messages should be
* printed. The default is stdout.
*/
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[],
FILE *outfile);
/**
* initCommsGridQuda() takes an optional "rank_from_coords" argument that
* should be a pointer to a user-defined function with this prototype.
*
* @param coords Node coordinates
* @param fdata Any auxiliary data needed by the function
* @return MPI rank or QMP node ID cooresponding to the node coordinates
*
* @see initCommsGridQuda
*/
typedef int (*QudaCommsMap)(const int *coords, void *fdata);
/**
* Declare the grid mapping ("logical topology" in QMP parlance)
* used for communications in a multi-GPU grid. This function
* should be called prior to initQuda(). The only case in which
* it's optional is when QMP is used for communication and the
* logical topology has already been declared by the application.
*
* @param nDim Number of grid dimensions. "4" is the only supported
* value currently.
*
* @param dims Array of grid dimensions. dims[0]*dims[1]*dims[2]*dims[3]
* must equal the total number of MPI ranks or QMP nodes.
*
* @param func Pointer to a user-supplied function that maps coordinates
* in the communication grid to MPI ranks (or QMP node IDs).
* If the pointer is NULL, the default mapping depends on
* whether QMP or MPI is being used for communication. With
* QMP, the existing logical topology is used if it's been
* declared. With MPI or as a fallback with QMP, the default
* ordering is lexicographical with the fourth ("t") index
* varying fastest.
*
* @param fdata Pointer to any data required by "func" (may be NULL)
*
* @see QudaCommsMap
*/
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata);
/**
* Initialize the library. This is a low-level interface that is
* called by initQuda. Calling initQudaDevice requires that the
* user also call initQudaMemory before using QUDA.
*
* @param device CUDA device number to use. In a multi-GPU build,
* this parameter may either be set explicitly on a
* per-process basis or set to -1 to enable a default
* allocation of devices to processes.
*/
void initQudaDevice(int device);
/**
* Initialize the library persistant memory allocations (both host
* and device). This is a low-level interface that is called by
* initQuda. Calling initQudaMemory requires that the user has
* previously called initQudaDevice.
*/
void initQudaMemory();
/**
* Initialize the library. This function is actually a wrapper
* around calls to initQudaDevice() and initQudaMemory().
*
* @param device CUDA device number to use. In a multi-GPU build,
* this parameter may either be set explicitly on a
* per-process basis or set to -1 to enable a default
* allocation of devices to processes.
*/
void initQuda(int device);
/**
* Finalize the library.
*/
void endQuda(void);
/**
* @brief update the radius for halos.
* @details This should only be needed for automated testing when
* different partitioning is applied within a single run.
*/
void updateR();
/**
* A new QudaGaugeParam should always be initialized immediately
* after it's defined (and prior to explicitly setting its members)
* using this function. Typical usage is as follows:
*
* QudaGaugeParam gauge_param = newQudaGaugeParam();
*/
QudaGaugeParam newQudaGaugeParam(void);
/**
* A new QudaInvertParam should always be initialized immediately
* after it's defined (and prior to explicitly setting its members)
* using this function. Typical usage is as follows:
*
* QudaInvertParam invert_param = newQudaInvertParam();
*/
QudaInvertParam newQudaInvertParam(void);
/**
* A new QudaMultigridParam should always be initialized immediately
* after it's defined (and prior to explicitly setting its members)
* using this function. Typical usage is as follows:
*
* QudaMultigridParam mg_param = newQudaMultigridParam();
*/
QudaMultigridParam newQudaMultigridParam(void);
/**
* A new QudaEigParam should always be initialized immediately
* after it's defined (and prior to explicitly setting its members)
* using this function. Typical usage is as follows:
*
* QudaEigParam eig_param = newQudaEigParam();
*/
QudaEigParam newQudaEigParam(void);
/**
* Print the members of QudaGaugeParam.
* @param param The QudaGaugeParam whose elements we are to print.
*/
void printQudaGaugeParam(QudaGaugeParam *param);
/**
* Print the members of QudaInvertParam.
* @param param The QudaInvertParam whose elements we are to print.
*/
void printQudaInvertParam(QudaInvertParam *param);
/**
* Print the members of QudaMultigridParam.
* @param param The QudaMultigridParam whose elements we are to print.
*/
void printQudaMultigridParam(QudaMultigridParam *param);
/**
* Print the members of QudaEigParam.
* @param param The QudaEigParam whose elements we are to print.
*/
void printQudaEigParam(QudaEigParam *param);
/**
* Load the gauge field from the host.
* @param h_gauge Base pointer to host gauge field (regardless of dimensionality)
* @param param Contains all metadata regarding host and device storage
*/
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param);
/**
* Free QUDA's internal copy of the gauge field.
*/
void freeGaugeQuda(void);
/**
* Save the gauge field to the host.
* @param h_gauge Base pointer to host gauge field (regardless of dimensionality)
* @param param Contains all metadata regarding host and device storage
*/
void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param);
/**
* Load the clover term and/or the clover inverse from the host.
* Either h_clover or h_clovinv may be set to NULL.
* @param h_clover Base pointer to host clover field
* @param h_cloverinv Base pointer to host clover inverse field
* @param inv_param Contains all metadata regarding host and device storage
*/
void loadCloverQuda(void *h_clover, void *h_clovinv,
QudaInvertParam *inv_param);
/**
* Free QUDA's internal copy of the clover term and/or clover inverse.
*/
void freeCloverQuda(void);
/**
* Perform the solve, according to the parameters set in param. It
* is assumed that the gauge field has already been loaded via
* loadGaugeQuda().
* @param h_x Solution spinor field
* @param h_b Source spinor field
* @param param Contains all metadata regarding host and device
* storage and solver parameters
*/
void lanczosQuda(int k0, int m, void *hp_Apsi, void *hp_r, void *hp_V,
void *hp_alpha, void *hp_beta, QudaEigParam *eig_param);
/**
* Perform the solve, according to the parameters set in param. It
* is assumed that the gauge field has already been loaded via
* loadGaugeQuda().
* @param h_x Solution spinor field
* @param h_b Source spinor field
* @param param Contains all metadata regarding host and device
* storage and solver parameters
*/
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param);
/**
* Perform the solve like @invertQuda but for multiples right hand sides.
*
* @param _hp_x Array of solution spinor fields
* @param _hp_b Array of source spinor fields
* @param param Contains all metadata regarding
* @param param Contains all metadata regarding host and device
* storage and solver parameters
*/
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param);
/**
* Solve for multiple shifts (e.g., masses).
* @param _hp_x Array of solution spinor fields
* @param _hp_b Source spinor fields
* @param param Contains all metadata regarding host and device
* storage and solver parameters
*/
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param);
/**
* Setup the multigrid solver, according to the parameters set in param. It
* is assumed that the gauge field has already been loaded via
* loadGaugeQuda().
* @param param Contains all metadata regarding host and device
* storage and solver parameters
*/
void* newMultigridQuda(QudaMultigridParam *param);
/**
* @brief Free resources allocated by the multigrid solver
* @param mg_instance Pointer to instance of multigrid_solver
*/
void destroyMultigridQuda(void *mg_instance);
/**
* @brief Updates the multigrid preconditioner for the new gauge / clover field
* @param mg_instance Pointer to instance of multigrid_solver
*/
void updateMultigridQuda(void *mg_instance, QudaMultigridParam *param);
/**
* Apply the Dslash operator (D_{eo} or D_{oe}).
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
* @param parity The destination parity of the field
*/
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param,
QudaParity parity);
/**
* Apply the Dslash operator (D_{eo} or D_{oe}) for 4D EO preconditioned DWF.
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
* @param parity The destination parity of the field
* @param test_type Choose a type of dslash operators
*/
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param,
QudaParity parity, int test_type);
/**
* Apply the Dslash operator (D_{eo} or D_{oe}) for Mobius DWF.
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
* @param parity The destination parity of the field
* @param test_type Choose a type of dslash operators
*/
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param,
QudaParity parity, int test_type);
/**
* Apply the clover operator or its inverse.
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
* @param parity The source and destination parity of the field
* @param inverse Whether to apply the inverse of the clover term
*/
void cloverQuda(void *h_out, void *h_in, QudaInvertParam *inv_param,
QudaParity *parity, int inverse);
/**
* Apply the full Dslash matrix, possibly even/odd preconditioned.
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
*/
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param);
/**
* Apply M^{\dag}M, possibly even/odd preconditioned.
* @param h_out Result spinor field
* @param h_in Input spinor field
* @param param Contains all metadata regarding host and device
* storage
*/
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param);
/*
* The following routines are temporary additions used by the HISQ
* link-fattening code.
*/
void set_dim(int *);
void pack_ghost(void **cpuLink, void **cpuGhost, int nFace,
QudaPrecision precision);
void computeKSLinkQuda(void* fatlink, void* longlink, void* ulink, void* inlink,
double *path_coeff, QudaGaugeParam *param);
/**
* Compute the gauge force and update the mometum field
*
* @param mom The momentum field to be updated
* @param sitelink The gauge field from which we compute the force
* @param input_path_buf[dim][num_paths][path_length]
* @param path_length One less that the number of links in a loop (e.g., 3 for a staple)
* @param loop_coeff Coefficients of the different loops in the Symanzik action
* @param num_paths How many contributions from path_length different "staples"
* @param max_length The maximum number of non-zero of links in any path in the action
* @param dt The integration step size (for MILC this is dt*beta/3)
* @param param The parameters of the external fields and the computation settings
*/
int computeGaugeForceQuda(void* mom, void* sitelink, int*** input_path_buf, int* path_length,
double* loop_coeff, int num_paths, int max_length, double dt,
QudaGaugeParam* qudaGaugeParam);
/**
* Evolve the gauge field by step size dt, using the momentum field
* I.e., Evalulate U(t+dt) = e(dt pi) U(t)
*
* @param gauge The gauge field to be updated
* @param momentum The momentum field
* @param dt The integration step size step
* @param conj_mom Whether to conjugate the momentum matrix
* @param exact Whether to use an exact exponential or Taylor expand
* @param param The parameters of the external fields and the computation settings
*/
void updateGaugeFieldQuda(void* gauge, void* momentum, double dt,
int conj_mom, int exact, QudaGaugeParam* param);
/**
* Apply the staggered phase factors to the gauge field. If the
* imaginary chemical potential is non-zero then the phase factor
* exp(imu/T) will be applied to the links in the temporal
* direction.
*
* @param gauge_h The gauge field
* @param param The parameters of the gauge field
*/
void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param);
/**
* Project the input field on the SU(3) group. If the target
* tolerance is not met, this routine will give a runtime error.
*
* @param gauge_h The gauge field to be updated
* @param tol The tolerance to which we iterate
* @param param The parameters of the gauge field
*/
void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param);
/**
* Evaluate the momentum contribution to the Hybrid Monte Carlo
* action.
*
* @param momentum The momentum field
* @param param The parameters of the external fields and the computation settings
* @return momentum action
*/
double momActionQuda(void* momentum, QudaGaugeParam* param);
/**
* Allocate a gauge (matrix) field on the device and optionally download a host gauge field.
*
* @param gauge The host gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scalar, 4 - vector, 6 - tensor)
* @param param The parameters of the external field and the field to be created
* @return Pointer to the gauge field (cast as a void*)
*/
void* createGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param);
/**
* Copy the QUDA gauge (matrix) field on the device to the CPU
*
* @param outGauge Pointer to the host gauge field
* @param inGauge Pointer to the device gauge field (QUDA device field)
* @param param The parameters of the host and device fields
*/
void saveGaugeFieldQuda(void* outGauge, void* inGauge, QudaGaugeParam* param);
/**
* Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
*
* @param gauge Gauge field to be freed
*/
void destroyGaugeFieldQuda(void* gauge);
/**
* Compute the clover field and its inverse from the resident gauge field.
*
* @param param The parameters of the clover field to create
*/
void createCloverQuda(QudaInvertParam* param);
/**
* Compute the clover force contributions in each dimension mu given
* the array of solution fields, and compute the resulting momentum
* field.
*
* @param mom Force matrix
* @param dt Integrating step size
* @param x Array of solution vectors
* @param p Array of intermediate vectors
* @param coeff Array of residues for each contribution (multiplied by stepsize)
* @param kappa2 -kappa*kappa parameter
* @param ck -clover_coefficient * kappa / 8
* @param nvec Number of vectors
* @param multiplicity Number fermions this bilinear reresents
* @param gauge Gauge Field
* @param gauge_param Gauge field meta data
* @param inv_param Dirac and solver meta data
*/
void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck,
int nvector, double multiplicity, void *gauge,
QudaGaugeParam *gauge_param, QudaInvertParam *inv_param);
/**
* Compute the naive staggered force. All fields must be in the same precision.
*
* @param mom Momentum field
* @param dt Integrating step size
* @param delta Additional scale factor when updating momentum (mom += delta * [force]_TA
* @param gauge Gauge field (at present only supports resident gauge field)
* @param x Array of single-parity solution vectors (at present only supports resident solutions)
* @param gauge_param Gauge field meta data
* @param invert_param Dirac and solver meta data
*/
void computeStaggeredForceQuda(void* mom, double dt, double delta, void **x, void *gauge,
QudaGaugeParam *gauge_param, QudaInvertParam *invert_param);
/**
* Compute the fermion force for the HISQ quark action.
* @param momentum The momentum contribution from the quark action.
* @param level2_coeff The coefficients for the second level of smearing in the quark action.
* @param fat7_coeff The coefficients for the first level of smearing (fat7) in the quark action.
* @param w_link Unitarized link variables obtained by applying fat7 smearing and unitarization to the original links.
* @param v_link Fat7 link variables.
* @param u_link SU(3) think link variables.
* @param quark The input fermion field.
* @param num The number of quark fields
* @param num_naik The number of naik contributions
* @param coeff The coefficient multiplying the fermion fields in the outer product
* @param param. The field parameters.
*/
void computeHISQForceQuda(void* momentum,
long long* flops,
const double level2_coeff[6],
const double fat7_coeff[6],
const void* const w_link,
const void* const v_link,
const void* const u_link,
void** quark,
int num,
int num_naik,
double** coeff,
QudaGaugeParam* param);
/**
* Generate Gaussian distributed gauge field
* @param seed Seed
*/
void gaussGaugeQuda(long seed);
/**
* Computes the total, spatial and temporal plaquette averages of the loaded gauge configuration.
* @param Array for storing the averages (total, spatial, temporal)
*/
void plaqQuda(double plaq[3]);