coffee
Coronagraph Optimization For Fast Exoplanet Exploration
cudacomp.h
Go to the documentation of this file.
1 
15 #ifndef _CUDACOMP_H
16 #define _CUDACOMP_H
17 
18 
19 #ifdef HAVE_CUDA
20 
21 #include <cuda_runtime_api.h>
22 #include <cuda_runtime.h>
23 #include <cublas_v2.h>
24 #include <device_types.h>
25 #include <pthread.h>
26 
27 #endif
28 
29 #ifdef HAVE_CUDA
30 
31 // data passed to each thread
32 typedef struct
33 {
34  int thread_no;
35  long numl0;
36  int cindex; // computation index
37  int_fast8_t *status; // where to white status
38 
39  // timers
40  struct timespec *t0;
41  struct timespec *t1;
42  struct timespec *t2;
43  struct timespec *t3;
44  struct timespec *t4;
45  struct timespec *t5;
46 
47 } THDATA;
48 
49 
50 
58 typedef struct
59 {
60  int_fast8_t init;
61  int_fast8_t *refWFSinit;
62  int_fast8_t alloc;
63  long CM_ID;
64  long CM_cnt;
65  long timerID;
66 
67  uint_fast32_t M;
68  uint_fast32_t N;
69 
70 
72  int_fast8_t sem;
73  int_fast8_t gpuinit;
74 
76  sem_t **semptr1;
77  sem_t **semptr2;
78  sem_t **semptr3;
79  sem_t **semptr4;
80  sem_t **semptr5;
82  // computer memory (host)
83  float *cMat;
84  float **cMat_part;
85  float *wfsVec;
86  float **wfsVec_part;
87  float *wfsRef;
88  float **wfsRef_part;
89  float *dmVec;
90  float *dmVecTMP;
91  float **dmVec_part;
92  float **dmRef_part;
93 
94  // GPU memory (device)
95  float **d_cMat;
96  float **d_wfsVec;
97  float **d_dmVec;
98  float **d_wfsRef;
99  float **d_dmRef;
100 
101  // threads
102  THDATA *thdata;
103  int *iret;
104  pthread_t *threadarray;
105  int_fast8_t NBstreams;
106  cudaStream_t *stream;
107  cublasHandle_t *handle;
108 
109  // splitting limits
110  uint_fast32_t *Nsize;
111  uint_fast32_t *Noffset;
112 
113  int *GPUdevice;
114 
115  int_fast8_t orientation;
116 
117  long IDout;
118 
119 
121 #endif
122 
123 
124 
125 
126 
127 
128 /* =============================================================================================== */
129 /* =============================================================================================== */
133 /* =============================================================================================== */
135 /* =============================================================================================== */
136 
137 
138 void __attribute__ ((constructor)) libinit_cudacomp();
139 
143 int_fast8_t init_cudacomp();
144 
145 
155 int_fast8_t CUDACOMP_init();
156 
157 int CUDACOMP_printGPUMATMULTCONF(int index);
158 
159 int_fast8_t GPUcomp_test(long NBact, long NBmodes, long WFSsize, long GPUcnt);
160 
162 
163 
164 
165 
166 
167 #ifdef HAVE_CUDA
168 
169 /* =============================================================================================== */
170 /* =============================================================================================== */
176 /* =============================================================================================== */
178 /* =============================================================================================== */
179 
180 
181 
183 void matrixMulCPU(float *cMat, float *wfsVec, float *dmVec, int M, int N);
184 
185 
186 void *compute_function( void *ptr );
187 
188 
189 int GPUloadCmat(int index);
190 
191 
193 int GPU_loop_MultMat_setup(int index, const char *IDcontrM_name, const char *IDwfsim_name, const char *IDoutdmmodes_name, long NBGPUs, int *GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb);
194 
195 
196 int GPU_loop_MultMat_execute(int index, int_fast8_t *status, int_fast8_t *GPUstatus, float alpha, float beta, int timing, int TimerOffsetIndex);
197 
198 
199 int GPU_loop_MultMat_free(int index);
200 
202 
203 
204 
205 #ifdef HAVE_MAGMA
206 /* =============================================================================================== */
207 /* =============================================================================================== */
211 /* =============================================================================================== */
213 /* =============================================================================================== */
214 
215 
216 long CUDACOMP_MatMatMult_testPseudoInverse(const char *IDmatA_name, const char *IDmatAinv_name, const char *IDmatOut_name);
217 
218 
219 
223 int CUDACOMP_magma_compute_SVDpseudoInverse_SVD(const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name);
224 
225 
226 
246 int CUDACOMP_magma_compute_SVDpseudoInverse(const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode, int PSINV_MODE, double qdwh_s, float qdwh_tol, int testmode);
247 
248 
249 
250 int GPU_SVD_computeControlMatrix(int device, const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name);
251 
253 
254 #endif
255 
256 
257 
258 /* =============================================================================================== */
259 /* =============================================================================================== */
263 /* =============================================================================================== */
265 /* =============================================================================================== */
266 
267 
268 int CUDACOMP_Coeff2Map_Loop(const char *IDmodes_name, const char *IDcoeff_name, int GPUindex, const char *IDoutmap_name, int offsetmode, const char *IDoffset_name);
269 
270 
271 
296 int CUDACOMP_extractModesLoop(const char *in_stream, const char *intot_stream, const char *IDmodes_name, const char *IDrefin_name, const char *IDrefout_name, const char *IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait);
297 
298 
299 
300 #endif
301 
302 
303 #endif
int_fast8_t CUDACOMP_init()
Initialize CUDA and MAGMA.
Definition: cudacomp.c:567
struct timespec * t3
Definition: cudacomp.h:43
struct timespec * t4
Definition: cudacomp.h:44
void matrixMulCPU(float *cMat, float *wfsVec, float *dmVec, int M, int N)
CPU-based matrix vector multiplication.
Definition: cudacomp.c:844
int CUDACOMP_Coeff2Map_Loop(const char *IDmodes_name, const char *IDcoeff_name, int GPUindex, const char *IDoutmap_name, int offsetmode, const char *IDoffset_name)
Definition: cudacomp.c:4836
int GPUloadCmat(int index)
Definition: cudacomp.c:1228
int_fast8_t GPUcomp_test(long NBact, long NBmodes, long WFSsize, long GPUcnt)
Definition: cudacomp.c:680
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD(const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name)
Compute pseudoinverse using MAGMA-based SVD.
Definition: cudacomp.c:2448
int CUDACOMP_printGPUMATMULTCONF(int index)
Definition: cudacomp.c:600
long CUDACOMP_MatMatMult_testPseudoInverse(const char *IDmatA_name, const char *IDmatAinv_name, const char *IDmatOut_name)
Test pseudo inverse.
Definition: cudacomp.c:2313
void __attribute__((constructor)) libinit_cudacomp()
Definition: 00CORE.c:87
void * compute_function(void *ptr)
Definition: cudacomp.c:905
struct timespec * t5
Definition: cudacomp.h:45
struct timespec * t1
Definition: cudacomp.h:41
Definition: cudacomp.h:32
int GPU_SVD_computeControlMatrix(int device, const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name)
Definition: cudacomp.c:4404
int GPU_loop_MultMat_free(int index)
Definition: cudacomp.c:2178
This structure holds the GPU computation setup for matrix multiplication.
Definition: cudacomp.h:49
struct timespec * t2
Definition: cudacomp.h:42
int GPU_loop_MultMat_execute(int index, int_fast8_t *status, int_fast8_t *GPUstatus, float alpha, float beta, int timing, int TimerOffsetIndex)
Definition: cudacomp.c:1933
int CUDACOMP_extractModesLoop(const char *in_stream, const char *intot_stream, const char *IDmodes_name, const char *IDrefin_name, const char *IDrefout_name, const char *IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait)
extract mode coefficients from data stream
Definition: cudacomp.c:5159
int GPU_loop_MultMat_setup(int index, const char *IDcontrM_name, const char *IDwfsim_name, const char *IDoutdmmodes_name, long NBGPUs, int *GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb)
Setup memory and process for GPU-based matrix-vector multiply.
Definition: cudacomp.c:1286
int_fast8_t init_cudacomp()
Initialize cudacomp module and command line interface.
Definition: cudacomp.c:429
struct timespec * t0
Definition: cudacomp.h:40
int CUDACOMP_magma_compute_SVDpseudoInverse(const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode, int PSINV_MODE, double qdwh_s, float qdwh_tol)
Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A...