coffee
Coronagraph Optimization For Fast Exoplanet Exploration
|
CUDA functions wrapper. More...
Functions | |
int | QDWHpartial (int M, int N, int fact, int psinv, double s, float tol, float *d_A, int ldda, float *S, float *d_U, int lddu, float *d_VT, int lddvt, float *d_B, int lddb, float *A, int lda, int *sizeS, int *sizeK, int *it, float *flops, magma_queue_t queue, cublasHandle_t handle) |
CLI bindings | |
int_fast8_t | CUDACOMP_test_cli () |
int_fast8_t | CUDACOMP_MatMatMult_testPseudoInverse_cli () |
int_fast8_t | CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli () |
int_fast8_t | CUDACOMP_magma_compute_SVDpseudoInverse_cli () |
int_fast8_t | CUDACOMP_Coeff2Map_Loop_cli () |
int_fast8_t | CUDACOMP_Coeff2Map_offset_Loop_cli () |
int_fast8_t | CUDACOMP_extractModesLoop_cli () |
Module initialization | |
void | __attribute__ ((constructor)) |
int_fast8_t | init_cudacomp () |
Initialize cudacomp module and command line interface. More... | |
int_fast8_t | CUDACOMP_init () |
Initialize CUDA and MAGMA. More... | |
int | CUDACOMP_printGPUMATMULTCONF (int index) |
int_fast8_t | GPUcomp_test (long NBact, long NBmodes, long WFSsize, long GPUcnt) |
void | matrixMulCPU (float *cMat, float *wfsVec, float *dmVec, int M, int N) |
CPU-based matrix vector multiplication. More... | |
void * | compute_function (void *ptr) |
int | GPUloadCmat (int index) |
int | GPU_loop_MultMat_setup (int index, const char *IDcontrM_name, const char *IDwfsim_name, const char *IDoutdmmodes_name, long NBGPUs, int *GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb) |
Setup memory and process for GPU-based matrix-vector multiply. More... | |
int | GPU_loop_MultMat_execute (int index, int_fast8_t *status, int_fast8_t *GPUstatus, float alpha, float beta, int timing, int TimerOffsetIndex) |
int | GPU_loop_MultMat_free (int index) |
long | CUDACOMP_MatMatMult_testPseudoInverse (const char *IDmatA_name, const char *IDmatAinv_name, const char *IDmatOut_name) |
Test pseudo inverse. More... | |
int | CUDACOMP_magma_compute_SVDpseudoInverse_SVD (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name) |
Compute pseudoinverse using MAGMA-based SVD. More... | |
int | CUDACOMP_magma_compute_SVDpseudoInverse (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode, int PSINV_MODE, double qdwh_s, float qdwh_tol, int testmode) |
Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A. More... | |
int | GPU_SVD_computeControlMatrix (int device, const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name) |
int | CUDACOMP_Coeff2Map_Loop (const char *IDmodes_name, const char *IDcoeff_name, int GPUindex, const char *IDoutmap_name, int offsetmode, const char *IDoffset_name) |
int | CUDACOMP_extractModesLoop (const char *in_stream, const char *intot_stream, const char *IDmodes_name, const char *IDrefin_name, const char *IDrefout_name, const char *IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait) |
extract mode coefficients from data stream More... | |
Variables | |
int | FORCESEMINIT = 1 |
pid_t | CLIPID |
important directories and info More... | |
GPUMATMULTCONF | gpumatmultconf [20] |
CUDA functions wrapper.
Also uses MAGMA library
void __attribute__ | ( | (constructor) | ) |
void* compute_function | ( | void * | ptr | ) |
int CUDACOMP_Coeff2Map_Loop | ( | const char * | IDmodes_name, |
const char * | IDcoeff_name, | ||
int | GPUindex, | ||
const char * | IDoutmap_name, | ||
int | offsetmode, | ||
const char * | IDoffset_name | ||
) |
int_fast8_t CUDACOMP_Coeff2Map_Loop_cli | ( | ) |
int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli | ( | ) |
int CUDACOMP_extractModesLoop | ( | const char * | in_stream, |
const char * | intot_stream, | ||
const char * | IDmodes_name, | ||
const char * | IDrefin_name, | ||
const char * | IDrefout_name, | ||
const char * | IDmodes_val_name, | ||
int | GPUindex, | ||
int | PROCESS, | ||
int | TRACEMODE, | ||
int | MODENORM, | ||
int | insem, | ||
int | axmode, | ||
long | twait | ||
) |
extract mode coefficients from data stream
int_fast8_t CUDACOMP_extractModesLoop_cli | ( | ) |
int_fast8_t CUDACOMP_init | ( | ) |
Initialize CUDA and MAGMA.
Finds CUDA devices Initializes CUDA and MAGMA libraries
int CUDACOMP_magma_compute_SVDpseudoInverse | ( | const char * | ID_Rmatrix_name, |
const char * | ID_Cmatrix_name, | ||
double | SVDeps, | ||
long | MaxNBmodes, | ||
const char * | ID_VTmatrix_name, | ||
int | LOOPmode, | ||
int | PSINV_MODE, | ||
double | qdwh_s, | ||
float | qdwh_tol, | ||
int | testmode | ||
) |
Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A.
Computes pseuso inverse of a matrix.
Column-major representation used to match magma and lapack.
When viewed as an image, matrix leading dimension is size[0] = horizontal axis. When viewed in an image viewer, the first column is on the bottom side with the first element in bottom left corner, so the matrix appears rotated counter-clockwise by 90deg from its conventional representation where first column is on the left side.
Returns transpose of pseudoinverse.
Using column-major indexing
When viewed as a FITS file, the first matrix column (= vector) appears as the bottom line of the FITS image.
First matrix element is bottom left corner, second element is immediately to the right of it.
Noting elements as a[row,column] = a[i,j], elements are accessed as in memory as: a[ j * M + i ]
FITS file representation (ds9 view) starts from bottom left corner.
a[000,N-1] -> a[001,N-1] -> ... -> a[M-1,N-1] ............................................. ^ a[000,001] -> a[001,001] -> ... -> a[M-1,001] ^ a[000,000] -> a[001,000] -> ... -> a[M-1,000] ^ : this is the first matrix row
Note that a tall input matrix (M>N) will appear short if viewed as an image. To view the FITS file in the conventional matrix view, rotate by 90 deg clockwise.
Use LOOPmode = 1 for computing the same size SVD, same input and output location
When using function to invert AO response matrix with AOloopControl module, input is 2D or 3D image: M: number of sensors (AO control) = size[0] (2D) = size[0]*size[1] (3D) N: number of actuators (AO control) = size[1] (2D) = size[2] (3D)
We assume M>N
When using function to compute pseudo-inverse of data matrix (predictive control), input matrix is a 2D image which is the Transpose of the data matrix. M: number of measurements samples = size[0] (2D) N: dimension of each measurement = size[1] (2D)
We assume M>N
Notations: AT is transpose of A A+ is pseudo inverse of A
Computes pseudo-inverse : A+ = (AT A)^-1 AT Inverse of AT A is computed by SVD
SVD: A = U S V^T U are eigenvectors of A A^T V are eigenvectors of A^T A, computed at step 4 below
Linear algebra reminder: equivalence between (AT A)^-1 AT and V S^-1 UT
Definition of pseudoinverse: A+ = (AT A)^-1 AT singular value decomposition of A = U S VT A+ = ( V S UT U S VT )^-1 V S UT Since U is unitary, UT U = Id -> A+ = ( V S^2 VT )^-1 V S UT A+ = VT^-1 S^-2 V^-1 V S UT A+ = V S^-1 UT
Main steps (non-QDWH):
STEP 1 : Fill input data into magmaf_h_A on host
STEP 2 : Copy input data to GPU -> magmaf_d_A (MxN matrix on device)
STEP 3 : Compute trans(A) x A : magmaf_d_A x magmaf_d_A -> magmaf_d_AtA (NxN matrix on device)
STEP 4 : Compute eigenvalues and eigenvectors of A^T A -> magmaf_d_AtA (NxN matrix on device) Calls magma_ssyevd_gpu : Compute the eigenvalues and optionally eigenvectors of a symmetric real matrix in single precision, GPU interface, big matrix. This function computes in single precision all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A defined on the device. The first parameter can take the values MagmaVec,'V' or MagmaNoVec,'N' and answers the question whether the eigenvectors are desired. If the eigenvectors are desired, it uses a divide and conquer algorithm. The symmetric matrix A can be stored in lower (MagmaLower,'L') or upper (MagmaUpper,'U') mode. If the eigenvectors are desired, then on exit A contains orthonormal eigenvectors. The eigenvalues are stored in an array w
STEP 5 : Set eigenvalue limit
STEP 6 : Write eigenvectors to V^T matrix
STEP 7 : Write eigenvectors/eigenvalue to magma_h_VT1 if eigenvalue > limit Copy to magma_d_VT1
STEP 8 : Compute M2 = VT1 VT. M2 is (AT A)^-1
STEP 9 : Compute Ainv = M2 A. This is the pseudo inverse
TEST MODE OUTPOUT
non-QDWH mode:
test_mA.fits content of magmaf_h_A test_mAtA.fits content of transpose(A) x A = magmaf_d_AtA (output of STEP 3) test_eigenv.dat list of eigenvalues test_SVDmodes.log number of singular values kept test_mM2.fits matrix M2 (output of STEP 8) test_mVT.fits matrix transpose(V) = eigenvectors (output of step 6) test_mAinv.fits transpose of pseudoinverse test_AinvA.fits product of Ainv with A, should be close to identity matrix size NxN
QDWH mode:
test_mA.QDWH.fits content of magmaf_h_A test_Aorig.QDWH.txt content of magmaf_h_A prior to calling psinv function test_sv.QDWH.dat singular values after call to psinv function test_SVDmodes.QDWH.log number of singular values kept (note : independent form pseudo-inverse computation) test_mAinv.QDWH.fits transpose of pseudoinverse test_AinvA.QDWH.fits product of Ainv with A, should be close to identity matrix size NxN
Timing tests
< Timers
< Times in sec
< 1 if single precision, 0 if double precision
MATRIX REPRESENTATION CONVENTION
MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M : row index, coefficient of a vector j = 0 ... N : column index, vector index M is the matrix leading dimension = lda M = number of rows N = number of columns (assuming here that vector = column of the matrix)
each column (N=cst) of A is a z=cst slice of image Rmatrix
each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)
Initialize MAGMA if needed
memory is only allocated on first pass
memory is only allocated on first pass
w1 values are the EIGENVALUES of AT A Note: w1 values are the SQUARE of the singular values of A
if pseudo-inverse is only computed once, these arrays can be freed
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli | ( | ) |
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD | ( | const char * | ID_Rmatrix_name, |
const char * | ID_Cmatrix_name, | ||
double | SVDeps, | ||
long | MaxNBmodes, | ||
const char * | ID_VTmatrix_name | ||
) |
Compute pseudoinverse using MAGMA-based SVD.
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli | ( | ) |
long CUDACOMP_MatMatMult_testPseudoInverse | ( | const char * | IDmatA_name, |
const char * | IDmatAinv_name, | ||
const char * | IDmatOut_name | ||
) |
Test pseudo inverse.
IDmatA is an image loaded as a M x N matrix IDmatAinv is an image loaded as a M x M matrix, representing the transpose of the pseudo inverse of IDmatA
The input matrices can be 2D or a 3D images
If 2D image : IDmatA M = xsize IDmatA N = ysize
If 3D image : IDmatA M = xsize*ysize IDmatA N = ysize
MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N
each column (N=cst) of A is a z=cst slice of image Rmatrix
each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)
Initialize MAGAM if needed
load matA in h_A -> d_A
load matAinv in h_Ainv -> d_Ainv
int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli | ( | ) |
int CUDACOMP_printGPUMATMULTCONF | ( | int | index | ) |
synchronization
one semaphore per thread
int_fast8_t CUDACOMP_test_cli | ( | ) |
int GPU_loop_MultMat_execute | ( | int | index, |
int_fast8_t * | status, | ||
int_fast8_t * | GPUstatus, | ||
float | alpha, | ||
float | beta, | ||
int | timing, | ||
int | TimerOffsetIndex | ||
) |
int GPU_loop_MultMat_free | ( | int | index | ) |
int GPU_loop_MultMat_setup | ( | int | index, |
const char * | IDcontrM_name, | ||
const char * | IDwfsim_name, | ||
const char * | IDoutdmmodes_name, | ||
long | NBGPUs, | ||
int * | GPUdevice, | ||
int | orientation, | ||
int | USEsem, | ||
int | initWFSref, | ||
long | loopnb | ||
) |
Setup memory and process for GPU-based matrix-vector multiply.
setup matrix multiplication using multiple GPUs
Load Control Matrix
Load Input vectors
int GPU_SVD_computeControlMatrix | ( | int | device, |
const char * | ID_Rmatrix_name, | ||
const char * | ID_Cmatrix_name, | ||
double | SVDeps, | ||
const char * | ID_VTmatrix_name | ||
) |
int_fast8_t GPUcomp_test | ( | long | NBact, |
long | NBmodes, | ||
long | WFSsize, | ||
long | GPUcnt | ||
) |
int GPUloadCmat | ( | int | index | ) |
int_fast8_t init_cudacomp | ( | ) |
Initialize cudacomp module and command line interface.
void matrixMulCPU | ( | float * | cMat, |
float * | wfsVec, | ||
float * | dmVec, | ||
int | M, | ||
int | N | ||
) |
CPU-based matrix vector multiplication.
int QDWHpartial | ( | int | M, |
int | N, | ||
int | fact, | ||
int | psinv, | ||
double | s, | ||
float | tol, | ||
float * | d_A, | ||
int | ldda, | ||
float * | S, | ||
float * | d_U, | ||
int | lddu, | ||
float * | d_VT, | ||
int | lddvt, | ||
float * | d_B, | ||
int | lddb, | ||
float * | A, | ||
int | lda, | ||
int * | sizeS, | ||
int * | sizeK, | ||
int * | it, | ||
float * | flops, | ||
magma_queue_t | queue, | ||
cublasHandle_t | handle | ||
) |
pid_t CLIPID |
important directories and info
int FORCESEMINIT = 1 |
GPUMATMULTCONF gpumatmultconf[20] |