coffee
Coronagraph Optimization For Fast Exoplanet Exploration
cudacomp.c File Reference

CUDA functions wrapper. More...

Functions

int QDWHpartial (int M, int N, int fact, int psinv, double s, float tol, float *d_A, int ldda, float *S, float *d_U, int lddu, float *d_VT, int lddvt, float *d_B, int lddb, float *A, int lda, int *sizeS, int *sizeK, int *it, float *flops, magma_queue_t queue, cublasHandle_t handle)
 
CLI bindings
int_fast8_t CUDACOMP_test_cli ()
 
int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli ()
 
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ()
 
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli ()
 
int_fast8_t CUDACOMP_Coeff2Map_Loop_cli ()
 
int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli ()
 
int_fast8_t CUDACOMP_extractModesLoop_cli ()
 
Module initialization
void __attribute__ ((constructor))
 
int_fast8_t init_cudacomp ()
 Initialize cudacomp module and command line interface. More...
 
int_fast8_t CUDACOMP_init ()
 Initialize CUDA and MAGMA. More...
 
int CUDACOMP_printGPUMATMULTCONF (int index)
 
int_fast8_t GPUcomp_test (long NBact, long NBmodes, long WFSsize, long GPUcnt)
 
void matrixMulCPU (float *cMat, float *wfsVec, float *dmVec, int M, int N)
 CPU-based matrix vector multiplication. More...
 
void * compute_function (void *ptr)
 
int GPUloadCmat (int index)
 
int GPU_loop_MultMat_setup (int index, const char *IDcontrM_name, const char *IDwfsim_name, const char *IDoutdmmodes_name, long NBGPUs, int *GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb)
 Setup memory and process for GPU-based matrix-vector multiply. More...
 
int GPU_loop_MultMat_execute (int index, int_fast8_t *status, int_fast8_t *GPUstatus, float alpha, float beta, int timing, int TimerOffsetIndex)
 
int GPU_loop_MultMat_free (int index)
 
long CUDACOMP_MatMatMult_testPseudoInverse (const char *IDmatA_name, const char *IDmatAinv_name, const char *IDmatOut_name)
 Test pseudo inverse. More...
 
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name)
 Compute pseudoinverse using MAGMA-based SVD. More...
 
int CUDACOMP_magma_compute_SVDpseudoInverse (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode, int PSINV_MODE, double qdwh_s, float qdwh_tol, int testmode)
 Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A. More...
 
int GPU_SVD_computeControlMatrix (int device, const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name)
 
int CUDACOMP_Coeff2Map_Loop (const char *IDmodes_name, const char *IDcoeff_name, int GPUindex, const char *IDoutmap_name, int offsetmode, const char *IDoffset_name)
 
int CUDACOMP_extractModesLoop (const char *in_stream, const char *intot_stream, const char *IDmodes_name, const char *IDrefin_name, const char *IDrefout_name, const char *IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait)
 extract mode coefficients from data stream More...
 

Variables

int FORCESEMINIT = 1
 
pid_t CLIPID
 important directories and info More...
 
GPUMATMULTCONF gpumatmultconf [20]
 

Detailed Description

CUDA functions wrapper.

Also uses MAGMA library

Author
O. Guyon
Date
3 Jul 2017
Bug:
MAGMA execution can hang on dsyevd routine. This seems to be a MAGMA issue.

Function Documentation

void __attribute__ ( (constructor)  )
void* compute_function ( void *  ptr)
int CUDACOMP_Coeff2Map_Loop ( const char *  IDmodes_name,
const char *  IDcoeff_name,
int  GPUindex,
const char *  IDoutmap_name,
int  offsetmode,
const char *  IDoffset_name 
)
int_fast8_t CUDACOMP_Coeff2Map_Loop_cli ( )
int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli ( )
int CUDACOMP_extractModesLoop ( const char *  in_stream,
const char *  intot_stream,
const char *  IDmodes_name,
const char *  IDrefin_name,
const char *  IDrefout_name,
const char *  IDmodes_val_name,
int  GPUindex,
int  PROCESS,
int  TRACEMODE,
int  MODENORM,
int  insem,
int  axmode,
long  twait 
)

extract mode coefficients from data stream

int_fast8_t CUDACOMP_extractModesLoop_cli ( )
int_fast8_t CUDACOMP_init ( )

Initialize CUDA and MAGMA.

Finds CUDA devices Initializes CUDA and MAGMA libraries

Returns
number of CUDA devices found
int CUDACOMP_magma_compute_SVDpseudoInverse ( const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
long  MaxNBmodes,
const char *  ID_VTmatrix_name,
int  LOOPmode,
int  PSINV_MODE,
double  qdwh_s,
float  qdwh_tol,
int  testmode 
)

Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A.

Computes pseuso inverse of a matrix.
Column-major representation used to match magma and lapack.
When viewed as an image, matrix leading dimension is size[0] = horizontal axis. When viewed in an image viewer, the first column is on the bottom side with the first element in bottom left corner, so the matrix appears rotated counter-clockwise by 90deg from its conventional representation where first column is on the left side.
Returns transpose of pseudoinverse.

Matrix representation details

Using column-major indexing
When viewed as a FITS file, the first matrix column (= vector) appears as the bottom line of the FITS image.
First matrix element is bottom left corner, second element is immediately to the right of it.

Noting elements as a[row,column] = a[i,j], elements are accessed as in memory as: a[ j * M + i ]

FITS file representation (ds9 view) starts from bottom left corner.

    a[000,N-1] -> a[001,N-1] -> ... -> a[M-1,N-1]
    ............................................. ^
    a[000,001] -> a[001,001] -> ... -> a[M-1,001] ^
    a[000,000] -> a[001,000] -> ... -> a[M-1,000] ^     : this is the first matrix row

Note that a tall input matrix (M>N) will appear short if viewed as an image. To view the FITS file in the conventional matrix view, rotate by 90 deg clockwise.

Application Notes

Use LOOPmode = 1 for computing the same size SVD, same input and output location

Use case: Response matrix to compute control matrix

When using function to invert AO response matrix with AOloopControl module, input is 2D or 3D image: M: number of sensors (AO control) = size[0] (2D) = size[0]*size[1] (3D) N: number of actuators (AO control) = size[1] (2D) = size[2] (3D)

We assume M>N

Use case: Predictive control

When using function to compute pseudo-inverse of data matrix (predictive control), input matrix is a 2D image which is the Transpose of the data matrix. M: number of measurements samples = size[0] (2D) N: dimension of each measurement = size[1] (2D)

We assume M>N

Algorithm details and main computation steps

Notations: AT is transpose of A A+ is pseudo inverse of A

Computes pseudo-inverse : A+ = (AT A)^-1 AT Inverse of AT A is computed by SVD

SVD: A = U S V^T U are eigenvectors of A A^T V are eigenvectors of A^T A, computed at step 4 below

Linear algebra reminder: equivalence between (AT A)^-1 AT and V S^-1 UT

Definition of pseudoinverse: A+ = (AT A)^-1 AT singular value decomposition of A = U S VT A+ = ( V S UT U S VT )^-1 V S UT Since U is unitary, UT U = Id -> A+ = ( V S^2 VT )^-1 V S UT A+ = VT^-1 S^-2 V^-1 V S UT A+ = V S^-1 UT

Main steps (non-QDWH):

STEP 1 : Fill input data into magmaf_h_A on host

STEP 2 : Copy input data to GPU -> magmaf_d_A (MxN matrix on device)

STEP 3 : Compute trans(A) x A : magmaf_d_A x magmaf_d_A -> magmaf_d_AtA (NxN matrix on device)

STEP 4 : Compute eigenvalues and eigenvectors of A^T A -> magmaf_d_AtA (NxN matrix on device) Calls magma_ssyevd_gpu : Compute the eigenvalues and optionally eigenvectors of a symmetric real matrix in single precision, GPU interface, big matrix. This function computes in single precision all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A defined on the device. The first parameter can take the values MagmaVec,'V' or MagmaNoVec,'N' and answers the question whether the eigenvectors are desired. If the eigenvectors are desired, it uses a divide and conquer algorithm. The symmetric matrix A can be stored in lower (MagmaLower,'L') or upper (MagmaUpper,'U') mode. If the eigenvectors are desired, then on exit A contains orthonormal eigenvectors. The eigenvalues are stored in an array w

STEP 5 : Set eigenvalue limit

STEP 6 : Write eigenvectors to V^T matrix

STEP 7 : Write eigenvectors/eigenvalue to magma_h_VT1 if eigenvalue > limit Copy to magma_d_VT1

STEP 8 : Compute M2 = VT1 VT. M2 is (AT A)^-1

STEP 9 : Compute Ainv = M2 A. This is the pseudo inverse

Note
SVDeps^2 is applied as a limit to the eigenvectors of AT A, which are equal to the squares of the singular values of A, so this is equivalent to applying SVDeps as a limit on the singular values of A
When used to compute AO control matrix, N=number of actuators/modes, M=number of WFS elements
EIGENVALUES are good to about 1e-6 of peak eigenvalue in single precision, much better with double precision
2.5x faster in single precision
If provided with an additional data matrix named "", an additional Matrix Matrix product between Ainv and the provided matrix will be performed. This feature is used for predictive control and sensor fusion to create a control matrix.

TEST MODE OUTPOUT

non-QDWH mode:

test_mA.fits content of magmaf_h_A test_mAtA.fits content of transpose(A) x A = magmaf_d_AtA (output of STEP 3) test_eigenv.dat list of eigenvalues test_SVDmodes.log number of singular values kept test_mM2.fits matrix M2 (output of STEP 8) test_mVT.fits matrix transpose(V) = eigenvectors (output of step 6) test_mAinv.fits transpose of pseudoinverse test_AinvA.fits product of Ainv with A, should be close to identity matrix size NxN

QDWH mode:

test_mA.QDWH.fits content of magmaf_h_A test_Aorig.QDWH.txt content of magmaf_h_A prior to calling psinv function test_sv.QDWH.dat singular values after call to psinv function test_SVDmodes.QDWH.log number of singular values kept (note : independent form pseudo-inverse computation) test_mAinv.QDWH.fits transpose of pseudoinverse test_AinvA.QDWH.fits product of Ainv with A, should be close to identity matrix size NxN

Timing tests

< Timers

< Times in sec

< 1 if single precision, 0 if double precision

MATRIX REPRESENTATION CONVENTION

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M : row index, coefficient of a vector j = 0 ... N : column index, vector index M is the matrix leading dimension = lda M = number of rows N = number of columns (assuming here that vector = column of the matrix)

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGMA if needed

memory is only allocated on first pass

memory is only allocated on first pass

w1 values are the EIGENVALUES of AT A Note: w1 values are the SQUARE of the singular values of A

if pseudo-inverse is only computed once, these arrays can be freed

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli ( )
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD ( const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
long  MaxNBmodes,
const char *  ID_VTmatrix_name 
)

Compute pseudoinverse using MAGMA-based SVD.

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ( )
long CUDACOMP_MatMatMult_testPseudoInverse ( const char *  IDmatA_name,
const char *  IDmatAinv_name,
const char *  IDmatOut_name 
)

Test pseudo inverse.

IDmatA is an image loaded as a M x N matrix IDmatAinv is an image loaded as a M x M matrix, representing the transpose of the pseudo inverse of IDmatA

The input matrices can be 2D or a 3D images

If 2D image : IDmatA M = xsize IDmatA N = ysize

If 3D image : IDmatA M = xsize*ysize IDmatA N = ysize

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGAM if needed

load matA in h_A -> d_A

load matAinv in h_Ainv -> d_Ainv

int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli ( )
int CUDACOMP_printGPUMATMULTCONF ( int  index)

synchronization

one semaphore per thread

int_fast8_t CUDACOMP_test_cli ( )
int GPU_loop_MultMat_execute ( int  index,
int_fast8_t *  status,
int_fast8_t *  GPUstatus,
float  alpha,
float  beta,
int  timing,
int  TimerOffsetIndex 
)
int GPU_loop_MultMat_free ( int  index)
int GPU_loop_MultMat_setup ( int  index,
const char *  IDcontrM_name,
const char *  IDwfsim_name,
const char *  IDoutdmmodes_name,
long  NBGPUs,
int *  GPUdevice,
int  orientation,
int  USEsem,
int  initWFSref,
long  loopnb 
)

Setup memory and process for GPU-based matrix-vector multiply.

setup matrix multiplication using multiple GPUs

Load Control Matrix

Load Input vectors

int GPU_SVD_computeControlMatrix ( int  device,
const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
const char *  ID_VTmatrix_name 
)
int_fast8_t GPUcomp_test ( long  NBact,
long  NBmodes,
long  WFSsize,
long  GPUcnt 
)
int GPUloadCmat ( int  index)
int_fast8_t init_cudacomp ( )

Initialize cudacomp module and command line interface.

void matrixMulCPU ( float *  cMat,
float *  wfsVec,
float *  dmVec,
int  M,
int  N 
)

CPU-based matrix vector multiplication.

int QDWHpartial ( int  M,
int  N,
int  fact,
int  psinv,
double  s,
float  tol,
float *  d_A,
int  ldda,
float *  S,
float *  d_U,
int  lddu,
float *  d_VT,
int  lddvt,
float *  d_B,
int  lddb,
float *  A,
int  lda,
int *  sizeS,
int *  sizeK,
int *  it,
float *  flops,
magma_queue_t  queue,
cublasHandle_t  handle 
)

Variable Documentation

pid_t CLIPID

important directories and info

int FORCESEMINIT = 1
GPUMATMULTCONF gpumatmultconf[20]