Doxygen 1.9.1
Toolkit for Adaptive Stochastic Modeling and Non-Intrusive ApproximatioN: Tasmanian v8.2 (development)
TasGrid::TasGpu Namespace Reference

Wrappers around custom CUDA kernels to handle domain transforms and basis evaluations, the kernels are instantiated in tsgCudaKernels.cu. More...

Functions

template<typename T >
void dtrans2can (AccelerationContext const *acc, bool use01, int dims, int num_x, int pad_size, const double *gpu_trans_a, const double *gpu_trans_b, const T *gpu_x_transformed, T *gpu_x_canonical)
 Uses custom kernel to convert transformed points to canonical points, all arrays live on the CUDA device. More...
 
template<typename T >
void devalpwpoly (AccelerationContext const *acc, int order, TypeOneDRule rule, int num_dimensions, int num_x, int num_basis, const T *gpu_x, const T *gpu_nodes, const T *gpu_support, T *gpu_y)
 Evaluate the basis functions for a local polynomial grid using the DENSE algorithm. More...
 
template<typename T >
void devalpwpoly_sparse (AccelerationContext const *acc, int order, TypeOneDRule rule, int dims, int num_x, const T *gpu_x, const GpuVector< T > &gpu_nodes, const GpuVector< T > &gpu_support, const GpuVector< int > &gpu_hpntr, const GpuVector< int > &gpu_hindx, const GpuVector< int > &gpu_hroots, GpuVector< int > &gpu_spntr, GpuVector< int > &gpu_sindx, GpuVector< T > &gpu_svals)
 Evaluate the basis functions for a local polynomial grid using the SPARSE algorithm. More...
 
template<typename T >
void devalseq (AccelerationContext const *acc, int dims, int num_x, const std::vector< int > &max_levels, const T *gpu_x, const GpuVector< int > &num_nodes, const GpuVector< int > &points, const GpuVector< T > &nodes, const GpuVector< T > &coeffs, T *gpu_result)
 Evaluate the basis for a Sequence grid. More...
 
template<typename T >
void devalfor (AccelerationContext const *acc, int dims, int num_x, const std::vector< int > &max_levels, const T *gpu_x, const GpuVector< int > &num_nodes, const GpuVector< int > &points, T *gpu_wreal, typename GpuVector< T >::value_type *gpu_wimag)
 Evaluate the basis for a Fourier grid. More...
 
template<typename T >
void devalglo (AccelerationContext const *acc, bool is_nested, bool is_clenshawcurtis0, int dims, int num_x, int num_p, int num_basis, T const *gpu_x, GpuVector< T > const &nodes, GpuVector< T > const &coeff, GpuVector< T > const &tensor_weights, GpuVector< int > const &nodes_per_level, GpuVector< int > const &offset_per_level, GpuVector< int > const &map_dimension, GpuVector< int > const &map_level, GpuVector< int > const &active_tensors, GpuVector< int > const &active_num_points, GpuVector< int > const &dim_offsets, GpuVector< int > const &map_tensor, GpuVector< int > const &map_index, GpuVector< int > const &map_reference, T *gpu_result)
 Evaluate the basis for Global grid. More...
 
void fillDataGPU (AccelerationContext const *acc, double value, long long N, long long stride, double data[])
 Fills the data with the provided real number at the given stride.
 
template<typename T >
void load_n (AccelerationContext const *acc, T const *cpu_data, size_t num_entries, T *gpu_data)
 Similar to copy_n, copies the data from the CPU to the GPU.
 
template<typename T , typename U >
Utils::use_if<!std::is_same< U, T >::value > load_n (AccelerationContext const *acc, U const *cpu_data, size_t num_entries, T *gpu_data)
 Similar to copy_n, copies the data from the CPU to the GPU.
 
template<typename scalar_type >
void solveLSmultiGPU (AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
 Least squares solver with data sitting on the gpu device. More...
 
template<typename scalar_type >
void solveLSmultiOOC (AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
 Identical to TasGpu::solveLSmultiGPU() but the arrays are on the CPU and the MAGMA out-of-core implementation is used.
 
template<typename scalar_type >
void solveLSmulti (AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
 Identical to TasGpu::solveLSmultiGPU() but the data starts with the CPU and gets uploaded to the GPU first.
 
void factorizePLU (AccelerationContext const *acceleration, int n, double A[], int_gpu_lapack ipiv[])
 Factorize $ A = P L U $, arrays are on the GPU.
 
void solvePLU (AccelerationContext const *acceleration, char trans, int n, double const A[], int_gpu_lapack const ipiv[], double b[])
 Solve A x = b using a PLU factorization.
 
void solvePLU (AccelerationContext const *acceleration, char trans, int n, double const A[], int_gpu_lapack const ipiv[], int nrhs, double B[])
 Solve A x = b using a PLU factorization, B is in row-major format.
 
template<typename scalar_type >
void denseMultiply (AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, GpuVector< scalar_type > const &A, GpuVector< scalar_type > const &B, typename GpuVector< scalar_type >::value_type beta, scalar_type C[])
 Wrapper to GPU BLAS that multiplies dense matrices (e.g., cuBlas, MAGMA). More...
 
template<typename scalar_type >
void denseMultiplyMixed (AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, GpuVector< scalar_type > const &A, scalar_type const B[], typename GpuVector< scalar_type >::value_type beta, scalar_type C[])
 Identical to TasGpu::denseMultiply() but both B and C are array in CPU memory.
 
template<typename scalar_type >
void sparseMultiply (AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, const GpuVector< scalar_type > &A, const GpuVector< int > &pntr, const GpuVector< int > &indx, const GpuVector< scalar_type > &vals, scalar_type C[])
 Wrapper to GPU methods that multiplies a sparse and a dense matrix. More...
 
template<typename T >
void sparseMultiplyMixed (AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< T >::value_type alpha, const GpuVector< T > &A, const std::vector< int > &pntr, const std::vector< int > &indx, const std::vector< T > &vals, T C[])
 Identical to TasGpu::sparseMultiply() but the sparse matrix and the result C are in CPU memory.
 

Detailed Description

Wrappers around custom CUDA kernels to handle domain transforms and basis evaluations, the kernels are instantiated in tsgCudaKernels.cu.

Function Documentation

◆ solveLSmultiGPU()

template<typename scalar_type >
void TasGrid::TasGpu::solveLSmultiGPU ( AccelerationContext const *  acceleration,
int  n,
int  m,
scalar_type  A[],
int  nrhs,
scalar_type  B[] 
)

Least squares solver with data sitting on the gpu device.

Solves the least squares problem $ min_x \| A x - B \|^2_2 $ where A has n rows and m columns (n >= m) and B has n rows and nrhs columns. Both matrices are stored in row-major format on the GPU device associated with the given acceleration. The matrix B will be overwritten with the solution.

◆ denseMultiply()

template<typename scalar_type >
void TasGrid::TasGpu::denseMultiply ( AccelerationContext const *  acceleration,
int  M,
int  N,
int  K,
typename GpuVector< scalar_type >::value_type  alpha,
GpuVector< scalar_type > const &  A,
GpuVector< scalar_type > const &  B,
typename GpuVector< scalar_type >::value_type  beta,
scalar_type  C[] 
)

Wrapper to GPU BLAS that multiplies dense matrices (e.g., cuBlas, MAGMA).

Computes $ C = \alpha A B + \beta C $ where A is M by K, B is K by N, and C is M by N all stored in column major format on the GPU device associated with the acceleration. The signature is near identical to BLAS sgemm() or dgemm() but there are no transpose variants and the leading dimensions are inferred as if the matrices have no padding.

◆ sparseMultiply()

template<typename scalar_type >
void TasGrid::TasGpu::sparseMultiply ( AccelerationContext const *  acceleration,
int  M,
int  N,
int  K,
typename GpuVector< scalar_type >::value_type  alpha,
const GpuVector< scalar_type > &  A,
const GpuVector< int > &  pntr,
const GpuVector< int > &  indx,
const GpuVector< scalar_type > &  vals,
scalar_type  C[] 
)

Wrapper to GPU methods that multiplies a sparse and a dense matrix.

Computes $ C = \alpha A B $ where A is M by K, B is K by N, and C is M by N, matrices are in column major format with B being sparse in column compressed format.