Doxygen 1.9.1
Toolkit for Adaptive Stochastic Modeling and Non-Intrusive ApproximatioN: Tasmanian v8.2 (development)
tsgGpuWrappers.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, Miroslav Stoyanov
3  *
4  * This file is part of
5  * Toolkit for Adaptive Stochastic Modeling And Non-Intrusive ApproximatioN: TASMANIAN
6  *
7  * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
12  * and the following disclaimer in the documentation and/or other materials provided with the distribution.
13  *
14  * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse
15  * or promote products derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  * UT-BATTELLE, LLC AND THE UNITED STATES GOVERNMENT MAKE NO REPRESENTATIONS AND DISCLAIM ALL WARRANTIES, BOTH EXPRESSED AND IMPLIED.
25  * THERE ARE NO EXPRESS OR IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY PATENT,
26  * COPYRIGHT, TRADEMARK, OR OTHER PROPRIETARY RIGHTS, OR THAT THE SOFTWARE WILL ACCOMPLISH THE INTENDED RESULTS OR THAT THE SOFTWARE OR ITS USE WILL NOT RESULT IN INJURY OR DAMAGE.
27  * THE USER ASSUMES RESPONSIBILITY FOR ALL LIABILITIES, PENALTIES, FINES, CLAIMS, CAUSES OF ACTION, AND COSTS AND EXPENSES, CAUSED BY, RESULTING FROM OR ARISING OUT OF,
28  * IN WHOLE OR IN PART THE USE, STORAGE OR DISPOSAL OF THE SOFTWARE.
29  */
30 
31 #ifndef __TASMANIAN_GPU_WRAPPERS_HPP
32 #define __TASMANIAN_GPU_WRAPPERS_HPP
33 
47 
48 namespace TasGrid{
49 namespace TasGpu{
50 
59 template<typename scalar_type>
60 void solveLSmultiGPU(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[]);
61 
65 template<typename scalar_type>
66 void solveLSmultiOOC(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[]);
67 
69 template<typename scalar_type>
70 void solveLSmulti(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[]){
71  GpuVector<scalar_type> gpuA(acceleration, m, n, A);
72  GpuVector<scalar_type> gpuB(acceleration, nrhs, n, B);
73  solveLSmultiGPU(acceleration, n, m, gpuA.data(), nrhs, gpuB.data());
74  gpuB.unload(acceleration, B);
75 }
76 
78 void factorizePLU(AccelerationContext const *acceleration, int n, double A[], int_gpu_lapack ipiv[]);
80 void solvePLU(AccelerationContext const *acceleration, char trans, int n, double const A[], int_gpu_lapack const ipiv[], double b[]);
82 void solvePLU(AccelerationContext const *acceleration, char trans, int n, double const A[], int_gpu_lapack const ipiv[], int nrhs, double B[]);
83 
92 template<typename scalar_type>
93 void denseMultiply(AccelerationContext const *acceleration, int M, int N, int K,
95  GpuVector<scalar_type> const &B, typename GpuVector<scalar_type>::value_type beta, scalar_type C[]);
96 
98 template<typename scalar_type>
99 void denseMultiplyMixed(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector<scalar_type>::value_type alpha,
100  GpuVector<scalar_type> const &A, scalar_type const B[],
101  typename GpuVector<scalar_type>::value_type beta, scalar_type C[]){
102  GpuVector<scalar_type> gpuB(acceleration, K, N, B), gpuC(acceleration, M, N);
103  denseMultiply(acceleration, M, N, K, alpha, A, gpuB, beta, gpuC.data());
104  gpuC.unload(acceleration, C);
105 }
106 
113 template<typename scalar_type>
114 void sparseMultiply(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector<scalar_type>::value_type alpha,
115  const GpuVector<scalar_type> &A, const GpuVector<int> &pntr, const GpuVector<int> &indx,
116  const GpuVector<scalar_type> &vals, scalar_type C[]);
117 
119 template<typename T>
120 void sparseMultiplyMixed(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector<T>::value_type alpha, const GpuVector<T> &A,
121  const std::vector<int> &pntr, const std::vector<int> &indx, const std::vector<T> &vals, T C[]){
122  GpuVector<int> gpu_pntr(acceleration, pntr), gpu_indx(acceleration, indx);
123  GpuVector<T> gpu_vals(acceleration, vals), gpu_c(acceleration, M, N);
124  sparseMultiply(acceleration, M, N, K, alpha, A, gpu_pntr, gpu_indx, gpu_vals, gpu_c.data());
125  gpu_c.unload(acceleration, C);
126 }
127 
128 }
129 }
130 
131 #endif
Template class that wraps around a single GPU array, providing functionality that mimics std::vector.
Definition: tsgAcceleratedDataStructures.hpp:95
void unload(AccelerationContext const *acc, std::vector< T > &cpu_data) const
Copy the data from the GPU array to cpu_data, the cpu_data will be resized and overwritten.
Definition: tsgAcceleratedDataStructures.hpp:196
T * data()
Get a reference to the GPU array, which an be used as input to GPU libraries and kernels.
Definition: tsgAcceleratedDataStructures.hpp:145
T value_type
The data-type of the vector entries.
Definition: tsgAcceleratedDataStructures.hpp:220
void solveLSmultiOOC(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
Identical to TasGpu::solveLSmultiGPU() but the arrays are on the CPU and the MAGMA out-of-core implem...
void sparseMultiplyMixed(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< T >::value_type alpha, const GpuVector< T > &A, const std::vector< int > &pntr, const std::vector< int > &indx, const std::vector< T > &vals, T C[])
Identical to TasGpu::sparseMultiply() but the sparse matrix and the result C are in CPU memory.
Definition: tsgGpuWrappers.hpp:120
void solveLSmultiGPU(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
Least squares solver with data sitting on the gpu device.
void solvePLU(AccelerationContext const *acceleration, char trans, int n, double const A[], int_gpu_lapack const ipiv[], double b[])
Solve A x = b using a PLU factorization.
void sparseMultiply(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, const GpuVector< scalar_type > &A, const GpuVector< int > &pntr, const GpuVector< int > &indx, const GpuVector< scalar_type > &vals, scalar_type C[])
Wrapper to GPU methods that multiplies a sparse and a dense matrix.
void factorizePLU(AccelerationContext const *acceleration, int n, double A[], int_gpu_lapack ipiv[])
Factorize , arrays are on the GPU.
void denseMultiplyMixed(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, GpuVector< scalar_type > const &A, scalar_type const B[], typename GpuVector< scalar_type >::value_type beta, scalar_type C[])
Identical to TasGpu::denseMultiply() but both B and C are array in CPU memory.
Definition: tsgGpuWrappers.hpp:99
void denseMultiply(AccelerationContext const *acceleration, int M, int N, int K, typename GpuVector< scalar_type >::value_type alpha, GpuVector< scalar_type > const &A, GpuVector< scalar_type > const &B, typename GpuVector< scalar_type >::value_type beta, scalar_type C[])
Wrapper to GPU BLAS that multiplies dense matrices (e.g., cuBlas, MAGMA).
void solveLSmulti(AccelerationContext const *acceleration, int n, int m, scalar_type A[], int nrhs, scalar_type B[])
Identical to TasGpu::solveLSmultiGPU() but the data starts with the CPU and gets uploaded to the GPU ...
Definition: tsgGpuWrappers.hpp:70
Encapsulates the Tasmanian Sparse Grid module.
Definition: TasmanianSparseGrid.hpp:68
int int_gpu_lapack
Defines the integer used by the LAPACK methods, usually int but DPC++ uses int64_t.
Definition: tsgEnumerates.hpp:97
Wrapper class around GPU device ID, acceleration type and GpuEngine.
Definition: tsgAcceleratedDataStructures.hpp:576
Data structures for interacting with CUDA and MAGMA environments.