how can i compile a mex function which contain openmp and cuda ?

4 views (last 30 days)
俊凯 王
俊凯 王 on 8 Sep 2021
i want to use cpu parallel and gpu parallel both!how can i compile mex ,i know mexcuda mexGPUExample.cu can compile cuda project,but how can i compile openmp in .cu project?
/*
* Example of how to use the mxGPUArray API in a MEX file. This example shows
* how to write a MEX function that takes a gpuArray input and returns a
* gpuArray output, e.g. B=mexFunction(A).
*
* Copyright 2012 The MathWorks, Inc.
*/
#include "mex.h"
#include "gpu/mxGPUArray.h"
#include"omp.h"
#include<stdio.h>
#include<stdlib.h>
/*
* Device code
*/
void __global__ TimesTwo(double const * const A,
double * const B,
int const N)
{
/* Calculate the global linear index, assuming a 1-d grid. */
int const i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N) {
B[i] = 2.0 * A[i];
}
}
/*
* Host code
*/
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, mxArray const *prhs[])
{
size_t test=omp_get_thread_num();
#pragma omp parallel for num_threads(6)
for(int i=0;i<5;i++)
{
test+=omp_get_thread_num();
}
mexPrintf("%d\n",test);
/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray *B;
double const *d_A;
double *d_B;
int N;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Choose a reasonably sized number of threads for the block. */
int const threadsPerBlock = 256;
int blocksPerGrid;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs!=1) || !(mxIsGPUArray(prhs[0]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCreateFromMxArray(prhs[0]);
/*
* Verify that A really is a double array before extracting the pointer.
*/
if (mxGPUGetClassID(A) != mxDOUBLE_CLASS) {
mexErrMsgIdAndTxt(errId, errMsg);
}
/*
* Now that we have verified the data type, extract a pointer to the input
* data on the device.
*/
d_A = (double const *)(mxGPUGetDataReadOnly(A));
/* Create a GPUArray to hold the result and get its underlying pointer. */
B = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
mxGPUGetDimensions(A),
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
d_B = (double *)(mxGPUGetData(B));
/*
* Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
* and it would be possible for the number of elements to be too large for
* the grid. For this example we are not guarding against this possibility.
*/
N = (int)(mxGPUGetNumberOfElements(A));
blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
TimesTwo<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, N);
/* Wrap the result up as a MATLAB gpuArray for return. */
plhs[0] = mxGPUCreateMxArrayOnGPU(B);
/*
* The mxGPUArray pointers are host-side structures that refer to device
* data. These must be destroyed before leaving the MEX function.
*/
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
}
this code is official cuda test code which i have add openmp test
  2 Comments

Sign in to comment.

Answers (0)

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!