// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#include "gpu/exampleKernel.hpp"
#include "deviceBuffer.hpp"
#include "deviceStream.hpp"
#include "cuda/error.hpp"
#include "cuda/util.hpp"
namespace {
__global__ void vecAddDummy(float* output, const float* input, unsigned int nbElem, float mult) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int gridSize = gridDim.x * blockDim.x;
for (int i = tid; i < nbElem; i += gridSize) {
output[i] = mult * input[i];
}
}
} // namespace
namespace VideoStitch {
namespace Core {
Status callDummyKernel(GPU::Buffer<float> outputBuff, const GPU::Buffer<const float>& inputBuff,
unsigned int nbElements, float mult, GPU::Stream stream) {
dim3 dimBlock(16, 1, 1);
dim3 dimGrid((unsigned)Cuda::ceilDiv(nbElements, dimBlock.x), 1, 1);
inputBuff.get();
vecAddDummy<<<dimGrid, dimBlock, 0, stream.get()>>>(outputBuff.get().raw(), inputBuff.get().raw(), nbElements, mult);
return CUDA_STATUS;
}
} // namespace Core
} // namespace VideoStitch
-
stitchEm authoredf1d60797