// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "gpu/core1/boundsKernel.hpp" #include "deviceBuffer.hpp" #include "deviceStream.hpp" #include #include #define REDUCE_THREADS_PER_BLOCK 512 namespace VideoStitch { namespace Core { namespace { /** * This kernel computes the OR of all pixels in each row, and pouts the result in * colHasImage * FIXME do it with parallel reduction */ __global__ void vertOrKernel(const uint32_t* __restrict__ contrib, uint32_t* __restrict__ colHasImage, unsigned panoWidth, unsigned panoHeight) { unsigned col = blockIdx.x * blockDim.x + threadIdx.x; if (col < panoWidth) { uint32_t accum = 0; for (unsigned row = 0; row < panoHeight; ++row) { accum |= contrib[panoWidth * row + col]; } colHasImage[col] = accum; } } __global__ void horizOrKernel(const uint32_t* __restrict__ contrib, uint32_t* __restrict__ rowHasImage, unsigned panoWidth, unsigned panoHeight) { unsigned row = blockIdx.x * blockDim.x + threadIdx.x; const uint32_t* rowp = contrib + panoWidth * row; if (row < panoHeight) { uint32_t accum = 0; for (unsigned col = 0; col < panoWidth; ++col) { accum |= rowp[col]; } rowHasImage[row] = accum; } } } // namespace Status vertOr(std::size_t croppedWidth, std::size_t croppedHeight, GPU::Buffer contrib, GPU::Buffer colHasImage, GPU::Stream stream) { dim3 dimBlock(REDUCE_THREADS_PER_BLOCK, 1, 1); const unsigned numBlocks = (unsigned)Cuda::ceilDiv(croppedWidth, dimBlock.x); dim3 dimGrid(numBlocks, 1, 1); vertOrKernel<<>>(contrib.get(), colHasImage.get(), (unsigned)croppedWidth, (unsigned)croppedHeight); return CUDA_STATUS; } Status horizOr(std::size_t croppedWidth, std::size_t croppedHeight, GPU::Buffer contrib, GPU::Buffer rowHasImage, GPU::Stream stream) { dim3 dimBlock(REDUCE_THREADS_PER_BLOCK, 1, 1); const unsigned numBlocks = (unsigned)Cuda::ceilDiv(croppedHeight, dimBlock.x); dim3 dimGrid(numBlocks, 1, 1); horizOrKernel<<>>(contrib.get(), rowHasImage.get(), (unsigned)croppedWidth, (unsigned)croppedHeight); return CUDA_STATUS; } } // namespace Core } // namespace VideoStitch