// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "gpu/image/imgExtract.hpp" #include "../deviceBuffer.hpp" #include "../deviceStream.hpp" #include "cuda/util.hpp" #include #include namespace VideoStitch { namespace Image { /** * This kernel extract a part of the content of the image @src at * offset (@offsetX, offsetY) with size (dstWidth x dstHeight) and writes it in into a packed buffer @dst. * @dst must be large enough to hold the dstWidth * dstHeight pixels. * On overflow, the source image wraps if hWrap is true. Else pixels are filled with 0. * 2D version: We assume that the @dst (but not the @src) image is divisible * by the block size on each dimension. */ __global__ void imgExtractFromKernel(uint32_t* __restrict__ dst, unsigned dstWidth, unsigned dstHeight, const uint32_t* __restrict__ src, unsigned srcWidth, unsigned srcHeight, int offsetX, int offsetY, bool hWrap) { int dstX = blockIdx.x * blockDim.x + threadIdx.x; int dstY = blockIdx.y * blockDim.y + threadIdx.y; int srcX = offsetX + dstX; int srcY = offsetY + dstY; uint32_t res = 0; if (dstX < dstWidth && dstY < dstHeight) { if (0 <= srcY && srcY < srcHeight) { if (hWrap) { if (0 <= srcX) { if (srcX < srcWidth) { res = src[srcWidth * srcY + srcX]; } else { res = src[srcWidth * srcY + (srcX % srcWidth)]; } } else { res = src[srcWidth * srcY + srcWidth + (srcX % srcWidth)]; // modulo has sign of dividend } } else if (0 <= srcX & srcX < srcWidth) { res = src[srcWidth * srcY + srcX]; } } dst[dstWidth * dstY + dstX] = res; } } Status imgExtractFrom(GPU::Buffer dst, std::size_t dstWidth, std::size_t dstHeight, GPU::Buffer src, std::size_t srcWidth, std::size_t srcHeight, std::size_t offsetX, std::size_t offsetY, bool hWrap, GPU::Stream stream) { dim3 dimBlock(16, 16, 1); dim3 dimGrid((unsigned)Cuda::ceilDiv(dstWidth, dimBlock.x), (unsigned)Cuda::ceilDiv(dstHeight, dimBlock.y), 1); imgExtractFromKernel<<>>(dst.get(), (unsigned)dstWidth, (unsigned)dstHeight, src.get(), (unsigned)srcWidth, (unsigned)srcHeight, (unsigned)offsetX, (unsigned)offsetY, hWrap); // wraps return CUDA_STATUS; } } // namespace Image } // namespace VideoStitch