// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "imageProcessingGPUUtils.hpp" #include "core1/imageMerger.hpp" #include "core1/imageMapping.hpp" #include "core1/bounds.hpp" #include "gpu/image/sampling.hpp" //#define MERGER_PAIR_DEBUG #if defined(MERGER_PAIR_DEBUG) #include "util/debugUtils.hpp" #include #endif namespace VideoStitch { namespace Util { template <> Status ImageProcessingGPU::subsampleImage(GPU::Buffer dst, GPU::Buffer src, std::size_t srcWidth, std::size_t srcHeight, GPU::Stream gpuStream, const bool isNearest) { if (isNearest) { assert(false); /* return VideoStitch::Image::subsample22Nearest( dst, src, srcWidth, srcHeight, Core::ImageMerger::CudaBlockSize, gpuStream); */ return {Origin::Stitcher, ErrType::ImplementationError, "Unsupported subsampling mode"}; } else { return VideoStitch::Image::subsample22RGBA(dst, src, srcWidth, srcHeight, gpuStream); } } template <> Status ImageProcessingGPU::subsampleImage(GPU::Buffer dst, GPU::Buffer src, std::size_t srcWidth, std::size_t srcHeight, GPU::Stream gpuStream, const bool isNearest) { if (isNearest) { assert(false); /* return VideoStitch::Image::subsample22Nearest( dst, src, srcWidth, srcHeight, Core::ImageMerger::CudaBlockSize, gpuStream); */ return {Origin::Stitcher, ErrType::ImplementationError, "Unsupported subsampling mode"}; } else { return VideoStitch::Image::subsample22(dst, src, srcWidth, srcHeight, gpuStream); } } template Status ImageProcessingGPU::downSampleImages(const int levelCount, int& bufferWidth, int& bufferHeight, GPU::Buffer buffer, GPU::Stream stream, const bool isNearest) { GPU::UniqueBuffer tmpBuffer; FAIL_RETURN(tmpBuffer.alloc(bufferWidth * bufferHeight, "ImageProcessingGPU")); int width = bufferWidth; int height = bufferHeight; for (int t = 0; t < levelCount; t++) { // Down-sample the image as well if (t % 2 == 0) { FAIL_RETURN(subsampleImage(tmpBuffer.borrow(), buffer.as_const(), width, height, stream, isNearest)); } else { FAIL_RETURN(subsampleImage(buffer, tmpBuffer.borrow_const(), width, height, stream, isNearest)); } const int dstWidth = (width + 1) / 2; const int dstHeight = (height + 1) / 2; width = dstWidth; height = dstHeight; } if (levelCount % 2 == 1) { FAIL_RETURN(GPU::memcpyBlocking(buffer, tmpBuffer.borrow_const(), width * height * sizeof(T))); } bufferWidth = width; bufferHeight = height; return stream.synchronize(); } template Status ImageProcessingGPU::downSampleImages(const int levelCount, int& bufferWidth, int& bufferHeight, GPU::Buffer buffer, GPU::Stream stream, const bool isNearest); template Status ImageProcessingGPU::downSampleImages(const int levelCount, int& bufferWidth, int& bufferHeight, GPU::Buffer buffer, GPU::Stream stream, const bool isNearest); Status ImageProcessingGPU::findBBox(Core::TextureTarget t, const bool canWarp, const Core::StereoRigDefinition* rigDef, const int width, const int height, const GPU::Buffer& maskBuffer, Core::Rect& boundingRect, GPU::Stream gpuStream) { // Now find the bounding rect GPU::UniqueBuffer tmpBinarizedMaskBuffer; GPU::UniqueBuffer tmpDevBuffer; FAIL_RETURN(tmpDevBuffer.alloc(size_t(std::max(width, height)), "Image Processing GPU")); FAIL_RETURN(tmpBinarizedMaskBuffer.alloc(width * height, "Image Processing GPU")); FAIL_RETURN(binarizeMask(make_int2(width, height), maskBuffer, tmpBinarizedMaskBuffer.borrow(), gpuStream)); auto tmpHostBuffer = GPU::HostBuffer::allocate(size_t(std::max(width, height) * sizeof(uint32_t)), "Image Processing GPU"); FAIL_RETURN(tmpHostBuffer.status()); std::map imageMappings; imageMappings[0] = new Core::ImageMapping(0); // Compute H-Bound FAIL_RETURN(Core::computeHBounds(t, width, height, imageMappings, rigDef, Eye::LeftEye, tmpBinarizedMaskBuffer.borrow_const(), tmpHostBuffer.value(), tmpDevBuffer.borrow(), gpuStream, canWarp)); // Compute V-Bound FAIL_RETURN(Core::computeVBounds(t, width, height, imageMappings, tmpBinarizedMaskBuffer.borrow_const(), tmpHostBuffer.value(), tmpDevBuffer.borrow(), gpuStream)); FAIL_RETURN(tmpHostBuffer.value().release()); boundingRect = imageMappings[0]->getOutputRect(t); delete imageMappings[0]; imageMappings.clear(); return Status::OK(); } Status ImageProcessingGPU::downSampleCoordImage(const int inputWidth, const int inputHeight, const int levelCount, GPU::UniqueBuffer& coordBuffer, GPU::UniqueBuffer& weightBuffer, GPU::Stream stream) { GPU::UniqueBuffer tmpCoordBuffer; FAIL_RETURN(tmpCoordBuffer.alloc(inputWidth * inputHeight, "Image Processing GPU")); GPU::UniqueBuffer tmpWeightBuffer; FAIL_RETURN(tmpWeightBuffer.alloc(inputWidth * inputHeight, "Image Processing GPU")); int width = inputWidth; int height = inputHeight; for (int t = 0; t < levelCount; t++) { // Down-sample the image as well if (t % 2 == 0) { FAIL_RETURN(VideoStitch::Image::subsample22Mask(tmpCoordBuffer.borrow(), tmpWeightBuffer.borrow(), coordBuffer.borrow_const(), weightBuffer.borrow_const(), width, height, Core::ImageMerger::CudaBlockSize, stream)); } else { FAIL_RETURN(VideoStitch::Image::subsample22Mask( coordBuffer.borrow(), weightBuffer.borrow(), tmpCoordBuffer.borrow_const(), tmpWeightBuffer.borrow_const(), width, height, Core::ImageMerger::CudaBlockSize, stream)); } const int dstWidth = (width + 1) / 2; const int dstHeight = (height + 1) / 2; width = dstWidth; height = dstHeight; } if (levelCount % 2 == 1) { FAIL_RETURN( GPU::memcpyBlocking(coordBuffer.borrow(), tmpCoordBuffer.borrow_const(), width * height * sizeof(float2))); FAIL_RETURN( GPU::memcpyBlocking(weightBuffer.borrow(), tmpWeightBuffer.borrow_const(), width * height * sizeof(uint32_t))); } return stream.synchronize(); } Status ImageProcessingGPU::computeTightOverlappingRect(Core::TextureTarget t, const int warpWidth, const Core::Rect& boundingRect0, const GPU::Buffer& buffer0, const Core::Rect& boundingRect1, const GPU::Buffer& buffer1, Core::Rect& overlappingRect, GPU::Stream stream) { Core::Rect iRect, uRect; if (boundingRect0.right() < warpWidth) { uRect = boundingRect0; } else if (boundingRect1.right() < warpWidth) { uRect = boundingRect1; } else { Core::Rect::getInterAndUnion(boundingRect0, boundingRect1, iRect, uRect, warpWidth); uRect.setLeft(0); uRect.setRight(warpWidth - 1); } GPU::UniqueBuffer maskBuffer; FAIL_RETURN(maskBuffer.alloc(uRect.getArea(), "Image Processing GPU")); FAIL_RETURN(onBothBufferOperator(warpWidth, boundingRect0, buffer0, boundingRect1, buffer1, uRect, maskBuffer.borrow(), stream)); FAIL_RETURN(stream.synchronize()); #ifdef MERGER_PAIR_DEBUG static videoreaderid_t id = 0; std::stringstream ss; ss.str(""); ss << "andMaskOut-" << id << "_1.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), buffer1, boundingRect1.getWidth(), boundingRect1.getHeight()); id++; #endif FAIL_RETURN(findBBox(t, false, nullptr, (int)uRect.getWidth(), (int)uRect.getHeight(), maskBuffer.borrow_const(), overlappingRect, stream)); overlappingRect.setTop(uRect.top() + overlappingRect.top()); overlappingRect.setBottom(uRect.top() + overlappingRect.bottom()); overlappingRect.setLeft(uRect.left() + overlappingRect.left()); overlappingRect.setRight(uRect.left() + overlappingRect.right()); return Status::OK(); } } // namespace Util } // namespace VideoStitch