// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "mergerMask.hpp" #include "seamFinder.hpp" #include "core/controllerInputFrames.hpp" #include "core1/bounds.hpp" #include "core1/imageMapping.hpp" #include "core1/imageMerger.hpp" #include "core1/panoRemapper.hpp" #include "core1/inputsMap.hpp" #include "core1/imageMerger.hpp" #include "core1/textureTarget.hpp" #include "gpu/allocator.hpp" #include "gpu/core1/strip.hpp" #include "gpu/core1/transform.hpp" #include "gpu/core1/voronoi.hpp" #include "gpu/uniqueBuffer.hpp" #include "gpu/stream.hpp" #include "gpu/memcpy.hpp" #include "gpu/image/sampling.hpp" #include "gpu/image/blur.hpp" #include "util/registeredAlgo.hpp" #include "util/compressionUtils.hpp" #include "util/imageProcessingGPUUtils.hpp" #include #include //#define MERGERMASK_SETUP_DISTORTIONMAP //#define MERGERMASK_SETUP_MAPPEDRECT //#define MERGERMASK_SETUP_FRAMES //#define MERGERMASK_GET_MERGERMASKS //#define MERGERMASK_GET_MERGERMASKS_FULL //#define MERGERMASK_GET_MERGERMASKS_FINAL //#define MERGERMASK_INPUT_SPACE //#define MERGERMASK_SEAM_INPUT //#define MERGERMASK_SEAM_OPTIMIZATION //#define MERGERMASK_SEAM_MASK #if defined(MERGERMASK_SETUP_MAPPEDRECT) || defined(MERGERMASK_SEAM_OPTIMIZATION) || defined(MERGERMASK_SEAM_MASK) || \ defined(MERGERMASK_SEAM_INPUT) || defined(MERGERMASK_GET_MERGERMASKS_FINAL) || defined(MERGERMASK_INPUT_SPACE) #ifdef NDEBUG #error "This is not supposed to be included in non-debug mode." #endif #include "backend/cuda/deviceBuffer.hpp" #include "backend/cuda/deviceStream.hpp" #endif #if defined(MERGERMASK_GET_MERGERMASKS_FULL) || defined(MERGERMASK_SETUP_FRAMES) || \ defined(MERGERMASK_SETUP_DISTORTIONMAP) || defined(MERGERMASK_GET_MERGERMASKS) || \ defined(MERGERMASK_SEAM_OPTIMIZATION) || defined(MERGERMASK_SEAM_MASK) || defined(MERGERMASK_SEAM_INPUT) || \ defined(MERGERMASK_GET_MERGERMASKS_FINAL) || defined(MERGERMASK_INPUT_SPACE) #ifdef NDEBUG #error "This is not supposed to be included in non-debug mode." #endif #include "util/pngutil.hpp" #include "util/pnm.hpp" #include "util/debugUtils.hpp" #include "util/opticalFlowUtils.hpp" #endif namespace VideoStitch { namespace MergerMask { Potential MergerMask::create(const std::map& readers, const Core::PanoDefinition& pano, const Core::StereoRigDefinition* rigDef, const MergerMaskConfig& config, const MergerMaskProgress& progress) { std::unique_ptr mergerMask; mergerMask.reset(new MergerMask(pano, rigDef, config, progress)); FAIL_RETURN(mergerMask->setup(readers)); return mergerMask.release(); } MergerMask::MergerMask(const Core::PanoDefinition& pano, const Core::StereoRigDefinition* rigDef, const MergerMaskConfig& config, const MergerMaskProgress& progress) : pano(pano), rigDef(rigDef), mergerMaskConfig(config), // downsamplingLevelCount(getDownLevel(pano, (int)config.getSizeThreshold())), downsamplingLevelCount(0), progress(progress) {} MergerMask::~MergerMask() { for (size_t i = 0; i < imageMappings.size(); i++) { delete imageMappings[(int)i]; } imageMappings.clear(); for (size_t i = 0; i < transforms.size(); i++) { delete transforms[(int)i]; } transforms.clear(); for (size_t i = 0; i < distortionMaps.size(); i++) { distortionMaps[(int)i].release(); } for (size_t i = 0; i < originalDistortionMaps.size(); i++) { originalDistortionMaps[(int)i].release(); } distortionMaps.clear(); originalDistortionMaps.clear(); } int MergerMask::getDownSize(const int size) const { int level = downsamplingLevelCount; int downSize = size; while (level > 0) { downSize = (downSize + 1) / 2; level--; } return downSize; } int MergerMask::getDownCoord(const int coord) const { int level = downsamplingLevelCount; float downCoord = (float)coord; while (level > 0) { downCoord = (downCoord + 1.0f) / 2.0f; level--; } return (int)downCoord; } int MergerMask::getDownLevel(const Core::PanoDefinition& pano, const int sizeThreshold) { int width = (int)pano.getWidth(); int height = (int)pano.getHeight(); int level = 0; while (width > sizeThreshold || height > sizeThreshold) { width = (width + 1) / 2; height = (height + 1) / 2; level++; } return level; } Status MergerMask::setupMappings(const std::map& readers) { // Prepare input maps GPU::Stream stream = GPU::Stream::getDefault(); std::unique_ptr inputsMap; Potential potInputsMap = Core::InputsMap::create(pano); FAIL_RETURN(potInputsMap.status()); inputsMap = std::unique_ptr(potInputsMap.release()); FAIL_RETURN(inputsMap->compute(readers, pano, rigDef, LeftEye, false)); // Create the mappers. imageMappings.clear(); for (auto reader : readers) { imageMappings[reader.second->id] = new Core::ImageMapping(reader.second->id); } // Prepare mappings auto tmpDevBuffer = GPU::Buffer::allocate(size_t(std::max(pano.getWidth(), pano.getHeight())), "Input Bounding boxes"); FAIL_RETURN(tmpDevBuffer.status()); auto tmpHostBuffer = GPU::HostBuffer::allocate((unsigned)std::max(pano.getWidth(), pano.getHeight()), "Input Bounding boxes"); FAIL_RETURN(tmpHostBuffer.status()); FAIL_RETURN(computeHBounds(Core::EQUIRECTANGULAR, pano.getWidth(), pano.getHeight(), imageMappings, rigDef, LeftEye, inputsMap->getMask(), tmpHostBuffer.value(), tmpDevBuffer.value(), stream, true)); FAIL_RETURN(computeVBounds(Core::EQUIRECTANGULAR, pano.getWidth(), pano.getHeight(), imageMappings, inputsMap->getMask(), tmpHostBuffer.value(), tmpDevBuffer.value(), stream)); int devWidth = (int)pano.getWidth(); int devHeight = (int)pano.getHeight(); // Duplicate the original inputs map FAIL_RETURN(inputsMapOriginalBuffer.alloc(devWidth * devHeight, "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(inputsMapOriginalBuffer.borrow(), inputsMap->getMask(), devWidth * devHeight * sizeof(uint32_t))); #ifdef MERGERMASK_GET_MERGERMASKS_FULL { std::stringstream ss; ss.str(""); ss << "input-full-index.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputsMapOriginalBuffer.borrow_const(), devWidth, devHeight); for (int i = 0; i < pano.numInputs(); i++) { std::stringstream ss; ss.str(""); ss << "input-full-index" << i << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputsMapOriginalBuffer.borrow_const(), devWidth, devHeight, i); } } #endif // Generate the down-sampled inputs map FAIL_RETURN(Util::ImageProcessingGPU::downSampleImages(downsamplingLevelCount, devWidth, devHeight, inputsMap->getMask(), stream, true)); FAIL_RETURN(inputsMapBuffer.alloc(devWidth * devHeight, "Merger Mask")); FAIL_RETURN( GPU::memcpyBlocking(inputsMapBuffer.borrow(), inputsMap->getMask(), devWidth * devHeight * sizeof(uint32_t))); FAIL_RETURN(tmpHostBuffer.value().release()); FAIL_RETURN(tmpDevBuffer.value().release()); return Status::OK(); } Status MergerMask::setupTransform(const std::map& readers) { // Prepare transform for (auto reader : readers) { const Core::InputDefinition& inputDef = pano.getInput(reader.second->id); Core::Transform* transform = Core::Transform::create(inputDef); if (!transform) { return {Origin::Stitcher, ErrType::SetupFailure, "Cannot create v1 transformation for input " + std::to_string(reader.second->id)}; } transforms[reader.second->id] = transform; } return Status::OK(); } Status MergerMask::setupDistortionMap() { // Prepare distortion maps GPU::Stream stream = GPU::Stream::getDefault(); for (auto transform : transforms) { Core::ImageMapping* mapping = imageMappings[transform.first]; auto potDevOut = GPU::uniqueBuffer(mapping->getOutputRect(Core::EQUIRECTANGULAR).getArea(), "Merger Mask Algorithm"); FAIL_RETURN(potDevOut.status()); const Core::InputDefinition& im = pano.getInput(transform.first); FAIL_RETURN(transform.second->mapDistortion(0, potDevOut.borrow(), mapping->getOutputRect(Core::EQUIRECTANGULAR), pano, im, stream)); int devWidth = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getWidth(); int devHeight = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getHeight(); FAIL_RETURN(updateDistortionFromMask((int)transform.first, make_int2(devWidth, devHeight), make_int2((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).left(), (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).top()), potDevOut.borrow(), make_int2((int)pano.getWidth(), (int)pano.getHeight()), inputsMapOriginalBuffer.borrow_const(), stream)); #ifdef MERGERMASK_SETUP_DISTORTIONMAP { std::stringstream ss; ss.str(""); ss << "distortion-ori" << i << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), potDevOut.borrow_const(), mapping->getOutputRect().getWidth(), mapping->getOutputRect().getHeight()); } #endif // Down-sampling the image to the appropriate level auto originalCacheBuffer = GPU::uniqueBuffer(devWidth * devHeight, "Merger Mask Algorithm"); FAIL_RETURN(originalCacheBuffer.status()); FAIL_RETURN(GPU::memcpyBlocking(originalCacheBuffer.borrow(), potDevOut.borrow_const(), devWidth * devHeight * sizeof(unsigned char))); FAIL_RETURN(Util::ImageProcessingGPU::downSampleImages(downsamplingLevelCount, devWidth, devHeight, potDevOut.borrow(), stream, false)); // Put the down-sampled image into the cached map auto cacheBuffer = GPU::uniqueBuffer(devWidth * devHeight, "Merger Mask Algorithm"); FAIL_RETURN(cacheBuffer.status()); FAIL_RETURN(GPU::memcpyBlocking(cacheBuffer.borrow(), potDevOut.borrow().as_const(), devWidth * devHeight * sizeof(unsigned char))); FAIL_RETURN(transformDistortion(make_int2(devWidth, devHeight), cacheBuffer.borrow(), stream)); #ifdef MERGERMASK_SETUP_DISTORTIONMAP { std::stringstream ss; ss.str(""); ss << "distortion-" << i << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), cacheBuffer.borrow().as_const(), devWidth, devHeight); } #endif distortionMaps[transform.first] = cacheBuffer.releaseOwnership(); originalDistortionMaps[transform.first] = originalCacheBuffer.releaseOwnership(); } return Status::OK(); } Status MergerMask::setupMappedRect() { // Precompute data for the low res cachedMappedRects.clear(); std::vector rectOffset; std::vector rectSize; std::vector frameOffsets; int offset = 0; // Precompute data for the original res cachedOriginalMappedRects.clear(); std::vector originalRectOffset; std::vector originalRectSize; std::vector originalFrameOffsets; int originalOffset = 0; for (auto transform : transforms) { Core::ImageMapping* mapping = imageMappings[transform.first]; // Data for the original res cachedOriginalMappedRects.push_back(mapping->getOutputRect(Core::EQUIRECTANGULAR)); originalRectOffset.push_back(make_int2((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).left(), (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).top())); originalRectSize.push_back(make_int2((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getWidth(), (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getHeight())); originalFrameOffsets.push_back(originalOffset); originalOffset += (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getArea(); // Data for the low res int devWidth = (int)getDownSize((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getWidth()); int devHeight = (int)getDownSize((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getHeight()); int devLeft = (int)getDownCoord((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).left()); int devTop = (int)getDownCoord((int)mapping->getOutputRect(Core::EQUIRECTANGULAR).top()); cachedMappedRects.push_back( Core::Rect::fromInclusiveTopLeftBottomRight(devTop, devLeft, devTop + devHeight - 1, devLeft + devWidth - 1)); rectOffset.push_back(make_int2(devLeft, devTop)); rectSize.push_back(make_int2(devWidth, devHeight)); frameOffsets.push_back(offset); offset += devWidth * devHeight; } // For the original images FAIL_RETURN(originalMappedRectOffset.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN(originalMappedRectSize.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN(originalMappedOffset.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN( GPU::memcpyBlocking(originalMappedRectOffset.borrow(), &originalRectOffset[0], transforms.size() * sizeof(int2))); FAIL_RETURN( GPU::memcpyBlocking(originalMappedRectSize.borrow(), &originalRectSize[0], transforms.size() * sizeof(int2))); FAIL_RETURN(GPU::memcpyBlocking(originalMappedOffset.borrow(), &originalFrameOffsets[0], transforms.size() * sizeof(uint32_t))); // For the resize caches FAIL_RETURN(mappedRectOffset.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN(mappedRectSize.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN(mappedOffset.alloc(pano.numInputs(), "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(mappedRectOffset.borrow(), &rectOffset[0], transforms.size() * sizeof(int2))); FAIL_RETURN(GPU::memcpyBlocking(mappedRectSize.borrow(), &rectSize[0], transforms.size() * sizeof(int2))); FAIL_RETURN(GPU::memcpyBlocking(mappedOffset.borrow(), &frameOffsets[0], transforms.size() * sizeof(uint32_t))); #ifdef MERGERMASK_SETUP_MAPPEDRECT std::vector debugRectOffset(transforms.size()); std::vector debugRectSize(transforms.size()); std::vector debugFrameOffsets(transforms.size()); cudaMemcpy(&debugRectOffset[0], mappedRectOffset.borrow_const().get(), transforms.size() * sizeof(int2), cudaMemcpyDeviceToHost); cudaMemcpy(&debugRectSize[0], mappedRectSize.borrow_const().get(), transforms.size() * sizeof(int2), cudaMemcpyDeviceToHost); cudaMemcpy(&debugFrameOffsets[0], mappedOffset.borrow_const().get(), transforms.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost); #endif return Status::OK(); } Status MergerMask::setup(const std::map& readers) { FAIL_RETURN(setupMappings(readers)); FAIL_RETURN(setupTransform(readers)); FAIL_RETURN(setupMappedRect()); FAIL_RETURN(setupDistortionMap()); FAIL_RETURN(setupOverlappingPair()); FAIL_RETURN(setupFrames()); FAIL_RETURN(progress.add("coarseMask_setup", "Setup blending mask")); return Status::OK(); } Status MergerMask::retrieveImages(const std::vector& frames, FrameBuffers& frameBuffers, const Core::PanoDefinition& pano) { frameBuffers.clear(); auto container = Core::ControllerInputFrames::create(&pano); FAIL_RETURN(container.status()); for (auto& numFrame : frames) { FAIL_RETURN(container->seek((int)numFrame)); std::map>> loadedFrames; container->load(loadedFrames); for (auto& loadedFrame : loadedFrames) { readerid_t inputid = loadedFrame.first; if (inputid > (int)pano.numInputs()) { continue; } auto potLoadedFrame = loadedFrame.second; FAIL_RETURN(potLoadedFrame.status()); GPU::HostBuffer frame = potLoadedFrame.value(); /* Get the size of the current image */ const Core::InputDefinition& idef = pano.getInput(inputid); const int width = (int)idef.getWidth(); const int height = (int)idef.getHeight(); const size_t frameSize = (size_t)(width * height * sizeof(uint32_t)); std::pair id = std::make_pair(inputid, numFrame); auto frameBuffer = GPU::uniqueBuffer(width * height, "Merger Mask Algorithm"); FAIL_RETURN(frameBuffer.status()); FAIL_RETURN(GPU::memcpyBlocking(frameBuffer.borrow(), frame.hostPtr(), frameSize)); frameBuffers[id] = frameBuffer.releaseValue(); } } return Status::OK(); } Status MergerMask::setupFrames() { const std::vector frames = mergerMaskConfig.getFrames(); FrameBuffers frameBuffers; FAIL_RETURN(retrieveImages(mergerMaskConfig.getFrames(), frameBuffers, pano)); #ifdef MERGERMASK_SETUP_FRAMES for (FrameBuffers::iterator it = frameBuffers.begin(); it != frameBuffers.end(); ++it) { const int frameid = it->first.first; const int camid = it->first.second; const Core::InputDefinition& idef = pano.getInput(camid); const int width = (int)idef.getWidth(); const int height = (int)idef.getHeight(); std::stringstream ss; ss << "inputImage-" << frameid << " " << camid << ".png"; Debug::dumpRGBADeviceBuffer(ss.str().c_str(), (it->second).borrow_const(), width, height); } #endif // Find the total number of element needed for a certain time frame int totalElementCount = 0; for (size_t i = 0; i < cachedMappedRects.size(); i++) { totalElementCount += (int)cachedMappedRects[i].getArea(); } const int width = getDownSize((int)pano.getWidth()); const int height = getDownSize((int)pano.getHeight()); FAIL_RETURN(work1.alloc(pano.getWidth() * pano.getHeight(), "Merger Mask")); FAIL_RETURN(work2.alloc(pano.getWidth() * pano.getHeight(), "Merger Mask")); FAIL_RETURN(workMask.alloc(pano.getWidth() * pano.getHeight(), "Merger Mask")); FAIL_RETURN(workCost.alloc(width * height, "Merger Mask")); // Allocate temporal memory for processing the mapped buffer auto potDevOut = GPU::uniqueBuffer(pano.getWidth() * pano.getHeight(), "Merger Mask Algorithm"); FAIL_RETURN(potDevOut.status()); cachedMappedFrames.clear(); const int camCount = (int)pano.numInputs(); GPU::Stream stream = GPU::Stream::getDefault(); // For all frameid and camid, do the mapping for (size_t j = 0; j < frames.size(); j++) { auto cacheBuffer = GPU::uniqueBuffer(totalElementCount, "Merger Mask"); FAIL_RETURN(cacheBuffer.status()); cachedMappedFrames.push_back(cacheBuffer.releaseValue()); int elementOffset = 0; for (int i = 0; i < camCount; i++) { Core::Transform* transform = transforms[i]; Core::ImageMapping* mapping = imageMappings[i]; const Core::InputDefinition& im = pano.getInput(i); const std::pair inPair = std::make_pair(i, frames[j]); FrameBuffers::const_iterator it = frameBuffers.find(inPair); if (it != frameBuffers.end()) { const GPU::Buffer inputBuffer = it->second.borrow_const(); auto surface = Core::OffscreenAllocator::createSourceSurface(im.getWidth(), im.getHeight(), "Merger Mask"); FAIL_RETURN(GPU::memcpyAsync(*surface->pimpl->surface, inputBuffer, stream)); // Map the input to the output space transform->mapBuffer(frames[j], potDevOut.borrow(), *surface->pimpl->surface, nullptr, mapping->getOutputRect(Core::EQUIRECTANGULAR), pano, im, *surface->pimpl->surface, stream); // Down-sampling the image to the appropriate level int devWidth = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getWidth(); int devHeight = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getHeight(); #ifdef MERGERMASK_SETUP_FRAMES { std::stringstream ss; ss << "oriwarped-" << i << " " << frames[j] << ".png"; Debug::dumpRGB210DeviceBuffer(ss.str().c_str(), devOut.borrow_const(), devWidth, devHeight); } #endif Util::ImageProcessingGPU::downSampleImages(downsamplingLevelCount, devWidth, devHeight, potDevOut.borrow(), stream, false); // Put the down-sampled image into the cached map #ifdef MERGERMASK_SETUP_FRAMES { std::stringstream ss; ss << "warped-rgb210-" << i << " " << frames[j] << ".png"; Debug::dumpRGB210DeviceBuffer(ss.str().c_str(), devOut.borrow_const(), devWidth, devHeight); } #endif // Convert from rgb to lab GPU::Buffer subBuffer = cachedMappedFrames.back().borrow().createSubBuffer(elementOffset); FAIL_RETURN(Util::ImageProcessingGPU::convertRGB210ToRGBandGradient( make_int2(devWidth, devHeight), potDevOut.borrow_const(), subBuffer, stream)); #ifdef MERGERMASK_SETUP_FRAMES { std::stringstream ss; ss << "warped-rgb-" << i << " " << frames[j] << ".png"; Debug::dumpRGBADeviceBuffer(ss.str().c_str(), devOut.borrow_const(), devWidth, devHeight); } #endif FAIL_RETURN(Util::ImageProcessingGPU::convertRGBandGradientToNormalizedLABandGradient( make_int2(devWidth, devHeight), subBuffer, stream)); #ifdef MERGERMASK_SETUP_FRAMES { std::stringstream ss; ss << "warped-" << i << " " << frames[j] << ".png"; Debug::dumpRGB210DeviceBuffer(ss.str().c_str(), subBuffer.as_const(), devWidth, devHeight); } { std::stringstream ss; ss << "gradient-" << i << " " << frames[j] << ".png"; GPU::UniqueBuffer gradientBuffer; FAIL_RETURN(gradientBuffer.alloc(devWidth * devHeight, "Merger Mask")); FAIL_RETURN( extractChannel(make_int2(devWidth, devHeight), subBuffer.as_const(), 3, gradientBuffer.borrow(), stream)); Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), gradientBuffer.borrow_const(), devWidth, devHeight); } #endif elementOffset += (int)cachedMappedRects[i].getArea(); } } } return Status::OK(); } Status MergerMask::setupOverlappingPair() { // Prepare overlapping pair GPU::Stream stream = GPU::Stream::getDefault(); const GPU::Buffer mappingMask = inputsMapBuffer.borrow_const(); // Copy data from GPU to CPU PotentialValue> potHostBuffer = GPU::HostBuffer::allocate(mappingMask.numElements(), "Mask Merger Graph"); FAIL_RETURN(potHostBuffer.status()); GPU::HostBuffer mask = potHostBuffer.value(); FAIL_RETURN(GPU::memcpyBlocking(mask, mappingMask)); // Find the overlapping bit set std::unordered_set overlappingValueSet; const uint32_t* maskPtr = mask.hostPtr(); for (size_t i = 0; i < mappingMask.numElements(); i++) { if (maskPtr[i] > 0) { if (overlappingValueSet.find(maskPtr[i]) == overlappingValueSet.end()) { overlappingValueSet.insert(maskPtr[i]); } } } // Find all pair mapping for (readerid_t i = 0; i < pano.numInputs(); i++) { isOverlapping.push_back(std::vector(pano.numInputs(), 0)); } for (auto i = overlappingValueSet.begin(); i != overlappingValueSet.end(); i++) { uint32_t v = *i; std::vector indices; // Find the set of indices int t = 0; while (v > 0) { if ((v & 1) > 0) { indices.push_back(t); } t++; v = v >> 1; } for (size_t i = 0; i < indices.size(); i++) for (size_t j = i + 1; j < indices.size(); j++) { isOverlapping[indices[i]][indices[j]]++; isOverlapping[indices[j]][indices[i]]++; } } mask.release(); return Status::OK(); } // Step (3c) Status MergerMask::getBlendingCost(const std::vector& normalizedMaxOverlappingWidths, const int2& size, const std::vector& allCam, const GPU::Buffer& inputDistortionBuffer, const GPU::Buffer& inputNonOverlappingIndexBuffer, GPU::Buffer nextDistortionBuffer, GPU::Buffer nextNonOverlappingIndexBuffer, GPU::Buffer nextInputIndexBuffer, float& blendingCost) { GPU::Stream stream = GPU::Stream::getDefault(); const std::vector frames = mergerMaskConfig.getFrames(); FAIL_RETURN(updateInputIndexByDistortionMap((int)allCam.back(), size, inputNonOverlappingIndexBuffer, inputDistortionBuffer, nextNonOverlappingIndexBuffer, nextDistortionBuffer, stream)); #ifdef MERGERMASK_GET_MERGERMASKS { std::stringstream ss; ss.str(""); ss << "trial-distortion-"; for (int i = 0; i < allCam.size(); i++) { ss << allCam[i] << "-"; } ss << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), nextDistortionBuffer.as_const(), size.x, size.y); } #endif // To make sure the overlapping area is small (for efficiency), reduce the new area's mask to less than a threshold // - (C3) Make sure that the overlapping width of image pairs is less than a certain threshold FAIL_RETURN(updateOverlappingMap(normalizedMaxOverlappingWidths, size, allCam, nextNonOverlappingIndexBuffer, nextInputIndexBuffer, stream)); // Thirdly, compute the image difference cost metric // Iterate over all frames const int kernelSize = mergerMaskConfig.getKernelSize(); // First, initialize the distortion cost FAIL_RETURN(GPU::memsetToZeroBlocking(workCost.borrow(), sizeof(float) * size.x * size.y)); // Now iterate over all frames and start accumulating the cost into buffers GPU::UniqueBuffer debugBuffer0, debugBuffer1; FAIL_RETURN(debugBuffer0.alloc(size.x * size.y, "Merger Mask")); FAIL_RETURN(debugBuffer1.alloc(size.x * size.y, "Merger Mask")); for (size_t i = 0; i < frames.size(); i++) { FAIL_RETURN(updateStitchingCost(size, kernelSize, nextInputIndexBuffer.as_const(), mappedOffset.borrow_const(), mappedRectOffset.borrow_const(), mappedRectSize.borrow_const(), cachedMappedFrames[i].borrow_const(), workCost.borrow(), debugBuffer0.borrow(), debugBuffer1.borrow(), stream)); } #ifdef MERGERMASK_GET_MERGERMASKS { std::stringstream ss; /*ss.str(""); ss << "cost-map-"; for (int i = 0; i < prevCam.size(); i++) { ss << prevCam[i] << "-"; } ss << camId << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), workCost.borrow_const(), size.x, size.y); */ ss.str(""); ss << "cost-debug-map-zero-"; for (int i = 0; i < allCam.size(); i++) { ss << allCam[i] << "-"; } ss << ".png"; Debug::dumpRGBADeviceBuffer(ss.str().c_str(), debugBuffer0.borrow(), size.x, size.y); ss.str(""); ss << "cost-debug-map-one-"; for (int i = 0; i < allCam.size(); i++) { ss << allCam[i] << "-"; } ss << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), debugBuffer1.borrow(), size.x, size.y); ss.str(""); ss << "cost-overlapping-mask-final-"; for (int i = 0; i < allCam.size(); i++) { ss << allCam[i] << "-"; } ss << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), nextInputIndexBuffer.as_const(), size.x, size.y); } #endif // Calculate cost now float output, mask; FAIL_RETURN( Util::ImageProcessingGPU::calculateSum(size.x * size.y, workCost.borrow_const(), 256, stream, output, mask)); float cost = mask > 0 ? output / mask : 1; float maskRatio = mask / float(size.x * size.y); float preferBiggerMaskCost = expf(-maskRatio * maskRatio * 100); blendingCost = cost + 0.05f * preferBiggerMaskCost; return Status::OK(); } Status MergerMask::updateOverlappingMap(const std::vector& normalizedMaxOverlappingWidths, const int2& size, const std::vector& allCam, const GPU::Buffer& inputNonOverlappingIndexBuffer, GPU::Buffer inputIndexBuffer, GPU::Stream stream, const bool original) { assert(normalizedMaxOverlappingWidths.size() == allCam.size()); assert(allCam.size() > 1); // Get all frame index as max value size_t maxCam = allCam[0]; for (size_t i = 1; i < allCam.size(); i++) { if (allCam[i] > maxCam) { maxCam = allCam[i]; } } std::vector camIndex(maxCam + 1, 0); for (size_t i = 0; i < allCam.size(); i++) { camIndex[(int)allCam[i]] = (char)i; } GPU::UniqueBuffer camIndexBuffer; FAIL_RETURN(camIndexBuffer.alloc(maxCam, "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(camIndexBuffer.borrow(), &camIndex[0])); // Initialize the index buffer from the non-overlapping FAIL_RETURN( GPU::memcpyBlocking(inputIndexBuffer, inputNonOverlappingIndexBuffer, size.x * size.y * sizeof(uint32_t))); // Set up a mask for all camera previous to the current one to calculate the distance transform function for (size_t i = 0; i < allCam.size(); i++) { if (normalizedMaxOverlappingWidths[i] > 0) { // Compute the distance map for the current camera, make sure that it will have overlapping // area that is larger than a predefined threshold const int upSize = int(float(size.x) * float(normalizedMaxOverlappingWidths[i]) / float(getDownSize(size.x))); const int maxOverlappingWidth = original ? upSize : normalizedMaxOverlappingWidths[i]; FAIL_RETURN(Core::computeEuclideanDistanceMap(workMask.borrow(), inputIndexBuffer, work1.borrow(), work2.borrow(), size.x, size.y, 1 << allCam[i], true, (float)maxOverlappingWidth, 1, stream)); } else { FAIL_RETURN(GPU::memsetToZeroBlocking(workMask.borrow(), size.x * size.y)); } #ifdef MERGERMASK_GET_MERGERMASKS_FINAL if (original) { std::stringstream ss; ss.str(""); ss << "work-mask-"; for (int j = 0; j <= i; j++) { ss << allCam[j] << "-"; } ss << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), workMask.borrow_const(), size.x, size.y); } #endif const int2 camSize = original ? make_int2((int)cachedOriginalMappedRects[allCam[i]].getWidth(), (int)cachedOriginalMappedRects[allCam[i]].getHeight()) : make_int2((int)cachedMappedRects[allCam[i]].getWidth(), (int)cachedMappedRects[allCam[i]].getHeight()); const int2 camOffset = original ? make_int2((int)cachedOriginalMappedRects[allCam[i]].left(), (int)cachedOriginalMappedRects[allCam[i]].top()) : make_int2((int)cachedMappedRects[allCam[i]].left(), (int)cachedMappedRects[allCam[i]].top()); FAIL_RETURN( updateIndexMask((int)allCam[i], original ? 10000 : 2, camIndexBuffer.borrow_const(), camSize, camOffset, original ? originalDistortionMaps[(int)allCam[(int)i]] : distortionMaps[(int)allCam[(int)i]], size, inputIndexBuffer, workMask.borrow(), original ? inputsMapOriginalBuffer.borrow_const() : inputsMapBuffer.borrow_const(), stream)); #ifdef MERGERMASK_GET_MERGERMASKS_FINAL if (original) { { std::stringstream ss; ss.str(""); ss << "index-buffer-final-"; for (int j = 0; j <= i; j++) { ss << allCam[j] << "-"; } ss << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexBuffer.as_const(), size.x, size.y); } /*{ std::stringstream ss; ss.str(""); ss << "distortion-buffer-final-"; for (int j = 0; j <= i; j++) { ss << allCam[j] << "-"; } ss << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), originalDistortionMaps[allCam[i]].as_const(), size.x, size.y); }*/ } #endif } return Status::OK(); } Status MergerMask::releaseMergerMaskMemory() { for (size_t i = 0; i < distortionMaps.size(); i++) { distortionMaps[(int)i].release(); } for (size_t i = 0; i < originalDistortionMaps.size(); i++) { originalDistortionMaps[(int)i].release(); } distortionMaps.clear(); originalDistortionMaps.clear(); workCost.releaseOwnership().release(); inputsMapBuffer.releaseOwnership().release(); inputsMapOriginalBuffer.releaseOwnership().release(); cachedMappedFrames.clear(); return Status::OK(); } Status MergerMask::getMergerMasks(GPU::Buffer inputIndexPixelBuffer, std::vector& masksOrder) { const bool useBlendingOrder = mergerMaskConfig.useBlendingOrder(); const int width = (int)getDownSize((int)pano.getWidth()); const int height = (int)getDownSize((int)pano.getHeight()); const int camCount = (int)pano.numInputs(); GPU::UniqueBuffer inputDistortionBuffer; FAIL_RETURN(inputDistortionBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer nextDistortionBuffer; FAIL_RETURN(nextDistortionBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer bestDistortionBuffer; FAIL_RETURN(bestDistortionBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer inputNonOverlappingIndexBuffer; FAIL_RETURN(inputNonOverlappingIndexBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer nextNonOverlappingIndexBuffer; FAIL_RETURN(nextNonOverlappingIndexBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer bestNonOverlappingIndexBuffer; FAIL_RETURN(bestNonOverlappingIndexBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer inputIndexBuffer; FAIL_RETURN(inputIndexBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer nextInputIndexBuffer; FAIL_RETURN(nextInputIndexBuffer.alloc(width * height, "Merger Mask")); GPU::Stream stream = GPU::Stream::getDefault(); std::vector bestCamOrder; float bestFinalCost = -1; std::vector bestWidths; std::vector normalizedMaxOverlappingWidths; if (mergerMaskConfig.getMaxOverlappingWidth() < -1) { normalizedMaxOverlappingWidths.push_back(-1); for (int i = 100; i <= 200; i += 50) { normalizedMaxOverlappingWidths.push_back(i); } } else { normalizedMaxOverlappingWidths.push_back(mergerMaskConfig.getMaxOverlappingWidth()); } // Try it heuristically // For the first image, just try all of them for (int i0 = 0; i0 < camCount; i0++) { // Do put all image into frame std::vector processedCam(camCount, false); std::vector camOrder; std::vector widths; widths.push_back(-1); float finalCost = 0; processedCam[i0] = true; camOrder.push_back(i0); // Initialize the first map FAIL_RETURN(initializeMasks(make_int2(width, height), i0, inputNonOverlappingIndexBuffer.borrow(), inputDistortionBuffer.borrow(), stream)); #ifdef MERGERMASK_GET_MERGERMASKS { std::stringstream ss; ss.str(""); ss << "trial-distortion-" << i0 << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), inputDistortionBuffer.borrow().as_const(), width, height); } #endif for (int t = 1; t < camCount; t++) { FAIL_RETURN(progress.add("coarseMask_lowRes", "Optimizing for the coarse masks")); // Find camera at position [t] float bestCost = 0; int bestNextCam = -1; std::vector bestNextWidths; // Check whether there is at least one overlapping bool atLeastOneOverlapping = false; for (size_t i = 0; i < camOrder.size(); i++) { for (int j = 0; j < camCount; j++) if (!processedCam[j] && isOverlapping[j][camOrder[i]]) { atLeastOneOverlapping = true; break; } } for (int iNext = 0; iNext < camCount; iNext++) if (!processedCam[iNext]) { if (!useBlendingOrder) { iNext = t; atLeastOneOverlapping = false; } if (atLeastOneOverlapping) { bool isOverlapped = false; for (size_t i = 0; i < camOrder.size(); i++) { if (isOverlapping[iNext][camOrder[i]]) { isOverlapped = true; break; } } if (!isOverlapped) { continue; } } for (size_t k = 0; k < normalizedMaxOverlappingWidths.size(); k++) { std::vector trialWidths = widths; trialWidths.push_back(normalizedMaxOverlappingWidths[k]); std::vector allCam = camOrder; allCam.push_back(iNext); // Compute the cost of current configuration float cost = 100000000; FAIL_RETURN(getBlendingCost(trialWidths, make_int2(width, height), allCam, inputDistortionBuffer.borrow_const(), inputNonOverlappingIndexBuffer.borrow_const(), nextDistortionBuffer.borrow(), nextNonOverlappingIndexBuffer.borrow(), nextInputIndexBuffer.borrow(), cost)); if (cost < bestCost || bestNextCam == -1) { bestCost = cost; bestNextCam = iNext; bestNextWidths = trialWidths; FAIL_RETURN(GPU::memcpyBlocking(bestNonOverlappingIndexBuffer.borrow(), nextNonOverlappingIndexBuffer.borrow_const(), width * height * sizeof(uint32_t))); FAIL_RETURN(GPU::memcpyBlocking(bestDistortionBuffer.borrow(), nextDistortionBuffer.borrow_const(), width * height * sizeof(unsigned char))); } } if (!useBlendingOrder) { break; } } // Perform the greedy step here // From all the trial in this step, pick the one that have the smallest cost and use it for the next iteration if (bestNextCam >= 0) { camOrder.push_back(bestNextCam); widths = bestNextWidths; finalCost = bestCost; processedCam[bestNextCam] = true; FAIL_RETURN(GPU::memcpyBlocking(inputNonOverlappingIndexBuffer.borrow(), bestNonOverlappingIndexBuffer.borrow_const(), width * height * sizeof(uint32_t))); FAIL_RETURN(GPU::memcpyBlocking(inputDistortionBuffer.borrow(), bestDistortionBuffer.borrow_const(), width * height * sizeof(unsigned char))); } else { finalCost = -1; break; } } // Find the best results from all initial camera if (camOrder.size() == (size_t)camCount) { if ((finalCost > 0) && (finalCost < bestFinalCost || bestFinalCost < 0)) { bestFinalCost = finalCost; bestCamOrder = camOrder; bestWidths = widths; } } if (!useBlendingOrder) { break; } } // Found the best order, now re-run the process in full resolution to generate the final map masksOrder = bestCamOrder; FAIL_RETURN(getOriginalMaskFromOrder(bestWidths, masksOrder, inputIndexPixelBuffer)); if (mergerMaskConfig.useSeam()) { // Release all that are not needed FAIL_RETURN(releaseMergerMaskMemory()); FAIL_RETURN(performSeamOptimization(masksOrder, inputIndexPixelBuffer)); } return Status::OK(); } Status MergerMask::getOriginalMaskFromOrder(const std::vector& normalizedOverlappingWidths, const std::vector& maskOrder, GPU::Buffer inputIndexPixelBuffer) { const int width = (int)pano.getWidth(); const int height = (int)pano.getHeight(); GPU::Stream stream = GPU::Stream::getDefault(); GPU::UniqueBuffer inputDistortionBuffer; FAIL_RETURN(inputDistortionBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer nextDistortionBuffer; FAIL_RETURN(nextDistortionBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer inputNonOverlappingIndexBuffer; FAIL_RETURN(inputNonOverlappingIndexBuffer.alloc(width * height, "Merger Mask")); GPU::UniqueBuffer nextNonOverlappingIndexBuffer; FAIL_RETURN(nextNonOverlappingIndexBuffer.alloc(width * height, "Merger Mask")); const int2 size = make_int2(width, height); FAIL_RETURN(initializeMasks(size, (int)maskOrder[0], inputNonOverlappingIndexBuffer.borrow(), inputDistortionBuffer.borrow(), stream, true)); for (size_t i = 1; i < maskOrder.size(); i++) { FAIL_RETURN(updateInputIndexByDistortionMap( (int)maskOrder[i], size, inputNonOverlappingIndexBuffer.borrow_const(), inputDistortionBuffer.borrow_const(), nextNonOverlappingIndexBuffer.borrow(), nextDistortionBuffer.borrow(), stream, true)); FAIL_RETURN(GPU::memcpyBlocking(inputNonOverlappingIndexBuffer.borrow(), nextNonOverlappingIndexBuffer.borrow_const(), width * height * sizeof(uint32_t))); FAIL_RETURN(GPU::memcpyBlocking(inputDistortionBuffer.borrow(), nextDistortionBuffer.borrow_const(), width * height * sizeof(unsigned char))); } #ifdef MERGERMASK_GET_MERGERMASKS_FULL { std::stringstream ss; ss.str(""); ss << "final-non-overlapping-result-"; for (int i = 0; i < normalizedOverlappingWidths.size(); i++) ss << normalizedOverlappingWidths[i] << " "; ss << "cam"; for (int i = 0; i < maskOrder.size(); i++) { ss << maskOrder[i] << "-"; } ss << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputNonOverlappingIndexBuffer.borrow_const(), size.x, size.y); } { std::stringstream ss; ss.str(""); ss << "final-distortion-map-"; for (int i = 0; i < normalizedOverlappingWidths.size(); i++) ss << normalizedOverlappingWidths[i] << " "; ss << "cam"; for (int i = 0; i < maskOrder.size(); i++) { ss << maskOrder[i] << "-"; } ss << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), inputDistortionBuffer.borrow_const(), size.x, size.y); } #endif FAIL_RETURN(updateOverlappingMap(normalizedOverlappingWidths, size, maskOrder, nextNonOverlappingIndexBuffer.borrow_const(), inputIndexPixelBuffer, stream, true)); #ifdef MERGERMASK_GET_MERGERMASKS_FULL { std::stringstream ss; ss.str(""); ss << "final-full-result-"; for (int i = 0; i < normalizedOverlappingWidths.size(); i++) ss << normalizedOverlappingWidths[i] << " "; ss << "cam"; for (int i = 0; i < maskOrder.size(); i++) { ss << maskOrder[i] << "-"; } ss << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexPixelBuffer.as_const(), size.x, size.y); } { std::stringstream ss; ss.str(""); ss << "input-full-index.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputsMapOriginalBuffer.borrow_const(), size.x, size.y); } #endif FAIL_RETURN(progress.add("coarseMask_fulRes", "Coarse masks done")); return Status::OK(); } Status MergerMask::performSeamOptimization(const std::vector& maskOrder, GPU::Buffer inputIndexBuffer) { // Copy input index buffer to a temporal buffer GPU::UniqueBuffer tmpInputIndexBuffer; FAIL_RETURN(tmpInputIndexBuffer.alloc(inputIndexBuffer.numElements(), "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(tmpInputIndexBuffer.borrow(), inputIndexBuffer)); const int width = (int)pano.getWidth(); const int height = (int)pano.getHeight(); const int2 size = make_int2(width, height); #ifdef MERGERMASK_SEAM_MASK { std::stringstream ss; ss.str(""); ss << "before-seam-mask" << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexBuffer.as_const(), size.x, size.y); } #endif GPU::UniqueBuffer indexBuffer0; GPU::UniqueBuffer indexBuffer1; FAIL_RETURN(indexBuffer0.alloc(inputIndexBuffer.numElements(), "Merger Mask")); FAIL_RETURN(indexBuffer1.alloc(inputIndexBuffer.numElements(), "Merger Mask")); GPU::Stream stream = GPU::Stream::getDefault(); std::vector frames = {mergerMaskConfig.getFrames()[0]}; // Retrieve the first image from the selected set of frames FrameBuffers frameBuffers; FAIL_MSG(retrieveImages(frames, frameBuffers, pano), "Run out of memory while trying to read input frames for seam optimization"); int totalSize = 0; for (auto mapping : imageMappings) { totalSize += (int)mapping.second->getOutputRect(Core::EQUIRECTANGULAR).getArea(); } auto potMappedFrames = GPU::uniqueBuffer(totalSize, "Merger Mask"); FAIL_RETURN(potMappedFrames.status()); // Transforms image from the input to the output space, store them in the mappedFrames int offset = 0; for (auto transformIterator : transforms) { const videoreaderid_t i = transformIterator.first; Core::Transform* transform = transformIterator.second; Core::ImageMapping* mapping = imageMappings[i]; const Core::InputDefinition& im = pano.getInput(i); const std::pair inPair = std::make_pair(i, frames[0]); FrameBuffers::const_iterator it = frameBuffers.find(inPair); if (it != frameBuffers.end()) { const GPU::Buffer inputBuffer = it->second.borrow_const(); auto surface = Core::OffscreenAllocator::createSourceSurface(im.getWidth(), im.getHeight(), "Merger Mask"); FAIL_RETURN(GPU::memcpyAsync(*surface->pimpl->surface, inputBuffer, stream)); auto potDevOut = GPU::uniqueBuffer(mapping->getOutputRect(Core::EQUIRECTANGULAR).getArea(), "Merger Mask"); FAIL_RETURN(potDevOut.status()); // Map the input to the output space FAIL_RETURN(transform->mapBuffer(frames[0], potDevOut.borrow(), *surface->pimpl->surface, nullptr, mapping->getOutputRect(Core::EQUIRECTANGULAR), pano, im, *surface->pimpl->surface, stream)); // Down-sampling the image to the appropriate level int devWidth = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getWidth(); int devHeight = (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getHeight(); #ifdef MERGERMASK_SEAM_INPUT { std::stringstream ss; ss << "oriwarped-" << i << ".png"; Debug::dumpRGB210DeviceBuffer(ss.str().c_str(), devOut.borrow_const(), devWidth, devHeight); } #endif // Convert from rgb to lab GPU::Buffer subBuffer = potMappedFrames.borrow().createSubBuffer(offset); FAIL_RETURN(Util::ImageProcessingGPU::convertRGB210ToRGBandGradient(make_int2(devWidth, devHeight), potDevOut.borrow_const(), subBuffer, stream)); #ifdef MERGERMASK_SEAM_INPUT { std::stringstream ss; ss << "warped-rgb-" << i << ".png"; Debug::dumpRGBADeviceBuffer(ss.str().c_str(), devOut.borrow_const(), devWidth, devHeight); } #endif FAIL_RETURN(Util::ImageProcessingGPU::convertRGBandGradientToNormalizedLABandGradient( make_int2(devWidth, devHeight), subBuffer, stream)); #ifdef MERGERMASK_SEAM_INPUT { std::stringstream ss; ss << "warped-" << i << ".png"; Debug::dumpRGB210DeviceBuffer(ss.str().c_str(), subBuffer.as_const(), devWidth, devHeight); } { std::stringstream ss; ss << "gradient-" << i << ".png"; GPU::UniqueBuffer gradientBuffer; FAIL_RETURN(gradientBuffer.alloc(devWidth * devHeight, "Merger Mask")); FAIL_RETURN( extractChannel(make_int2(devWidth, devHeight), subBuffer.as_const(), 3, gradientBuffer.borrow(), stream)); Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), gradientBuffer.borrow_const(), devWidth, devHeight); } #endif } offset += (int)mapping->getOutputRect(Core::EQUIRECTANGULAR).getArea(); } FAIL_RETURN(progress.add("seam_setup", "Setup seam computation")); GPU::UniqueBuffer input0Buffer; GPU::UniqueBuffer input1Buffer; FAIL_RETURN(input0Buffer.alloc(size.x * size.y, "Merger Mask")); FAIL_RETURN(input1Buffer.alloc(size.x * size.y, "Merger Mask")); const int seamFeatheringSize = mergerMaskConfig.getSeamFeatheringSize(); Core::Rect rect = Core::Rect::fromInclusiveTopLeftBottomRight(0, 0, size.y - 1, size.x - 1); uint32_t prevIds = 1 << maskOrder[0]; for (size_t i = 1; i < maskOrder.size(); i++) { // Optimize for the seam using the first image only, more will likely cause more harm than good const uint32_t currentId = 1 << maskOrder[i]; std::vector prevCams; for (size_t j = 0; j < i; j++) { prevCams.push_back((unsigned char)maskOrder[j]); } GPU::UniqueBuffer prevCamsBuffer; FAIL_RETURN(prevCamsBuffer.alloc(prevCams.size(), "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(prevCamsBuffer.borrow(), &prevCams[0])); std::vector currentCam = {(unsigned char)maskOrder[i]}; GPU::UniqueBuffer currentCamBuffer; FAIL_RETURN(currentCamBuffer.alloc(currentCam.size(), "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(currentCamBuffer.borrow(), ¤tCam[0])); FAIL_RETURN(lookupColorBufferFromInputIndex( (int)rect.getWidth(), prevCamsBuffer.borrow_const(), originalMappedRectOffset.borrow_const(), originalMappedRectSize.borrow_const(), originalMappedOffset.borrow_const(), potMappedFrames.borrow_const(), size, inputIndexBuffer, input0Buffer.borrow(), stream)); FAIL_RETURN(lookupColorBufferFromInputIndex( (int)rect.getWidth(), currentCamBuffer.borrow_const(), originalMappedRectOffset.borrow_const(), originalMappedRectSize.borrow_const(), originalMappedOffset.borrow_const(), potMappedFrames.borrow_const(), size, inputIndexBuffer, input1Buffer.borrow(), stream)); auto potSeamFinder = SeamFinder::create(2, 1, (int)pano.getWidth(), rect, input0Buffer.borrow_const(), rect, input1Buffer.borrow_const(), stream, workMask, work1, work2); FAIL_MSG(potSeamFinder.status(), "Failed to initialize seam computation"); std::unique_ptr seamFinder(potSeamFinder.release()); FAIL_MSG(seamFinder->findSeam(), "Failed to find seam"); #ifdef MERGERMASK_SEAM_OPTIMIZATION // Convert input buffers into rgb for dumping { VideoStitch::MergerMask::MergerMask::convertNormalizedLABandGradientToRGBA(size, input0Buffer.borrow(), stream); VideoStitch::MergerMask::MergerMask::convertNormalizedLABandGradientToRGBA(size, input1Buffer.borrow(), stream); } { std::stringstream ss; ss << "ori-" << i << "-0.png"; Debug::dumpRGBADeviceBuffer(ss.str().c_str(), input0Buffer.borrow_const(), size.x, size.y); } { std::stringstream ss; ss << "ori-" << i << "-1.png"; work1 Debug::dumpRGBADeviceBuffer(ss.str().c_str(), input1Buffer.borrow_const(), size.x, size.y); } { std::stringstream ss; ss << "output-image" << i << "-with-seam.png"; seamFinder.saveSeamImage(ss.str(), 2); } { std::stringstream ss; ss << "output-map-" << i << "-0.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), seamFinder.getOutputsMap().as_const(), size.x, size.y); } #endif // If there is no overlapping, just continue the process if (seamFinder->seamFound()) { FAIL_RETURN( updateIndexMaskAfterSeam(prevIds, currentId, size, seamFinder->getOutputsMap(), inputIndexBuffer, stream)); #ifdef MERGERMASK_SEAM_OPTIMIZATION { { std::stringstream ss; ss << "after-seam-" << i << "-index-map.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexBuffer.as_const(), size.x, size.y); } } #endif } prevIds += currentId; FAIL_RETURN(progress.add("seam_fulRes", "Perform seam optimization")); } #ifdef MERGERMASK_SEAM_MASK { std::stringstream ss; ss.str(""); ss << "final-seam-mask-no-feathering" << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexBuffer.as_const(), size.x, size.y); } #endif if (seamFeatheringSize > 0) { // Now perform feathering around borders GPU::UniqueBuffer seamWithFeatheringBuffer; FAIL_RETURN(seamWithFeatheringBuffer.alloc(size.x * size.y, "Merger Mask")); FAIL_RETURN(GPU::memcpyBlocking(seamWithFeatheringBuffer.borrow(), inputIndexBuffer)); for (size_t i = 0; i < maskOrder.size(); i++) { // Compute the distance map for the current camera, make sure that it will have overlapping // area that is larger than a predefined threshold FAIL_RETURN(Core::computeEuclideanDistanceMap(workMask.borrow(), inputIndexBuffer, work1.borrow(), work2.borrow(), size.x, size.y, 1 << maskOrder[i], true, (float)seamFeatheringSize, 1, stream)); #ifdef MERGERMASK_SEAM_OPTIMIZATION { std::stringstream ss; ss.str(""); ss << "work-mask-" << i << ".png"; Debug::dumpMonochromeDeviceBuffer(ss.str().c_str(), workMask.borrow_const(), size.x, size.y); } #endif FAIL_RETURN(updateSeamMask((int)maskOrder[i], size, tmpInputIndexBuffer.borrow_const(), workMask.borrow_const(), seamWithFeatheringBuffer.borrow(), stream)); #ifdef MERGERMASK_SEAM_OPTIMIZATION { std::stringstream ss; ss.str(""); ss << "index-buffer-final-" << i << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), seamWithFeatheringBuffer.borrow_const(), size.x, size.y); } #endif } FAIL_RETURN(GPU::memcpyBlocking(inputIndexBuffer, seamWithFeatheringBuffer.borrow_const())); #ifdef MERGERMASK_SEAM_MASK { std::stringstream ss; ss.str(""); ss << "final-seam-mask.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputIndexBuffer.as_const(), size.x, size.y); } #endif } #ifdef MERGERMASK_SEAM_MASK { std::stringstream ss; ss.str(""); ss << "input-original-mask.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), inputsMapOriginalBuffer.borrow_const(), size.x, size.y); } #endif FAIL_RETURN(progress.add("seam_updateMask", "Update seam mask")); return Status::OK(); } Status MergerMask::transformMasksFromOutputToEncodedInputSpace( const Core::PanoDefinition& pano, const std::map& readers, const GPU::Buffer& maskOutputSpaces, std::map& maskInputSpaces) { // Prepare transform std::map> transforms; for (auto reader : readers) { const Core::InputDefinition& inputDef = pano.getInput(reader.second->id); Core::Transform* transform = Core::Transform::create(inputDef); if (!transform) { return {Origin::Stitcher, ErrType::SetupFailure, "Cannot create v1 transformation for input " + std::to_string(reader.second->id)}; } transforms[reader.second->id] = std::unique_ptr(transform); } GPU::Stream stream = GPU::Stream::getDefault(); Core::Rect outputBounds = Core::Rect::fromInclusiveTopLeftBottomRight(0, 0, pano.getHeight() - 1, pano.getWidth() - 1); const int inputScaleFactor = pano.getBlendingMaskInputScaleFactor(); maskInputSpaces.clear(); #ifdef MERGERMASK_INPUT_SPACE { std::stringstream ss; ss << "inputsMap-result.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), maskOutputSpaces.as_const(), outputBounds.getWidth(), outputBounds.getHeight()); } #endif for (auto& transform : transforms) { const videoreaderid_t imId = transform.first; // Find the precomputed coordinate to map pixels from the output to the input space const Core::InputDefinition& inputDef = pano.getInput(imId); GPU::UniqueBuffer devCoord; FAIL_RETURN(devCoord.alloc(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor, "Merger Mask")); FAIL_RETURN(transform.second->mapCoordInput(0, inputScaleFactor, devCoord.borrow(), pano, inputDef, stream)); #ifdef MERGERMASK_INPUT_SPACE { std::stringstream ss; ss << "inputsMap-inputcoord-" << imId << ".png"; GPU::UniqueBuffer dst; FAIL_RETURN( dst.alloc(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor, "Merger Mask")); Util::OpticalFlow::convertFlowToRGBA( make_int2((int)inputDef.getWidth() * inputScaleFactor, (int)inputDef.getHeight() * inputScaleFactor), devCoord.borrow_const(), make_int2((int)pano.getWidth(), (int)pano.getHeight()), dst.borrow(), stream); Debug::dumpRGBADeviceBuffer(ss.str().c_str(), dst.borrow_const(), inputDef.getWidth() * inputScaleFactor, inputDef.getHeight() * inputScaleFactor); } #endif GPU::UniqueBuffer inputMask; FAIL_RETURN(inputMask.alloc(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor, "Merger Mask")); GPU::UniqueBuffer tmp; FAIL_RETURN( tmp.alloc(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor, "Merger Mask")); FAIL_RETURN(GPU::memsetToZeroBlocking(inputMask.borrow(), inputMask.borrow().byteSize())); // Accumulate the pixels FAIL_RETURN(getInputMaskFromOutputIndices( imId, inputScaleFactor, make_int2((int)outputBounds.getWidth(), (int)outputBounds.getHeight()), maskOutputSpaces, make_int2((int)inputDef.getWidth(), (int)inputDef.getHeight()), devCoord.borrow(), inputMask.borrow(), stream)); // Perform gaussian filter on the mask to remove spiky edges. // This step will improve polyline compression rate dramatically. FAIL_RETURN(Image::gaussianBlur2D(inputMask.borrow(), tmp.borrow(), (int)inputDef.getWidth() * inputScaleFactor, (int)inputDef.getHeight() * inputScaleFactor, 1, 6, 0, 16, stream)); FAIL_RETURN(Util::ImageProcessingGPU::thresholdingBuffer( make_int2((int)inputDef.getWidth() * inputScaleFactor, (int)inputDef.getHeight() * inputScaleFactor), 0, 0, 255, inputMask.borrow(), stream)); std::vector masks(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor); FAIL_RETURN( GPU::memcpyBlocking(&masks[0], inputMask.borrow_const(), masks.size() * sizeof(unsigned char))); #ifdef MERGERMASK_INPUT_SPACE { std::stringstream ss; ss << "inputsMap-mask-" << imId << ".png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), masks, inputDef.getWidth() * inputScaleFactor, inputDef.getHeight() * inputScaleFactor); } #endif std::string maskInputSpace; FAIL_RETURN(Util::Compression::polylineEncodeBinaryMask((int)inputDef.getWidth() * inputScaleFactor, (int)inputDef.getHeight() * inputScaleFactor, masks, maskInputSpace)); maskInputSpaces[imId] = maskInputSpace; } #ifdef MERGERMASK_INPUT_SPACE { GPU::UniqueBuffer reconstructMaskOutput; FAIL_RETURN(reconstructMaskOutput.alloc(pano.getWidth() * pano.getHeight(), "Merger Mask")); FAIL_RETURN( transformMasksFromEncodedInputToOutputSpace(pano, readers, maskInputSpaces, reconstructMaskOutput.borrow())); std::stringstream ss; ss << "inputsMap-reconstruction.png"; Debug::dumpRGBAIndexDeviceBuffer(ss.str().c_str(), reconstructMaskOutput.borrow_const(), pano.getWidth(), pano.getHeight()); } #endif return Status::OK(); } Status MergerMask::transformMasksFromEncodedInputToOutputSpace( const Core::PanoDefinition& pano, const std::map& readers, const std::map& maskInputSpaces, GPU::Buffer maskOutputSpaces) { // Prepare transform std::map> transforms; for (auto reader : readers) { const Core::InputDefinition& inputDef = pano.getInput(reader.second->id); Core::Transform* transform = Core::Transform::create(inputDef); if (!transform) { return {Origin::Stitcher, ErrType::SetupFailure, "Cannot create v1 transformation for input " + std::to_string(reader.second->id)}; } transforms[reader.second->id] = std::unique_ptr(transform); } const int inputScaleFactor = pano.getBlendingMaskInputScaleFactor(); GPU::Stream stream = GPU::Stream::getDefault(); Core::Rect outputBounds = Core::Rect::fromInclusiveTopLeftBottomRight(0, 0, pano.getHeight() - 1, pano.getWidth() - 1); auto tex = Core::OffscreenAllocator::createCoordSurface(outputBounds.getWidth(), outputBounds.getHeight(), "Merger Mask"); if (!tex.ok()) { return tex.status(); } std::unique_ptr devCoord(tex.release()); FAIL_RETURN(GPU::memsetToZeroBlocking(maskOutputSpaces, maskOutputSpaces.byteSize())); for (auto& transform : transforms) { const videoreaderid_t imId = transform.first; const Core::InputDefinition& inputDef = pano.getInput(imId); FAIL_RETURN( transform.second->mapBufferCoord(0, *devCoord.get()->pimpl->surface, outputBounds, pano, inputDef, stream)); GPU::UniqueBuffer inputMask; FAIL_RETURN(inputMask.alloc(inputDef.getWidth() * inputDef.getHeight() * inputScaleFactor * inputScaleFactor, "Merger Mask")); std::vector masks; auto it = maskInputSpaces.find(imId); if (it == maskInputSpaces.end()) { return {Origin::BlendingMaskAlgorithm, ErrType::ImplementationError, "Data was not found"}; } FAIL_RETURN(Util::Compression::polylineDecodeBinaryMask( (int)inputDef.getWidth() * inputScaleFactor, (int)inputDef.getHeight() * inputScaleFactor, it->second, masks)); FAIL_RETURN( GPU::memcpyBlocking(inputMask.borrow(), &masks[0], masks.size() * sizeof(unsigned char))); FAIL_RETURN(getOutputIndicesFromInputMask( imId, inputScaleFactor, make_int2((int)inputDef.getWidth(), (int)inputDef.getHeight()), inputMask.borrow_const(), make_int2((int)outputBounds.getWidth(), (int)outputBounds.getHeight()), *devCoord.get()->pimpl->surface, maskOutputSpaces, stream)); } return Status::OK(); } } // namespace MergerMask } // namespace VideoStitch