// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "gpu/core1/mergerKernel.hpp" #include "../deviceBuffer.hpp" #include "../deviceStream.hpp" #include "backend/common/imageOps.hpp" #include "cuda/error.hpp" #include "cuda/util.hpp" #include "libvideostitch/panoDef.hpp" #include "core1/imageMapping.hpp" namespace VideoStitch { namespace { // CUDA/OpenCL shared implementation #include "../gpuKernelDef.h" #include <backend/common/core1/mergerKernel.gpu> } // namespace namespace Core { Status countInputs(TextureTarget t, const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, const ImageMapping& fromIm, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); if (fromIm.getOutputRect(t).empty()) { return Status::OK(); } const dim3 dimBlock(16, 16, 1); const dim3 dimGrid((unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getWidth(), dimBlock.x), (unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getHeight(), dimBlock.y), 1); if (fromIm.getOutputRect(t).right() >= (int64_t)pano.getWidth()) { countInputsKernelWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } else { countInputsKernelNoWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } return CUDA_STATUS; } Status colorMap(const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); const int64_t size = pano.getWidth() * pano.getHeight(); dim3 dimBlock(512); dim3 dimGrid(Cuda::compute2DGridForFlatBuffer(size, 512)); colormapKernel<<<dimGrid, dimBlock, 0, stream>>>(pbo.get(), (unsigned)pano.getWidth(), (unsigned)size); return CUDA_ERROR(cudaStreamSynchronize(stream)); } Status stitchingError(TextureTarget t, const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, const ImageMapping& fromIm, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); if (fromIm.getOutputRect(t).empty()) { return Status::OK(); } const dim3 dimBlock(16, 16, 1); const dim3 dimGrid((unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getWidth(), dimBlock.x), (unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getHeight(), dimBlock.y), 1); if (fromIm.getOutputRect(t).right() >= (int64_t)pano.getWidth()) { stitchingErrorKernelWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } else { stitchingErrorKernelNoWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } return CUDA_STATUS; } Status exposureDiffRGB(TextureTarget t, const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, const ImageMapping& fromIm, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); if (fromIm.getOutputRect(t).empty()) { return Status::OK(); } const dim3 dimBlock(16, 16, 1); const dim3 dimGrid((unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getWidth(), dimBlock.x), (unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getHeight(), dimBlock.y), 1); if (fromIm.getOutputRect(t).right() >= (int64_t)pano.getWidth()) { exposureErrorRGBKernelWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } else { exposureErrorRGBKernelNoWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } return CUDA_STATUS; } Status amplitude(const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); const int64_t size = pano.getWidth() * pano.getHeight(); dim3 dimBlock(512); dim3 dimGrid(Cuda::compute2DGridForFlatBuffer(size, 512)); amplitudeKernel<<<dimGrid, dimBlock, 0, stream>>>(pbo.get(), 0, (3 * 256 * 256), (unsigned)pano.getWidth(), (unsigned)size); return CUDA_ERROR(cudaStreamSynchronize(stream)); } Status disregardNoDiffArea(const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); const int64_t size = pano.getWidth() * pano.getHeight(); dim3 dimBlock(512); dim3 dimGrid(Cuda::compute2DGridForFlatBuffer(size, 512)); maskOutSingleInput<<<dimGrid, dimBlock, 0, stream>>>(pbo.get(), (unsigned)pano.getWidth(), (unsigned)size); return CUDA_ERROR(cudaStreamSynchronize(stream)); } Status checkerMerge(TextureTarget t, const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, const ImageMapping& fromIm, unsigned checkerSize, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); if (fromIm.getOutputRect(t).empty()) { return Status::OK(); } const dim3 dimBlock(16, 16, 1); const dim3 dimGrid((unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getWidth(), dimBlock.x), (unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getHeight(), dimBlock.y), 1); if (fromIm.getOutputRect(t).right() >= (int64_t)pano.getWidth()) { checkerInsertKernelWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top(), checkerSize); } else { checkerInsertKernelNoWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top(), checkerSize); } return CUDA_STATUS; } Status noblend(TextureTarget t, const PanoDefinition& pano, GPU::Buffer<uint32_t> pbo, const ImageMapping& fromIm, GPU::Stream gpuStream) { cudaStream_t stream = gpuStream.get(); if (fromIm.getOutputRect(t).empty()) { return Status::OK(); } const dim3 dimBlock(16, 16, 1); const dim3 dimGrid((unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getWidth(), dimBlock.x), (unsigned)Cuda::ceilDiv(fromIm.getOutputRect(t).getHeight(), dimBlock.y), 1); if (fromIm.getOutputRect(t).right() >= (int64_t)pano.getWidth()) { noblendKernelWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } else { noblendKernelNoWrap<<<dimGrid, dimBlock, 0, stream>>>( pbo.get(), (unsigned)pano.getWidth(), (unsigned)pano.getHeight(), fromIm.getDeviceOutputBuffer(t).get(), (unsigned)fromIm.getOutputRect(t).getWidth(), (unsigned)fromIm.getOutputRect(t).getHeight(), (unsigned)fromIm.getOutputRect(t).left(), (unsigned)fromIm.getOutputRect(t).top()); } return CUDA_STATUS; } } // namespace Core } // namespace VideoStitch