1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#include "parallax/mergerPair.hpp"
#include "backend/cuda/deviceBuffer.hpp"
#include "backend/cuda/deviceStream.hpp"
#include "cuda/error.hpp"
#include "cuda/util.hpp"
#include "core/rect.hpp"
#include "parallax/flowConstant.hpp"
#include <string.h>
namespace VideoStitch {
namespace Core {
#define TILE_X 16
#define TILE_Y 16
__global__ void pairMappingMaskKernel(const int wrapWidth, const int input0OffsetX, const int input0OffsetY,
const int input0Width, const int input0Height, const float2* input0CoordBuffer,
const int input1OffsetX, const int input1OffsetY, const int input1Width,
const int input1Height, const float2* input1CoordBuffer, const int outputOffsetX,
const int outputOffsetY, const int outputWidth, const int outputHeight,
uint32_t* outputMask) {
// calculate normalized texture coordinates
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < outputWidth && y < outputHeight) {
outputMask[y * outputWidth + x] = 0;
// Put the first mask in
const int input0X = (x + outputOffsetX - input0OffsetX + wrapWidth) % wrapWidth;
const int input0Y = (y + outputOffsetY - input0OffsetY);
if (input0X >= 0 && input0X < input0Width && input0Y >= 0 && input0Y < input0Height) {
float2 coord0 = input0CoordBuffer[input0Y * input0Width + input0X];
if (coord0.x != INVALID_FLOW_VALUE && coord0.y != INVALID_FLOW_VALUE) {
outputMask[y * outputWidth + x] += 1 << 1;
}
}
// Put the second mask in
const int input1X = (x + outputOffsetX - input1OffsetX + wrapWidth) % wrapWidth;
const int input1Y = (y + outputOffsetY - input1OffsetY);
if (input1X >= 0 && input1X < input1Width && input1Y >= 0 && input1Y < input1Height) {
float2 coord1 = input1CoordBuffer[input1Y * input1Width + input1X];
if (coord1.x != INVALID_FLOW_VALUE && coord1.y != INVALID_FLOW_VALUE) {
outputMask[y * outputWidth + x] += 1 << 2;
}
}
}
}
Status MergerPair::setupPairMappingMask(GPU::Buffer<uint32_t> devMask, GPU::Stream gpuStream) const {
const Rect rect0 = getBoundingPanoRect(0);
const Rect rect1 = getBoundingPanoRect(1);
Rect iRect = getBoundingPanosIRect();
dim3 dimBlock(TILE_X, TILE_Y, 1);
dim3 dimGrid((unsigned)Cuda::ceilDiv(iRect.getWidth(), dimBlock.x),
(unsigned)Cuda::ceilDiv(iRect.getHeight(), dimBlock.y), 1);
cudaStream_t stream = gpuStream.get();
pairMappingMaskKernel<<<dimGrid, dimBlock, 0, stream>>>(
(int)wrapWidth, (int)rect0.left(), (int)rect0.top(), (int)rect0.getWidth(), (int)rect0.getHeight(),
panoToInputSpaceCoordMapping0.borrow_const().get(), (int)rect1.left(), (int)rect1.top(), (int)rect1.getWidth(),
(int)rect1.getHeight(), panoToInputSpaceCoordMapping1.borrow_const().get(), (int)iRect.left(), (int)iRect.top(),
(int)iRect.getWidth(), (int)iRect.getHeight(), devMask.get());
return CUDA_STATUS;
}
} // namespace Core
} // namespace VideoStitch