1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#ifndef UNROLLED_GAUSSIAN_KERNEL_HPP_
#define UNROLLED_GAUSSIAN_KERNEL_HPP_
#include "backend/common/imageOps.hpp"
#include "backend/common/vectorOps.hpp"
namespace VideoStitch {
namespace Image {
inline __device__ void addToAccumulatorsWeightedRGBA(const int32_t *&argb, int32_t &tr, int32_t &tg, int32_t &tb,
int32_t &acc, int32_t weight) {
int32_t isSolid = (*argb++) * weight;
tr += isSolid * (*argb++);
tg += isSolid * (*argb++);
tb += isSolid * (*argb++);
acc += isSolid;
}
__device__ inline uint32_t unrolledGaussianKernel1(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 2);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
__device__ inline uint32_t unrolledGaussianKernel2(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 4);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 4);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
__device__ inline uint32_t unrolledGaussianKernel3(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 15);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 20);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 15);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
__device__ inline uint32_t unrolledGaussianKernel4(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 8);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 28);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 56);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 70);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 56);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 28);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 8);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
__device__ inline uint32_t unrolledGaussianKernel5(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 10);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 45);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 120);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 210);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 252);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 210);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 120);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 45);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 10);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
__device__ inline uint32_t unrolledGaussianKernel6(const int32_t *col) {
int32_t tr = 0;
int32_t tg = 0;
int32_t tb = 0;
int32_t acc = 0;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 12);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 66);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 220);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 495);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 792);
int32_t isSolid = *col;
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 924);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 792);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 495);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 220);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 66);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 12);
addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1);
return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid);
}
} // namespace Image
} // namespace VideoStitch
#endif