1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#include <image/unpack.hpp>
#include "../deviceBuffer2D.hpp"
#include "../context.hpp"
#include "../kernel.hpp"
#include "../surface.hpp"
#include "gpu/memcpy.hpp"
#include "gpu/util.hpp"
#ifdef VS_OPENCL
namespace VideoStitch {
namespace Image {
namespace {
#include "unpack.xxd"
}
INDIRECT_REGISTER_OPENCL_PROGRAM(unpack, true);
Status unpackRGBA(GPU::Buffer2D& dst, const GPU::Buffer<const uint32_t>& src, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream stream) {
size_t src_origin[3] = {0, 0, 0};
size_t dst_origin[3] = {0, 0, 0};
size_t region[3] = {dst.getWidth(), dst.getHeight(), 1};
return CL_ERROR(clEnqueueCopyBufferRect(stream.get(), src.get(), dst.get(), src_origin, dst_origin, region, 0,
0, // src_row_pitch, src_slice_pitch
dst.getPitch(), 0, // dst_row_pitch, dst_slice_pitch
0, 0, nullptr));
}
Status unpackRGBA(GPU::Buffer2D& dst, const GPU::Surface& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelRGBA))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), (unsigned)dst.getPitch(), src.get(), (unsigned)(width),
(unsigned)(height));
}
Status unpackRGB(GPU::Buffer2D& dst, const GPU::Buffer<const uint32_t>& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D =
GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelRGB)).setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), (unsigned)dst.getPitch(), src.get().raw(), (unsigned)(width),
(unsigned)(height));
}
Status unpackRGB(GPU::Buffer2D& dst, const GPU::Surface& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelRGBSource))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), (unsigned)dst.getPitch(), src.get(), (unsigned)(width),
(unsigned)(height));
}
Status unpackF32C1(GPU::Buffer2D& dst, const GPU::Buffer<const uint32_t>& src, std::size_t /* width */,
std::size_t /* height */, GPU::Stream stream) {
size_t src_origin[3] = {0, 0, 0};
size_t dst_origin[3] = {0, 0, 0};
size_t region[3] = {dst.getWidth(), dst.getHeight(), 1};
return CL_ERROR(clEnqueueCopyBufferRect(stream.get(), src.get(), dst.get(), src_origin, dst_origin, region, 0,
0, // src_row_pitch, src_slice_pitch
dst.getPitch(), 0, // dst_row_pitch, dst_slice_pitch
0, 0, nullptr));
}
Status unpackF32C1(GPU::Buffer2D& dst, const GPU::Surface& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelF32C1))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), (unsigned)dst.getPitch(), src.get(), (unsigned)(width),
(unsigned)(height));
}
Status unpackGrayscale16(GPU::Buffer2D& /* dst */, const GPU::Buffer<const uint32_t>& /* input */, size_t /* width */,
size_t /* height */, GPU::Stream /* s */) {
// TODO_OPENCL_IMPL/
return {Origin::GPU, ErrType::UnsupportedAction,
"Color space conversion for Grayscale16 not implemented from buffer"};
}
Status unpackGrayscale16(GPU::Buffer2D& /* dst */, const GPU::Surface& /* surf */, size_t /* width */,
size_t /* height */, GPU::Stream /* s */) {
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for Grayscale16 not implemented in OpenCL"};
}
Status unpackDepth(GPU::Buffer2D& yDst, GPU::Buffer2D& uDst, GPU::Buffer2D& vDst,
const GPU::Buffer<const uint32_t>& src, std::size_t width, std::size_t height, GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelDepth))
.setup2D(stream, (unsigned)((width + 1) / 2), (unsigned)((height + 1) / 2));
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uDst.get().raw(),
(unsigned)uDst.getPitch(), vDst.get().raw(), (unsigned)vDst.getPitch(),
(float*)src.get().raw(), (unsigned)width, (unsigned)height);
}
Status unpackDepth(GPU::Buffer2D& yDst, GPU::Buffer2D& uDst, GPU::Buffer2D& vDst, const GPU::Surface& src,
std::size_t width, std::size_t height, GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelDepthSource))
.setup2D(stream, (unsigned)((width + 1) / 2), (unsigned)((height + 1) / 2));
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uDst.get().raw(),
(unsigned)uDst.getPitch(), vDst.get().raw(), (unsigned)vDst.getPitch(),
src.get(), (unsigned)width, (unsigned)height);
}
Status unpackYV12(GPU::Buffer2D& yDst, GPU::Buffer2D& uDst, GPU::Buffer2D& vDst,
const GPU::Buffer<const uint32_t>& array, size_t width, size_t height, GPU::Stream stream) {
// planar colorspace, 3 planes
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelYV12))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uDst.get().raw(),
(unsigned)uDst.getPitch(), vDst.get().raw(), (unsigned)vDst.getPitch(),
array.get().raw(), (unsigned)(width), (unsigned)(height));
}
Status unpackYV12(GPU::Buffer2D& yDst, GPU::Buffer2D& uDst, GPU::Buffer2D& vDst, const GPU::Surface& array,
size_t width, size_t height, GPU::Stream stream) {
// planar colorspace, 3 planes
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelYV12Source))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uDst.get().raw(),
(unsigned)uDst.getPitch(), vDst.get().raw(), (unsigned)vDst.getPitch(),
array.get(), (unsigned)(width), (unsigned)(height));
}
Status unpackNV12(GPU::Buffer2D& yDst, GPU::Buffer2D& uvDst, const GPU::Buffer<const uint32_t>& array, size_t width,
size_t height, GPU::Stream stream) {
// planar colorspace, 2 planes
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelNV12))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uvDst.get().raw(),
(unsigned)uvDst.getPitch(), array.get().raw(), (unsigned)(width),
(unsigned)(height));
}
Status unpackNV12(GPU::Buffer2D& yDst, GPU::Buffer2D& uvDst, const GPU::Surface& array, size_t width, size_t height,
GPU::Stream stream) {
// planar colorspace, 2 planes
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelNV12Source))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch(), uvDst.get().raw(),
(unsigned)uvDst.getPitch(), array.get(), (unsigned)(width), (unsigned)(height));
}
Status unpackYUY2(GPU::Buffer2D&, const GPU::Buffer<const uint32_t>&, std::size_t, std::size_t, GPU::Stream) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for YUV422 not implemented"};
}
Status unpackYUY2(GPU::Buffer2D&, const GPU::Surface&, std::size_t, std::size_t, GPU::Stream) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for YUV422 not implemented"};
}
Status unpackUYVY(GPU::Buffer2D&, const GPU::Buffer<const uint32_t>&, std::size_t, std::size_t, GPU::Stream) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for YUV422 not implemented"};
}
Status unpackUYVY(GPU::Buffer2D&, const GPU::Surface&, std::size_t, std::size_t, GPU::Stream) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for YUV422 not implemented"};
}
Status unpackYUV422P10(GPU::Buffer2D& yDst, GPU::Buffer2D& uDst, GPU::Buffer2D& vDst,
const GPU::Buffer<const uint32_t>& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackYUV422P10Kernel))
.setup2D(stream, (unsigned)((width + 1) / 2), (unsigned)height);
return kernel2D.enqueueWithKernelArgs(yDst.get().raw(), (unsigned)yDst.getPitch() / 2, uDst.get().raw(),
(unsigned)uDst.getPitch() / 2, vDst.get().raw(), (unsigned)vDst.getPitch() / 2,
src.get(), (unsigned)width, (unsigned)height);
}
Status unpackYUV422P10(GPU::Buffer2D&, GPU::Buffer2D&, GPU::Buffer2D&, const GPU::Surface&, std::size_t, std::size_t,
GPU::Stream) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for YUV422P10 not implemented"};
}
Status unpackGrayscale(GPU::Buffer2D& /*dst*/, const GPU::Buffer<const uint32_t>& /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for Grayscale not implemented"};
}
Status unpackGrayscale(GPU::Buffer2D& dst, const GPU::Surface& src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(unpackKernelGrayscaleSource))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), (unsigned)dst.getPitch(), src.get(), (unsigned)(width),
(unsigned)(height));
}
// --------------------------------------------------------------------------
Status convertRGBToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertRGBToRGBAKernel))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src, (unsigned)width, (unsigned)height);
}
Status convertRGB210ToRGBA(GPU::Surface& dst, GPU::Buffer<const uint32_t> src, std::size_t width, std::size_t height,
GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertRGB210ToRGBAKernel))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src, (unsigned)width, (unsigned)height);
}
Status unpackYUV422P10(GPU::Buffer<unsigned char> dst, GPU::Buffer<const uint32_t> src, std::size_t width,
std::size_t height, GPU::Stream stream) {
assert(!(width & 1));
assert(!(height & 1));
std::string kernelName = KERNEL_STR(unpackYUV422P10Kernel);
auto kernel1D = GPU::Kernel::get(PROGRAM(unpack), kernelName).setup1D(stream, (unsigned)(width * height / 2));
return kernel1D.enqueueWithKernelArgs(dst.as<unsigned short>(), src, (unsigned)width, (unsigned)height);
}
Status unpackGrayscale(GPU::Buffer<uint32_t> /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion for Grayscale not implemented"};
// const dim3 dimBlock2D(16, 16, 1);
// const dim3 dimGrid2D((unsigned)Cuda::ceilDiv(width, dimBlock2D.x), (unsigned)Cuda::ceilDiv(height, dimBlock2D.y),
// 1); unpackKernelGrayscale <<< dimGrid2D, dimBlock2D, 0, stream.get() >>>(dst, src, (unsigned)width,
// (unsigned)height);
}
Status convertBGRToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion BGR to RGBA not implemented"};
}
Status convertBGRUToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion BGRU to RGBA not implemented"};
}
Status convertBayerRGGBToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion RGGB to RGBA not implemented"};
}
Status convertBayerBGGRToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion BayerBGGR to RGBA not implemented"};
}
Status convertBayerGRBGToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion BayerGRBG to RGBA not implemented"};
}
Status convertBayerGBRGToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion BayerGBRG to RGBA not implemented"};
}
Status convertUYVYToRGBA(GPU::Surface& /*dst*/, GPU::Buffer<const unsigned char> /*src*/, std::size_t /*width*/,
std::size_t /*height*/, GPU::Stream /*stream*/) {
// TODO_OPENCL_IMPL
return {Origin::GPU, ErrType::UnsupportedAction, "Color space conversion UYVY to RGBA not implemented"};
}
Status convertYUV422P10ToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width,
std::size_t height, GPU::Stream stream) {
assert(!(width & 1));
assert(!(height & 1));
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertYUV422P10ToRGBAKernel))
.setup2D(stream, (unsigned)width / 2, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src.as<const uint16_t>().get(), (unsigned)(width),
(unsigned)(height));
}
Status convertYUY2ToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width, std::size_t height,
GPU::Stream stream) {
assert(!(width & 1));
assert(!(height & 1));
std::string kernelName = KERNEL_STR(convertYUY2ToRGBAKernel);
auto kernel1D = GPU::Kernel::get(PROGRAM(unpack), kernelName).setup1D(stream, (unsigned)(width * height / 2));
return kernel1D.enqueueWithKernelArgs(dst, src, (unsigned)width, (unsigned)height);
}
Status convertYV12ToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width, std::size_t height,
GPU::Stream stream) {
assert(!(width & 1));
assert(!(height & 1));
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertYV12ToRGBAKernel))
.setup2D(stream, (unsigned)width / 2, (unsigned)height / 2);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src, (unsigned)width, (unsigned)height);
}
Status convertNV12ToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width, std::size_t height,
GPU::Stream stream) {
assert(!(width & 1));
assert(!(height & 1));
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertNV12ToRGBAKernel))
.setup2D(stream, (unsigned)width / 2, (unsigned)height / 2);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src, (unsigned)width, (unsigned)height);
}
Status convertGrayscaleToRGBA(GPU::Surface& dst, GPU::Buffer<const unsigned char> src, std::size_t width,
std::size_t height, GPU::Stream stream) {
auto kernel2D = GPU::Kernel::get(PROGRAM(unpack), KERNEL_STR(convertGrayscaleToRGBAKernel))
.setup2D(stream, (unsigned)width, (unsigned)height);
return kernel2D.enqueueWithKernelArgs(dst.get().raw(), src, (unsigned)width, (unsigned)height);
}
} // namespace Image
} // namespace VideoStitch
#endif // VS_OPENCL