1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#include "kernel.hpp"
namespace VideoStitch {
namespace GPU {
// TODO ceil size?
static const size_t CEIL = 16;
static size_t ceilKernelSize(size_t v, size_t d) {
const size_t res = v / d;
const size_t group = res + (v - res * d > 0); // add one if the remainder is nonzero
return group * d;
}
Kernel Kernel::get(std::string programName, std::string kernelName) {
const auto& potContext = GPU::getContext();
assert(potContext.ok());
CLKernel& potCLKernel = potContext.value().getKernel(programName, kernelName);
return Kernel{potCLKernel};
}
KernelExecution<1> Kernel::setup1D(GPU::Stream stream, unsigned totalSize) const {
size_t global{ceilKernelSize((size_t)totalSize, CEIL)};
size_t local{0};
return KernelExecution<1>{kernel, stream, {{global}}, {{local}}};
}
KernelExecution<1> Kernel::setup1D(GPU::Stream stream, unsigned totalSize, unsigned blockSize) const {
size_t global1D{ceilKernelSize((size_t)totalSize, blockSize)};
size_t local1D{blockSize};
return KernelExecution<1>{kernel, stream, {{global1D}}, {{local1D}}};
}
KernelExecution<2> Kernel::setup2D(GPU::Stream stream, unsigned totalWidth, unsigned totalHeight) const {
std::array<size_t, 2> global2D{{ceilKernelSize((size_t)totalWidth, CEIL), ceilKernelSize((size_t)totalHeight, CEIL)}};
std::array<size_t, 2> local2D{{0, 0}};
return KernelExecution<2>{kernel, stream, global2D, local2D};
}
KernelExecution<2> Kernel::setup2D(GPU::Stream stream, unsigned totalWidth, unsigned totalHeight, unsigned blockSizeX,
unsigned blockSizeY) const {
std::array<size_t, 2> global2D{
{ceilKernelSize((size_t)totalWidth, blockSizeX), ceilKernelSize((size_t)totalHeight, blockSizeY)}};
std::array<size_t, 2> local2D{{blockSizeX, blockSizeY}};
return KernelExecution<2>{kernel, stream, global2D, local2D};
}
} // namespace GPU
} // namespace VideoStitch