// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #ifndef UNROLLED_GAUSSIAN_KERNEL_HPP_ #define UNROLLED_GAUSSIAN_KERNEL_HPP_ #include "backend/common/imageOps.hpp" #include "backend/common/vectorOps.hpp" namespace VideoStitch { namespace Image { inline __device__ void addToAccumulatorsWeightedRGBA(const int32_t *&argb, int32_t &tr, int32_t &tg, int32_t &tb, int32_t &acc, int32_t weight) { int32_t isSolid = (*argb++) * weight; tr += isSolid * (*argb++); tg += isSolid * (*argb++); tb += isSolid * (*argb++); acc += isSolid; } __device__ inline uint32_t unrolledGaussianKernel1(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 2); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } __device__ inline uint32_t unrolledGaussianKernel2(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 4); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 4); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } __device__ inline uint32_t unrolledGaussianKernel3(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 15); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 20); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 15); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 6); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } __device__ inline uint32_t unrolledGaussianKernel4(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 8); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 28); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 56); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 70); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 56); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 28); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 8); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } __device__ inline uint32_t unrolledGaussianKernel5(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 10); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 45); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 120); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 210); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 252); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 210); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 120); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 45); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 10); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } __device__ inline uint32_t unrolledGaussianKernel6(const int32_t *col) { int32_t tr = 0; int32_t tg = 0; int32_t tb = 0; int32_t acc = 0; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 12); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 66); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 220); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 495); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 792); int32_t isSolid = *col; addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 924); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 792); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 495); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 220); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 66); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 12); addToAccumulatorsWeightedRGBA(col, tr, tg, tb, acc, 1); return RGB210::pack(tr / acc, tr / acc, tr / acc, isSolid); } } // namespace Image } // namespace VideoStitch #endif