// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm // // Basic input unpacking tests. #include "gpu/testing.hpp" #include "gpu/util.hpp" #include "libvideostitch/context.hpp" #include "gpu/allocator.hpp" #include "gpu/2dBuffer.hpp" #include #include "libvideostitch/profile.hpp" #include "libvideostitch/logging.hpp" // Uncomment this to compare with opencv. You need to have OpenCV buit with GPU support, and add opencv_gpu to your // libs. #define WITH_OPENCV_CMP #ifdef WITH_OPENCV_CMP #include #include #include #endif #include #ifdef DUMP_IMAGES #include "util/debugUtils.hpp" #endif namespace { struct NoProfiler { NoProfiler(const char*, bool, VideoStitch::ThreadSafeOstream&) {} }; } // namespace using namespace VideoStitch::Image; namespace VideoStitch { namespace Testing { template void testOutOfPlaceRGB(unsigned width, unsigned height) { // Generate data std::vector input; for (unsigned i = 0; i < width * height; ++i) { input.push_back((unsigned char)((33 * i) % 235)); input.push_back((unsigned char)((31 * i) % 83)); input.push_back((unsigned char)((42 * i) % 57)); } DeviceBuffer inputBuffer(width, height); inputBuffer.fillData(input); auto potUniqueStream = GPU::UniqueStream::create(); ENSURE(potUniqueStream.ok()); auto stream = potUniqueStream.ref().borrow(); DeviceBuffer rgb210Buffer(width, height); { ENSURE(stream.synchronize()); ProfilerT p("RGB pack", false, Logger::get(Logger::Info)); // packUnpack(PixelArray(width, height, rgb210Buffer.gpuBuf()), // PixelArray(width, height, inputBuffer.gpuBuf()), stream); ENSURE(stream.synchronize()); } DeviceBuffer outputBuffer(width, height); { ENSURE(stream.synchronize()); ProfilerT p("RGB unpack", false, Logger::get(Logger::Info)); // packUnpack(PixelArray(width, height, outputBuffer.gpuBuf()), // PixelArray(width, height, rgb210Buffer.gpuBuf()), stream); ENSURE(stream.synchronize()); } std::vector output; outputBuffer.readbackData(output); for (unsigned i = 0; i < width * height; ++i) { ENSURE_EQ((int)input[3 * i + 0], (int)output[3 * i + 0]); ENSURE_EQ((int)input[3 * i + 1], (int)output[3 * i + 1]); ENSURE_EQ((int)input[3 * i + 2], (int)output[3 * i + 2]); } } template void testYV12Minimal(unsigned width, unsigned height) { ENSURE(!(width & 1), "width must be even"); ENSURE(!(height & 1), "height must be even"); // Generate YV12 data unsigned char* input = new unsigned char[3 * width * height / 2]; for (unsigned i = 0; i < width * height; ++i) { input[i] = 124; } for (unsigned i = 0; i < width * height / 4; ++i) { input[width * height + i] = 94; } for (unsigned i = 0; i < width * height / 4; ++i) { input[width * height + width * height / 4 + i] = 196; } auto buffer = GPU::uniqueBuffer(width * height * 3 / 2, "unpackTest"); ENSURE(buffer.status(), "no device memory"); ENSURE(GPU::memcpyBlocking(buffer.borrow(), input), "transfer error"); // to internal format auto surf = Core::OffscreenAllocator::createSourceSurface(width, height, "testYV12Minimal"); ENSURE(surf.ok()); convertYV12ToRGBA(*surf->pimpl->surface, buffer.borrow(), width, height, GPU::Stream::getDefault()); // from internal format auto yOut = GPU::Buffer2D::allocate(width, height, "YV12 Y plane").value(); auto uOut = GPU::Buffer2D::allocate(width / 2, height / 2, "YV12 U plane").value(); auto vOut = GPU::Buffer2D::allocate(width / 2, height / 2, "YV12 V plane").value(); unpackYV12(yOut, uOut, vOut, *surf->pimpl->surface, width, height, GPU::Stream::getDefault()); // Check result of conversion std::vector outputY(width * height); ENSURE(GPU::memcpyBlocking(outputY.data(), yOut), "transfer error"); std::vector outputU(width * height / 4); ENSURE(GPU::memcpyBlocking(outputU.data(), uOut), "transfer error"); std::vector outputV(width * height / 4); ENSURE(GPU::memcpyBlocking(outputV.data(), vOut), "transfer error"); int eps = 1; for (unsigned i = 0; i < width * height; ++i) { ENSURE_APPROX_EQ(124, (int)outputY[i], eps); } for (unsigned i = 0; i < (width * height) / 4; ++i) { ENSURE_APPROX_EQ(94, (int)outputU[i], eps); } for (unsigned i = 0; i < (width * height) / 4; ++i) { ENSURE_APPROX_EQ(196, (int)outputV[i], eps); } // clean up delete[] input; ENSURE(yOut.release()); ENSURE(uOut.release()); ENSURE(vOut.release()); } template void testNV12Minimal(unsigned width, unsigned height) { ENSURE(!(width & 1), "width must be even"); ENSURE(!(height & 1), "height must be even"); // Generate NV12 data std::vector input(3 * width * height / 2); for (unsigned i = 0; i < width * height; ++i) { input[i] = 80 + i % 48; } for (unsigned i = 0; i < width * height / 2; i += 2) { input[width * height + i] = 90 + (i / width) % 80; input[width * height + i + 1] = 80 + (i / height) % 100; } // alloc auto buffer = GPU::uniqueBuffer(width * height * 3 / 2, "unpackTest"); ENSURE(buffer.status(), "no device memory"); // transfer ENSURE(GPU::memcpyBlocking(buffer.borrow(), input.data()), "transfer error"); // to internal format auto surf = Core::OffscreenAllocator::createSourceSurface(width, height, "testNV12Minimal"); ENSURE(surf.ok()); convertNV12ToRGBA(*surf->pimpl->surface, buffer.borrow(), width, height, GPU::Stream::getDefault()); #ifdef DUMP_IMAGES GPU::Stream::getDefault().synchronize(); Debug::dumpRGBATexture("/tmp/nv12.png", *surf->pimpl->surface, width, height); #endif // from internal format GPU::Buffer2D yOut = GPU::Buffer2D::allocate(width, height, "NV12 Y plane").value(); GPU::Buffer2D uvOut = GPU::Buffer2D::allocate(width, height / 2, "NV12 UV plane").value(); unpackNV12(yOut, uvOut, *surf->pimpl->surface, width, height, GPU::Stream::getDefault()); // Check result of conversion std::vector outputY(width * height); ENSURE(GPU::memcpyBlocking(outputY.data(), yOut), "transfer error"); std::vector outputUV(width * height / 2); ENSURE(GPU::memcpyBlocking(outputUV.data(), uvOut), "transfer error"); GPU::Stream::getDefault().synchronize(); int eps = 1; for (unsigned i = 0; i < width * height; ++i) { ENSURE_APPROX_EQ((int)(80 + i % 48), (int)outputY[i], eps); } for (unsigned i = 0; i < width * height / 2; i += 2) { ENSURE_APPROX_EQ((int)(90 + (i / width) % 80), (int)outputUV[i], eps); ENSURE_APPROX_EQ((int)(80 + (i / height) % 100), (int)outputUV[i + 1], eps); } // clean up ENSURE(yOut.release()); ENSURE(uvOut.release()); } // TODO_OPENCL_IMPL #ifndef VS_OPENCL void testBayerRGGB() { const int width = 6; const int height = 4; DeviceBuffer bayerFiltered(width, height); const std::vector inputData = { 10, 42, 36, 78, 40, 52, 50, 64, 8, 82, 42, 44, 90, 112, 124, 122, 162, 56, 130, 144, 16, 48, 44, 128, }; bayerFiltered.fill(inputData); PackedDeviceBuffer outputDev(width, height); convertBayerRGGBToRGBA(outputDev.gpuBuf(), bayerFiltered.gpuBufConst(), width, height, GPU::Stream::getDefault()); std::vector actual; outputDev.readback(actual); /* std::cout << "R" << std::endl; for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { std::cout << RGBA::r(actual[y * width + x]) << " "; } std::cout << std::endl; } std::cout << "G" << std::endl; for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { std::cout << RGBA::g(actual[y * width + x]) << " "; } std::cout << std::endl; } std::cout << "B" << std::endl; for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { std::cout << RGBA::b(actual[y * width + x]) << " "; } std::cout << std::endl; } */ // Red: // // 10, , 36, , 40, , 10, 23, 36, // 38, 40, 40, (40) // , , , , , , => => 50, 65, 80, // 90, 101, 101, (XX) // 90, , 124, , 162, , 90, 107, 124, // 143, 162, 162, (162) // , , , , , , 90, 107, 124, // 143, 162, 162, (XX) // (90), (XX),(124), // (XX),(162), (XX), // (162) // Green: // (XX), (50), (XX), (8), (XX), (42), (XX), (XX) (XX), (50), (XX), (8), // (XX), (42), (XX), (XX) // , 42, , 78, , 52, (42), , 42, , 78, , 52, (XX) (42), 46, 42, 34, // 78, 53, 52, (XX) // 50, , 8, , 42, , => (XX), 50, , 8, , 42, , (42) => (XX), 50, 53, 8, // 62, 42, 48, (42) // , 112, , 122, , 56, (112), , 112, , 122, , 56, (XX) (112), 101, 112, 64, // 122, 66, 56, (XX) // 130, , 16, , 44, , (XX), 130, , 16, , 44, , (44) (XX), 130, 92, 16, // 76, 44, 50, (44) // (XX), (XX),(112), (XX),(122), (XX), (56), (XX) (XX), (XX),(112), // (XX),(122), (XX), (56), (XX) // Blue: // (64), (XX), (64), (XX), // (82), (XX), (44), // , , , , , , (XX), 64, 64, 73, // 82, 63, 44, , 64, , 82, , 44, => => (64), // 64, 64, 73, 82, 63, 44, , , , , , , (XX), 104, 104, 84, 65, 75, 86, , 144, // , 48, , 128, (144), 144, 144, 96, 48, 88, // 128, const std::vector expected = { RGBA::pack(10, 46, 64, 255), RGBA::pack(23, 42, 64, 255), RGBA::pack(36, 34, 73, 255), RGBA::pack(38, 78, 82, 255), RGBA::pack(40, 53, 63, 255), RGBA::pack(40, 52, 44, 255), RGBA::pack(50, 50, 64, 255), RGBA::pack(65, 53, 64, 255), RGBA::pack(80, 8, 73, 255), RGBA::pack(90, 62, 82, 255), RGBA::pack(101, 42, 63, 255), RGBA::pack(101, 48, 44, 255), RGBA::pack(90, 101, 104, 255), RGBA::pack(107, 112, 104, 255), RGBA::pack(124, 64, 84, 255), RGBA::pack(143, 122, 65, 255), RGBA::pack(162, 66, 75, 255), RGBA::pack(162, 56, 86, 255), RGBA::pack(90, 130, 144, 255), RGBA::pack(107, 92, 144, 255), RGBA::pack(124, 16, 96, 255), RGBA::pack(143, 76, 48, 255), RGBA::pack(162, 44, 88, 255), RGBA::pack(162, 50, 128, 255), }; ENSURE_RGBA8888_ARRAY_EQ(expected.data(), actual.data(), width, height); } void benchBayerRGGB(const int width, const int height) { std::vector inputData(width * height); for (int i = 0; i < width * height; ++i) { inputData[i] = (unsigned char)((i * 1357) % 255); } { DeviceBuffer bayerFiltered(width, height); { Util::SimpleProfiler p("Debayer bench VS, copy to GPU", false, Logger::get(Logger::Info)); bayerFiltered.fill(inputData); } PackedDeviceBuffer vsOutputDev(width, height); { Util::SimpleProfiler p("Debayer bench VS, debayer", false, Logger::get(Logger::Info)); convertBayerRGGBToRGBA(vsOutputDev.gpuBuf(), bayerFiltered.gpuBufConst(), width, height, GPU::Stream::getDefault()); cudaDeviceSynchronize(); } std::vector vsOutput; { Util::SimpleProfiler p("Debayer bench VS, readback", false, Logger::get(Logger::Info)); vsOutputDev.readback(vsOutput); } } #ifdef WITH_OPENCV_CMP { std::unique_ptr cvSrc; { Util::SimpleProfiler p("Debayer bench OpenCV, create image, copy to GPU", false, std::cout); cvSrc.reset(new cv::gpu::GpuMat(cv::Mat(height, width, CV_8UC1, (void*)inputData.data()))); } cv::gpu::GpuMat cvDevOutput(height, width, CV_8UC4); { Util::SimpleProfiler p("Debayer bench OpenCV, debayer", false, std::cout); cv::gpu::cvtColor(*cvSrc, cvDevOutput, CV_BayerBG2BGR); cudaDeviceSynchronize(); } cv::Mat cvOutput(height, width, CV_8UC4); { Util::SimpleProfiler p("Debayer bench OpenCV, readback", false, std::cout); cvDevOutput.download(cvOutput); cudaDeviceSynchronize(); } } #endif } #ifdef WITH_OPENCV_CMP void testBayerRGGBReal() { const int width = 600; const int height = 480; cv::Mat cvOutput(height, width, CV_8UC4); cv::Mat vsOutput(height, width, CV_8UC4); { DeviceBuffer bayerFiltered(width, height); bayerFiltered.readRawFromFile("data/bayer/mire.gray"); PackedDeviceBuffer vsOutputDev(width, height); { Util::SimpleProfiler p("Debayer real VS, debayer", false, std::cout); convertBayerRGGBToRGBA(vsOutputDev.ptr(), bayerFiltered.ptr(), width, height, 16); cudaDeviceSynchronize(); } std::vector tmp; vsOutputDev.readback(tmp); std::unique_ptr os(Util::PpmWriter::openPpm("debayer-vs.ppm", width, height)); for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { const uint32_t v = tmp[y * width + x]; *os << (char)RGBA::r(v); *os << (char)RGBA::g(v); *os << (char)RGBA::b(v); } } vsOutput = cv::Mat(height, width, CV_8UC4, (void*)tmp.data()).clone(); } std::cout << vsOutput.at(200, 200) << std::endl; { cv::gpu::GpuMat cvSrc(cv::imread("data/bayer/mire.pgm", CV_LOAD_IMAGE_GRAYSCALE)); cv::gpu::GpuMat cvDevOutput(height, width, CV_8UC4); { Util::SimpleProfiler p("Debayer real OpenCV, debayer", false, std::cout); cv::gpu::cvtColor(cvSrc, cvDevOutput, CV_BayerBG2BGR); cudaDeviceSynchronize(); } cvDevOutput.download(cvOutput); } cv::imwrite("debayer-opencv.ppm", cvOutput); // cv::imwrite("debayer-vs.ppm", vsOutput); } #endif #endif // VS_OPENCL } // namespace Testing } // namespace VideoStitch int main(int argc, char** argv) { VideoStitch::Testing::initTest(); VideoStitch::Testing::ENSURE(VideoStitch::GPU::setDefaultBackendDevice(0)); // VideoStitch::Testing::testOutOfPlaceRGB(16, 16); // VideoStitch::Testing::testOutOfPlaceRGB(312, 531); // VideoStitch::Testing::testOutOfPlaceMonoY(16, 16); // VideoStitch::Testing::testOutOfPlaceMonoY(312, 531); VideoStitch::Testing::testYV12Minimal(16, 16); VideoStitch::Testing::testYV12Minimal(312, 532); VideoStitch::Testing::testNV12Minimal(16, 16); VideoStitch::Testing::testNV12Minimal(280, 124); // TODO_OPENCL_IMPL #ifndef VS_OPENCL // VideoStitch::Testing::testBayerRGGB(); #endif #ifdef WITH_OPENCV_CMP VideoStitch::Testing::testBayerRGGBReal(); #endif if (argc > 1 && !strcmp(argv[1], "bench")) { VideoStitch::Testing::testOutOfPlaceRGB(6542, 5322); // TODO_OPENCL_IMPL #ifndef VS_OPENCL VideoStitch::Testing::benchBayerRGGB(6542, 5322); #endif } VideoStitch::GPU::Context::destroy(); return 0; }