// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "deviceBuffer2D.hpp" #include "context.hpp" #include "gpu/2dBuffer.hpp" #include "../common/allocStats.hpp" namespace VideoStitch { namespace GPU { /** * Compute an optimal pitch (= distance in byte between rows in a 2D array) such that each new row begins at an address * that is optimal for coalescing, and then allocates a memory area as large as pitch times the number of rows you * specify. * * The optimal pitch is computed by * (1) getting the base address alignment preference for your card(CL_DEVICE_MEM_BASE_ADDR_ALIGN property * with clGetDeviceInfo : note that the returned value is in bits); let's call this base * (2) find the largest multiple of base that is no less than your natural data pitch (sizeof(type) times number of * columns); this will be the pitch. Then allocate pitch times number of rows bytes, and pass the pitch information to * kernels. */ PotentialValue Buffer2D::allocate(size_t width, size_t height, const char* name) { const auto& potContext = getContext(); FAIL_RETURN(potContext.status()); const auto& ctx = potContext.value(); cl_int base; cl_int err = clGetDeviceInfo(ctx.deviceID(), CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(base), &base, nullptr); if (err != CL_SUCCESS) { assert(false); } assert(base % 8 == 0); size_t mod = width % (base / 8); size_t pitch = width; if (mod > 0) { pitch = width + (base / 8) - mod; } cl_mem buf = clCreateBuffer(ctx, CL_MEM_READ_WRITE, pitch * height, nullptr, &err); Status status = CL_ERROR(err); if (status.ok()) { deviceStats.addPtr(name, buf, pitch * height); return PotentialValue(Buffer2D(new DeviceBuffer2D(buf), width, height, pitch)); } else { return PotentialValue({Origin::GPU, ErrType::OutOfResources, "Could not allocate GPU memory. Reduce the project output size and close other " "applications to free up GPU memory."}); } } Status Buffer2D::release() const { if (pimpl && pimpl->image) { deviceStats.deletePtr(pimpl->image); return CL_ERROR(clReleaseMemObject(pimpl->image)); } return Status::OK(); } Buffer2D::~Buffer2D() { delete pimpl; } Buffer2D::Buffer2D(const Buffer2D& other) : pimpl(other.pimpl ? new DeviceBuffer2D(other.pimpl->image) : nullptr), width(other.width), height(other.height), pitch(other.pitch) {} bool Buffer2D::operator==(const Buffer2D& other) const { if (pimpl && other.pimpl) { return *pimpl == *other.pimpl; } return !pimpl && !other.pimpl; } void* Buffer2D::devicePtr() const { if (pimpl) { return pimpl->raw(); } return nullptr; } } // namespace GPU } // namespace VideoStitch