// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm

#include "core1/panoRemapper.hpp"

#include "core/rect.hpp"
#include "core/transformGeoParams.hpp"

#include "backend/common/imageOps.hpp"

#include "backend/cuda/deviceBuffer.hpp"
#include "backend/cuda/deviceStream.hpp"
#include "backend/cuda/surface.hpp"

#include "cuda/error.hpp"
#include "cuda/util.hpp"

#include "gpu/core1/transform.hpp"
#include "gpu/buffer.hpp"
#include "gpu/memcpy.hpp"
#include "gpu/allocator.hpp"

#include "libvideostitch/panoDef.hpp"

#include "backend/cuda/core/transformStack.cu"
#include "core/kernels/withinStack.cu"

namespace VideoStitch {
namespace Core {

template <Convert2D3DFnT toSphere, class OutputCropper>
__global__ void remapKernel(uint32_t* g_odata, cudaTextureObject_t remapTex, int panoWidth, int panoHeight,
                            const float2 inPanoScale, const float2 outPanoScale, const vsfloat3x3 R) {
  unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
  unsigned y = blockIdx.y * blockDim.y + threadIdx.y;

  if (x < panoWidth && y < panoHeight) {
    if (OutputCropper::isPanoPointVisible(x, y, panoWidth, panoHeight)) {
      float2 uv = make_float2((float)x, (float)y);

      /**
       * The transformations are applied relative to the center of the panorama image
       */
      uv.x -= (panoWidth - 1) / 2.0f;
      uv.y -= (panoHeight - 1) / 2.0f;

      /**
       * Apply transform stack
       */
      uv.x /= outPanoScale.x;
      uv.y /= outPanoScale.y;

      float3 pt = toSphere(uv);

      pt = rotateSphere(pt, R);

      uv = SphereToErect(pt);

      uv.x *= inPanoScale.x;
      uv.y *= inPanoScale.y;

      /**
       * See notes in warp kernel
       * compensate fetching offset with cudaFilterModeLinear by adding 0.5f
       * https://stackoverflow.com/questions/10643790/texture-memory-tex2d-basics
       */
      uv.x += panoWidth / 2.0f;
      uv.y += panoHeight / 2.0f;

      float4 px = tex2D<float4>(remapTex, uv.x, uv.y);
      g_odata[y * panoWidth + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                     __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
    } else {
      g_odata[y * panoWidth + x] = 0;
    }
  }
}

__global__ void remapCubemapKernel(uint32_t* __restrict__ xPositive, uint32_t* __restrict__ xNegative,
                                   uint32_t* __restrict__ yPositive, uint32_t* __restrict__ yNegative,
                                   uint32_t* __restrict__ zPositive, uint32_t* __restrict__ zNegative, int panoWidth,
                                   int panoHeight, cudaTextureObject_t remapTex, int faceDim, const float2 panoScale,
                                   const vsfloat3x3 R) {
  unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
  unsigned y = blockIdx.y * blockDim.y + threadIdx.y;

  if (x < faceDim && y < faceDim) {
    /* compensate fetching offset with cudaFilterModeLinear by adding 0.5f */
    float2 uv = make_float2(x + 0.5f, y + 0.5f);
    uv = (uv / faceDim) * 2.f - make_float2(1.f, 1.f);

    float3 pt;
    for (unsigned int face = 0; face < 6; face++) {
      // Layer 0 is positive X face
      if (face == 0) {
        pt.x = 1;
        pt.y = -uv.y;
        pt.z = -uv.x;
      }
      // Layer 1 is negative X face
      else if (face == 1) {
        pt.x = -1;
        pt.y = -uv.y;
        pt.z = uv.x;
      }
      // Layer 2 is positive Y face
      else if (face == 2) {
        pt.x = uv.x;
        pt.y = 1;
        pt.z = uv.y;
      }
      // Layer 3 is negative Y face
      else if (face == 3) {
        pt.x = uv.x;
        pt.y = -1;
        pt.z = -uv.y;
      }
      // Layer 4 is positive Z face
      else if (face == 4) {
        pt.x = uv.x;
        pt.y = -uv.y;
        pt.z = 1;
      }
      // Layer 5 is negative Z face
      else if (face == 5) {
        pt.x = -uv.x;
        pt.y = -uv.y;
        pt.z = -1;
      }

      pt = rotateSphere(pt, R);

      float2 xy = SphereToErect(pt);

      xy *= panoScale;

      /**
       * See notes in warp kernel
       */
      xy.x += panoWidth / 2.0f;
      xy.y += panoHeight / 2.0f;

      float4 px = tex2D<float4>(remapTex, xy.x, xy.y);
      if (face == 0) {
        xPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 1) {
        xNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 2) {
        yPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 3) {
        yNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 4) {
        zPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 5) {
        zNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      }
    }
  }
}

template <bool equiangular>
__global__ void rotateCubemapKernel(uint32_t* __restrict__ xPositive, uint32_t* __restrict__ xNegative,
                                    uint32_t* __restrict__ yPositive, uint32_t* __restrict__ yNegative,
                                    uint32_t* __restrict__ zPositive, uint32_t* __restrict__ zNegative, int faceDim,
                                    cudaTextureObject_t remapTex, const vsfloat3x3 R) {
  unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
  unsigned y = blockIdx.y * blockDim.y + threadIdx.y;

  if (x < faceDim && y < faceDim) {
    float2 uv = make_float2((float)x, (float)y);
    uv = (uv / faceDim) * 2.f - make_float2(1.f, 1.f);

    if (equiangular) {
      uv.x = tanf_vs(uv.x * PI_F_VS / 4.);
      uv.y = tanf_vs(uv.y * PI_F_VS / 4.);
    }

    float3 pt;
    for (unsigned int face = 0; face < 6; face++) {
      // Layer 0 is positive X face
      if (face == 0) {
        pt.x = 1;
        pt.y = -uv.y;
        pt.z = -uv.x;
      }
      // Layer 1 is negative X face
      else if (face == 1) {
        pt.x = -1;
        pt.y = -uv.y;
        pt.z = uv.x;
      }
      // Layer 2 is positive Y face
      else if (face == 2) {
        pt.x = uv.x;
        pt.y = 1;
        pt.z = uv.y;
      }
      // Layer 3 is negative Y face
      else if (face == 3) {
        pt.x = uv.x;
        pt.y = -1;
        pt.z = -uv.y;
      }
      // Layer 4 is positive Z face
      else if (face == 4) {
        pt.x = uv.x;
        pt.y = -uv.y;
        pt.z = 1;
      }
      // Layer 5 is negative Z face
      else if (face == 5) {
        pt.x = -uv.x;
        pt.y = -uv.y;
        pt.z = -1;
      }

      pt = rotateSphere(pt, R);

      if (equiangular) {
        // first normalize with Chebyshev distance to project back on the cube
        float cheb = fmaxf(abs(pt.x), abs(pt.y));
        cheb = fmaxf(cheb, abs(pt.z));
        pt /= cheb;
        // then reinflate the cube
        pt.x = 4. / PI_F_VS * atanf_vs(pt.x);
        pt.y = 4. / PI_F_VS * atanf_vs(pt.y);
        pt.z = 4. / PI_F_VS * atanf_vs(pt.z);
      }

      float4 px = texCubemap<float4>(remapTex, pt.x, pt.y, pt.z);
      if (face == 0) {
        xPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 1) {
        xNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 2) {
        yPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 3) {
        yNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 4) {
        zPositive[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      } else if (face == 5) {
        zNegative[y * faceDim + x] = Image::RGBA::pack(__float2uint_rn(px.x * 255.), __float2uint_rn(px.y * 255.),
                                                       __float2uint_rn(px.z * 255.), __float2uint_rn(px.w * 255.));
      }
    }
  }
}

Status rotateCubemap(const PanoDefinition& pano, GPU::CubemapSurface& cubemapSurface, GPU::Buffer<uint32_t> xPosPbo,
                     GPU::Buffer<uint32_t> xNegPbo, GPU::Buffer<uint32_t> yPosPbo, GPU::Buffer<uint32_t> yNegPbo,
                     GPU::Buffer<uint32_t> zPosPbo, GPU::Buffer<uint32_t> zNegPbo, const Matrix33<double>& perspective,
                     bool equiangular, GPU::Stream stream) {
  vsfloat3x3 rotation;
  for (int i = 0; i < 3; i++) {
    for (int j = 0; j < 3; j++) {
      rotation.values[i][j] = (float)perspective(i, j);
    }
  }

  dim3 block(16, 16, 1);
  dim3 grid((unsigned)Cuda::ceilDiv(pano.getLength(), block.x), (unsigned)Cuda::ceilDiv(pano.getLength(), block.y), 1);

  if (equiangular) {
    rotateCubemapKernel<true><<<grid, block, 0, stream.get()>>>(
        xPosPbo.get(), xNegPbo.get(), yPosPbo.get(), yNegPbo.get(), zPosPbo.get(), zNegPbo.get(), (int)pano.getLength(),
        cubemapSurface.get().texture(), rotation);
  } else {
    rotateCubemapKernel<false><<<grid, block, 0, stream.get()>>>(
        xPosPbo.get(), xNegPbo.get(), yPosPbo.get(), yNegPbo.get(), zPosPbo.get(), zNegPbo.get(), (int)pano.getLength(),
        cubemapSurface.get().texture(), rotation);
  }

  return CUDA_STATUS;
}

__device__ float3 positiveX(float2& uv) {
  float3 pt;
  pt.x = 1;
  pt.y = -uv.y;
  pt.z = uv.x;
  return pt;
}
__device__ float3 negativeX(float2& uv) {
  float3 pt;
  pt.x = -1;
  pt.y = -uv.y;
  pt.z = -uv.x;
  return pt;
}
__device__ float3 positiveY(float2& uv) {
  float3 pt;
  pt.x = uv.x;
  pt.y = 1;
  pt.z = -uv.y;
  return pt;
}
__device__ float3 negativeY(float2& uv) {
  float3 pt;
  pt.x = uv.x;
  pt.y = -1;
  pt.z = uv.y;
  return pt;
}
__device__ float3 positiveZ(float2& uv) {
  float3 pt;
  pt.x = uv.x;
  pt.y = -uv.y;
  pt.z = -1;
  return pt;
}
__device__ float3 negativeZ(float2& uv) {
  float3 pt;
  pt.x = -uv.x;
  pt.y = -uv.y;
  pt.z = 1;
  return pt;
}

template <float3 (*project)(float2&), bool equiangular>
__global__ void remapMaskFace(unsigned char* __restrict__ face, int dstOffsetX, int dstOffsetY, int bbWidth,
                              int bbHeight, int panoWidth, int panoHeight, cudaTextureObject_t remapTex, int srcOffsetX,
                              int srcOffsetY, int faceDim, const float2 panoScale) {
  unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
  unsigned y = blockIdx.y * blockDim.y + threadIdx.y;

  if (x < bbWidth && y < bbHeight) {
    /* compensate fetching offset with cudaFilterModeLinear by adding 0.5f */
    float2 uv = make_float2(x + dstOffsetX + 0.5f, y + dstOffsetY + 0.5f);
    uv = (uv / faceDim) * 2.f - make_float2(1.f, 1.f);

    if (equiangular) {
      uv.x = tanf_vs(uv.x * PI_F_VS / 4.);
      uv.y = tanf_vs(uv.y * PI_F_VS / 4.);
    }

    float3 pt = project(uv);

    float2 xy = SphereToErect(pt);

    xy *= panoScale;

    /**
     * See notes in warp kernel
     */
    xy.x += panoWidth / 2.0f;
    xy.y += panoHeight / 2.0f;

    xy.x -= srcOffsetX;
    xy.y -= srcOffsetY;
    if (xy.x < 0.) {
      xy.x += panoWidth;
    }

    float px = tex2D<float>(remapTex, xy.x, xy.y);
    face[y * bbWidth + x] = __float2uint_rn(px * 255.);
  }
}

Status reprojectAlphaToCubemap(int panoWidth, int panoHeight, int faceLength, GPU::Surface& alphaSurface,
                               Rect equirectBB, GPU::Buffer<unsigned char> xPosAlpha, Rect xPosBB,
                               GPU::Buffer<unsigned char> xNegAlpha, Rect xNegBB, GPU::Buffer<unsigned char> yPosAlpha,
                               Rect yPosBB, GPU::Buffer<unsigned char> yNegAlpha, Rect yNegBB,
                               GPU::Buffer<unsigned char> zPosAlpha, Rect zPosBB, GPU::Buffer<unsigned char> zNegAlpha,
                               Rect zNegBB, bool equiangular, GPU::Stream stream) {
  dim3 block(16, 16, 1);
  float2 panoScale = {TransformGeoParams::computePanoScale(PanoProjection::Equirectangular, panoWidth, 360.f),
                      2 * TransformGeoParams::computePanoScale(PanoProjection::Equirectangular, panoHeight, 360.f)};

  if (!xPosBB.empty()) {
    dim3 gridXPos((unsigned)Cuda::ceilDiv(xPosBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(xPosBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<positiveX, true><<<gridXPos, block, 0, stream.get()>>>(
          xPosAlpha.get().raw(), (unsigned)xPosBB.left(), (unsigned)xPosBB.top(), (unsigned)xPosBB.getWidth(),
          (unsigned)xPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<positiveX, false><<<gridXPos, block, 0, stream.get()>>>(
          xPosAlpha.get().raw(), (unsigned)xPosBB.left(), (unsigned)xPosBB.top(), (unsigned)xPosBB.getWidth(),
          (unsigned)xPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  if (!xNegBB.empty()) {
    dim3 gridXNeg((unsigned)Cuda::ceilDiv(xNegBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(xNegBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<negativeX, true><<<gridXNeg, block, 0, stream.get()>>>(
          xNegAlpha.get().raw(), (unsigned)xNegBB.left(), (unsigned)xNegBB.top(), (unsigned)xNegBB.getWidth(),
          (unsigned)xNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<negativeX, false><<<gridXNeg, block, 0, stream.get()>>>(
          xNegAlpha.get().raw(), (unsigned)xNegBB.left(), (unsigned)xNegBB.top(), (unsigned)xNegBB.getWidth(),
          (unsigned)xNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  if (!yPosBB.empty()) {
    dim3 gridYPos((unsigned)Cuda::ceilDiv(yPosBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(yPosBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<positiveY, true><<<gridYPos, block, 0, stream.get()>>>(
          yPosAlpha.get().raw(), (unsigned)yPosBB.left(), (unsigned)yPosBB.top(), (unsigned)yPosBB.getWidth(),
          (unsigned)yPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<positiveY, false><<<gridYPos, block, 0, stream.get()>>>(
          yPosAlpha.get().raw(), (unsigned)yPosBB.left(), (unsigned)yPosBB.top(), (unsigned)yPosBB.getWidth(),
          (unsigned)yPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  if (!yNegBB.empty()) {
    dim3 gridYNeg((unsigned)Cuda::ceilDiv(yNegBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(yNegBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<negativeY, true><<<gridYNeg, block, 0, stream.get()>>>(
          yNegAlpha.get().raw(), (unsigned)yNegBB.left(), (unsigned)yNegBB.top(), (unsigned)yNegBB.getWidth(),
          (unsigned)yNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<negativeY, false><<<gridYNeg, block, 0, stream.get()>>>(
          yNegAlpha.get().raw(), (unsigned)yNegBB.left(), (unsigned)yNegBB.top(), (unsigned)yNegBB.getWidth(),
          (unsigned)yNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  if (!zPosBB.empty()) {
    dim3 gridZPos((unsigned)Cuda::ceilDiv(zPosBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(zPosBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<positiveZ, true><<<gridZPos, block, 0, stream.get()>>>(
          zPosAlpha.get().raw(), (unsigned)zPosBB.left(), (unsigned)zPosBB.top(), (unsigned)zPosBB.getWidth(),
          (unsigned)zPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<positiveZ, false><<<gridZPos, block, 0, stream.get()>>>(
          zPosAlpha.get().raw(), (unsigned)zPosBB.left(), (unsigned)zPosBB.top(), (unsigned)zPosBB.getWidth(),
          (unsigned)zPosBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  if (!zNegBB.empty()) {
    dim3 gridZNeg((unsigned)Cuda::ceilDiv(zNegBB.getWidth(), block.x),
                  (unsigned)Cuda::ceilDiv(zNegBB.getHeight(), block.y), 1);
    if (equiangular) {
      remapMaskFace<negativeZ, true><<<gridZNeg, block, 0, stream.get()>>>(
          zNegAlpha.get().raw(), (unsigned)zNegBB.left(), (unsigned)zNegBB.top(), (unsigned)zNegBB.getWidth(),
          (unsigned)zNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    } else {
      remapMaskFace<negativeZ, false><<<gridZNeg, block, 0, stream.get()>>>(
          zNegAlpha.get().raw(), (unsigned)zNegBB.left(), (unsigned)zNegBB.top(), (unsigned)zNegBB.getWidth(),
          (unsigned)zNegBB.getHeight(), (unsigned)panoWidth, (unsigned)panoHeight, alphaSurface.get().texture(),
          (unsigned)equirectBB.left(), (unsigned)equirectBB.top(), (unsigned)faceLength, panoScale);
    }
  }
  return CUDA_STATUS;
}

template <Convert2D3DFnT toSphere, class OutputCropper>
Status reprojectPanorama(GPU::Buffer<uint32_t> pbo, float2 dstScale, GPU::Surface& tex, float2 srcScale, unsigned width,
                         unsigned height, const Matrix33<double>& perspective, GPU::Stream stream) {
  dim3 dimBlock(16, 16, 1);
  dim3 dimGrid((unsigned)Cuda::ceilDiv(width, dimBlock.x), (unsigned)Cuda::ceilDiv(height, dimBlock.y), 1);

  vsfloat3x3 rotation;
  for (int i = 0; i < 3; i++) {
    for (int j = 0; j < 3; j++) {
      rotation.values[i][j] = (float)perspective(i, j);
    }
  }

  remapKernel<toSphere, OutputCropper><<<dimGrid, dimBlock, 0, stream.get()>>>(pbo.get(), tex.get().texture(), width,
                                                                               height, srcScale, dstScale, rotation);

  return CUDA_STATUS;
}

Status reprojectRectilinear(GPU::Buffer<uint32_t> pbo, float2 outScale, GPU::Surface& tex, float2 inScale,
                            unsigned width, unsigned height, const Matrix33<double>& perspective, GPU::Stream stream) {
  return reprojectPanorama<RectToSphere, OutputRectCropper>(pbo, outScale, tex, inScale, width, height, perspective,
                                                            stream);
}
Status reprojectEquirectangular(GPU::Buffer<uint32_t> pbo, float2 outScale, GPU::Surface& tex, float2 inScale,
                                unsigned width, unsigned height, const Matrix33<double>& perspective,
                                GPU::Stream stream) {
  return reprojectPanorama<ErectToSphere, OutputRectCropper>(pbo, outScale, tex, inScale, width, height, perspective,
                                                             stream);
}
Status reprojectFullFrameFisheye(GPU::Buffer<uint32_t> pbo, float2 outScale, GPU::Surface& tex, float2 inScale,
                                 unsigned width, unsigned height, const Matrix33<double>& perspective,
                                 GPU::Stream stream) {
  return reprojectPanorama<FisheyeToSphere, OutputRectCropper>(pbo, outScale, tex, inScale, width, height, perspective,
                                                               stream);
}
Status reprojectCircularFisheye(GPU::Buffer<uint32_t> pbo, float2 outScale, GPU::Surface& tex, float2 inScale,
                                unsigned width, unsigned height, const Matrix33<double>& perspective,
                                GPU::Stream stream) {
  return reprojectPanorama<FisheyeToSphere, OutputCircleCropper>(pbo, outScale, tex, inScale, width, height,
                                                                 perspective, stream);
}
Status reprojectStereographic(GPU::Buffer<uint32_t> pbo, float2 outScale, GPU::Surface& tex, float2 inScale,
                              unsigned width, unsigned height, const Matrix33<double>& perspective,
                              GPU::Stream stream) {
  return reprojectPanorama<StereoToSphere, OutputRectCropper>(pbo, outScale, tex, inScale, width, height, perspective,
                                                              stream);
}

}  // namespace Core
}  // namespace VideoStitch