# ----------------------------------------------------------------------------
# CUDA backend options
# ----------------------------------------------------------------------------

option(FASTMATH "Build with cuda fast_math" ON)
option(CUDA_KERNEL_PROFILING "Compile to enable additional profiling information" OFF)
option(CUDA_LOCAL_ARCH_ONLY "Compile CUDA kernels only for the local CUDA architecture" OFF)

include (${CMAKE_SOURCE_DIR}/cmake/getCuda.cmake)

if(NOT CUDA_FOUND)
  MESSAGE (ERROR_FATAL "Required cuda version not found !")
endif (NOT CUDA_FOUND)


# ----------------------------------------------------------------------------
# Find CUDA libraries and dependencies
# ----------------------------------------------------------------------------

# TODO: extract find_library + find_package

if (WINDOWS)
  find_library(GLEW "glew32" PATH "${VCPKG_PATH}\\${VCPKG_TARGET_TRIPLET}-static\\lib" NO_DEFAULT_PATH)
  include_directories("${VCPKG_PATH}\\${VCPKG_TARGET_TRIPLET}-static\\include")
  include_directories(${OPENGL_INCLUDE_DIRS})
  include_directories(${OPENCV_INCLUDE_DIRS})
  include_directories("${NVIDIA_VIDEO_CODEC}\\include")
  find_library(CUDA cuda PATHS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" NO_DEFAULT_PATH)
  find_library(CUDART cudart PATHS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" NO_DEFAULT_PATH)
  find_library(CUVID nvcuvid PATHS "${NVIDIA_VIDEO_CODEC}\\Lib\\x64")
  find_library(NVML nvml PATHS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" NO_DEFAULT_PATH)
endif(WINDOWS)

if(LINUX)
  set(CUDA_PROPAGATE_HOST_FLAGS "FALSE")
  if(TEGRA_DEMO)
    set(LINUX_CUDA_PATH ${CUDA_TOOLKIT_TARGET_DIR}/lib)
    find_library(GLEW_LIBRARIES GLEW NO_CMAKE_FIND_ROOT_PATH)
    find_library(OPENGL_LIBRARIES GL NO_CMAKE_FIND_ROOT_PATH)
  else()
    find_package(GLEW)
    find_library(CUDA cuda PATHS "${LINUX_CUDA_PATH}/stubs")
    find_library(CUDART cudart PATHS ${LINUX_CUDA_PATH})
    find_library(NVML nvidia-ml PATHS "${LINUX_CUDA_PATH}/stubs")
  endif()
  find_library(CUVID nvcuvid)
endif(LINUX)

if(APPLE)
  find_package(GLEW)
  find_library(CUDA cuda PATHS ${MAC_CUDA_PATH} NO_DEFAULT_PATH)
  find_library(CUDART cudart PATHS ${MAC_CUDA_PATH} NO_DEFAULT_PATH)
  string(REPLACE "/usr/local/cuda/" "${CUDA_TOOLKIT_ROOT_DIR}/" CUDA_LIBRARIES "${CUDA_LIBRARIES}")
endif(APPLE)

message(STATUS "CUDA: ${CUDA}")
message(STATUS "CUDART: ${CUDART}")
message(STATUS "CUDA_LIBRARIES: ${CUDA_LIBRARIES}")

# ----------------------------------------------------------------------------
# CUDA compilation options
# ----------------------------------------------------------------------------

if(FASTMATH)
  list(APPEND CUDA_NVCC_FLAGS --use_fast_math)
endif(FASTMATH)

if(CUDA_KERNEL_PROFILING)
  add_definitions(-DUSE_NVTX)
  find_library(NVTX nvToolsExt PATHS ${LINUX_CUDA_PATH} NO_DEFAULT_PATH)
  message(STATUS "NVTX: ${NVTX}")
  list(APPEND CUDA_NVCC_FLAGS -lineinfo)
else(CUDA_KERNEL_PROFILING)
  # profiling kernel details failed with default-stream per-thread
  list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread)
endif(CUDA_KERNEL_PROFILING)

# ----------------------------------------------------------------------------
# CUDA compilation OS specific flags
# ----------------------------------------------------------------------------

if(LINUX OR ANDROID)
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
      --compiler-options -fno-strict-aliasing)
endif(LINUX OR ANDROID)

if(LINUX OR APPLE)
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
      -Xcompiler -fPIC)
  if(${CUDA_VERSION} VERSION_LESS 8.0)
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
        -Xcompiler -Wno-conversion)
  endif()
endif(LINUX OR APPLE)

if(COMPILER_CLANG)
  # nvcc/clang complains about code after assert(false)
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
      -Xcompiler -Wno-unreachable-code)
endif()


if(WINDOWS)
   set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
       -D_USE_MATH_DEFINES
       -DVS_LIB_COMPILATION
       -DNOMINMAX
       -Xcompiler /FS)
else(WINDOWS)
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
      -std=c++11)
endif(WINDOWS)

if(ANDROID)
  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
      -ccbin ${CMAKE_CXX_COMPILER} -O3)
endif(ANDROID)

include(CUDAArch)

# ----------------------------------------------------------------------------
# Determine CUDA architectures to generate code for
# ----------------------------------------------------------------------------

if(CUDA_LOCAL_ARCH_ONLY OR CUDA_TARGET_ARCH)
  # queried and set in CUDAArch
  set(CUDA_NVCC_ARCH_FLAGS ${CUDA_LOCAL_ARCH_FLAGS})
else()
  if(TEGRA_DEMO)
    set(CUDA_NVCC_ARCH_FLAGS
        -gencode=arch=compute_53,code=sm_53
        -gencode=arch=compute_62,code=sm_62)
  else(TEGRA_DEMO)
    set(CUDA_NVCC_ARCH_FLAGS
        -gencode=arch=compute_30,code=sm_30
        -gencode=arch=compute_35,code=sm_35
        -gencode=arch=compute_50,code=sm_50
        -gencode=arch=compute_52,code=sm_52
        -gencode=arch=compute_61,code=sm_61
        -gencode=arch=compute_61,code=compute_61)
  endif(TEGRA_DEMO)
endif(CUDA_LOCAL_ARCH_ONLY OR CUDA_TARGET_ARCH)

message(STATUS "NVCC arch flags: ${CUDA_NVCC_ARCH_FLAGS}")

set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${CUDA_NVCC_ARCH_FLAGS})