// Copyright (c) 2012-2017 VideoStitch SAS // Copyright (c) 2018 stitchEm #include "libvideostitch/gpu_device.hpp" #include "gpu/buffer.hpp" #include "cuda/memory.hpp" #include "cuda/error.hpp" #include #include #if !defined(__APPLE__) && !defined(TEGRA_DEMO) #include #endif #include #include #include #if defined(linux) #include #include #elif _MSC_VER #include #endif namespace VideoStitch { #if !defined(__APPLE__) && !defined(TEGRA_DEMO) namespace { void init(); void done(); struct NVML { NVML() { init(); } ~NVML() { done(); } unsigned int device_count = 0; }; NVML nvml; #ifdef linux pid_t pid; #elif _MSC_VER DWORD pid; #endif void init() { nvmlReturn_t result = nvmlInit(); if (NVML_SUCCESS != result) { printf("Failed to initialize NVML: %s\n", nvmlErrorString(result)); return; } result = nvmlDeviceGetCount(&nvml.device_count); if (NVML_SUCCESS != result) { printf("Failed to query device count: %s\n", nvmlErrorString(result)); return; } printf("Found %d device%s\n\n", (int)nvml.device_count, nvml.device_count != 1 ? "s" : ""); #ifdef linux pid = getpid(); #elif _MSC_VER pid = GetCurrentProcessId(); #endif } void done() { nvmlReturn_t result = nvmlInit(); if (NVML_SUCCESS != result) { printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result)); } } } // namespace #endif // __APPLE__ namespace GPU { std::atomic defaultDevice(-1); Status checkDefaultBackendDeviceInitialization() { return CUDA_ERROR(cudaPeekAtLastError()); } Status setDefaultBackendDeviceVS(int vsDevice) { int device; if (!Discovery::getBackendDeviceIndex(vsDevice, device)) { return {Origin::GPU, ErrType::ImplementationError, "[CUDA] trying to set a device that does not exist"}; } return setDefaultBackendDevice(device); } Status getDefaultBackendDeviceContext(void* context) { return CUDA_ERROR(cuCtxGetCurrent((CUcontext*)context)); } Status useDefaultBackendDevice() { if (defaultDevice == -1) { return {Origin::GPU, ErrType::ImplementationError, "[CUDA] default device is not set"}; } return CUDA_ERROR(cudaSetDevice(defaultDevice)); } Status setDefaultBackendDevice(int device) { if (defaultDevice != -1 && device != defaultDevice) { return {Origin::GPU, ErrType::ImplementationError, "[CUDA] changing the default device is not permitted"}; } defaultDevice = device; return useDefaultBackendDevice(); } Status getDefaultBackendDevice(int* device) { if (defaultDevice == -1) { return {Origin::GPU, ErrType::ImplementationError, "[CUDA] default device is not set"}; } *device = defaultDevice; return Status::OK(); } Discovery::Framework getFramework() { return Discovery::Framework::CUDA; } PotentialValue getMemoryUsage() { // add buffers, cached buffers and allocated size on device size_t used_memory = 0; used_memory += getBufferPoolCurrentSize(); // used_memory += getCachedBufferPoolCurrentSize(); used_memory += Cuda::getDevicePoolCurrentSize(); return PotentialValue(used_memory); } PotentialValue > getMemoryUsageByDevices() { std::vector used_memory; #if !defined(__APPLE__) && !defined(TEGRA_DEMO) for (unsigned int i = 0; i < nvml.device_count; i++) { nvmlDevice_t device; nvmlDeviceGetHandleByIndex(i, &device); #ifdef linux unsigned int infoCount = 0; nvmlDeviceGetComputeRunningProcesses(device, &infoCount, nullptr); nvmlProcessInfo_t* infos = new nvmlProcessInfo_t[infoCount]; nvmlDeviceGetComputeRunningProcesses(device, &infoCount, infos); for (unsigned int j = 0; j < infoCount; j++) { if ((unsigned int)pid == infos[j].pid) { used_memory.push_back(infos[j].usedGpuMemory); } } delete[] infos; #elif _MSC_VER nvmlMemory_t memory; nvmlDeviceGetMemoryInfo(device, &memory); used_memory.push_back(memory.used); #endif } #else // add buffers, cached buffers and allocated size on device // initialize with first buffers used_memory = getBufferPoolCurrentSizeByDevices(); // accumulate other buffers // std::transform(used_memory.begin(), used_memory.end(), getCachedBufferPoolCurrentSizeByDevices().begin(), // used_memory.begin(), std::plus()); std::transform(used_memory.begin(), used_memory.end(), Cuda::getDevicePoolCurrentSizeByDevices().begin(), used_memory.begin(), std::plus()); #endif return PotentialValue >(used_memory); } } // namespace GPU } // namespace VideoStitch