1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#include "memory.hpp"
#include "libvideostitch/logging.hpp"
#include "backend/common/allocStats.hpp"
#include <cuda_runtime.h>
#include "error.hpp"
#include <cassert>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <mutex>
// If the following is defined, all cudaMallocXxxVS functions will bin memory allocation in named pools for memory
// debugging In that case, a global lock will be used which makes it possible to share pools between threads without
// caring about thread-safety.
#define USE_VS_MALLOC_POOLS
#undef max
namespace VideoStitch {
namespace Cuda {
#ifdef USE_VS_MALLOC_POOLS
namespace {
AllocStatsMap deviceStats("Device");
AllocStatsMap hostStats("Host");
} // namespace
#endif // USE_VS_MALLOC_POOLS
std::size_t getDevicePoolCurrentSize(void) {
#ifdef USE_VS_MALLOC_POOLS
return deviceStats.bytesUsed();
#else
return 0;
#endif
}
std::vector<std::size_t> getDevicePoolCurrentSizeByDevices(void) {
#ifdef USE_VS_MALLOC_POOLS
return deviceStats.bytesUsedByDevices();
#else
return std::vector<std::size_t>();
#endif
}
std::size_t getHostPoolCurrentSize(void) {
#ifdef USE_VS_MALLOC_POOLS
return hostStats.bytesUsed();
#else
return 0;
#endif
}
std::vector<std::size_t> getHostPoolCurrentSizeByDevices(void) {
#ifdef USE_VS_MALLOC_POOLS
return hostStats.bytesUsedByDevices();
#else
return std::vector<std::size_t>();
#endif
}
void printDevicePool() {
#ifdef USE_VS_MALLOC_POOLS
deviceStats.print(std::cout);
#endif
}
void printHostPool() {
#ifdef USE_VS_MALLOC_POOLS
hostStats.print(std::cout);
#endif
}
#ifndef NDEBUG
#define PRINT_FILELINE \
if (file) { \
Logger::get(VideoStitch::Logger::Error) << " (at " << file << ", l. " << line << ")" << std::endl; \
};
#define FILELINE_ARGS const char *file, int line
#else
#define PRINT_FILELINE
#define FILELINE_ARGS const char* /*file*/, int /*line*/
#endif
Status __mallocVS(void** buf, size_t size, const char* name, unsigned /*flagsUnused*/, FILELINE_ARGS) {
if (!CUDA_ERROR(cudaMalloc(buf, size)).ok()) {
Logger::get(VideoStitch::Logger::Error) << "Could not allocate " << size << " bytes of GPU memory.";
PRINT_FILELINE
Logger::get(VideoStitch::Logger::Error) << std::endl;
return {Origin::GPU, ErrType::OutOfResources, "Could not allocate GPU memory"};
} else {
#ifdef USE_VS_MALLOC_POOLS
deviceStats.addPtr(name, *buf, size);
#endif
return Status::OK();
}
}
Status freeVS(void* buf) {
#ifdef USE_VS_MALLOC_POOLS
deviceStats.deletePtr(buf);
#endif
if (buf) {
return CUDA_ERROR(cudaFree(buf));
} else {
return Status::OK();
}
}
Status __mallocHostVS(void** buf, size_t size, const char* name, unsigned flags, FILELINE_ARGS) {
if (size == 0) {
*buf = nullptr;
return {Origin::GPU, ErrType::ImplementationError, "Cannot allocate 0-size buffer"};
}
if (!CUDA_ERROR(cudaHostAlloc(buf, size, flags)).ok()) {
Logger::get(VideoStitch::Logger::Error) << "Could not allocate " << size << " bytes of pinned CPU memory.";
PRINT_FILELINE
Logger::get(VideoStitch::Logger::Error) << std::endl;
return {Origin::GPU, ErrType::OutOfResources, "Could not allocate pinned memory"};
} else {
#ifdef USE_VS_MALLOC_POOLS
hostStats.addPtr(name, *buf, size);
#endif
return Status::OK();
}
}
Status freeHostVS(void* buf) {
#ifdef USE_VS_MALLOC_POOLS
hostStats.deletePtr(buf);
#endif
if (buf) {
return CUDA_ERROR(cudaFreeHost(buf));
} else {
return Status::OK();
}
}
Status __mallocArrayVS(struct cudaArray** array, const struct cudaChannelFormatDesc* desc, size_t width, size_t height,
unsigned int flags, const char* /*name*/, FILELINE_ARGS) {
if (!CUDA_ERROR(cudaMallocArray(array, desc, width, height, flags)).ok()) {
Logger::get(VideoStitch::Logger::Error)
<< "Could not allocate CUDA array of size " << width << " x " << height << ".";
PRINT_FILELINE
Logger::get(VideoStitch::Logger::Error) << std::endl;
return {Origin::GPU, ErrType::OutOfResources, "Could not allocate CUDA array"};
} else {
return Status::OK();
}
}
Status freeArrayVS(struct cudaArray* array) {
if (array) {
return CUDA_ERROR(cudaFreeArray(array));
} else {
return Status::OK();
}
}
Status __mallocPrint(void** p, size_t size, const char* file, const int line) {
VideoStitch::Logger::get(VideoStitch::Logger::Debug)
<< "Alloc " << size << " CUDA bytes (" << size / (1024 * 1024) << " MB) at " << file << ":" << line << std::endl;
return CUDA_ERROR(cudaMalloc(p, size));
}
} // namespace Cuda
} // namespace VideoStitch