1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm
#pragma once
#include "config.hpp"
#include "status.hpp"
#include <array>
#include <vector>
namespace VideoStitch {
namespace Ptv {
class Value;
}
namespace Audio {
/** Internal audio sample representation */
typedef double audioSample_t;
/**
* Channel mapping mask.
*/
enum ChannelMap : int64_t {
SPEAKER_FRONT_LEFT = 0x1,
SPEAKER_FRONT_RIGHT = 0x2,
SPEAKER_SIDE_LEFT = 0x4,
SPEAKER_SIDE_RIGHT = 0x8,
SPEAKER_FRONT_CENTER = 0x10,
SPEAKER_BACK_CENTER = 0x20,
SPEAKER_LOW_FREQUENCY = 0x40,
SPEAKER_BACK_LEFT = 0x80,
SPEAKER_BACK_RIGHT = 0x100,
SPEAKER_FRONT_LEFT_OF_CENTER = 0x200,
SPEAKER_FRONT_RIGHT_OF_CENTER = 0x400,
SPEAKER_TOP_CENTER = 0x800,
SPEAKER_TOP_FRONT_LEFT = 0x1000,
SPEAKER_TOP_FRONT_CENTER = 0x2000,
SPEAKER_TOP_FRONT_RIGHT = 0x4000,
SPEAKER_TOP_BACK_LEFT = 0x8000,
SPEAKER_TOP_BACK_CENTER = 0x10000,
SPEAKER_TOP_BACK_RIGHT = 0x20000,
SPEAKER_AMB_W = 0x40000,
SPEAKER_AMB_X = 0x80000,
SPEAKER_AMB_Y = 0x100000,
SPEAKER_AMB_Z = 0x200000,
SPEAKER_AMB_R = 0x400000,
SPEAKER_AMB_S = 0x800000,
SPEAKER_AMB_T = 0x1000000,
SPEAKER_AMB_U = 0x2000000,
SPEAKER_AMB_V = 0x4000000,
SPEAKER_AMB_K = 0x8000000,
SPEAKER_AMB_L = 0x10000000,
SPEAKER_AMB_M = 0x20000000,
SPEAKER_AMB_N = 0x40000000,
SPEAKER_AMB_O = 0x80000000,
SPEAKER_AMB_P = 0x100000000,
SPEAKER_AMB_Q = 0x200000000,
NO_SPEAKER = 0x400000000
};
/**
* Sampling rate.
* @brief Defines the sampling rate of the audio signal
*/
enum class SamplingRate : int {
SR_NONE = 0,
SR_22050 = 1,
SR_32000 = 2,
SR_44100 = 3,
SR_48000 = 4,
SR_88200 = 5,
SR_96000 = 6,
SR_176400 = 7,
SR_192000 = 8
};
/**
* Sampling depth.
* @brief Defines the sampling depth of the audio signal
*/
enum class SamplingDepth {
SD_NONE,
// interleaved
UINT8,
INT16,
INT24,
INT32,
FLT,
DBL,
// planar
UINT8_P,
INT16_P,
INT24_P,
INT32_P,
FLT_P,
DBL_P
};
#define MAX_AUDIO_CHANNELS 35
/**
* @brief Defines if the audio signal is planar or interleaved
*/
enum SamplingFormat { FORMAT_UNKNOWN, INTERLEAVED, PLANAR };
/**
* Channel layout.
* @brief Defines the layout of the audio signal
*/
enum ChannelLayout : int64_t {
UNKNOWN,
MONO = SPEAKER_FRONT_LEFT,
STEREO = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT,
_3DUMMY = STEREO | SPEAKER_SIDE_LEFT,
_2POINT1 = STEREO | SPEAKER_LOW_FREQUENCY,
_2_1 = STEREO | SPEAKER_BACK_CENTER,
SURROUND = STEREO | SPEAKER_FRONT_CENTER,
_3POINT1 = SURROUND | SPEAKER_LOW_FREQUENCY,
_4POINT0 = SURROUND | SPEAKER_BACK_CENTER,
_4POINT1 = _4POINT0 | SPEAKER_LOW_FREQUENCY,
_2_2 = STEREO | SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT,
QUAD = STEREO | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
_5POINT0 = SURROUND | SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT,
_5POINT1 = _5POINT0 | SPEAKER_LOW_FREQUENCY,
_5POINT0_BACK = SURROUND | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
_5POINT1_BACK = _5POINT0_BACK | SPEAKER_LOW_FREQUENCY,
_6POINT0 = _5POINT0 | SPEAKER_BACK_CENTER,
_6POINT0_FRONT = _2_2 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
HEXAGONAL = _5POINT0_BACK | SPEAKER_BACK_CENTER,
_6POINT1 = _5POINT1 | SPEAKER_BACK_CENTER,
_6POINT1_BACK = _5POINT1_BACK | SPEAKER_BACK_CENTER,
_6POINT1_FRONT = _6POINT0_FRONT | SPEAKER_LOW_FREQUENCY,
_7POINT0 = _5POINT0 | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
_7POINT0_FRONT = _5POINT0 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
_8DUMMY = _6POINT1 | SPEAKER_BACK_LEFT,
_7POINT1 = _5POINT1 | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
_7POINT1_WIDE = _5POINT1 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
_7POINT1_WIDE_BACK = _5POINT1_BACK | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
OCTAGONAL = _5POINT0 | SPEAKER_BACK_LEFT | SPEAKER_BACK_CENTER | SPEAKER_BACK_RIGHT,
AMBISONICS_WXY = SPEAKER_AMB_W | SPEAKER_AMB_X | SPEAKER_AMB_Y,
AMBISONICS_WXYZ = AMBISONICS_WXY | SPEAKER_AMB_Z,
AMBISONICS_2ND = AMBISONICS_WXYZ | SPEAKER_AMB_R | SPEAKER_AMB_S | SPEAKER_AMB_T | SPEAKER_AMB_U | SPEAKER_AMB_V,
AMBISONICS_3RD = AMBISONICS_2ND | SPEAKER_AMB_K | SPEAKER_AMB_L | SPEAKER_AMB_M | SPEAKER_AMB_N | SPEAKER_AMB_O |
SPEAKER_AMB_P | SPEAKER_AMB_Q
};
/**
* Block size.
* @brief Defines the block size in samples for audio processors.
*/
enum class BlockSize { BS_NONE, BS_32, BS_64, BS_128, BS_256, BS_512, BS_1024, BS_2048, BS_4096 };
static inline double getDefaultSamplingRate() { return 44100.0; }
static inline int getDefaultBlockSize() { return 512; }
static inline std::string getAudioGeneratorId() { return "audiogen"; }
/**
* @brief Determines if the audio signal is planar or interleaved
*/
VS_EXPORT SamplingFormat getSamplingFormatFromSamplingDepth(SamplingDepth samplingDepth);
/**
* @brief Return the size (bytes) of one sample
*/
VS_EXPORT std::size_t getSampleSizeFromSamplingDepth(SamplingDepth samplingDepth);
/**
* @brief Return an index corresponding to the channel map.
*/
VS_EXPORT int getChannelIndexFromChannelMap(ChannelMap speaker);
/**
* @brief Return a channel map from an integer value
*/
VS_EXPORT ChannelMap getChannelMapFromChannelIndex(int i);
/**
* @brief Return an int representation of a block size.
* This can be used when serializing a block size enum.
*/
VS_EXPORT int getIntFromBlockSize(BlockSize bs);
/**
* @brief Return a double representation of a block size.
* This can be used when serializing a block size enum.
*/
VS_EXPORT double getDblFromBlockSize(BlockSize bs);
/**
* @brief Return an int representation of a sampling rate.
* This can be used when serializing a sampling rate enum.
*/
VS_EXPORT int getIntFromSamplingRate(SamplingRate samplingRate);
/**
* @brief Return a double representation of a sampling rate.
* This can be used when serializing a sampling rate enum.
*/
VS_EXPORT double getDblFromSamplingRate(SamplingRate samplingRate);
/**
* @brief Return a sampling rate from an integer value
* This can be used when parsing a sampling rate enum.
*/
VS_EXPORT SamplingRate getSamplingRateFromInt(const int samplingRateInt);
/**
* @brief Return a block size from an integer value
* This can be used when parsing a block size enum.
*/
VS_EXPORT BlockSize getBlockSizeFromInt(const int bs);
/**
* @brief Return a string representation of a sampling depth.
* This can be used when serializing a sampling depth enum.
*/
VS_EXPORT const char* getStringFromSamplingDepth(SamplingDepth samplingDepth);
/**
* @brief Return a sampling rate from a string.
* This can be used when parsing a sampling rate enum.
*/
VS_EXPORT SamplingDepth getSamplingDepthFromString(const char* samplingDepthStr);
/**
* @brief Return a string representation of a channel map.
* This can be used when serializing a channel map.
*/
VS_EXPORT const char* getStringFromChannelType(ChannelMap map);
/**
* @brief Return a string representation of a sampling depth.
* This can be used when serializing a sampling depth enum.
*/
VS_EXPORT const char* getStringFromChannelLayout(ChannelLayout channelLayout);
/**
* @brief Return the number of channels used by this layout.
*/
VS_EXPORT int getNbChannelsFromChannelLayout(ChannelLayout channelLayout);
/**
* @brief Return a channel layout with this number of channels.
*/
VS_EXPORT ChannelLayout getAChannelLayoutFromNbChannels(size_t nbChannels);
/**
* @brief Return a sampling rate from a string.
* This can be used when parsing a sampling rate enum.
*/
VS_EXPORT ChannelLayout getChannelLayoutFromString(const char* channelLayout);
/**
* Where the audio samples channel planes will be written.
* Up to 18 channels passthru is supported.
* Audio channel samples are either interleaved into a sample
* frame or planar, either contiguous or padded (see Spec).
* The memory of each plane is not allocated.
* The ownership is transfered to the stitcher.
*/
class VS_EXPORT Samples {
public:
typedef std::array<uint8_t*, MAX_AUDIO_CHANNELS> data_buffer_t;
typedef void (*deleter)(data_buffer_t&);
/**
* @brief Default constructor. Generate an invalid buffer.
*/
Samples();
/**
* @brief Construct from raw data.
* @param r Sampling rate
* @param d Sample depth
* @param l Channels layout
* @param timestamp First sample timestamp in microseconds
* @param data Raw data. Ownership is transferred to newly created Samples object.
* @param nbSamples Number of samples in the buffer
* @param delete_ Optional deleter functor
*/
Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, data_buffer_t& data,
size_t nbSamples, deleter delete_);
/**
* @brief Construct from raw data.
* @param r Sampling rate
* @param d Sample depth
* @param l Channels layout
* @param timestamp First sample timestamp in microseconds
* @param data Raw data. Ownership is transferred to newly created Samples object.
* @param nbSamples Number of samples in the buffer
*/
Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, data_buffer_t& data,
size_t nbSamples);
/**
* @brief Construct from raw data.
* @param r Sampling rate
* @param d Sample depth
* @param l Channels layout
* @param timestamp First sample timestamp in microseconds
* @param data Raw data. Ownership is transferred to newly created Samples object.
* @param nbSamples Number of samples in the buffer
* @param delete_ Optional deleter functor
* TODO deprecate
*/
Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, uint8_t** data,
size_t nbSamples, deleter delete_);
/**
* @brief Construct from raw data.
* @param r Sampling rate
* @param d Sample depth
* @param l Channels layout
* @param timestamp First sample timestamp in microseconds
* @param data Raw data. Ownership is transferred to newly created Samples object.
* @param nbSamples Number of samples in the buffer
* TODO deprecate
*/
Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, uint8_t** data,
size_t nbSamples);
~Samples();
/**
* @brief Copy constructor with move semantics
* @param s The object to copy
*/
Samples(Samples&& s);
/**
* @brief Copying audio samples is undefined
*/
Samples(const Samples&) = delete;
/**
* @brief Copying audio samples is undefined
*/
Samples& operator=(const Samples&) = delete;
/**
* @brief Assign operator with move semantics
* @param s The object to assign
*/
Samples& operator=(Samples&& s);
/**
* @brief Assign operator with move semantics
* @param s The object to assign
*/
Samples clone() const;
/**
* @brief Get the number of audio samples
* @return The audio samples
*/
size_t getNbOfSamples() const { return nbSamples; }
/**
* @brief Get the audio samples (const)
* @return s The constant reference to the audio samples
*/
const data_buffer_t& getSamples() const { return samples; }
/**
* @brief Get the samples timestamp.
* @return The samples timestamp : in 1 / fps unit (like the video's frameId)
*/
mtime_t getTimestamp() const { return timestamp; }
/**
* @brief Set the samples timestamp.
* @param t The samples timestamp : in 1 / fps unit (like the video's frameId)
*/
void setTimestamp(mtime_t t) { timestamp = t; }
/**
* @brief Drop the @param nb first samples of the buffer
*/
Status drop(size_t nb);
/**
* @brief Append the @param other samples at the end of the buffer
*/
Status append(const Audio::Samples& other);
/**
* @brief Get the sampling rate.
* @return The sampling rate.
*/
SamplingRate getSamplingRate() const { return rate; }
/**
* @brief Get the sampling depth.
* @return The sampling depth.
*/
SamplingDepth getSamplingDepth() const { return depth; }
/**
* @brief Get the channels layout.
* @return The channels layout.
*/
ChannelLayout getChannelLayout() const { return layout; }
/**
* @brief Set the samples deleter.
* @param del The functor
*/
void setDeleter(deleter del) { delete_ = del; }
private:
// TODO use data_buffer_t&
void alloc(uint8_t** data);
void clear();
template <typename Functor>
void mapSamples(const Functor& execFunctor);
data_buffer_t samples;
size_t nbSamples;
mtime_t timestamp;
deleter delete_;
SamplingDepth depth;
SamplingRate rate;
ChannelLayout layout;
};
} // namespace Audio
} // namespace VideoStitch