audio.hpp

// Copyright (c) 2012-2017 VideoStitch SAS
// Copyright (c) 2018 stitchEm

#pragma once

#include "config.hpp"
#include "status.hpp"

#include <array>
#include <vector>

namespace VideoStitch {
namespace Ptv {
class Value;
}

namespace Audio {

/** Internal audio sample representation */
typedef double audioSample_t;

/**
 * Channel mapping mask.
 */
enum ChannelMap : int64_t {
  SPEAKER_FRONT_LEFT = 0x1,
  SPEAKER_FRONT_RIGHT = 0x2,
  SPEAKER_SIDE_LEFT = 0x4,
  SPEAKER_SIDE_RIGHT = 0x8,
  SPEAKER_FRONT_CENTER = 0x10,
  SPEAKER_BACK_CENTER = 0x20,
  SPEAKER_LOW_FREQUENCY = 0x40,
  SPEAKER_BACK_LEFT = 0x80,
  SPEAKER_BACK_RIGHT = 0x100,
  SPEAKER_FRONT_LEFT_OF_CENTER = 0x200,
  SPEAKER_FRONT_RIGHT_OF_CENTER = 0x400,
  SPEAKER_TOP_CENTER = 0x800,
  SPEAKER_TOP_FRONT_LEFT = 0x1000,
  SPEAKER_TOP_FRONT_CENTER = 0x2000,
  SPEAKER_TOP_FRONT_RIGHT = 0x4000,
  SPEAKER_TOP_BACK_LEFT = 0x8000,
  SPEAKER_TOP_BACK_CENTER = 0x10000,
  SPEAKER_TOP_BACK_RIGHT = 0x20000,
  SPEAKER_AMB_W = 0x40000,
  SPEAKER_AMB_X = 0x80000,
  SPEAKER_AMB_Y = 0x100000,
  SPEAKER_AMB_Z = 0x200000,
  SPEAKER_AMB_R = 0x400000,
  SPEAKER_AMB_S = 0x800000,
  SPEAKER_AMB_T = 0x1000000,
  SPEAKER_AMB_U = 0x2000000,
  SPEAKER_AMB_V = 0x4000000,
  SPEAKER_AMB_K = 0x8000000,
  SPEAKER_AMB_L = 0x10000000,
  SPEAKER_AMB_M = 0x20000000,
  SPEAKER_AMB_N = 0x40000000,
  SPEAKER_AMB_O = 0x80000000,
  SPEAKER_AMB_P = 0x100000000,
  SPEAKER_AMB_Q = 0x200000000,
  NO_SPEAKER = 0x400000000
};

/**
 * Sampling rate.
 * @brief Defines the sampling rate of the audio signal
 */
enum class SamplingRate : int {
  SR_NONE = 0,
  SR_22050 = 1,
  SR_32000 = 2,
  SR_44100 = 3,
  SR_48000 = 4,
  SR_88200 = 5,
  SR_96000 = 6,
  SR_176400 = 7,
  SR_192000 = 8
};

/**
 * Sampling depth.
 * @brief Defines the sampling depth of the audio signal
 */
enum class SamplingDepth {
  SD_NONE,
  // interleaved
  UINT8,
  INT16,
  INT24,
  INT32,
  FLT,
  DBL,
  // planar
  UINT8_P,
  INT16_P,
  INT24_P,
  INT32_P,
  FLT_P,
  DBL_P
};

#define MAX_AUDIO_CHANNELS 35

/**
 * @brief Defines if the audio signal is planar or interleaved
 */
enum SamplingFormat { FORMAT_UNKNOWN, INTERLEAVED, PLANAR };

/**
 * Channel layout.
 * @brief Defines the layout of the audio signal
 */
enum ChannelLayout : int64_t {
  UNKNOWN,
  MONO = SPEAKER_FRONT_LEFT,
  STEREO = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT,
  _3DUMMY = STEREO | SPEAKER_SIDE_LEFT,
  _2POINT1 = STEREO | SPEAKER_LOW_FREQUENCY,
  _2_1 = STEREO | SPEAKER_BACK_CENTER,
  SURROUND = STEREO | SPEAKER_FRONT_CENTER,
  _3POINT1 = SURROUND | SPEAKER_LOW_FREQUENCY,
  _4POINT0 = SURROUND | SPEAKER_BACK_CENTER,
  _4POINT1 = _4POINT0 | SPEAKER_LOW_FREQUENCY,
  _2_2 = STEREO | SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT,
  QUAD = STEREO | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
  _5POINT0 = SURROUND | SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT,
  _5POINT1 = _5POINT0 | SPEAKER_LOW_FREQUENCY,
  _5POINT0_BACK = SURROUND | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
  _5POINT1_BACK = _5POINT0_BACK | SPEAKER_LOW_FREQUENCY,
  _6POINT0 = _5POINT0 | SPEAKER_BACK_CENTER,
  _6POINT0_FRONT = _2_2 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
  HEXAGONAL = _5POINT0_BACK | SPEAKER_BACK_CENTER,
  _6POINT1 = _5POINT1 | SPEAKER_BACK_CENTER,
  _6POINT1_BACK = _5POINT1_BACK | SPEAKER_BACK_CENTER,
  _6POINT1_FRONT = _6POINT0_FRONT | SPEAKER_LOW_FREQUENCY,
  _7POINT0 = _5POINT0 | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
  _7POINT0_FRONT = _5POINT0 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
  _8DUMMY = _6POINT1 | SPEAKER_BACK_LEFT,
  _7POINT1 = _5POINT1 | SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT,
  _7POINT1_WIDE = _5POINT1 | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
  _7POINT1_WIDE_BACK = _5POINT1_BACK | SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER,
  OCTAGONAL = _5POINT0 | SPEAKER_BACK_LEFT | SPEAKER_BACK_CENTER | SPEAKER_BACK_RIGHT,
  AMBISONICS_WXY = SPEAKER_AMB_W | SPEAKER_AMB_X | SPEAKER_AMB_Y,
  AMBISONICS_WXYZ = AMBISONICS_WXY | SPEAKER_AMB_Z,
  AMBISONICS_2ND = AMBISONICS_WXYZ | SPEAKER_AMB_R | SPEAKER_AMB_S | SPEAKER_AMB_T | SPEAKER_AMB_U | SPEAKER_AMB_V,
  AMBISONICS_3RD = AMBISONICS_2ND | SPEAKER_AMB_K | SPEAKER_AMB_L | SPEAKER_AMB_M | SPEAKER_AMB_N | SPEAKER_AMB_O |
                   SPEAKER_AMB_P | SPEAKER_AMB_Q
};

/**
 * Block size.
 * @brief Defines the block size in samples for audio processors.
 */
enum class BlockSize { BS_NONE, BS_32, BS_64, BS_128, BS_256, BS_512, BS_1024, BS_2048, BS_4096 };

static inline double getDefaultSamplingRate() { return 44100.0; }
static inline int getDefaultBlockSize() { return 512; }
static inline std::string getAudioGeneratorId() { return "audiogen"; }

/**
 * @brief Determines if the  audio signal is planar or interleaved
 */
VS_EXPORT SamplingFormat getSamplingFormatFromSamplingDepth(SamplingDepth samplingDepth);

/**
 * @brief Return the size (bytes) of one sample
 */
VS_EXPORT std::size_t getSampleSizeFromSamplingDepth(SamplingDepth samplingDepth);

/**
 * @brief Return an index corresponding to the channel map.
 */
VS_EXPORT int getChannelIndexFromChannelMap(ChannelMap speaker);

/**
 * @brief Return a channel map from an integer value
 */
VS_EXPORT ChannelMap getChannelMapFromChannelIndex(int i);

/**
 * @brief Return an int representation of a block size.
 * This can be used when serializing a block size enum.
 */
VS_EXPORT int getIntFromBlockSize(BlockSize bs);

/**
 * @brief Return a double representation of a block size.
 * This can be used when serializing a block size enum.
 */
VS_EXPORT double getDblFromBlockSize(BlockSize bs);

/**
 * @brief Return an int representation of a sampling rate.
 * This can be used when serializing a sampling rate enum.
 */
VS_EXPORT int getIntFromSamplingRate(SamplingRate samplingRate);

/**
 * @brief Return a double representation of a sampling rate.
 * This can be used when serializing a sampling rate enum.
 */
VS_EXPORT double getDblFromSamplingRate(SamplingRate samplingRate);

/**
 * @brief Return a sampling rate from an integer value
 * This can be used when parsing a sampling rate enum.
 */
VS_EXPORT SamplingRate getSamplingRateFromInt(const int samplingRateInt);

/**
 * @brief Return a block size from an integer value
 * This can be used when parsing a block size enum.
 */
VS_EXPORT BlockSize getBlockSizeFromInt(const int bs);

/**
 * @brief Return a string representation of a sampling depth.
 * This can be used when serializing a sampling depth enum.
 */
VS_EXPORT const char* getStringFromSamplingDepth(SamplingDepth samplingDepth);

/**
 * @brief Return a sampling rate from a string.
 * This can be used when parsing a sampling rate enum.
 */
VS_EXPORT SamplingDepth getSamplingDepthFromString(const char* samplingDepthStr);

/**
 * @brief Return a string representation of a channel map.
 * This can be used when serializing a channel map.
 */
VS_EXPORT const char* getStringFromChannelType(ChannelMap map);

/**
 * @brief Return a string representation of a sampling depth.
 * This can be used when serializing a sampling depth enum.
 */
VS_EXPORT const char* getStringFromChannelLayout(ChannelLayout channelLayout);

/**
 * @brief Return the number of channels used by this layout.
 */
VS_EXPORT int getNbChannelsFromChannelLayout(ChannelLayout channelLayout);

/**
 * @brief Return a channel layout with this number of channels.
 */
VS_EXPORT ChannelLayout getAChannelLayoutFromNbChannels(size_t nbChannels);

/**
 * @brief Return a sampling rate from a string.
 * This can be used when parsing a sampling rate enum.
 */
VS_EXPORT ChannelLayout getChannelLayoutFromString(const char* channelLayout);

/**
 * Where the audio samples channel planes will be written.
 * Up to 18 channels passthru is supported.
 * Audio channel samples are either interleaved into a sample
 * frame or planar, either contiguous or padded (see Spec).
 * The memory of each plane is not allocated.
 * The ownership is transfered to the stitcher.
 */
class VS_EXPORT Samples {
 public:
  typedef std::array<uint8_t*, MAX_AUDIO_CHANNELS> data_buffer_t;

  typedef void (*deleter)(data_buffer_t&);

  /**
   * @brief Default constructor. Generate an invalid buffer.
   */
  Samples();

  /**
   * @brief Construct from raw data.
   * @param r Sampling rate
   * @param d Sample depth
   * @param l Channels layout
   * @param timestamp First sample timestamp in microseconds
   * @param data Raw data. Ownership is transferred to newly created Samples object.
   * @param nbSamples Number of samples in the buffer
   * @param delete_ Optional deleter functor
   */
  Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, data_buffer_t& data,
          size_t nbSamples, deleter delete_);

  /**
   * @brief Construct from raw data.
   * @param r Sampling rate
   * @param d Sample depth
   * @param l Channels layout
   * @param timestamp First sample timestamp in microseconds
   * @param data Raw data. Ownership is transferred to newly created Samples object.
   * @param nbSamples Number of samples in the buffer
   */
  Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, data_buffer_t& data,
          size_t nbSamples);

  /**
   * @brief Construct from raw data.
   * @param r Sampling rate
   * @param d Sample depth
   * @param l Channels layout
   * @param timestamp First sample timestamp in microseconds
   * @param data Raw data. Ownership is transferred to newly created Samples object.
   * @param nbSamples Number of samples in the buffer
   * @param delete_ Optional deleter functor
   * TODO deprecate
   */
  Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, uint8_t** data,
          size_t nbSamples, deleter delete_);
  /**
   * @brief Construct from raw data.
   * @param r Sampling rate
   * @param d Sample depth
   * @param l Channels layout
   * @param timestamp First sample timestamp in microseconds
   * @param data Raw data. Ownership is transferred to newly created Samples object.
   * @param nbSamples Number of samples in the buffer
   * TODO deprecate
   */
  Samples(const SamplingRate r, const SamplingDepth d, const ChannelLayout l, mtime_t timestamp, uint8_t** data,
          size_t nbSamples);
  ~Samples();

  /**
   * @brief Copy constructor with move semantics
   * @param s The object to copy
   */
  Samples(Samples&& s);

  /**
   * @brief Copying audio samples is undefined
   */
  Samples(const Samples&) = delete;

  /**
   * @brief Copying audio samples is undefined
   */
  Samples& operator=(const Samples&) = delete;

  /**
   * @brief Assign operator with move semantics
   * @param s The object to assign
   */
  Samples& operator=(Samples&& s);

  /**
   * @brief Assign operator with move semantics
   * @param s The object to assign
   */
  Samples clone() const;

  /**
   * @brief Get the number of audio samples
   * @return The audio samples
   */
  size_t getNbOfSamples() const { return nbSamples; }

  /**
   * @brief Get the audio samples (const)
   * @return s The constant reference to the audio samples
   */
  const data_buffer_t& getSamples() const { return samples; }

  /**
   * @brief Get the samples timestamp.
   * @return The samples timestamp : in 1 / fps unit (like the video's frameId)
   */
  mtime_t getTimestamp() const { return timestamp; }

  /**
   * @brief Set the samples timestamp.
   * @param t The samples timestamp : in 1 / fps unit (like the video's frameId)
   */
  void setTimestamp(mtime_t t) { timestamp = t; }

  /**
   * @brief Drop the @param nb first samples of the buffer
   */
  Status drop(size_t nb);

  /**
   * @brief Append the @param other samples at the end of the buffer
   */
  Status append(const Audio::Samples& other);

  /**
   * @brief Get the sampling rate.
   * @return The sampling rate.
   */
  SamplingRate getSamplingRate() const { return rate; }

  /**
   * @brief Get the sampling depth.
   * @return The sampling depth.
   */
  SamplingDepth getSamplingDepth() const { return depth; }

  /**
   * @brief Get the channels layout.
   * @return The channels layout.
   */
  ChannelLayout getChannelLayout() const { return layout; }

  /**
   * @brief Set the samples deleter.
   * @param del The functor
   */
  void setDeleter(deleter del) { delete_ = del; }

 private:
  // TODO use data_buffer_t&
  void alloc(uint8_t** data);
  void clear();

  template <typename Functor>
  void mapSamples(const Functor& execFunctor);

  data_buffer_t samples;
  size_t nbSamples;
  mtime_t timestamp;
  deleter delete_;

  SamplingDepth depth;
  SamplingRate rate;
  ChannelLayout layout;
};

}  // namespace Audio
}  // namespace VideoStitch