Skip to content

Commit

Permalink
Bug 1271585: Proxy audio data to a separate thread for encoding r=peh…
Browse files Browse the repository at this point in the history
…rsons
  • Loading branch information
jesup committed May 12, 2016
1 parent f178a14 commit ed49a4b
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 80 deletions.
1 change: 0 additions & 1 deletion media/webrtc/signaling/src/media-conduit/VideoConduit.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "nsAutoPtr.h"
#include "mozilla/Attributes.h"
#include "mozilla/Atomics.h"
#include "mozilla/SharedThreadPool.h"

#include "MediaConduitInterface.h"
#include "MediaEngineWrapper.h"
Expand Down
219 changes: 140 additions & 79 deletions media/webrtc/signaling/src/mediapipeline/MediaPipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "transportlayerice.h"
#include "runnable_utils.h"
#include "libyuv/convert.h"
#include "mozilla/SharedThreadPool.h"
#if !defined(MOZILLA_EXTERNAL_LINKAGE)
#include "mozilla/PeerIdentity.h"
#include "mozilla/TaskQueue.h"
Expand All @@ -61,7 +62,8 @@

#include "logging.h"

// Max size given stereo is 480*2*2 = 1920 (48KHz)
// Max size given stereo is 480*2*2 = 1920 (10ms of 16-bits stereo audio at
// 48KHz)
#define AUDIO_SAMPLE_BUFFER_MAX 480*2*2
static_assert((WEBRTC_DEFAULT_SAMPLE_RATE/100)*sizeof(uint16_t) * 2
<= AUDIO_SAMPLE_BUFFER_MAX,
Expand Down Expand Up @@ -463,6 +465,128 @@ class VideoFrameConverter
};
#endif

// An async inserter for audio data, to avoid running audio codec encoders
// on the MSG/input audio thread. Basically just bounces all the audio
// data to a single audio processing/input queue. We could if we wanted to
// use multiple threads and a TaskQueue.
class AudioProxyThread
{
public:
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioProxyThread)

explicit AudioProxyThread(AudioSessionConduit *aConduit)
: mConduit(aConduit)
{
MOZ_ASSERT(mConduit);
MOZ_COUNT_CTOR(AudioProxyThread);

#if !defined(MOZILLA_EXTERNAL_LINKAGE)
// Use only 1 thread; also forces FIFO operation
// We could use multiple threads, but that may be dicier with the webrtc.org
// code. If so we'd need to use TaskQueues like the videoframe converter
RefPtr<SharedThreadPool> pool =
SharedThreadPool::Get(NS_LITERAL_CSTRING("AudioProxy"), 1);

mThread = pool.get();
#else
nsCOMPtr<nsIThread> thread;
if (!NS_WARN_IF(NS_FAILED(NS_NewNamedThread("AudioProxy", getter_AddRefs(thread))))) {
mThread = thread;
}
#endif
}

// called on mThread
void InternalProcessAudioChunk(
TrackRate rate,
AudioChunk& chunk,
bool enabled) {

// Convert to interleaved, 16-bits integer audio, with a maximum of two
// channels (since the WebRTC.org code below makes the assumption that the
// input audio is either mono or stereo).
uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
const int16_t* samples = nullptr;
UniquePtr<int16_t[]> convertedSamples;

// We take advantage of the fact that the common case (microphone directly to
// PeerConnection, that is, a normal call), the samples are already 16-bits
// mono, so the representation in interleaved and planar is the same, and we
// can just use that.
if (enabled && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
samples = chunk.ChannelData<int16_t>().Elements()[0];
} else {
convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);

if (!enabled || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
} else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
DownmixAndInterleave(chunk.ChannelData<float>(),
chunk.mDuration, chunk.mVolume, outputChannels,
convertedSamples.get());
} else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
DownmixAndInterleave(chunk.ChannelData<int16_t>(),
chunk.mDuration, chunk.mVolume, outputChannels,
convertedSamples.get());
}
samples = convertedSamples.get();
}

MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100

// Check if the rate or the number of channels has changed since the last time
// we came through. I realize it may be overkill to check if the rate has
// changed, but I believe it is possible (e.g. if we change sources) and it
// costs us very little to handle this case.

uint32_t audio_10ms = rate / 100;

if (!packetizer_ ||
packetizer_->PacketSize() != audio_10ms ||
packetizer_->Channels() != outputChannels) {
// It's ok to drop the audio still in the packetizer here.
packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
}

packetizer_->Input(samples, chunk.mDuration);

while (packetizer_->PacketsAvailable()) {
uint32_t samplesPerPacket = packetizer_->PacketSize() *
packetizer_->Channels();
// We know that webrtc.org's code going to copy the samples down the line,
// so we can just use a stack buffer here instead of malloc-ing.
int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];

packetizer_->Output(packet);
mConduit->SendAudioFrame(packet, samplesPerPacket, rate, 0);
}
}

void QueueAudioChunk(TrackRate rate, AudioChunk& chunk, bool enabled)
{
RUN_ON_THREAD(mThread,
WrapRunnable(RefPtr<AudioProxyThread>(this),
&AudioProxyThread::InternalProcessAudioChunk,
rate, chunk, enabled),
NS_DISPATCH_NORMAL);
}

protected:
virtual ~AudioProxyThread()
{
// Conduits must be released on MainThread, and we might have the last reference
// We don't need to worry about runnables still trying to access the conduit, since
// the runnables hold a ref to AudioProxyThread.
NS_ReleaseOnMainThread(mConduit.forget());
MOZ_COUNT_DTOR(AudioProxyThread);
}

RefPtr<AudioSessionConduit> mConduit;
nsCOMPtr<nsIEventTarget> mThread;
// Only accessed on mThread
nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
};

static char kDTLSExporterLabel[] = "EXTRACTOR-dtls_srtp";

MediaPipeline::MediaPipeline(const std::string& pc,
Expand Down Expand Up @@ -1091,8 +1215,7 @@ friend class MediaPipelineTransmit;
track_id_external_(TRACK_INVALID),
active_(false),
enabled_(false),
direct_connect_(false),
packetizer_(nullptr)
direct_connect_(false)
{
}

Expand Down Expand Up @@ -1125,6 +1248,13 @@ friend class MediaPipelineTransmit;
void SetActive(bool active) { active_ = active; }
void SetEnabled(bool enabled) { enabled_ = enabled; }

// These are needed since nested classes don't have access to any particular
// instance of the parent
void SetAudioProxy(const RefPtr<AudioProxyThread>& proxy)
{
audio_processing_ = proxy;
}

#if !defined(MOZILLA_EXTERNAL_LINKAGE)
void SetVideoFrameConverter(const RefPtr<VideoFrameConverter>& converter)
{
Expand Down Expand Up @@ -1172,10 +1302,8 @@ friend class MediaPipelineTransmit;
StreamTime offset,
const MediaSegment& media);

virtual void ProcessAudioChunk(AudioSessionConduit *conduit,
TrackRate rate, AudioChunk& chunk);

RefPtr<MediaSessionConduit> conduit_;
RefPtr<AudioProxyThread> audio_processing_;
#if !defined(MOZILLA_EXTERNAL_LINKAGE)
RefPtr<VideoFrameConverter> converter_;
#endif
Expand All @@ -1195,8 +1323,6 @@ friend class MediaPipelineTransmit;

// Written and read on the MediaStreamGraph thread
bool direct_connect_;

nsAutoPtr<AudioPacketizer<int16_t, int16_t>> packetizer_;
};

#if !defined(MOZILLA_EXTERNAL_LINKAGE)
Expand Down Expand Up @@ -1308,8 +1434,12 @@ MediaPipelineTransmit::MediaPipelineTransmit(
video_sink_(new PipelineVideoSink(conduit, listener_)),
domtrack_(domtrack)
{
if (!IsVideo()) {
audio_processing_ = MakeAndAddRef<AudioProxyThread>(static_cast<AudioSessionConduit*>(conduit.get()));
listener_->SetAudioProxy(audio_processing_);
}
#if !defined(MOZILLA_EXTERNAL_LINKAGE)
if (IsVideo()) {
else { // Video
// For video we send frames to an async VideoFrameConverter that calls
// back to a VideoFrameFeeder that feeds I420 frames to VideoConduit.

Expand Down Expand Up @@ -1678,83 +1808,14 @@ NewData(MediaStreamGraph* graph,
#else
rate = graph->GraphRate();
#endif
ProcessAudioChunk(static_cast<AudioSessionConduit*>(conduit_.get()),
rate, *iter);
audio_processing_->QueueAudioChunk(rate, *iter, enabled_);
iter.Next();
}
} else {
// Ignore
}
}

void MediaPipelineTransmit::PipelineListener::ProcessAudioChunk(
AudioSessionConduit *conduit,
TrackRate rate,
AudioChunk& chunk) {

// Convert to interleaved, 16-bits integer audio, with a maximum of two
// channels (since the WebRTC.org code below makes the assumption that the
// input audio is either mono or stereo).
uint32_t outputChannels = chunk.ChannelCount() == 1 ? 1 : 2;
const int16_t* samples = nullptr;
UniquePtr<int16_t[]> convertedSamples;

// We take advantage of the fact that the common case (microphone directly to
// PeerConnection, that is, a normal call), the samples are already 16-bits
// mono, so the representation in interleaved and planar is the same, and we
// can just use that.
if (enabled_ && outputChannels == 1 && chunk.mBufferFormat == AUDIO_FORMAT_S16) {
samples = chunk.ChannelData<int16_t>().Elements()[0];
} else {
convertedSamples = MakeUnique<int16_t[]>(chunk.mDuration * outputChannels);

if (!enabled_ || chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
PodZero(convertedSamples.get(), chunk.mDuration * outputChannels);
} else if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
DownmixAndInterleave(chunk.ChannelData<float>(),
chunk.mDuration, chunk.mVolume, outputChannels,
convertedSamples.get());
} else if (chunk.mBufferFormat == AUDIO_FORMAT_S16) {
DownmixAndInterleave(chunk.ChannelData<int16_t>(),
chunk.mDuration, chunk.mVolume, outputChannels,
convertedSamples.get());
}
samples = convertedSamples.get();
}

MOZ_ASSERT(!(rate%100)); // rate should be a multiple of 100

// Check if the rate or the number of channels has changed since the last time
// we came through. I realize it may be overkill to check if the rate has
// changed, but I believe it is possible (e.g. if we change sources) and it
// costs us very little to handle this case.

uint32_t audio_10ms = rate / 100;

if (!packetizer_ ||
packetizer_->PacketSize() != audio_10ms ||
packetizer_->Channels() != outputChannels) {
// It's ok to drop the audio still in the packetizer here.
packetizer_ = new AudioPacketizer<int16_t, int16_t>(audio_10ms, outputChannels);
}

packetizer_->Input(samples, chunk.mDuration);

while (packetizer_->PacketsAvailable()) {
uint32_t samplesPerPacket = packetizer_->PacketSize() *
packetizer_->Channels();

// We know that webrtc.org's code going to copy the samples down the line,
// so we can just use a stack buffer here instead of malloc-ing.
int16_t packet[AUDIO_SAMPLE_BUFFER_MAX];

packetizer_->Output(packet);
conduit->SendAudioFrame(packet,
samplesPerPacket,
rate, 0);
}
}

void MediaPipelineTransmit::PipelineVideoSink::
SetCurrentFrames(const VideoSegment& aSegment)
{
Expand Down
2 changes: 2 additions & 0 deletions media/webrtc/signaling/src/mediapipeline/MediaPipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class nsIPrincipal;
namespace mozilla {
class MediaPipelineFilter;
class PeerIdentity;
class AudioProxyThread;
#if !defined(MOZILLA_EXTERNAL_LINKAGE)
class VideoFrameConverter;
#endif
Expand Down Expand Up @@ -350,6 +351,7 @@ class MediaPipelineTransmit : public MediaPipeline {

private:
RefPtr<PipelineListener> listener_;
RefPtr<AudioProxyThread> audio_processing_;
#if !defined(MOZILLA_EXTERNAL_LINKAGE)
RefPtr<VideoFrameFeeder> feeder_;
RefPtr<VideoFrameConverter> converter_;
Expand Down

0 comments on commit ed49a4b

Please sign in to comment.