-
Notifications
You must be signed in to change notification settings - Fork 37
Audio encoding: support custom num_channels
#693
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
52d624b
aad9c7d
2d76a7b
7d643f2
dd85e96
b101939
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,9 @@ class AudioEncoder { | |
// like passing 0, which results in choosing the minimum supported bit rate. | ||
// Passing 44_100 could result in output being 44000 if only 44000 is | ||
// supported. | ||
// | ||
// TODO-ENCODING: bundle the optional params like bitRate, numChannels, etc. | ||
// into an AudioStreamOptions struct, or similar. | ||
AudioEncoder( | ||
const torch::Tensor wf, | ||
// The *output* sample rate. We can't really decide for the user what it | ||
|
@@ -21,20 +24,23 @@ class AudioEncoder { | |
// encoding will still work but audio will be distorted. | ||
int sampleRate, | ||
std::string_view fileName, | ||
std::optional<int64_t> bitRate = std::nullopt); | ||
std::optional<int64_t> bitRate = std::nullopt, | ||
std::optional<int64_t> numChannels = std::nullopt); | ||
AudioEncoder( | ||
const torch::Tensor wf, | ||
int sampleRate, | ||
std::string_view formatName, | ||
std::unique_ptr<AVIOToTensorContext> avioContextHolder, | ||
std::optional<int64_t> bitRate = std::nullopt); | ||
std::optional<int64_t> bitRate = std::nullopt, | ||
std::optional<int64_t> numChannels = std::nullopt); | ||
void encode(); | ||
torch::Tensor encodeToTensor(); | ||
|
||
private: | ||
void initializeEncoder( | ||
int sampleRate, | ||
std::optional<int64_t> bitRate = std::nullopt); | ||
std::optional<int64_t> bitRate = std::nullopt, | ||
std::optional<int64_t> numChannels = std::nullopt); | ||
void encodeInnerLoop( | ||
AutoAVPacket& autoAVPacket, | ||
const UniqueAVFrame& srcAVFrame); | ||
|
@@ -44,6 +50,9 @@ class AudioEncoder { | |
UniqueAVCodecContext avCodecContext_; | ||
int streamIndex_; | ||
UniqueSwrContext swrContext_; | ||
// TODO-ENCODING: desiredNumChannels should just be part of an options struct, | ||
// see other TODO above. | ||
int desiredNumChannels_ = -1; | ||
|
||
const torch::Tensor wf_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a comment here that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a TODO somewhere to rename |
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -88,21 +88,71 @@ void setDefaultChannelLayout( | |
#endif | ||
} | ||
|
||
void setChannelLayout( | ||
UniqueAVFrame& dstAVFrame, | ||
const UniqueAVCodecContext& avCodecContext) { | ||
void setDefaultChannelLayout(UniqueAVFrame& avFrame, int numChannels) { | ||
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4 | ||
auto status = av_channel_layout_copy( | ||
&dstAVFrame->ch_layout, &avCodecContext->ch_layout); | ||
TORCH_CHECK( | ||
status == AVSUCCESS, | ||
"Couldn't copy channel layout to avFrame: ", | ||
getFFMPEGErrorStringFromErrorCode(status)); | ||
AVChannelLayout channel_layout; | ||
av_channel_layout_default(&channel_layout, numChannels); | ||
avFrame->ch_layout = channel_layout; | ||
#else | ||
dstAVFrame->channel_layout = avCodecContext->channel_layout; | ||
dstAVFrame->channels = avCodecContext->channels; | ||
uint64_t channel_layout = av_get_default_channel_layout(numChannels); | ||
avFrame->channel_layout = channel_layout; | ||
avFrame->channels = numChannels; | ||
#endif | ||
} | ||
|
||
void validateNumChannels(const AVCodec& avCodec, int numChannels) { | ||
#if LIBAVFILTER_VERSION_MAJOR > 7 // FFmpeg > 4 | ||
if (avCodec.ch_layouts == nullptr) { | ||
// If we can't validate, we must assume it'll be fine. If not, FFmpeg will | ||
// eventually raise. | ||
return; | ||
} | ||
// FFmpeg doc indicate that the ch_layouts array is terminated by a zeroed | ||
// layout, so checking for nb_channels == 0 should indicate its end. | ||
for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) { | ||
if (numChannels == avCodec.ch_layouts[i].nb_channels) { | ||
return; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A comment here saying that we've now entered the error path might be helpful - I think this is less obvious because we're in an |
||
// At this point it seems that the encoder doesn't support the requested | ||
// number of channels, so we error out. | ||
std::stringstream supportedNumChannels; | ||
for (auto i = 0; avCodec.ch_layouts[i].nb_channels != 0; ++i) { | ||
if (i > 0) { | ||
supportedNumChannels << ", "; | ||
} | ||
supportedNumChannels << avCodec.ch_layouts[i].nb_channels; | ||
} | ||
#else | ||
if (avCodec.channel_layouts == nullptr) { | ||
// can't validate, same as above. | ||
return; | ||
} | ||
for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) { | ||
if (numChannels == | ||
av_get_channel_layout_nb_channels(avCodec.channel_layouts[i])) { | ||
return; | ||
} | ||
} | ||
// At this point it seems that the encoder doesn't support the requested | ||
// number of channels, so we error out. | ||
std::stringstream supportedNumChannels; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto about error path, partially so both arms have the same structure. |
||
for (auto i = 0; avCodec.channel_layouts[i] != 0; ++i) { | ||
if (i > 0) { | ||
supportedNumChannels << ", "; | ||
} | ||
supportedNumChannels << av_get_channel_layout_nb_channels( | ||
avCodec.channel_layouts[i]); | ||
} | ||
#endif | ||
TORCH_CHECK( | ||
false, | ||
"Desired number of channels (", | ||
numChannels, | ||
") is not supported by the ", | ||
"encoder. Supported number of channels are: ", | ||
supportedNumChannels.str(), | ||
"."); | ||
} | ||
|
||
namespace { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,23 +31,27 @@ def to_file( | |
dest: Union[str, Path], | ||
*, | ||
bit_rate: Optional[int] = None, | ||
num_channels: Optional[int] = None, | ||
) -> None: | ||
_core.encode_audio_to_file( | ||
wf=self._samples, | ||
sample_rate=self._sample_rate, | ||
filename=dest, | ||
bit_rate=bit_rate, | ||
num_channels=num_channels, | ||
) | ||
|
||
def to_tensor( | ||
self, | ||
format: str, | ||
*, | ||
bit_rate: Optional[int] = None, | ||
num_channels: Optional[int] = None, | ||
) -> Tensor: | ||
return _core.encode_audio_to_tensor( | ||
wf=self._samples, | ||
sample_rate=self._sample_rate, | ||
format=format, | ||
bit_rate=bit_rate, | ||
num_channels=num_channels, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are no tests for the public API right now. I will soon migrate most of the existing encoder ops tests into testing the public Python APIs. |
||
) |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to think about this for a few moments to convince myself it's correct, so it may be worth putting in a comment: the default channel layout should be the channel layout of the provided waveform. The desired channel layout only comes in if we need to do any conversions in the encoding inner loop.