Hi everyone,
I’m working on a project that involves streaming audio over WebSockets, and I need to compress the audio to reduce bandwidth usage. I’m currently using AVAudioEngine to capture and process audio in PCM format (AVAudioPCMBuffer), but I want to compress the buffer into Opus (or another efficient codec) before sending it over the network.
Has anyone worked with compressing an AVAudioPCMBuffer into Opus format within a tap on the inputNode, or could you recommend the best approach for compressing the PCM buffer into a different format? I haven’t been able to find a working solution for this.
Any advice or code examples would be greatly appreciated!
Thanks in advance,
Ondřej
--
My current code without the compression:
inputNode.installTap(onBus: .zero, bufferSize: 1440, format: nil) { [weak self] buffer, time in
guard let self else {
return
}
// 1. Send data
// a) Convert the buffer into the desired format
if let outputBuffer = buffer.convert(toFormat: Self.websocketInputFormat) {
// b) Use the converted buffer
// TODO: compress it into a different format
if let data = outputBuffer.convertToData() {
self.sendAudio(data)
}
}
// 2. Get sound level
self.visualizeRecorderBuffer(buffer)
}
func convert(toFormat outputFormat: AVAudioFormat) -> AVAudioPCMBuffer? {
let outputFrameCapacity = AVAudioFrameCount(
round(Double(frameLength) * (outputFormat.sampleRate / format.sampleRate))
)
guard
let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity),
let converter = AVAudioConverter(from: format, to: outputFormat)
else {
return nil
}
converter.convert(to: outputBuffer, error: nil) { packetCount, status in
status.pointee = .haveData
return self
}
return outputBuffer
}
static private let websocketInputFormat = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: 16000,
channels: 1,
interleaved: false
)!
Hello @LookingForFont, please see below a very basic example to get you started. It taps the engine's input node and uses AVAudioConverter to convert back and forth between PCM and Opus. The example uses InputStream
and OutputStream
, but it should be straightforward to adapt it to your needs. Note that the example omits checks while unwrapping optionals for simplicity, but an actual implementation should not skip these checks. Please also refer to TN3136 for more information on AVAudioConverter
.
import AVFAudio
class AudioManager {
let sizeOfFloat = UInt32(MemoryLayout<Float>.size)
let engine = AVAudioEngine()
var outBuffer: AVAudioPCMBuffer!
var playerNode: AVAudioPlayerNode!
var converter: AVAudioConverter!
var micFormat: AVAudioFormat!
let opusFormat : AVAudioFormat = {
var opusDesc = AudioStreamBasicDescription()
opusDesc.mSampleRate = 48000
opusDesc.mFormatID = kAudioFormatOpus
opusDesc.mChannelsPerFrame = 1
opusDesc.mFramesPerPacket = 960
return AVAudioFormat(streamDescription: &opusDesc)!
}()
func getOffsetBuffer(buffer: AVAudioPCMBuffer, offset: UInt32, count: UInt32) -> AVAudioPCMBuffer? {
let data = UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList).first!.mData! + UnsafeMutableRawPointer.Stride(offset * sizeOfFloat)
var abl = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: count * sizeOfFloat, mData: data))
return AVAudioPCMBuffer(pcmFormat: buffer.format, bufferListNoCopy: &abl)
}
func record(outputStream: OutputStream) {
let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
converter = AVAudioConverter(from: micFormat, to: opusFormat)
converter.bitRateStrategy = AVAudioBitRateStrategy_Constant
engine.inputNode.installTap(onBus: 0, bufferSize: 0, format: micFormat) { buffer, time in
self.encode(buffer: buffer, outputStream: outputStream)
}
let audioSession = AVAudioSession.sharedInstance()
try? audioSession.setCategory(.playAndRecord)
try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
try? audioSession.setActive(true)
try? engine.start()
}
func encode(buffer: AVAudioPCMBuffer, outputStream: OutputStream) {
let bufferLength = buffer.frameLength
var offset: UInt32 = 0
var done = false
while !done {
let opusBuffer = AVAudioCompressedBuffer(format: opusFormat, packetCapacity: 1, maximumPacketSize: converter.maximumOutputPacketSize)
let outputStatus = converter.convert(to: opusBuffer, error: nil) { packetCount, inputStatus in
let count = min(packetCount, buffer.frameLength - offset)
if count == 0 {
inputStatus.pointee = .noDataNow
return nil
}
let buffer = self.getOffsetBuffer(buffer: buffer, offset: offset, count: count)
offset += count
inputStatus.pointee = .haveData
return buffer
}
if outputStatus == .haveData {
outputStream.write(opusBuffer.data, maxLength: 128)
}
if offset >= bufferLength {
done = true
}
}
}
func play(inputStream: InputStream) {
engine.stop()
engine.inputNode.removeTap(onBus: 0)
let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
converter = AVAudioConverter(from: opusFormat, to: micFormat)
let audioSession = AVAudioSession.sharedInstance()
try? audioSession.setCategory(.playAndRecord)
try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
try? audioSession.setActive(true)
outBuffer = AVAudioPCMBuffer(pcmFormat: micFormat, frameCapacity: 1024)
playerNode = AVAudioPlayerNode()
engine.attach(playerNode)
engine.connect(playerNode, to: engine.mainMixerNode, format: micFormat)
try? engine.start()
playerNode.play()
schedule(inputStream: inputStream)
}
func schedule(inputStream: InputStream) {
decode(inputStream: inputStream)
playerNode.scheduleBuffer(outBuffer) {
self.schedule(inputStream: inputStream)
}
}
func decode(inputStream: InputStream) {
var offset: UInt32 = 0
var done = false
while !done {
let outputStatus = converter.convert(to: outBuffer, error: nil) { packetCount, inputStatus in
let opusBuffer = AVAudioCompressedBuffer(format: self.opusFormat, packetCapacity: 1, maximumPacketSize: 128)
inputStream.read(opusBuffer.data, maxLength: 128)
opusBuffer.packetCount = 1
opusBuffer.byteLength = 128
opusBuffer.packetDescriptions![0].mDataByteSize = 128
inputStatus.pointee = .haveData
return opusBuffer
}
if outputStatus == .haveData {
offset += UInt32(outBuffer.frameLength)
}
if offset >= 1024 {
done = true
}
}
}
}