Compressing AVAudioPCMBuffer within AVAudioEngine Tap

Hi everyone,

I’m working on a project that involves streaming audio over WebSockets, and I need to compress the audio to reduce bandwidth usage. I’m currently using AVAudioEngine to capture and process audio in PCM format (AVAudioPCMBuffer), but I want to compress the buffer into Opus (or another efficient codec) before sending it over the network.

Has anyone worked with compressing an AVAudioPCMBuffer into Opus format within a tap on the inputNode, or could you recommend the best approach for compressing the PCM buffer into a different format? I haven’t been able to find a working solution for this.

Any advice or code examples would be greatly appreciated!

Thanks in advance,

Ondřej

--

My current code without the compression:

inputNode.installTap(onBus: .zero, bufferSize: 1440, format: nil) { [weak self] buffer, time in
    guard let self else {
        return
    }
    
    // 1. Send data
    // a) Convert the buffer into the desired format
    if let outputBuffer = buffer.convert(toFormat: Self.websocketInputFormat) {
        // b) Use the converted buffer
        // TODO: compress it into a different format
        if let data = outputBuffer.convertToData() {
            self.sendAudio(data)
        }
    }
    
    // 2. Get sound level
    self.visualizeRecorderBuffer(buffer)
}

    func convert(toFormat outputFormat: AVAudioFormat) -> AVAudioPCMBuffer? {
        let outputFrameCapacity = AVAudioFrameCount(
            round(Double(frameLength) * (outputFormat.sampleRate / format.sampleRate))
        )
        
        guard
            let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, frameCapacity: outputFrameCapacity),
            let converter = AVAudioConverter(from: format, to: outputFormat)
        else {
            return nil
        }
        
        converter.convert(to: outputBuffer, error: nil) { packetCount, status in
            status.pointee = .haveData
            return self
        }
        
        return outputBuffer
    }

    static private let websocketInputFormat = AVAudioFormat(
        commonFormat: .pcmFormatInt16,
        sampleRate: 16000,
        channels: 1,
        interleaved: false
    )!
Answered by Engineer in 802967022

Hello @LookingForFont, please see below a very basic example to get you started. It taps the engine's input node and uses AVAudioConverter to convert back and forth between PCM and Opus. The example uses InputStream and OutputStream, but it should be straightforward to adapt it to your needs. Note that the example omits checks while unwrapping optionals for simplicity, but an actual implementation should not skip these checks. Please also refer to TN3136 for more information on AVAudioConverter.

import AVFAudio

class AudioManager {
    let sizeOfFloat = UInt32(MemoryLayout<Float>.size)
    let engine = AVAudioEngine()
    var outBuffer: AVAudioPCMBuffer!
    var playerNode: AVAudioPlayerNode!
    var converter: AVAudioConverter!
    var micFormat: AVAudioFormat!
    
    let opusFormat : AVAudioFormat = {
        var opusDesc = AudioStreamBasicDescription()
        opusDesc.mSampleRate = 48000
        opusDesc.mFormatID = kAudioFormatOpus
        opusDesc.mChannelsPerFrame = 1
        opusDesc.mFramesPerPacket = 960
        return AVAudioFormat(streamDescription: &opusDesc)!
    }()
    
    func getOffsetBuffer(buffer: AVAudioPCMBuffer, offset: UInt32, count: UInt32) -> AVAudioPCMBuffer? {
        let data = UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList).first!.mData! + UnsafeMutableRawPointer.Stride(offset * sizeOfFloat)
        var abl = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: count * sizeOfFloat, mData: data))
        return AVAudioPCMBuffer(pcmFormat: buffer.format, bufferListNoCopy: &abl)
    }
    
    func record(outputStream: OutputStream) {
        let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
        micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
        converter = AVAudioConverter(from: micFormat, to: opusFormat)
        converter.bitRateStrategy = AVAudioBitRateStrategy_Constant
        
        engine.inputNode.installTap(onBus: 0, bufferSize: 0, format: micFormat) { buffer, time in
            self.encode(buffer: buffer, outputStream: outputStream)
        }
        
        let audioSession = AVAudioSession.sharedInstance()
        try? audioSession.setCategory(.playAndRecord)
        try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
        try? audioSession.setActive(true)
        
        try? engine.start()
    }
    
    func encode(buffer: AVAudioPCMBuffer, outputStream: OutputStream) {
        let bufferLength = buffer.frameLength
        var offset: UInt32 = 0
        var done = false
        
        while !done {
            let opusBuffer = AVAudioCompressedBuffer(format: opusFormat, packetCapacity: 1, maximumPacketSize: converter.maximumOutputPacketSize)
            
            let outputStatus = converter.convert(to: opusBuffer, error: nil) { packetCount, inputStatus in
                let count = min(packetCount, buffer.frameLength - offset)
                
                if count == 0 {
                    inputStatus.pointee = .noDataNow
                    return nil
                }
                
                let buffer = self.getOffsetBuffer(buffer: buffer, offset: offset, count: count)
                offset += count
                
                inputStatus.pointee = .haveData
                return buffer
            }
            
            if outputStatus == .haveData {
                outputStream.write(opusBuffer.data, maxLength: 128)
            }
            
            if offset >= bufferLength {
                done = true
            }
        }
    }
    
    func play(inputStream: InputStream) {
        engine.stop()
        engine.inputNode.removeTap(onBus: 0)
        
        let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
        micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
        converter = AVAudioConverter(from: opusFormat, to: micFormat)
        
        let audioSession = AVAudioSession.sharedInstance()
        try? audioSession.setCategory(.playAndRecord)
        try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
        try? audioSession.setActive(true)
        
        outBuffer = AVAudioPCMBuffer(pcmFormat: micFormat, frameCapacity: 1024)
        
        playerNode = AVAudioPlayerNode()
        engine.attach(playerNode)
        engine.connect(playerNode, to: engine.mainMixerNode, format: micFormat)
        
        try? engine.start()
        
        playerNode.play()
        schedule(inputStream: inputStream)
    }
    
    func schedule(inputStream: InputStream) {
        decode(inputStream: inputStream)
        
        playerNode.scheduleBuffer(outBuffer) {
            self.schedule(inputStream: inputStream)
        }
    }
    
    func decode(inputStream: InputStream) {
        var offset: UInt32 = 0
        var done = false
        
        while !done {
            let outputStatus = converter.convert(to: outBuffer, error: nil) { packetCount, inputStatus in
                let opusBuffer = AVAudioCompressedBuffer(format: self.opusFormat, packetCapacity: 1, maximumPacketSize: 128)
                
                inputStream.read(opusBuffer.data, maxLength: 128)
                
                opusBuffer.packetCount = 1
                opusBuffer.byteLength = 128
                opusBuffer.packetDescriptions![0].mDataByteSize = 128
                inputStatus.pointee = .haveData
                return opusBuffer
            }
            
            if outputStatus == .haveData {
                offset += UInt32(outBuffer.frameLength)
            }
            
            if offset >= 1024 {
                done = true
            }
        }
    }
}
Accepted Answer

Hello @LookingForFont, please see below a very basic example to get you started. It taps the engine's input node and uses AVAudioConverter to convert back and forth between PCM and Opus. The example uses InputStream and OutputStream, but it should be straightforward to adapt it to your needs. Note that the example omits checks while unwrapping optionals for simplicity, but an actual implementation should not skip these checks. Please also refer to TN3136 for more information on AVAudioConverter.

import AVFAudio

class AudioManager {
    let sizeOfFloat = UInt32(MemoryLayout<Float>.size)
    let engine = AVAudioEngine()
    var outBuffer: AVAudioPCMBuffer!
    var playerNode: AVAudioPlayerNode!
    var converter: AVAudioConverter!
    var micFormat: AVAudioFormat!
    
    let opusFormat : AVAudioFormat = {
        var opusDesc = AudioStreamBasicDescription()
        opusDesc.mSampleRate = 48000
        opusDesc.mFormatID = kAudioFormatOpus
        opusDesc.mChannelsPerFrame = 1
        opusDesc.mFramesPerPacket = 960
        return AVAudioFormat(streamDescription: &opusDesc)!
    }()
    
    func getOffsetBuffer(buffer: AVAudioPCMBuffer, offset: UInt32, count: UInt32) -> AVAudioPCMBuffer? {
        let data = UnsafeMutableAudioBufferListPointer(buffer.mutableAudioBufferList).first!.mData! + UnsafeMutableRawPointer.Stride(offset * sizeOfFloat)
        var abl = AudioBufferList(mNumberBuffers: 1, mBuffers: AudioBuffer(mNumberChannels: 1, mDataByteSize: count * sizeOfFloat, mData: data))
        return AVAudioPCMBuffer(pcmFormat: buffer.format, bufferListNoCopy: &abl)
    }
    
    func record(outputStream: OutputStream) {
        let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
        micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
        converter = AVAudioConverter(from: micFormat, to: opusFormat)
        converter.bitRateStrategy = AVAudioBitRateStrategy_Constant
        
        engine.inputNode.installTap(onBus: 0, bufferSize: 0, format: micFormat) { buffer, time in
            self.encode(buffer: buffer, outputStream: outputStream)
        }
        
        let audioSession = AVAudioSession.sharedInstance()
        try? audioSession.setCategory(.playAndRecord)
        try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
        try? audioSession.setActive(true)
        
        try? engine.start()
    }
    
    func encode(buffer: AVAudioPCMBuffer, outputStream: OutputStream) {
        let bufferLength = buffer.frameLength
        var offset: UInt32 = 0
        var done = false
        
        while !done {
            let opusBuffer = AVAudioCompressedBuffer(format: opusFormat, packetCapacity: 1, maximumPacketSize: converter.maximumOutputPacketSize)
            
            let outputStatus = converter.convert(to: opusBuffer, error: nil) { packetCount, inputStatus in
                let count = min(packetCount, buffer.frameLength - offset)
                
                if count == 0 {
                    inputStatus.pointee = .noDataNow
                    return nil
                }
                
                let buffer = self.getOffsetBuffer(buffer: buffer, offset: offset, count: count)
                offset += count
                
                inputStatus.pointee = .haveData
                return buffer
            }
            
            if outputStatus == .haveData {
                outputStream.write(opusBuffer.data, maxLength: 128)
            }
            
            if offset >= bufferLength {
                done = true
            }
        }
    }
    
    func play(inputStream: InputStream) {
        engine.stop()
        engine.inputNode.removeTap(onBus: 0)
        
        let sampleRate = engine.outputNode.outputFormat(forBus: 0).sampleRate
        micFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
        converter = AVAudioConverter(from: opusFormat, to: micFormat)
        
        let audioSession = AVAudioSession.sharedInstance()
        try? audioSession.setCategory(.playAndRecord)
        try? audioSession.setPreferredIOBufferDuration(1024.0 / sampleRate)
        try? audioSession.setActive(true)
        
        outBuffer = AVAudioPCMBuffer(pcmFormat: micFormat, frameCapacity: 1024)
        
        playerNode = AVAudioPlayerNode()
        engine.attach(playerNode)
        engine.connect(playerNode, to: engine.mainMixerNode, format: micFormat)
        
        try? engine.start()
        
        playerNode.play()
        schedule(inputStream: inputStream)
    }
    
    func schedule(inputStream: InputStream) {
        decode(inputStream: inputStream)
        
        playerNode.scheduleBuffer(outBuffer) {
            self.schedule(inputStream: inputStream)
        }
    }
    
    func decode(inputStream: InputStream) {
        var offset: UInt32 = 0
        var done = false
        
        while !done {
            let outputStatus = converter.convert(to: outBuffer, error: nil) { packetCount, inputStatus in
                let opusBuffer = AVAudioCompressedBuffer(format: self.opusFormat, packetCapacity: 1, maximumPacketSize: 128)
                
                inputStream.read(opusBuffer.data, maxLength: 128)
                
                opusBuffer.packetCount = 1
                opusBuffer.byteLength = 128
                opusBuffer.packetDescriptions![0].mDataByteSize = 128
                inputStatus.pointee = .haveData
                return opusBuffer
            }
            
            if outputStatus == .haveData {
                offset += UInt32(outBuffer.frameLength)
            }
            
            if offset >= 1024 {
                done = true
            }
        }
    }
}

Hello @Engineer, I am trying to convert PCM to AAC Format, the sample rate, channel is same to PCM. I use the code modified upon, but when finish, the sound played was very rapid, What's the problem?

        let bufferLength = buffer.frameLength
        var offset: UInt32 = 0
        var done = false
        var data = Data()
        while !done {
            let aacBuffer = AVAudioCompressedBuffer(format: outputFormat, packetCapacity: 1, maximumPacketSize: converter.maximumOutputPacketSize)
            
            converter.convert(to: aacBuffer, error: nil) { packetCount, inputStatus in
                let count = min(packetCount, buffer.frameLength - offset)
                
                if count == 0 {
                    inputStatus.pointee = .noDataNow
                    return nil
                }

                let buffer1 = self.getOffsetBuffer(buffer: buffer, offset: offset, count: count)
                offset += count
                
                inputStatus.pointee = .haveData
                return buffer1
            }
            
                data.append(Data(bytes: aacBuffer.data, count: Int(aacBuffer.byteLength)))
            
            if offset >= bufferLength {
                done = true
            }
        }
        
        return data // To add  AAC Header



Compressing AVAudioPCMBuffer within AVAudioEngine Tap
 
 
Q