Hi y'all,
After getting mono recording working, I want to differentiate my app from the standard voice memos to allow for stereo recording. I followed this tutorial (https://developer.apple.com/documentation/avfaudio/capturing_stereo_audio_from_built-in_microphones) to get my voice recorder to record stereo audio. However, when I look at the waveform in Audacity, both channels are the same. If I look at the file info after sharing it, it says the file is in stereo. I don't exactly know what's going on here. What I suspect is happening is that the recorder is only using one microphone. Here is the relevant part of my recorder:
// MARK: - Initialization
override init() {
super.init()
do {
try configureAudioSession()
try enableBuiltInMicrophone()
try setupAudioRecorder()
} catch {
// If any errors occur during initialization,
// terminate the app with a fatalError.
fatalError("Error: \(error)")
}
}
// MARK: - Audio Session and Recorder Configuration
private func enableBuiltInMicrophone() throws {
let audioSession = AVAudioSession.sharedInstance()
let availableInputs = audioSession.availableInputs
guard let builtInMicInput = availableInputs?.first(where: { $0.portType == .builtInMic }) else {
throw Errors.NoBuiltInMic
}
do {
try audioSession.setPreferredInput(builtInMicInput)
} catch {
throw Errors.UnableToSetBuiltInMicrophone
}
}
private func configureAudioSession() throws {
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.record, mode: .default, options: [.allowBluetooth])
try audioSession.setActive(true)
} catch {
throw Errors.FailedToInitSessionError
}
}
private func setupAudioRecorder() throws {
let date = Date()
let dateFormatter = DateFormatter()
dateFormatter.locale = Locale(identifier: "en_US_POSIX")
dateFormatter.dateFormat = "yyyy-MM-dd, HH:mm:ss"
let timestamp = dateFormatter.string(from: date)
self.recording = Recording(name: timestamp)
guard let fileURL = recording?.returnURL() else {
fatalError("Failed to create file URL")
}
self.currentURL = fileURL
print("Recording URL: \(fileURL)")
do {
let audioSettings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVLinearPCMIsNonInterleaved: false,
AVSampleRateKey: 44_100.0,
AVNumberOfChannelsKey: isStereoSupported ? 2 : 1,
AVLinearPCMBitDepthKey: 16,
AVEncoderAudioQualityKey: AVAudioQuality.max.rawValue
]
audioRecorder = try AVAudioRecorder(url: fileURL, settings: audioSettings)
} catch {
throw Errors.UnableToCreateAudioRecorder
}
audioRecorder.delegate = self
audioRecorder.prepareToRecord()
}
//MARK: update orientation
public func updateOrientation(withDataSourceOrientation orientation: AVAudioSession.Orientation = .front, interfaceOrientation: UIInterfaceOrientation) async throws {
let session = AVAudioSession.sharedInstance()
guard let preferredInput = session.preferredInput,
let dataSources = preferredInput.dataSources,
let newDataSource = dataSources.first(where: { $0.orientation == orientation }),
let supportedPolarPatterns = newDataSource.supportedPolarPatterns else {
return
}
isStereoSupported = supportedPolarPatterns.contains(.stereo)
if isStereoSupported {
try newDataSource.setPreferredPolarPattern(.stereo)
}
try preferredInput.setPreferredDataSource(newDataSource)
try session.setPreferredInputOrientation(interfaceOrientation.inputOrientation)
}
Here is the relevant part of my SwiftUI view:
RecordView()
.onAppear {
Task {
if await AVAudioApplication.requestRecordPermission() {
// The user grants access. Present recording interface.
print("Permission granted")
} else {
// The user denies access. Present a message that indicates
// that they can change their permission settings in the
// Privacy & Security section of the Settings app.
model.showAlert.toggle()
}
try await recorder.updateOrientation(interfaceOrientation: deviceOrientation)
}
}
.onReceive(NotificationCenter.default.publisher(for: UIDevice.orientationDidChangeNotification)) { _ in
if let windowScene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
let orientation = windowScene.windows.first?.windowScene?.interfaceOrientation {
deviceOrientation = orientation
Task {
do {
try await recorder.updateOrientation(interfaceOrientation: deviceOrientation)
} catch {
throw Errors.UnableToUpdateOrientation
}
}
}
}
Here is the full repo: https://github.com/aabagdi/MemoMan/tree/MemoManStereo
Thanks for any leads!
Audio
RSS for tagDive into the technical aspects of audio on your device, including codecs, format support, and customization options.
Post
Replies
Boosts
Views
Activity
在48kHz的采样率下,Vision pro无法捕获超过16khz到24khz的记录数据,为什么?或者你能告诉我如何配置吗?
Vision pro cannot capture recordings over 16kHz to 24kHz at a sampling rate of 48kHz, why? Or can you tell me how to configure it?Vision pro cannot capture recordings over 16kHz to 24kHz at a sampling rate of 48kHz, why? Or can you tell me how to configure it?
In the AudioServicesPlaySystemSound function of AudioToolbox, you can enter the corresponding SystemSoundID to play some sound effects that come with the system. However, I can't be sure what sound effect each number corresponds to, so I want to know all the sound effects in visionOS and its corresponding SystemSoundID.
I am trying to use AVAssetExportSession to export audio form video but every time I try it, it fails and I don't know why ?!
this is the code
import AVFoundation
protocol AudioExtractionProtocol {
func extractAudio(from fileUrl: URL, to outputUrl: URL)
}
final class AudioExtraction {
private var avAsset: AVAsset?
private var avAssetExportSession: AVAssetExportSession?
init() {}
}
//MARK: - AudioExtraction conforms to AudioExtractionProtocol
extension AudioExtraction: AudioExtractionProtocol {
func extractAudio(from fileUrl: URL, to outputUrl: URL) {
createAVAsset(for: fileUrl)
createAVAssetExportSession(for: outputUrl)
exportAudio()
}
}
//MARK: - Private Methods
extension AudioExtraction {
private func createAVAsset(for fileUrl: URL) {
avAsset = AVAsset(url: fileUrl)
}
private func createAVAssetExportSession(for outputUrl: URL) {
guard let avAsset else { return }
avAssetExportSession = AVAssetExportSession(asset: avAsset, presetName: AVAssetExportPresetAppleM4A)
avAssetExportSession?.outputURL = outputUrl
}
private func exportAudio() {
guard let avAssetExportSession else { return }
print("I am here \n")
avAssetExportSession.exportAsynchronously {
if avAssetExportSession.status == .failed {
print("\(avAssetExportSession.status)\n")
}
}
}
}
func test_AudioExtraction_extractAudioAndWriteItToFile() {
let videoUrl = URL(string: "https://storage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4")!
let audioExtraction: AudioExtractionProtocol = AudioExtraction()
audioExtraction.extractAudio(from: videoUrl, to: FileMangerTest.audioFile)
FileMangerTest.tearDown()
}
class FileMangerTest {
private static let fileManger = FileManager.default
private static var directoryUrl: URL {
fileManger.urls(for: .cachesDirectory, in: .userDomainMask).first!
}
static var audioFile: URL {
directoryUrl.appendingPathComponent("audio", conformingTo: .mpeg4Audio)
}
static func tearDown() {
try? fileManger.removeItem(at: audioFile)
}
static func contant(at url: URL) -> Data? {
return fileManger.contents(atPath: url.absoluteString)
}
}
I have an app (iPhone / iPad) that currently uses miniaudio and I'd to transition to a pure CoreAudio solution, and I cannot for the life of me get it to work.
I want to set up a remoteIO with microphone and speaker callbacks, so that I get a callback when the microphone (USB microphone at 384kHz, mono) has samples for me, and I get a callback when the speakers (49kHz, stereo) need more samples. It should be insanely simple. It's Objective C as I have never got round to Swift and can't see it happening this late in life, but that shouldn't change things.
So if anyone can tell me what I'm doing wrong here it would be GREATLY appreciated. My playbackCallback is never being fired, only the recordCallback.
`-(void) launchRemoteIOandSleep
{
NSError *error;
[ [AVAudioSession sharedInstance] setPreferredIOBufferDuration:(1024.0f / 48000.0f) error:&error ];
OSStatus status;
AudioComponentInstance audioUnit;
// set up callback structures - different sanity clauses to identify in breakpoints
renderCallBackHandle sanityClause666;
sanityClause666.remoteIO = audioUnit;
sanityClause666.sanityCheck666 = 666;
renderCallBackHandle sanityClause667;
sanityClause667.remoteIO = audioUnit;
sanityClause667.sanityCheck666 = 667;
// set up audio formats
AudioStreamBasicDescription audioFormatInput;
FillOutASBDForLPCM(audioFormatInput,384000.0,1,16,16,0,0,0);
AudioStreamBasicDescription audioFormatOutput;
FillOutASBDForLPCM(audioFormatOutput,48000.0,2,16,16,0,0,0);
// set up callback structs
AURenderCallbackStruct callbackStructRender;
callbackStructRender.inputProc = playbackCallback;
callbackStructRender.inputProcRefCon = &sanityClause666;
AURenderCallbackStruct callbackStructRecord;
callbackStructRecord.inputProc = recordCallback;
callbackStructRecord.inputProcRefCon = &sanityClause667;
// grab remoteIO
AudioComponentDescription desc;
desc.componentType = kAudioUnitType_Output;
desc.componentSubType = kAudioUnitSubType_RemoteIO;
desc.componentManufacturer = kAudioUnitManufacturer_Apple;
desc.componentFlags = 0;
desc.componentFlagsMask = 0;
AudioComponent component = AudioComponentFindNext(NULL, &desc);
// Get audio unit
status = AudioComponentInstanceNew(component, &audioUnit);
checkStatus(status);
// Enable IO for both recording and playback
// this enables the OUTPUT side of the OUTPUT bus which is the speaker (I thnk ... )
UInt32 flag = 1;
status = AudioUnitSetProperty(audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output,
kOutputBus,
&flag,
sizeof(flag));
checkStatus(status);
// this enables the INPUT side of the INPUT bus which is the mic (I thnk ... )
flag = 1;
status = AudioUnitSetProperty(audioUnit,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
kInputBus,
&flag,
sizeof(flag));
checkStatus(status);
// Apply format - INPUT bus of OUTPUT SCOPE which is my samples into remoteIO
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
kInputBus,
&audioFormatOutput,
sizeof(audioFormatOutput));
checkStatus(status);
// Apply format - OUTPUT bus of INPUT SCOPE which is where I pick up my samples from mic
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input,
kOutputBus,
&audioFormatInput,
sizeof(audioFormatInput));
checkStatus(status);
// set output callback
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_SetRenderCallback,
kAudioUnitScope_Output,
kInputBus,
&callbackStructRender,
sizeof(callbackStructRender));
checkStatus(status);
// Set input callback
status = AudioUnitSetProperty(audioUnit,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Input,
kOutputBus,
&callbackStructRecord,
sizeof(callbackStructRecord));
checkStatus(status);
// Disable buffer allocation for the recorder
flag = 0;
status = AudioUnitSetProperty(audioUnit,
kAudioUnitProperty_ShouldAllocateBuffer,
kAudioUnitScope_Input,
kInputBus,
&flag,
sizeof(flag));
// Initialise
status = AudioUnitInitialize(audioUnit);
checkStatus(status);
status = AudioOutputUnitStart(audioUnit);
checkStatus(status);
[ self waitForAudioStabilisation ];
while (1) sleep(2);
}`
I am working on a VoIP based PTT app. Uses 'voip' apns notification type to get to know about new incoming PTT call.
When my app receives a PTT call, the app plays audio. But the call audio is not heard. While checking the phone volume, the API [[AVAudioSession sharedInstance] outputVolume] returns 0. But clearly the phone volume is not zero. On checking the phone volume by pressing side volume button, the volume is above 50%.
This behavior is observed in both app foreground and background scenario.
Why does the API return zero volume level ? Is there any other reason why the app volume is not heard ?
So I just got some airpods pro, and they wont connect to my phon. Im wondering if it is because of my phone running iOS 18, and i would like advice on how to fix this. (my phone is running smoothly with no noticeable issues, also my primary device)
In my iOS app, I've the functionality to record audio and video using the AVFoundation framework. While audio recording works smoothly on some devices, such as iPads and certain others, I'm encountering issues with newer models like iPhone 14, 14 Pro, and 15 series. Specifically, when attempting to initiate audio recording by tapping the microphone icon, the interface becomes unresponsive and remains static.
This issue surfaced following an update to iOS 17.2.1. It seems to affect only a subset of devices, despite video recording functioning correctly across all devices.
Hi,
I'm looking to implement PHASEStreamNode in Unity, but the current provided PHASE library for Unity doesn't contain this new typos of nodes yet.
https://developer.apple.com/documentation/phase/phasestreamnode
When you will be looking into releasing the beta of the Unity Plugins as well?
This is very important for spatial audio in Unity to be consistent with Apple's standards.
Best,
Antonio
I'm trying to play an H265 video in opus audio format using iOS HLS, and a video slice file in fmp4 format, and I found that on iPhone13 ios 17.5.1, it plays fine, but on iPhone X ios 16.5.1, it doesn't play. Is it because some iOS versions do not support HLS in the opus audio format?
I am trying to use hls to play audio and video on iOS devices, the audio format is opus, sliced files use fmp4 format, on iPhone13 ios 17.5.1, it works fine, but on iPhoneX 16.5.1, it does not play, Is it because hls does not support the opus audio format on some versions?
Dear Sirs,
I've written an audio driver based on IOUserAudioDevice. In my IOOperationHandler I can receive and send the audio samples as expected. Is there any way to configure the number of samples transferred in each call? Currently it seem to be around 512 samples per call, which relates to 10.7 millisecs when operating on 48 kHz samplerate. I'd like to achieve something like 48 or 96 samples per call. I did some experiments and tried calls to SetOutputLatency() etc. but so far I didn't find the right way to change the in_io_buffer_frame_size in the callback. I'd like to do this as smaller buffer sizes would allow lower latencies for the subsequent audio processing.
Thanks and best regards,
Johannes
I'm a newby at tvOS and want to know, if it is possible to override some system settings.
Especially I want to override the output to audio devices.
For the moment (using Apple TV 4K and tvOS 17) you can only select one device (TV, eARC, airPods) and I need a possibility to set a simultaneous output to airPods and TV or eARC.
Is such a programming possible?
在我们App中,打开一个H5页面,使用webplayer播放H5中的视频。
然后再去播放App的播放器,播放视频、或音频文件,
都存在抢不到音频焦点问题,声音响一下就停了,播放器还在运行。
尝试在每次App播放都先调用setCategory、setActive也不生效。
这个问题,在beta1~beta3都存在。
请问,webkit的 player做了什么处理,会一直锁定着音频焦点,App要怎么处理才能把焦点拿过来?
In our App, open an H5 page and use webplayer to play the video in H5.
Then go to the PlayApp player to play the video or audio file.
There is a problem of not being able to grab the audio focus. The sound stops as soon as it sounds, but the player is still running.
Trying to call setCategory and setActive every time in AppPlay does not work either.
This problem exists in beta1~beta3.
I would like to ask, what processing has been done by the webkit player to keep the audio focus locked? How can the app handle it so that it can take the focus?
AVAudioFormat has no Swift concurrency annotations but the documentation states "Instances of this class are immutable."
This made me always assume it was safe to pass AVAudioFormat instances around. Is this the case? If so can it be marked as Sendable? Am I missing something?
I have an iPad Pro 12.9". I am looking to make an app which can take a simultaneous audio recording from two different microphones at the same time. I want to be able to specify which of the 5 built-in microphones each audio stream should use - ideally one should be from the microphone on the left side of the iPad, and the other should be from one of the mics at the top of the iPad. Is this possible to achieve with the API?
The end goal here is to be able to use the two audio streams and do some DSP on the recordings to determine the approximate direction a particular sound comes from.
Hello,
I hope this message finds you well. I am currently working on a Unity-based iOS application that requires continuous microphone input while also producing sound outputs. For this we need to use iOS echo cancellation, so some sounds need to be played via the iOS layer w/ echo cancellation, I am manually setting up the Audio Session after the app starts. Using the .playAndRecord mode of AVAudioSession. However, I am facing an issue where the volume of the sound output is inconsistent across different iOS devices and scenarios.
The process is quite simple, for each AudioClip we are about to play via unity, we copy the buffer data to our iOS Swift layer, which then does all the processing then plays the audio via the native layer.
Here are the specific issues I am encountering:
The volume level for the game sound effects fluctuate between a normal audible volume and a very low volume.
The sound output behaves differently depending on whether the app is launched with the device at full volume or on mute, and if the app is put into background and in foreground afterwards.
The volume inconsistency affects my game negatively, as it is very hard to hear some audios, regardless of the device or its initial volume state. I have followed the basic setup for AVAudioSession as per the documentation, but the inconsistencies persist.
I'm also aware that Unity uses FMOD to set up the audio routing in iOS, we configure our custom routing after that.
We tried tweaking the output volume prior to playing an audio so there isn't much discrepancy, this seems to align the output volume, however there is still some places where the volume is super low, I've looked into the waveforms in Unity and they all seem consistent, there is no reason why the volume would take a dip.
private var audioPlayer = AVAudioPlayerNode()
@objc public func Play() {
audioPlayer.volume = AVAudioSession.sharedInstance().outputVolume * 0.25
audioPlayer.play()
}
We also explored changing the audio session options to see if we had any luck but unfortunately nothing has changed.
private func ConfigAudioSession() {
let audioSession = AVAudioSession.sharedInstance();
do {
try audioSession.setCategory(.playAndRecord, options: [.mixWithOthers, .allowBluetooth, .defaultToSpeaker]);
try audioSession.setMode(.spokenAudio)
try audioSession.setActive(true);
}
catch {
//Treat error
}
}
Could anyone provide guidance or suggest best practices to ensure a stable and consistent volume output in this scenario? Any advice on this issue would be greatly appreciated.
Thank you in advance for your help!
Hello, I am building a new iOS app which uses AVSpeechSynthesizer and should be able to mix audio nicely with audio from other apps. AVSpeechSynthesizer seems to handle setting the AVAudioSession to active on it's own, but does not deactivate the audio session. This leads to issues, namely that other audio sources remain "ducked" after AVSpeechSynthesizer is done speaking.
I have implemented deactivating the audio session myself, which "works", in that it allows other audio sources to become "un-ducked", but it throws this exception each time even though it appears successful.
Error Domain=NSOSStatusErrorDomain Code=560030580 "Session deactivation failed" UserInfo={NSLocalizedDescription=Session deactivation failed}
It appears to be a bug with how AVSpeechSynthesizer handles activating/deactivating the audio session.
Below is a minimal example which illustrates the problem. It has two buttons, one which manually deactivates the audio sessions, which throws the exception, but otherwise works, and another button which leaves audio session management to the AVSpeechSynthesizer but does not "un-duck" other audio.
If you play some audio from another app (ex: Music), you'll see the button which throws/catches an exception successfully ducks/un-ducks the audio, while the one without attempting to deactivate the session ducks but does not un-duck the audio.
import AVFoundation
struct ContentView: View {
let workingSynthesizer = UnduckingSpeechSynthesizer()
let brokenSynthesizer = BrokenSpeechSynthesizer()
init() {
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.playback, mode: .voicePrompt, options: [.duckOthers])
} catch {
print("Setup error info: \(error)")
}
}
var body: some View {
VStack {
Button("Works Correctly"){
workingSynthesizer.speak(text: "Hello planet")
}
Text("-------")
Button("Does not work"){
brokenSynthesizer.speak(text: "Hello planet")
}
}
.padding()
}
}
class UnduckingSpeechSynthesizer: NSObject {
var synth = AVSpeechSynthesizer()
let audioSession = AVAudioSession.sharedInstance()
override init(){
super.init()
synth.delegate = self
}
func speak(text: String) {
let utterance = AVSpeechUtterance(string: text)
synth.speak(utterance)
}
}
extension UnduckingSpeechSynthesizer: AVSpeechSynthesizerDelegate {
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
do {
try audioSession.setActive(false, options: .notifyOthersOnDeactivation)
}
catch {
// always throws an error
// Error Domain=NSOSStatusErrorDomain Code=560030580 "Session deactivation failed" UserInfo={NSLocalizedDescription=Session deactivation failed}
print("Deactivate error info: \(error)")
}
}
}
class BrokenSpeechSynthesizer {
var synth = AVSpeechSynthesizer()
let audioSession = AVAudioSession.sharedInstance()
func speak(text: String) {
let utterance = AVSpeechUtterance(string: text)
synth.speak(utterance)
}
}
(I have a separate issue where the first speech attempt takes a few seconds but I don't think it's related)
Hello Apple Community,
I am developing an iOS app and would like to add a feature that allows users to play and organize Audible.com files within the app. Does Audible or the App Store provide any API or SDK for third-party apps to access and manage Audible content? If so, could you please provide some guidance on how to integrate it into my app?
Thank you for your assistance!
Best regards,
Yes it labs