Recently, deep learning projects have been getting larger, and sometimes loading models has become a bottleneck. I download the .mlpackage format CoreML from the internet and need to use compileModelAtURL to convert the .mlpackage into an .mlmodelc, then call modelWithContentsOfURL to convert the .mlmodelc into a handle. Generally, generating a handle with modelWithContentsOfURL is very slow. I noticed from WWDC 2023 that it is possible to cache the compiled results (see https://developer.apple.com/videos/play/wwdc2023/10049/?time=677, which states "This compilation includes further optimizations for the specific compute device and outputs an artifact that the compute device can run. Once complete, Core ML caches these artifacts to be used for subsequent model loads."). However, it seems that I couldn't find how to cache in the documentation.
Core ML
RSS for tagIntegrate machine learning models into your app using Core ML.
Post
Replies
Boosts
Views
Activity
We have a code that crashed The crash stack is as follows
Thread 26 Crashed:
0 CoreFoundation 0x0000000198b0569c CFRelease + 44
1 CoreFoundation 0x0000000198b12334 __CFBasicHashRehash + 1172
2 CoreFoundation 0x0000000198b015dc __CFBasicHashAddValue + 100
3 CoreFoundation 0x0000000198b232e4 CFDictionarySetValue + 208
4 Foundation 0x00000001979b0378 _getStringAtMarker + 464
5 Foundation 0x00000001979b016c _NSXPCSerializationStringForObject + 56
6 Foundation 0x00000001979cec4c __44-[NSXPCDecoder _decodeArrayOfObjectsForKey:]_block_invoke + 52
7 Foundation 0x00000001979ceb90 _NSXPCSerializationIterateArrayObject + 208
8 Foundation 0x00000001979cda7c -[NSXPCDecoder _decodeArrayOfObjectsForKey:] + 240
9 Foundation 0x00000001979cd1bc -[NSDictionary(NSDictionary) initWithCoder:] + 176
10 Foundation 0x00000001979ae6e8 _decodeObject + 1264
11 Foundation 0x00000001979cec4c __44-[NSXPCDecoder _decodeArrayOfObjectsForKey:]_block_invoke + 52
12 Foundation 0x00000001979ceb90 _NSXPCSerializationIterateArrayObject + 208
13 Foundation 0x00000001979cda7c -[NSXPCDecoder _decodeArrayOfObjectsForKey:] + 240
14 Foundation 0x00000001979cd1a4 -[NSDictionary(NSDictionary) initWithCoder:] + 152
15 Foundation 0x00000001979ae6e8 _decodeObject + 1264
16 Foundation 0x00000001979ad030 -[NSXPCDecoder _decodeObjectOfClasses:atObject:] + 148
17 Foundation 0x0000000197a0a7f0 _NSXPCSerializationDecodeTypedObjCValuesFromArray + 892
18 Foundation 0x0000000197a0a1f8 _NSXPCSerializationDecodeInvocationArgumentArray + 412
19 Foundation 0x0000000197a0866c -[NSXPCDecoder __decodeXPCObject:allowingSimpleMessageSend:outInvocation:outArguments:outArgumentsMaxCount:outMethodSignature:outSelector:isReply:replySelector:] + 700
20 Foundation 0x0000000197a61078 -[NSXPCDecoder _decodeReplyFromXPCObject:forSelector:] + 76
21 Foundation 0x0000000197a5f690 -[NSXPCConnection _decodeAndInvokeReplyBlockWithEvent:sequence:replyInfo:] + 252
22 Foundation 0x0000000197a63664 __88-[NSXPCConnection _sendInvocation:orArguments:count:methodSignature:selector:withProxy:]_block_invoke_5 + 188
23 Foundation 0x0000000197a08058 -[NSXPCConnection _sendInvocation:orArguments:count:methodSignature:selector:withProxy:] + 2244
24 CoreFoundation 0x0000000198b19d88 ___forwarding___ + 1016
25 CoreFoundation 0x0000000198b198d0 _CF_forwarding_prep_0 + 96
26 AppleNeuralEngine 0x00000001e912ab1c -[_ANEDaemonConnection loadModel:sandboxExtension:options:qos:withReply:] + 332
27 AppleNeuralEngine 0x00000001e912a674 __44-[_ANEClient doLoadModel:options:qos:error:]_block_invoke + 360
28 libdispatch.dylib 0x00000001a0a21dd4 _dispatch_client_callout + 20
29 libdispatch.dylib 0x00000001a0a312c4 _dispatch_lane_barrier_sync_invoke_and_complete + 56
30 AppleNeuralEngine 0x00000001e9129ef0 -[_ANEClient doLoadModel:options:qos:error:] + 500
31 Espresso 0x00000001a7e02034 Espresso::ANERuntimeEngine::compiler::build_segment(std::__1::shared_ptr<Espresso::abstract_batch> const&, int, Espresso::net_compiler_segment_based::segment_t const&) + 3736
32 Espresso 0x00000001a7e010cc Espresso::net_compiler_segment_based::build(std::__1::shared_ptr<Espresso::abstract_batch> const&, int, int) + 384
33 Espresso 0x00000001a7df02a4 Espresso::ANERuntimeEngine::compiler::build(std::__1::shared_ptr<Espresso::abstract_batch> const&, int, int) + 120
34 Espresso 0x00000001a7e1b3a4 Espresso::net::__build(std::__1::shared_ptr<Espresso::abstract_batch> const&, int, int) + 360
35 Espresso 0x00000001a7e178e0 Espresso::abstract_context::compute_batch_sync(void (std::__1::shared_ptr<Espresso::abstract_batch> const&) block_pointer) + 112
36 Espresso 0x00000001a7e198b8 EspressoLight::espresso_plan::prepare_compiler_if_needed() + 3208
37 Espresso 0x00000001a7e183f4 EspressoLight::espresso_plan::prepare() + 1712
38 Espresso 0x00000001a7da8e78 espresso_plan_build_with_options + 300
39 Espresso 0x00000001a7da8d30 espresso_plan_build + 44
40 CoreML 0x00000001b346645c -[MLNeuralNetworkEngine rebuildPlan:error:] + 536
41 CoreML 0x00000001b3464294 -[MLNeuralNetworkEngine _setupContextAndPlanWithConfiguration:usingCPU:reshapeWithContainer:error:] + 3132
42 CoreML 0x00000001b34797a0 -[MLNeuralNetworkEngine initWithContainer:configuration:error:] + 196
43 CoreML 0x00000001b347962c +[MLNeuralNetworkEngine loadModelFromCompiledArchive:modelVersionInfo:compilerVersionInfo:configuration:error:] + 164
44 CoreML 0x00000001b34792a0 +[MLLoader _loadModelWithClass:fromArchive:modelVersionInfo:compilerVersionInfo:configuration:error:] + 144
45 CoreML 0x00000001b3478c64 +[MLLoader _loadModelFromArchive:configuration:modelVersion:compilerVersion:loaderEvent:useUpdatableModelLoaders:loadingClasses:error:] + 532
46 CoreML 0x00000001b34650c8 +[MLLoader _loadWithModelLoaderFromArchive:configuration:loaderEvent:useUpdatableModelLoaders:error:] + 424
47 CoreML 0x00000001b3474bc8 +[MLLoader _loadModelFromArchive:configuration:loaderEvent:useUpdatableModelLoaders:error:] + 460
48 CoreML 0x00000001b347a024 +[MLLoader _loadModelFromAssetAtURL:configuration:loaderEvent:error:] + 244
49 CoreML 0x00000001b3479cbc +[MLLoader loadModelFromAssetAtURL:configuration:error:] + 104
50 CoreML 0x00000001b347ac2c -[MLModelAsset load:] + 564
51 CoreML 0x00000001b347a9c4 -[MLModelAsset modelWithError:] + 24
52 CoreML 0x00000001b347a7b4 +[MLModel modelWithContentsOfURL:configuration:error:] + 172
53 CoreML 0x00000001b37afbc4 +[MLModel modelWithContentsOfURL:error:] + 76
Core code
MLModel* model = nil;
NSError *error = nil;
@try
{
model = [MLModel modelWithContentsOfURL:modelURL error:&error];
}
@catch (NSException *exception)
{
model = nil;
return Ret_OperationErr_InvalidInit;
}
Two question:
What does this stack mean?
I added @ try @ catch, why is it still crashing?
When I use CoreML to infer a w8a8 model on iPhone 15 (iOS 18 beta 8), the model uses CPU inference instead of ANE, which results in slower inference speed. The model I am using is from the coremltools documentation, which indicates that on iOS 17, quantized models can run on ANE properly and achieve faster speeds. How can I make the quantized model run correctly on ANE to achieve the desired inference speed?
To reproduce this issue, you can download the Weight & Activation quantized model from the following link: https://apple.github.io/coremltools/docs-guides/source/opt-quantization-perf.html.
Xcode Version: Version 15.2 (15C500b)
com.github.apple.coremltools.source: torch==1.12.1
com.github.apple.coremltools.version: 7.2
Compute: Mixed (Float16, Int32)
Storage: Float16
The input to the mlpackage is MultiArray (Float16 1 × 1 × 544 × 960)
The flexibility is: 1 × 1 × 544 × 960 | 1 × 1 × 384 × 640 | 1 × 1 × 736 × 1280 | 1 × 1 × 1088 × 1920
I tested this on iPhone XR, iPhone 11, iPhone 12, iPhone 13, and iPhone 14. On all devices except the iPhone 11, the model runs correctly on the NPU. However, on the iPhone 11, the model runs on the CPU instead.
Here is the CoreMLTools conversion code I used:
mlmodel = ct.convert(trace,
inputs=[ct.TensorType(shape=input_shape, name="input", dtype=np.float16)],
outputs=[ct.TensorType(name="output", dtype=np.float16, shape=output_shape)],
convert_to='mlprogram',
minimum_deployment_target=ct.target.iOS16
)
We have an application that receives a message (through MQTT) from an external system to snap a photo, runs a CoreML vision request on the image, and then sends the results back. The customer has 100s of devices and recently on a couple of those devices (13 pros), the customer encountered an issue in which the devices were not responding in time. There was no crash, just some individual inferences were slowed down. The device performs 1000s of requests per day. Upon further evaluation of the request before and after in the device logs, I noticed that Apple loads the following
default 2024-09-04 13:18:31.310401 -0400 ProcessName Processing image for reference: ***
default 2024-09-04 13:18:31.403606 -0400 ProcessName Found matching service: H1xANELoadBalancer
default 2024-09-04 13:18:31.403646 -0400 ProcessName Found matching service: H11ANEIn
default 2024-09-04 13:18:31.403661 -0400 ProcessName Found ANE device :1
default 2024-09-04 13:18:31.403681 -0400 ProcessName Total num of devices 1
default 2024-09-04 13:18:31.403681 -0400 ProcessName (Single-ANE System) Opening H11ANE device at index 0
default 2024-09-04 13:18:31.403681 -0400 ProcessName H11ANEDevice::H11ANEDeviceOpen, usage type: 1
In a good scenario (above), these actions will performed very quickly (in a split second). The app doesn't do anything until coreml inference result is returned. In the bad scenario (below), there is a delay of about 4 seconds from app passing the control to vision request and then getting the response back (leading to timeouts with the customer)
default 2024-09-04 13:19:08.777468 -0400 ProcessName Processing image for reference: ZZZ
default 2024-09-04 13:19:12.199758 -0400 ProcessName Found matching service: H1xANELoadBalancer
default 2024-09-04 13:19:12.199800 -0400 ProcessName Found matching service: H11ANEIn
default 2024-09-04 13:19:12.199812 -0400 ProcessName Found ANE device :1
default 2024-09-04 13:19:12.199832 -0400 ProcessName Total num of devices 1
default 2024-09-04 13:19:12.199834 -0400 ProcessName (Single-ANE System) Opening H11ANE device at index 0
default 2024-09-04 13:19:12.199834 -0400 ProcessName H11ANEDevice::H11ANEDeviceOpen, usage type: 1
The logs are in order, I haven't removed anything. The code is fairly simple, it's just running a vision request without doing much. Has anyone encountered this before?
Hello everyone,
I have a PyTorch model that outputs an image. I converted this model to CoreML using coremltools, and the resulting CoreML model can be used in my iOS project to perform inference using the MLModel's prediction function, which returns a result of type CVPixelBuffer.
I want to avoid allocating memory every time I call the prediction function. Instead, I would like to use a pre-allocated buffer. I noticed that MLModel provides an overloaded prediction function that accepts an MLPredictionOptions object. This object has an outputBackings member, which allows me to pass a pre-allocated CVPixelBuffer.
However, when I attempt to do this, I encounter the following error:
Copy from tensor to pixel buffer (pixel_format_type: BGRA, image_pixel_type: BGR8, component_dtype: INT, component_pack: FMT_32) is not supported.
Could someone point out what I might be doing wrong? How can I make MLModel use my pre-allocated CVPixelBuffer instead of creating a new one each time?
Here is the Python code I used to convert the PyTorch model to CoreML, where I specified the color_layout as coremltools.colorlayout.BGR:
def export_ml(model, resolution="640x360"):
ml_path = f"model.mlpackage"
print("exporting ml model")
width, height = map(int, resolution.split('x'))
img0 = torch.randn(1, 3, height, width)
img1 = torch.randn(1, 3, height, width)
traced_model = torch.jit.trace(model, (img0, img1))
input_shape = ct.Shape(shape=(1, 3, height, width))
output_type_img = ct.ImageType(name="out", scale=1.0, bias=[0, 0, 0], color_layout=ct.colorlayout.BGR)
ml_model = ct.convert(
traced_model,
inputs=[input_type_img0, input_type_img1],
outputs=[output_type_img]
)
ml_model.save(ml_path)
Here is the Swift code in my iOS project that calls the MLModel's prediction function:
func prediction(image1: CVPixelBuffer, image2: CVPixelBuffer, model: MLModel) -> CVPixelBuffer? {
let options = MLPredictionOptions()
guard let outputBuffer = outputBacking else {
fatalError("Failed to create CVPixelBuffer.")
}
options.outputBackings = ["out": outputBuffer]
// Perform the prediction
guard let prediction = try? model.prediction(from: RifeInput(img0: image1, img1: image2), options: options) else {
Log.i("Failed to perform prediction")
return nil
}
// Extract the result
guard let cvPixelBuffer = prediction.featureValue(for: "out")?.imageBufferValue else {
Log.i("Failed to get results from the model")
return nil
}
return cvPixelBuffer
}
Here is the code I used to create the outputBacking:
let attributes: [String: Any] = [
kCVPixelBufferCGImageCompatibilityKey as String: true,
kCVPixelBufferCGBitmapContextCompatibilityKey as String: true,
kCVPixelBufferWidthKey as String: Int(640),
kCVPixelBufferHeightKey as String: Int(360),
kCVPixelBufferIOSurfacePropertiesKey as String: [:]
]
let status = CVPixelBufferCreate(kCFAllocatorDefault, 640, 360, kCVPixelFormatType_32BGRA, attributes as CFDictionary, &outputBacking)
guard let outputBuffer = outputBacking else {
fatalError("Failed to create CVPixelBuffer.")
}
Any help or guidance would be greatly appreciated!
Thank you!
When I try to run basically any CoreML model using MLPredictionOptions.outputBackings , inference throws the following error:
2024-09-11 15:36:00.184740-0600 run_demo[4260:64822] [coreml] Unrecognized ANE execution priority (null)
2024-09-11 15:36:00.185380-0600 run_demo[4260:64822] *** Terminating app due to uncaught exception 'NSInvalidArgumentException', reason: 'Unrecognized ANE execution priority (null)'
*** First throw call stack:
(
0 CoreFoundation 0x000000019812cec0 __exceptionPreprocess + 176
1 libobjc.A.dylib 0x0000000197c12cd8 objc_exception_throw + 88
2 CoreFoundation 0x000000019812cdb0 +[NSException exceptionWithName:reason:userInfo:] + 0
3 CoreML 0x00000001a1bf6504 _ZN12_GLOBAL__N_141espressoPlanPriorityFromPredictionOptionsEP19MLPredictionOptions + 264
4 CoreML 0x00000001a1bf68c0 -[MLNeuralNetworkEngine _matchEngineToOptions:error:] + 236
5 CoreML 0x00000001a1be254c __62-[MLNeuralNetworkEngine predictionFromFeatures:options:error:]_block_invoke + 68
6 libdispatch.dylib 0x0000000197e20658 _dispatch_client_callout + 20
7 libdispatch.dylib 0x0000000197e2fcd8 _dispatch_l
*** Terminating app due to uncaught exception 'NSInvalidArgumentException', reason: 'Unrecognized ANE execution priority (null)'
*** First throw call stack:
(
0 CoreFoundation 0x000000019812cec0 __exceptionPreprocess + 176
1 libobjc.A.dylib 0x0000000197c12cd8 objc_exception_throw + 88
2 CoreFoundation 0x000000019812cdb0 +[NSException exceptionWithName:reason:userInfo:] + 0
3 CoreML 0x00000001a1bf6504 _ZN12_GLOBAL__N_141espressoPlanPriorityFromPredictionOptionsEP19MLPredictionOptions + 264
4 CoreML 0x00000001a1bf68c0 -[MLNeuralNetworkEngine _matchEngineToOptions:error:] + 236
5 CoreML 0x00000001a1be254c __62-[MLNeuralNetworkEngine predictionFromFeatures:options:error:]_block_invoke + 68
6 libdispatch.dylib 0x0000000197e20658 _dispatch_client_callout + 20
7 libdispatch.dylib 0x0000000197e2fcd8 _dispatch_lane_barrier_sync_invoke_and_complete + 56
8 CoreML 0x00000001a1be2450 -[MLNeuralNetworkEngine predictionFromFeatures:options:error:] + 304
9 CoreML 0x00000001a1c9e118 -[MLDelegateModel _predictionFromFeatures:usingState:options:error:] + 776
10 CoreML 0x00000001a1c9e4a4 -[MLDelegateModel predictionFromFeatures:options:error:] + 136
11 libMLBackend_coreml.dylib 0x00000001002f19f0 _ZN6CoreML8runModelENS_5ModelERNSt3__16vectorIPvNS1_9allocatorIS3_EEEES7_ + 904
12 libMLBackend_coreml.dylib 0x00000001002c56e8 _ZZN8ModelImp9runCoremlEPN2ML7Backend17ModelIoBindingImpEENKUlvE_clEv + 120
13 libMLBackend_coreml.dylib 0x00000001002c1e40 _ZNSt3__110__function6__funcIZN2ML4Util10WorkThread11runInThreadENS_8functionIFvvEEEEUlvE_NS_9allocatorIS8_EES6_EclEv + 40
14 libMLBackend_coreml.dylib 0x00000001002bc3a4 _ZZN2ML4Util10WorkThreadC1EvENKUlvE_clEv + 160
15 libMLBackend_coreml.dylib 0x00000001002bc244 _ZNSt3__114__thread_proxyB7v160006INS_5tupleIJNS_10unique_ptrINS_15__thread_structENS_14default_deleteIS3_EEEEZN2ML4Util10WorkThreadC1EvEUlvE_EEEEEPvSC_ + 52
16 libsystem_pthread.dylib 0x0000000197fd32e4 _pthread_start + 136
17 libsystem_pthread.dylib 0x0000000197fce0fc thread_start + 8
)
libc++abi: terminating due to uncaught exception of type NSException
Interestingly, if I don't use MLPredictionOptions to set pre-allocated output backings, then inference appears to run as expected.
A similar issue seems to have been discussed and fixed here: https://developer.apple.com/forums/thread/761649 , however I'm seeing this issue on a beta build that I downloaded today (Sept 11 2024).
Will this be fixed? Any advice would be greatly appreciated.
Thanks
I want my confidence of model is worked according to the when I detected the object by real time camera with help of ml model in android its gives me different results with different confidence as like 75, 40,30,95 not range 95 to 100 but when I used same model in ios its will give me range above 95 of any case. so what will be reason do you think
i believe i am encountering a bug in the MPS backend of CoreML.
i believe there is an invalid conversion of a slice_by_index + gather operation resulting in indexing the wrong values on GPU execution.
the following is a python program using the coremltools library illustrating the issue:
from coremltools.converters.mil import Builder as mb
from coremltools.converters.mil.mil import types
dB = 20480
shapeI = (2, dB)
shapeB = (dB, 22)
@mb.program(input_specs=[mb.TensorSpec(shape=shapeI, dtype=types.int32),
mb.TensorSpec(shape=shapeB)])
def prog(i, b):
lslice = mb.slice_by_index(x=i, begin=[0, 0], end=[1, dB], end_mask=[False, True],
squeeze_mask=[True, False], name='slice_left')
rslice = mb.slice_by_index(x=i, begin=[1, 0], end=[2, dB], end_mask=[False, True],
squeeze_mask=[True, False], name='slice_right')
ldata = mb.gather(x=b, indices=lslice)
rdata = mb.gather(x=b, indices=rslice) # actual bug in optimization of gather+slice
x = mb.add(x=ldata, y=rdata)
# dummy ops to make a bigger graph to run on GPU
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=2.)
x = mb.mul(x=x, y=.5)
x = mb.mul(x=x, y=1., name='result')
return x
input_types = [
ct.TensorType(name="i", shape=shapeI, dtype=np.int32),
ct.TensorType(name="b", shape=shapeB, dtype=np.float32),
]
with tempfile.TemporaryDirectory() as tmpdirname:
model_cpu = ct.convert(prog,
inputs=input_types,
compute_precision=ct.precision.FLOAT32,
compute_units=ct.ComputeUnit.CPU_ONLY,
package_dir=tmpdirname + 'model_cpu.mlpackage')
model_gpu = ct.convert(prog,
inputs=input_types,
compute_precision=ct.precision.FLOAT32,
compute_units=ct.ComputeUnit.CPU_AND_GPU,
package_dir=tmpdirname + 'model_gpu.mlpackage')
inputs = {
"i": torch.randint(0, shapeB[0], shapeI, dtype=torch.int32),
"b": torch.rand(shapeB, dtype=torch.float32),
}
cpu_output = model_cpu.predict(inputs)
gpu_output = model_gpu.predict(inputs)
# equivalent to prog
expected = inputs["b"][inputs["i"][0]] + inputs["b"][inputs["i"][1]]
# what actually happens on GPU
actual = inputs["b"][inputs["i"][0]] + inputs["b"][inputs["i"][0]]
print(f"diff expected vs cpu: {np.sum(np.absolute(expected - cpu_output['result']))}")
print(f"diff expected vs gpu: {np.sum(np.absolute(expected - gpu_output['result']))}")
print(f"diff actual vs gpu: {np.sum(np.absolute(actual - gpu_output['result']))}")
the issue seems to occur in the slice_right + gather operations when executed on GPU. the wrong items in input "i" are selected.
the program outpus
diff expected vs cpu: 0.0
diff expected vs gpu: 150104.015625
diff actual vs gpu: 0.0
this behavior has been tested on MacBook Pro 14inches 2023, (M2 pro) on mac os 14.7, using coremltools 8.0b2 with python 3.9.19
Hi everyone,
I’m currently in the process of converting and optimizing the Stable Diffusion XL model for iOS 18. I followed the steps from the WWDC 2024 session on model optimization, specifically the one titled "Bring your machine learning and AI models to Apple Silicon."
I utilized the Stable Diffusion XL model and the tools available in the ml-stable-diffusion GitHub repository and ran the following script to convert the model into an .mlpackage:
python3 -m python_coreml_stable_diffusion.torch2coreml \
--convert-unet \
--convert-vae-decoder \
--convert-text-encoder \
--xl-version \
--model-version stabilityai/stable-diffusion-xl-base-1.0 \
--bundle-resources-for-swift-cli \
--refiner-version stabilityai/stable-diffusion-xl-refiner-1.0 \
--attention-implementation SPLIT_EINSUM \
-o ../PotraitModel/ \
--custom-vae-version madebyollin/sdxl-vae-fp16-fix \
--latent-h 128 \
--latent-w 96 \
--chunk-unet
The model conversion worked without any issues. However, when I proceeded to optimize the model in a Jupyter notebook, following the same process shown in the WWDC session, I encountered an error during the post-training quantization step. Here’s the code I used for that:
op_config = cto_coreml.0pPalettizerConfig(
nbits=4,
mode="kmeans",
granularity="per_grouped_channel",
group_size=16,
)
config = cto_coreml.OptimizationConfig(op_config)
compressed_model = cto_coreml.palettize_weights(mlmodel, config)
Unfortunately, I received the following error:
AssertionError: The IOS16 only supports per-tensor LUT, but got more than one lut on 0th axis. LUT shape: (80, 1, 1, 1, 16, 1)
It appears that the minimum deployment target of the MLModel is set to iOS 16, which might be causing compatibility issues. How can I update the minimum deployment target to iOS 18? If anyone has encountered this issue or knows a workaround, I would greatly appreciate your guidance!
Thanks in advance for any help!
We are experiencing a major issue with the native .version1 of the SoundAnalysis framework in iOS 18, which has led to all our user not having recordings. Our core feature relies heavily on sound analysis in the background, and it previously worked flawlessly in prior iOS versions. However, in the new iOS 18, sound analysis stops working in the background, triggering a critical warning.
Details of the issue:
We are using SoundAnalysis to analyze background sounds and have enabled the necessary background permissions.
We are using the latest XCode
A warning now appears, and sound analysis fails in the background. Below is the warning message we are encountering:
Warning Message:
Execution of the command buffer was aborted due to an error during execution. Insufficient Permission (to submit GPU work from background)
[Espresso::handle_ex_plan] exception=Espresso exception: "Generic error": Insufficient Permission (to submit GPU work from background) (00000006:kIOGPUCommandBufferCallbackErrorBackgroundExecutionNotPermitted); code=7 status=-1
Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).
CoreML prediction failed with Error Domain=com.apple.CoreML Code=0 "Failed to evaluate model 0 in pipeline" UserInfo={NSLocalizedDescription=Failed to evaluate model 0 in pipeline, NSUnderlyingError=0x30330e910 {Error Domain=com.apple.CoreML Code=0 "Failed to evaluate model 1 in pipeline" UserInfo={NSLocalizedDescription=Failed to evaluate model 1 in pipeline, NSUnderlyingError=0x303307840 {Error Domain=com.apple.CoreML Code=0 "Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1)." UserInfo={NSLocalizedDescription=Unable to compute the prediction using a neural network model. It can be an invalid input data or broken/unsupported model (error code: -1).}}}}}
We urgently need guidance or a fix for this, as our application’s main functionality is severely impacted by this background permission error. Please let us know the next steps or if this is a known issue with iOS 18.
Hi all, I'm tuning my app prediction speed with Core ML model. I watched and tried the methods in video: Improve Core ML integration with async prediction and Optimize your Core ML usage. I also use instruments to look what's the bottleneck that my prediction speed cannot be faster.
Below is the instruments result with my app. its prediction duration is 10.29ms
And below is performance report shows the average speed of prediction is 5.55ms, that is about half time of my app prediction!
Below is part of my instruments records. I think the prediction should be considered quite frequent. Could it be faster?
How to be the same prediction speed as performance report? The prediction speed on macbook Pro M2 is nearly the same as macbook Air M1!
I'm experiencing issues with the Core ML Async API, as it doesn't seem to be working correctly. It consistently hangs during the
"03 performInference, after get smallInput, before prediction" part,
as shown in the attached:
log1.txt
log2.txt
Below is my code. Could you please advise on how I should modify it?
private func createFrameAsync(for sampleBuffer: CMSampleBuffer ) {
guard let pixelBuffer = sampleBuffer.imageBuffer else { return }
Task {
print("**** createFrameAsync before performInference")
do {
try await runModelAsync(on: pixelBuffer)
} catch {
print("Error processing frame: \(error)")
}
print("**** createFrameAsync after performInference")
}
}
func runModelAsync(on pixelbuffer: CVPixelBuffer) async
{
print("01 performInference, before resizeFrame")
guard let data = metalResizeFrame(sourcePixelFrame: pixelbuffer, targetSize: MTLSize.init(width: InputWidth, height: InputHeight, depth: 1), resizeMode: .scaleToFill) else {
os_log("Preprocessing failed", type: .error)
return
}
print("02 performInference, after resizeFrame, before get smallInput")
let input = model_smallInput(input: data)
print("03 performInference, after get smallInput, before prediction")
if let prediction = try? await mlmodel!.model.prediction(from: input) {
print("04 performInference, after prediction, before get result")
var results: [Float] = []
let output = prediction.featureValue(for: "output")?.multiArrayValue
if let bufferPointer = try? UnsafeBufferPointer<Float>(output!) {
results = Array(bufferPointer)
}
print("05 performInference, after get result, before setRenderData")
let localResults = results
await MainActor.run {
ScreenRecorder.shared
.setRenderDataNormalized(
screenImage: pixelbuffer,
depthData: localResults
)
}
print("06 performInference, after setRenderData")
}
}
I'm trying to run a coreML model.
This is an image classifier generated using:
let parameters = MLImageClassifier.ModelParameters(validation: .dataSource(validationDataSource),
maxIterations: 25,
augmentation: [],
algorithm: .transferLearning(
featureExtractor: .scenePrint(revision: 2),
classifier: .logisticRegressor
))
let model = try MLImageClassifier(trainingData: .labeledDirectories(at: trainingDir.url), parameters: parameters)
I'm trying to run it with the new async Vision api
let model = try MLModel(contentsOf: modelUrl)
guard let modelContainer = try? CoreMLModelContainer(model: model) else {
fatalError("The model is missing")
}
let request = CoreMLRequest(model: modelContainer)
let image = NSImage(named:"testImage")!
let cgImage = image.toCGImage()!
let handler = ImageRequestHandler(cgImage)
do {
let results = try await handler.perform(request)
print(results)
} catch {
print("Failed: \(error)")
}
This gives me
Failed: internalError("Error Domain=com.apple.Vision Code=7 "The VNDetectorProcessOption_ScenePrints required option was not found" UserInfo={NSLocalizedDescription=The VNDetectorProcessOption_ScenePrints required option was not found}")
Please help! Am I missing something?
Hi, I found when continuously predicting with the same Core ML model in 120 FPS will be faster than in 60 FPS.
I use Macbook Pro M2 and turn on ProMotion to run Core ML model prediction with a 120 FPS video, the average prediction time is 7.46ms as below:
But when I turn off ProMotion, set 60 Hz refresh rate, and run Core ML model prediction with a 60 FPS video, the average prediction time is 10.91ms as below:
What could be the technical explanation for these results? Is there any documentation or technical literature that addresses this behavior?
I'm hitting a limit when trying to train an Image Classifier.
It's at about 16k images (in line with the error info) - and it gives the error:
IOSurface creation failed: e00002be parentID: 00000000 properties: {
IOSurfaceAllocSize = 529984;
IOSurfaceBytesPerElement = 4;
IOSurfaceBytesPerRow = 1472;
IOSurfaceElementHeight = 1;
IOSurfaceElementWidth = 1;
IOSurfaceHeight = 360;
IOSurfaceName = CoreVideo;
IOSurfaceOffset = 0;
IOSurfacePixelFormat = 1111970369;
IOSurfacePlaneComponentBitDepths = (
8,
8,
8,
8
);
IOSurfacePlaneComponentNames = (
4,
3,
2,
1
);
IOSurfacePlaneComponentRanges = (
1,
1,
1,
1
);
IOSurfacePurgeWhenNotInUse = 1;
IOSurfaceSubsampling = 1;
IOSurfaceWidth = 360;
} (likely per client IOSurface limit of 16384 reached)
I feel like I was able to use more images than this before upgrading to Sonoma - but I don't have the receipts....
Is there a way around this?
I have oodles of spare memory on my machine - it's using about 16gb of 64 when it crashes...
code to create the model is
let parameters = MLImageClassifier.ModelParameters(validation: .dataSource(validationDataSource),
maxIterations: 25,
augmentation: [],
algorithm: .transferLearning(
featureExtractor: .scenePrint(revision: 2),
classifier: .logisticRegressor
))
let model = try MLImageClassifier(trainingData: .labeledDirectories(at: trainingDir.url), parameters: parameters)
I have also tried the same training source in CreateML, it runs through 'extracting features', and crashes at about 16k images processed.
Thank you
Hi, while trying to diagnose why some of my Core ML models are running slower when their configuration is set with compute units .CPU_AND_GPU compared to running with .CPU_ONLY I've been attempting to create Core ML model performance reports in Xcode to identify the operations that are not compatible with the GPU. However, when selecting an iPhone as the connected device and compute unit of 'All', 'CPU and GPU' or 'CPU and Neural Engine' Xcode displays one of the following two error messages:
“There was an error creating the performance report. The performance report has crashed on device”
"There was an error creating the performance report. Unable to compute the prediction using ML Program. It can be an invalid input data or broken/unsupported model."
The performance reports are successfully generated when selecting the connected device as iPhone with compute unit 'CPU only' or Mac with any combination of compute units.
Some of the models I have found the issue to occur with are stateful, some are not. I have tried to replicate the issue with some example models from the CoreML tools stateful model guide/video Bring your machine learning and AI models to Apple silicon. Running the performance report on a model generated from the Simple Accumulator example code the performance report is created successfully when trying all compute unit options, but using models from the toy attention and toy attention with kvcache examples it is only successful with compute units as 'CPU only' when choosing iPhone as the device.
Versions I'm currently working with:
Xcode Version 16.0
MacOS Sequoia 15.0.1
Core ML Tools 8.0
iPhone 16 Pro iOS 18.0.1
Is there a way to avoid these errors? Or is there another way to identify which operations within a CoreML model are supported to run on iPhone GPU/Neural engine?
Hello All,
I'm developing a machine learning model for image classification, which requires managing an exceptionally large dataset comprising over 18,000 classes. I've encountered several hurdles while using Create ML, and I would appreciate any insights or advice from those who have faced similar challenges.
Current Issues:
Create ML Failures with Large Datasets:
When using Create ML, the process often fails with errors such as "Failed to create CVPixelBufferPool." This issue appears when handling particularly large volumes of data.
Custom Implementation Struggles:
To bypass some of the limitations of Create ML, I've developed a custom solution leveraging the MLImageClassifier within the CreateML framework in my own SwiftUI MacOS app.
Initially I had similar errors as I did in Create ML, but I discovered I could move beyond the "extracting features" stage without crashing by employing a workaround: using a timer to cancel and restart the job every 30 seconds. This method is the only way I've been able to finish the extraction phase, even with large datasets, but it causes many errors in the console if I allow it to run too long.
Lack of Progress Reporting:
Using MLJob<MLImageClassifier>, I've noticed that progress reporting stalls after the feature extraction phase. Although system resources indicate activity, there is no programmatic feedback on what is occurring.
Things I've Tried:
Data Validation: Ensured that all images in the dataset are valid and non-corrupted, which helps prevent unnecessary issues from faulty data.
Custom Implementation with CreateML Framework: Developed a custom solution using the MLImageClassifier within the CreateML framework to gain more control over the training process.
Timer-Based Workaround: Employed a workaround using a timer to cancel and restart the job every 30 seconds to move past the "extracting features" phase, allowing progress even with larger datasets.
Monitoring System Resources: Observed ongoing system resource usage when process feedback stalled, confirming background processing activity despite the lack of progress reporting.
Subset Testing: Successfully created and tested a model on a subset of the data, which validated the approach worked for smaller datasets and could produce a functioning model.
Router Model Concept: Considered training multiple models for different subsets of data and implementing a "router" model to decide which specialized model to utilize based on input characteristics.
What I Need Help With:
Handling Large Datasets:
I'm seeking strategies or best practices for effectively utilizing Create ML with large datasets.
Any guidance on memory management or alternative methodologies would be immensely helpful.
Improving Progress Reporting:
I'm looking for ways to obtain more consistent and programmatic progress updates during the training and testing phases.
I'm working on a Mac M1 Pro w/ 32GB RAM, with Apple Silicon and am fully integrated within the Apple ecosystem. I am very grateful for any advice or experiences you could share to help overcome these challenges.
Thank you!
I've pasted the relevant code below:
func go() {
if self.trainingSession == nil {
self.trainingSession = createTrainingSession()
}
if self.startTime == nil {
self.startTime = Date()
}
job = try! MLImageClassifier.resume(self.trainingSession)
job.phase
.receive(on: RunLoop.main)
.sink { phase in
self.phase = phase
}
.store(in: &cancellables)
job.checkpoints
.receive(on: RunLoop.main)
.sink { checkpoint in
self.state = "\(checkpoint)\n\(self.job.progress)"
self.progress = self.job.progress.fractionCompleted + 0.2
self.updateTimeEstimates()
}
.store(in: &cancellables)
job.result
.receive(on: DispatchQueue.main)
.sink(receiveCompletion: { completion in
switch completion {
case .failure(let error):
print("Training Failed: \(error.localizedDescription)")
case .finished:
print("🎉🎉🎉🎉 TRAINING SESSION FINISHED!!!!")
self.trainingFinished = true
}
}, receiveValue: { classifier in
Task {
await self.saveModel(classifier)
}
})
.store(in: &cancellables)
}
private func createTrainingSession() -> MLTrainingSession<MLImageClassifier> {
do {
print("Initializing training Data...")
let trainingData: MLImageClassifier.DataSource = .labeledDirectories(at: trainingDataURL)
let modelParameters = MLImageClassifier.ModelParameters(
validation: .split(strategy: .automatic),
augmentation: self.augmentations,
algorithm: .transferLearning(
featureExtractor: .scenePrint(revision: 2),
classifier: .logisticRegressor
)
)
let sessionParameters = MLTrainingSessionParameters(
sessionDirectory: self.sessionDirectoryURL,
reportInterval: 1,
checkpointInterval: 100,
iterations: self.numberOfIterations
)
print("Initializing training session...")
let trainingSession: MLTrainingSession<MLImageClassifier>
if FileManager.default.fileExists(atPath: self.sessionDirectoryURL.path) && isSessionCreated(atPath: self.sessionDirectoryURL.path()) {
do {
trainingSession = try MLImageClassifier.restoreTrainingSession(sessionParameters: sessionParameters)
}
catch {
print("error resuming, exiting.... \(error.localizedDescription)")
fatalError()
}
}
else {
trainingSession = try MLImageClassifier.makeTrainingSession(
trainingData: trainingData,
parameters: modelParameters,
sessionParameters: sessionParameters
)
}
return trainingSession
} catch {
print("Failed to initialize training session: \(error.localizedDescription)")
fatalError()
}
}
I have seen a lot of tutorials on pytorchvision models being able to be converted to coreml models but I have not been able to google or find any tutorials for torchaudio models. Is converting to a torchaudio to coreml model even possible? Does anybody have links that show how to do it?
https://developer.apple.com/machine-learning/models/
Adding the DepthAnythingV2SmallF16.mlpackage to a new project in Xcode 16.1 and invoking the class crashes the app.
Anyone else having the same issue?
I tried Xcode 16.2 beta and it has the same response.
Code
import UIKit
import CoreML
class ViewController : UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view.
do {
// Use a default model configuration.
let defaultConfig = MLModelConfiguration()
// app crashes here
let model = try? DepthAnythingV2SmallF16(
configuration: defaultConfig
)
} catch {
//
}
}
}
Response
/AppleInternal/Library/BuildRoots/4b66fb3c-7dd0-11ef-b4fb-4a83e32a47e1/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphExecutable.mm:129: failed assertion Error: unhandled platform for MPSGraph serialization'
`