-
Notifications
You must be signed in to change notification settings - Fork 570
MetalPerformanceShadersGraph macOS xcode27.0 b2
Alex Soto edited this page Jun 23, 2026
·
1 revision
#MetalPerformanceShadersGraph.framework
diff -ruN /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraph.h /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraph.h
--- /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraph.h 2026-05-21 09:32:50
+++ /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraph.h 2026-06-16 03:43:48
@@ -118,8 +118,7 @@
-(void) disableTypeInference;
/// Turns on Automatic Layout Conversion (for conv like operations) for GPU.
-/// DEPRECATED: Layout conversion is now default, so this function is a no-op.
--(void) convertLayoutToNHWC MPS_AVAILABLE_STARTING(macos(26.4), ios(26.4), tvos(26.4));
+-(void) convertLayoutToNHWC MPS_AVAILABLE_STARTING_BUT_DEPRECATED("Layout Conversion to NHWC is enabled by default on M5 and newer", macos(26.4, 27.0), ios(26.4, 27.0), tvos(26.4, 27.0));
/// Turns off Automatic Layout Conversion (for conv like operations) for GPU.
-(void) disableAutoLayoutConversion MPS_AVAILABLE_STARTING(macos(27.0), ios(27.0), tvos(27.0));
diff -ruN /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphMatrixMultiplicationOps.h /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphMatrixMultiplicationOps.h
--- /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphMatrixMultiplicationOps.h 2026-05-23 03:56:39
+++ /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphMatrixMultiplicationOps.h 2026-06-13 22:36:00
@@ -18,7 +18,7 @@
/// Use this descriptor with
/// ``MPSGraph/scaledDotProductAttentionWithQueryTensor:keyTensor:valueTensor:descriptor:name:``
/// to specify optional features such as an attention mask, causal masking, and attention sinks.
-MPS_CLASS_AVAILABLE_STARTING(macos(26.0), ios(26.0), macCatalyst(26.0), tvos(26.0))
+MPS_CLASS_AVAILABLE_STARTING(macos(27.0), ios(27.0), macCatalyst(27.0), tvos(27.0))
@interface MPSGraphSDPADescriptor : MPSGraphObject
/// The scale applied to the result of the query–key matrix multiply before softmax.
@@ -28,7 +28,7 @@
/// An optional additive mask tensor applied to the scaled QK^T scores before softmax.
/// Must be broadcast-compatible with shape ``[batch, heads, T_q, T_kv]``.
/// Mutually exclusive with ``isCausal``.
-@property(nonatomic, nullable) MPSGraphTensor *maskTensor;
+@property(nonatomic, retain, nullable) MPSGraphTensor *maskTensor;
/// When YES, a causal (lower-triangular) mask is applied so that each query position
/// attends only to key positions at or before it. Mutually exclusive with ``maskTensor``.
@@ -37,7 +37,7 @@
/// An optional attention-sinks tensor of shape ``[nHeads]``. Each element seeds the
/// online-softmax accumulator for the corresponding query head with a virtual token logit,
/// causing real-token attention weights to sum to less than one.
-@property(nonatomic, nullable) MPSGraphTensor *sinksTensor;
+@property(nonatomic, retain, nullable) MPSGraphTensor *sinksTensor;
/// Creates a descriptor with the given scale and all other properties set to their defaults
/// (no mask, isCausal = NO, no sinks).
@@ -139,7 +139,7 @@
descriptor:(MPSGraphSDPADescriptor *)descriptor
name:(NSString *_Nullable)name
MPS_SWIFT_NAME(scaledDotProductAttention(query:key:value:descriptor:name:))
- MPS_AVAILABLE_STARTING(macos(26.0), ios(26.0), macCatalyst(26.0), tvos(26.0));
+ MPS_AVAILABLE_STARTING(macos(27.0), ios(27.0), macCatalyst(27.0), tvos(27.0));
@end
diff -ruN /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphQuantizationOps.h /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphQuantizationOps.h
--- /Applications/Xcode_27.0.0-beta.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphQuantizationOps.h 2026-05-23 03:56:38
+++ /Applications/Xcode_27.0.0-beta2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/MetalPerformanceShadersGraph.framework/Headers/MPSGraphQuantizationOps.h 2026-06-16 03:43:47
@@ -17,14 +17,16 @@
/// Creates a Quantize operation and returns the result tensor.
///
-/// Convert the float `tensor` to an i8 or u8 tensor by applying a scale + bias transform:
-/// result = (tensor / scale) + zeroPoint
+/// Convert the float `tensor` to a quantized tensor by applying a scale + bias transform:
+/// result = round(tensor / scale) + zeroPoint
///
/// - Parameters:
/// - tensor: Input tensor to be quantized
/// - scale: Scale scalar parameter
/// - zeroPoint: Bias scalar parameter (converted to dataType of resultTensor)
-/// - dataType: Integer data type of the result tensor.
+/// - dataType: Data type of the result tensor. Supports ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, and ``MPSDataTypeFloat8E5M2``. Float8 output requires symmetric
+/// quantization (zeroPoint = 0).
/// - name: The name for the operation.
/// - Returns: A valid MPSGraphTensor array of datatype dataType
-(MPSGraphTensor*) quantizeTensor:(MPSGraphTensor*)tensor
@@ -37,9 +39,15 @@
/// Creates Dequantize operation and returns the result tensor.
///
-/// Convert the i8 or u8 `tensor` to a float tensor by applying a scale + bias transform:
-/// result = scale(tensor - zeroPoint)
+/// Convert the quantized `tensor` to a float tensor by applying a scale + bias transform:
+/// result = scale * (tensor - zeroPoint)
///
+/// Supported input types: ``MPSDataTypeInt4``, ``MPSDataTypeUInt4``,
+/// ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// The scale type and `dataType` must match (``MPSDataTypeFloat16``, ``MPSDataTypeFloat32``,
+/// or ``MPSDataTypeBFloat16``).
+///
/// - Parameters:
/// - tensor: Input tensor to be dequantized
/// - scale: Scale scalar parameter
@@ -57,14 +65,16 @@
/// Creates a Quantize operation and returns the result tensor.
///
-/// Convert the float `tensor` to an i8 or u8 tensor by applying a scale + bias transform:
-/// result = (tensor / scaleTensor) + zeroPoint
+/// Convert the float `tensor` to a quantized tensor by applying a scale + bias transform:
+/// result = round(tensor / scaleTensor) + zeroPoint
///
/// - Parameters:
/// - tensor: Input tensor to be quantized
/// - scaleTensor: Scale 1D Tensor parameter with size == tensor.shape[axis]
/// - zeroPoint: Bias scalar parameter (converted to dataType of resultTensor)
-/// - dataType: Integer data type of the result tensor.
+/// - dataType: Data type of the result tensor. Supports ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, and ``MPSDataTypeFloat8E5M2``. Float8 output requires symmetric
+/// quantization (zeroPoint = 0).
/// - axis: Axis on which the scale 1D value is being broadcasted
/// - name: The name for the operation.
/// - Returns: A valid MPSGraphTensor array of datatype dataType
@@ -79,9 +89,15 @@
/// Creates Dequantize operation and returns the result tensor.
///
-/// Convert the i8 or u8 `tensor` to a float tensor by applying a scale + bias transform:
-/// result = scaleTensor(tensor - zeroPoint)
+/// Convert the quantized `tensor` to a float tensor by applying a scale + bias transform:
+/// result = scaleTensor * (tensor - zeroPoint)
///
+/// Supported input types: ``MPSDataTypeInt4``, ``MPSDataTypeUInt4``,
+/// ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// The `scaleTensor` type and `dataType` must match (``MPSDataTypeFloat16``,
+/// ``MPSDataTypeFloat32``, or ``MPSDataTypeBFloat16``).
+///
/// - Parameters:
/// - tensor: Input tensor to be dequantized
/// - scaleTensor: Scale scalar or 1D Tensor parameter with size == tensor.shape[axis]
@@ -101,14 +117,16 @@
/// Creates a Quantize operation and returns the result tensor.
///
-/// Convert the float `tensor` to an i8 or u8 tensor by applying a scale + bias transform:
-/// result = (tensor / scaleTensor) + zeroPointTensor
+/// Convert the float `tensor` to a quantized tensor by applying a scale + bias transform:
+/// result = round(tensor / scaleTensor) + zeroPointTensor
///
/// - Parameters:
/// - tensor: Input tensor to be quantized
/// - scaleTensor: Scale scalar or 1D Tensor parameter with size == tensor.shape[axis]
/// - zeroPointTensor: Bias scalar or 1D Tensor parameter with size == tensor.shape[axis]
-/// - dataType: Integer data type of the result tensor.
+/// - dataType: Data type of the result tensor. Supports ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, and ``MPSDataTypeFloat8E5M2``. Float8 output requires symmetric
+/// quantization (zeroPointTensor = 0).
/// - axis: Axis on which the scale 1D value is being broadcasted
/// - name: The name for the operation.
/// - Returns: A valid MPSGraphTensor array of datatype dataType
@@ -123,9 +141,15 @@
/// Creates a dequantize operation and returns the result tensor.
///
-/// Convert the i8 or u8 `tensor` to a float tensor by applying a scale + bias transform:
-/// result = scaleTensor(tensor - zeroPointTensor)
+/// Convert the quantized `tensor` to a float tensor by applying a scale + bias transform:
+/// result = scaleTensor * (tensor - zeroPointTensor)
///
+/// Supported input types: ``MPSDataTypeInt4``, ``MPSDataTypeUInt4``,
+/// ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// The `scaleTensor` type and `dataType` must match (``MPSDataTypeFloat16``,
+/// ``MPSDataTypeFloat32``, or ``MPSDataTypeBFloat16``).
+///
/// - Parameters:
/// - tensor: Input tensor to be dequantized
/// - scaleTensor: Scale scalar or 1D Tensor parameter with size == tensor.shape[axis]
@@ -143,14 +167,31 @@
MPS_AVAILABLE_STARTING(macos(13.1), ios(16.2), tvos(16.2))
MPS_SWIFT_NAME( dequantize(_:scaleTensor:zeroPointTensor:dataType:axis:name:) );
-
/// Creates a dequantize operation and returns the result tensor.
///
-/// Convert the i8, u8, i4 or u4 `tensor` to a float tensor by applying a scale and bias transform:
+/// Convert the quantized `tensor` to a float tensor by applying a scale and bias transform:
/// ```md
-/// result = scaleTensor(tensor - zeroPointTensor).
+/// result = scaleTensor * (tensor - zeroPointTensor)
/// ```
///
+/// The quantization mode is determined by the element type of `scaleTensor`:
+///
+/// **Regular blockwise** (`scaleTensor` type is an MPSGraph float type):
+/// - Supported input types: ``MPSDataTypeInt4``, ``MPSDataTypeUInt4``,
+/// ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// - `zeroPointTensor` type must match the input `tensor` type.
+/// - `scaleTensor` shape: leading dims of `tensor` with the last dim replaced by
+/// `tensor.shape[last] / blockSize`.
+///
+/// **MX mode** (`scaleTensor` type is ``MPSDataTypeFloat8E8M0``):
+/// - Supported input types: ``MPSDataTypeFloat4E2M1``, ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// - Block size 32, symmetric (zero point must be 0).
+///
+/// **F4 with F8E4M3 scale** (`scaleTensor` type is ``MPSDataTypeFloat8E4M3``,
+/// input type is ``MPSDataTypeFloat4E2M1``):
+/// - Block size 16, symmetric (zero point must be 0).
+///
/// - Parameters:
/// - tensor: Input tensor to be dequantized.
/// - scaleTensor: The scale tensor with groups support.
@@ -166,14 +207,31 @@
MPS_AVAILABLE_STARTING(macos(15.0), ios(18.0), macCatalyst(18.0), tvos(18.0))
MPS_SWIFT_NAME( dequantize(_:scaleTensor:zeroPointTensor:dataType:name:) );
-
/// Creates a dequantize operation and returns the result tensor.
///
-/// Converts the i8, u8, i4 or u4 `tensor` to a float tensor by applying a scale and bias transform:
+/// Converts the quantized `tensor` to a float tensor by applying a scale transform
+/// (symmetric — zero point is implicitly 0):
/// ```md
-/// result = scaleTensor * tensor.
+/// result = scaleTensor * tensor
/// ```
///
+/// The quantization mode is determined by the element type of `scaleTensor`:
+///
+/// **Regular blockwise** (`scaleTensor` type is an MPSGraph float type):
+/// - Supported input types: ``MPSDataTypeInt4``, ``MPSDataTypeUInt4``,
+/// ``MPSDataTypeInt8``, ``MPSDataTypeUInt8``,
+/// ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// - `scaleTensor` shape: leading dims of `tensor` with the last dim replaced by
+/// `tensor.shape[last] / blockSize`.
+///
+/// **MX mode** (`scaleTensor` type is ``MPSDataTypeFloat8E8M0``):
+/// - Supported input types: ``MPSDataTypeFloat4E2M1``, ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``.
+/// - Block size 32.
+///
+/// **F4 with F8E4M3 scale** (`scaleTensor` type is ``MPSDataTypeFloat8E4M3``,
+/// input type is ``MPSDataTypeFloat4E2M1``):
+/// - Block size 16.
+///
/// - Parameters:
/// - tensor: Input tensor to be dequantized.
/// - scaleTensor: Scale Tensor parameter with groups support.
@@ -187,17 +245,25 @@
MPS_AVAILABLE_STARTING(macos(15.0), ios(18.0), macCatalyst(18.0), tvos(18.0))
MPS_SWIFT_NAME( dequantize(_:scaleTensor:dataType:name:) );
-/// Creates a lookup-table based quantization operation and returns the result tensor.
+/// Creates a lookup-table based dequantize operation and returns the result tensor.
///
-/// Converts a u8 or u4 `tensor` to a float tensor by applying a lookup operation:
+/// Converts a `tensor` of integer indices to a float tensor by applying a lookup operation:
/// ```md
/// result[i1,...,in] = LUTTensor[i1',...,in',tensor[i1,...,in]].
/// ```
-/// Note: The operation supports LUT groups up to the last 3 dimensions for `tensor`.
///
+/// Supported `tensor` index types and required last-dimension size of `LUTTensor`:
+/// - ``MPSDataTypeUInt4``: 16 entries
+/// - ``MPSDataTypeUInt8``: 256 entries
+///
+/// `LUTTensor` (and result) element types: ``MPSDataTypeFloat16``, ``MPSDataTypeFloat32``,
+/// ``MPSDataTypeBFloat16``, ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``, ``MPSDataTypeInt8``.
+/// The input `tensor` must be a graph constant.
+/// The operation supports LUT groups up to the last 3 dimensions for `tensor`.
+///
/// - Parameters:
-/// - tensor: Input tensor to be dequantized.
-/// - LUTTensor: The lookup table to use - for u4 the last dimension should have 16 elements, and for u8 256 elements.
+/// - tensor: Input constant integer-index tensor to be dequantized.
+/// - LUTTensor: The lookup table to use.
/// - name: The name for the operation.
/// - Returns: A valid ``MPSGraphTensor`` object.
-(MPSGraphTensor *) dequantizeTensor:(MPSGraphTensor *) tensor
@@ -206,20 +272,29 @@
MPS_SWIFT_NAME( dequantize(_:LUTTensor:name:) )
MPS_AVAILABLE_STARTING(macos(15.0), ios(18.0), macCatalyst(18.0), tvos(18.0));
-/// Creates a vector lookup-table based quantization operation and returns the result tensor.
+/// Creates a vector lookup-table based dequantize operation and returns the result tensor.
///
-/// Converts a u8 or u4 `tensor` to a float tensor by applying a lookup operation, where each
-/// input index defines a vector of values. The operation reads the vector values from the last dimension of the lookup table
-/// tensor and stores them into the dimension defined by `axis` on the result tensor.
+/// Converts a `tensor` of integer indices to a float tensor by applying a lookup operation,
+/// where each input index defines a vector of values. The operation reads the vector values
+/// from the last dimension of the lookup table tensor and stores them into the dimension
+/// defined by `axis` on the result tensor.
/// ```md
/// result[i1, ... , i_axis, ..., in] = LUTTensor[i1', ..., in', tensor[i1, ..., in], i_axis]
/// ```
-/// Note: The operation supports LUT groups up to the last 2 dimensions for `tensor`.
///
+/// Supported `tensor` index types and required second-to-last-dimension size of `LUTTensor`:
+/// - ``MPSDataTypeUInt4``: 16 entries
+/// - ``MPSDataTypeUInt8``: 256 entries
+///
+/// `LUTTensor` (and result) element types: ``MPSDataTypeFloat16``, ``MPSDataTypeFloat32``,
+/// ``MPSDataTypeBFloat16``, ``MPSDataTypeFloat8E4M3``, ``MPSDataTypeFloat8E5M2``, ``MPSDataTypeInt8``.
+/// The input `tensor` must be a graph constant.
+/// The operation supports LUT groups up to the last 2 dimensions for `tensor`.
+///
/// - Parameters:
-/// - tensor: Input tensor to be dequantized.
-/// - LUTTensor: The lookup table to use - for u4 the second to last dimension should have 16 elements, and for u8 256 elements.
-/// - axis: Axis on which the scale 1D value is being broadcasted.
+/// - tensor: Input constant integer-index tensor to be dequantized.
+/// - LUTTensor: The lookup table to use.
+/// - axis: The result axis into which the LUT vector values are written.
/// - name: The name for the operation.
/// - Returns: A valid ``MPSGraphTensor`` object.
-(MPSGraphTensor *) dequantizeTensor:(MPSGraphTensor *) tensor