From ccfedd3cada8b7bdab0478bb70da7e4ff272b19f Mon Sep 17 00:00:00 2001 From: jethroqti Date: Tue, 16 Jun 2026 01:42:00 -0700 Subject: [PATCH] Qualcomm AI Engine Direct - raise ValueError for 2-bit per-tensor encoding Summary: raise ValueError for 2-bit per-tensor encoding in node_visitor and validators Test plan: python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedOperator.test_qnn_backend_16a2w_conv2d -b build-android -H ${HOST} -s ${SN} -m SM8850 python backends/qualcomm/tests/test_qnn_delegate.py TestQNNQuantizedOperator.test_qnn_backend_16a2w_linear -b build-android -H ${HOST} -s ${SN} -m SM8850 --- backends/qualcomm/builders/node_visitor.py | 19 ++++++++-------- backends/qualcomm/quantizer/validators.py | 25 +++++++++++++--------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/backends/qualcomm/builders/node_visitor.py b/backends/qualcomm/builders/node_visitor.py index 983a077ad18..60785901c8d 100644 --- a/backends/qualcomm/builders/node_visitor.py +++ b/backends/qualcomm/builders/node_visitor.py @@ -293,16 +293,15 @@ def make_qnn_per_tensor_config(self, quant_attrs: Dict): } # check Qnn_ScaleOffset_t in QNN/include/QnnTypes.h quant_config[QCOM_OFFSET] = -quant_attrs[QCOM_ZERO_POINT] - range_ = quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN] - assert range_ > 3, ( - f"2-bit quantization (range={range_}) does not support per-tensor encoding. " - "Use per-channel quantization instead." - ) - # special case for 4 bits - if ( - quant_config[QCOM_DTYPE] == torch.int8 - and quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN] <= 15 - ): + # special case for 4-bit / 2-bit integer weights. + quant_range = quant_config[QCOM_QUANT_MAX] - quant_config[QCOM_QUANT_MIN] + if quant_config[QCOM_DTYPE] == torch.int8 and quant_range <= 15: + if quant_range <= 3: + raise ValueError( + f"2-bit quantization (range={quant_range}) " + "does not support per-tensor encoding. Use per-channel quantization instead." + ) + # special case for 4 bits quant_config[QCOM_BITWIDTH] = 4 return ( PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET, diff --git a/backends/qualcomm/quantizer/validators.py b/backends/qualcomm/quantizer/validators.py index e68861bef8e..866e8af89c4 100644 --- a/backends/qualcomm/quantizer/validators.py +++ b/backends/qualcomm/quantizer/validators.py @@ -283,12 +283,21 @@ def _qspec_port_encoding_type(node: Node, qspec: QuantizationSpecBase): qscheme = qspec.qscheme if qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]: - range_ = qspec.quant_max - qspec.quant_min - assert range_ > 3, ( - f"2-bit quantization (range={range_}) does not support per-tensor encoding. " - "Use per-channel quantization instead." - ) - if qspec.dtype == torch.int8 and range_ <= 15: + # quant_max/quant_min are None for non-integer activations (e.g. uint16 in + # 16a2w) whose range is not expressed as a fixed integer bound; skip the + # 4-bit BW_SCALE_OFFSET special-casing for those tensors. + if ( + qspec.dtype == torch.int8 + and qspec.quant_max is not None + and qspec.quant_min is not None + and (quant_range := qspec.quant_max - qspec.quant_min) <= 15 + ): + if quant_range <= 3: + raise ValueError( + f"2-bit quantization (range={quant_range}) " + "does not support per-tensor encoding. " + "Use per-channel quantization instead." + ) encoding_type = ( PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET ) @@ -303,10 +312,6 @@ def _qspec_port_encoding_type(node: Node, qspec: QuantizationSpecBase): encoding_type = ( PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION ) - elif qspec.dtype == torch.int8 and qspec.quant_max - qspec.quant_min <= 3: - encoding_type = ( - PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET - ) elif qspec.dtype == torch.int8 and qspec.quant_max - qspec.quant_min <= 15: encoding_type = ( PyQnnManager.Qnn_QuantizationEncoding_t.QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET