From 0728a4790e00b7c02117b55737cf0dbaef7fadb3 Mon Sep 17 00:00:00 2001 From: Tom Allsop Date: Fri, 26 Jun 2026 11:16:26 +0100 Subject: [PATCH] Arm backend: Add Qwen3 VL language model MXFP8 test * Add an E2E TOSA test for the Qwen3 VL language model with linear layers converted to MXFP8. Signed-off-by: Baris Demir Signed-off-by: Tom Allsop Co-authored-by: Baris Demir --- .../models/Qwen3_VL/test_qwen3_vl_model.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py b/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py index ae4ba2dcbd0..b898ca62cf5 100644 --- a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py +++ b/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py @@ -11,10 +11,12 @@ import pytest import torch import torch.nn.functional as F +from executorch.backends.arm.ao_ext import MXFPOpConfig from executorch.backends.arm.test import common from executorch.backends.arm.test.models.Qwen3_VL.qwen3_vl_test_config import ( get_qwen3_vl_2b_instruct_checkpoint_config, ) +from executorch.backends.arm.test.ops.mxfp.common import MXFPTosaPipelineFP from executorch.backends.arm.test.tester.test_pipeline import ( TosaPipelineFP, VgfPipeline, @@ -25,6 +27,7 @@ ) input_t = Tuple[torch.Tensor, ...] +aten_op_mxfp_linear = "torch.ops.tosa_mxfp.linear.default" def _make_qwen3_vl_2b_instruct_layer_config(): @@ -104,6 +107,10 @@ def _to_bfloat16_model_and_floating_inputs( ) +def _is_linear(module: torch.nn.Module, _fqn: str) -> bool: + return isinstance(module, torch.nn.Linear) + + class TextModelWrapper(Qwen3VLModelTestModule): def __init__(self, config) -> None: super().__init__() @@ -300,3 +307,35 @@ def test_qwen3_vl_full_models_vgf_no_quant_bf16(test_case: Qwen3VLModelTestCase) tosa_spec="TOSA-1.0+FP+bf16", ) pipeline.run() + + +@pytest.mark.slow +def test_qwen3_vl_text_model_tosa_mxfp8_bf16(): + # The Qwen 3 VL FP8 model only quantizes the TextModel + model, inputs = TextModelWrapper.prepare_model_and_inputs() + model, inputs = _to_bfloat16_model_and_floating_inputs(model, inputs) + mxfp_config = MXFPOpConfig(weight_dtype=torch.float8_e4m3fn) + with torch.no_grad(): + pipeline = MXFPTosaPipelineFP[input_t]( + model, + inputs, + aten_op=aten_op_mxfp_linear, + exir_op=[], + filter_fn=_is_linear, + frobenius_threshold=0.1, + cosine_threshold=0.98, + mxfp_config=mxfp_config, + tosa_version="1.1", + tosa_extensions=["bf16", "mxfp"], + ) + # Check all linear layers are converted to MXFP + linear_count = sum( + _is_linear(submodule, name) for name, submodule in model.named_modules() + ) + pipeline.add_stage_after( + "export", + pipeline.tester.check_count, + {aten_op_mxfp_linear: linear_count}, + suffix="mxfp_linear", + ) + pipeline.run()