Merge branch 'main' into add-cosh-decomposition

Sebastian-Larsson · web-flow · commit d836239e9e35 · 2025-08-07T18:31:26.000+02:00
diff --git a/.gitmodules b/.gitmodules
@@ -1,6 +1,9 @@
 [submodule "backends/arm/third-party/ethos-u-core-driver"]
 	path = backends/arm/third-party/ethos-u-core-driver
 	url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
+[submodule "backends/arm/third-party/serialization_lib"]
+	path = backends/arm/third-party/serialization_lib
+	url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers
diff --git a/backends/arm/_passes/decompose_grouped_conv.py b/backends/arm/_passes/decompose_grouped_conv.py
@@ -6,7 +6,7 @@
 from copy import copy
 
 import torch
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -15,9 +15,9 @@
     get_param_tensor,
     is_param_node,
 )
-from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
diff --git a/backends/arm/_passes/fuse_quantized_activation_pass.py b/backends/arm/_passes/fuse_quantized_activation_pass.py
@@ -6,8 +6,8 @@
 # pyre-unsafe
 
 import torch
+from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.backends.arm.constants import Q_OPS
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import Node
diff --git a/backends/arm/_passes/insert_rescales_pass.py b/backends/arm/_passes/insert_rescales_pass.py
@@ -9,8 +9,8 @@
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.backends.arm.constants import DQ_OPS, Q_OPS
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch import Tensor
 from torch.fx import GraphModule, Node
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
@@ -10,7 +10,7 @@
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import QuantArgs
+from executorch.backends.arm._passes.quant_args import QuantArgs
 from executorch.exir import ExportedProgram
 
 from executorch.exir.dialects._ops import ops as exir_ops
diff --git a/backends/arm/_passes/quant_args.py b/backends/arm/_passes/quant_args.py
@@ -0,0 +1,125 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Any, cast, NamedTuple
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+
+exir_ops = cast(Any, exir_ops)
+from executorch.backends.arm.constants import PER_CHANNEL_QDQ_OPS, PER_TENSOR_QDQ_OPS
+from torch import Tensor
+
+
+class QuantArgs(NamedTuple):
+    scale: list[float] | float
+    zp: list[int] | int
+    qmin: int
+    qmax: int
+    dtype: torch.dtype
+    axis: int = 0
+    per_channel: bool = False
+
+    def quantize_value(self, x: torch.Tensor | float) -> Tensor:
+        """Quantizes the input tensor or value to a quantized tensor. If the input is
+        not a tensor, it is converted to a tensor first. If self.per_channel is True,
+        the quantization is done per channel, otherwise it is done per tensor.
+        """
+        if not isinstance(x, torch.Tensor):
+            x = torch.Tensor([x])
+        x = x.to(torch.float32)
+        if self.per_channel:
+            q_op = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
+            args = (
+                x,
+                torch.tensor(self.scale),
+                torch.tensor(self.zp),
+                self.axis,
+                self.qmin,
+                self.qmax,
+                self.dtype,
+            )
+        else:
+            q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+            args = (x, self.scale, self.zp, self.qmin, self.qmax, self.dtype)  # type: ignore[assignment]
+        return q_op(*args)
+
+    def dequantize_value(self, qx: torch.Tensor) -> torch.Tensor:
+        """Dequantizes the input tensor or value to a dequantized tensor  If the input
+        is not a tensor, it is converted to a tensor first. If self.per_channel is True,
+        the dequantization is done per channel, otherwise it is done per tensor.
+        """
+        if self.per_channel:
+            dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
+            args = (
+                qx,
+                torch.tensor(self.scale),
+                torch.tensor(self.zp),
+                self.axis,
+                self.qmin,
+                self.qmax,
+                self.dtype,
+            )
+        else:
+            dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+            args = (qx, self.scale, self.zp, self.qmin, self.qmax, self.dtype)  # type: ignore[assignment]
+        return dq_op(*args)
+
+    @classmethod
+    def from_operator(cls, op, args):
+        if op in PER_TENSOR_QDQ_OPS:
+            return cls(
+                scale=cast(float, args[1]),
+                zp=cast(int, args[2]),
+                qmin=cast(int, args[3]),
+                qmax=cast(int, args[4]),
+                dtype=cast(torch.dtype, args[5]),
+                axis=0,
+                per_channel=False,
+            )
+        elif op in PER_CHANNEL_QDQ_OPS:
+            return cls(
+                scale=cast(list[float], args[1].tolist()),
+                zp=cast(list[int], args[2].tolist()),
+                axis=cast(int, args[3]),
+                qmin=cast(int, args[4]),
+                qmax=cast(int, args[5]),
+                dtype=cast(torch.dtype, args[6]),
+                per_channel=True,
+            )
+        else:
+            # We're only handling per tensor and per channel quantization
+            raise NotImplementedError(f"Unsupported quantization operation: {op}")
+
+    def get_scale_per_tensor(self) -> float:
+        if not isinstance(self.scale, float):
+            raise TypeError(
+                f"Expected scale {self.scale} to be a float but found scale of "
+                f"type {type(self.scale)}"
+            )
+        return self.scale
+
+    def get_zp_per_tensor(self) -> int:
+        if not isinstance(self.zp, int):
+            raise TypeError(
+                f"Expected zero point {self.zp} to be an int but found zp of "
+                f"type {type(self.zp)}"
+            )
+        return self.zp
+
+    def get_scale_per_channel(self) -> list[float]:
+        if not isinstance(self.scale, list):
+            raise TypeError(
+                f"Expected scale {self.scale} to be a list but found scale of "
+                f"type {type(self.scale)}"
+            )
+        return self.scale
+
+    def get_zp_per_channel(self) -> list[int]:
+        if not isinstance(self.zp, list):
+            raise TypeError(
+                f"Expected zero point {self.zp} to be a list but found zp of "
+                f"type {type(self.zp)}"
+            )
+        return self.zp
diff --git a/backends/arm/operator_support/ethos_u55_support.py b/backends/arm/operator_support/ethos_u55_support.py
@@ -149,6 +149,8 @@ class EthosU55NotSupported(OperatorSupportBase):
         exir_ops.edge.aten.ne.Scalar,
         exir_ops.edge.aten.flip.default,  # REVERSE
         exir_ops.edge.aten.grid_sampler_2d,  # GATHER
+        exir_ops.edge.aten.index.Tensor,  # GATHER
+        exir_ops.edge.aten.index_select.default,  # GATHER
         exir_ops.edge.aten.scatter.src,
         exir_ops.edge.aten.scatter.value,
         exir_ops.edge.aten.select_scatter.default,
diff --git a/backends/arm/test/ops/test_index_select.py b/backends/arm/test/ops/test_index_select.py
@@ -12,6 +12,7 @@
 
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
+    OpNotSupportedPipeline,
     TosaPipelineFP,
     TosaPipelineINT,
     VgfPipeline,
@@ -120,6 +121,20 @@ def test_index_select_tosa_INT_rand(test_data: input_params):
     pipeline.run()
 
 
+@pytest.mark.parametrize("test_data", list(test_data.values())[-1:])
+def test_index_select_u55_INT_not_delegated(test_data: input_params):
+    op, test_input = test_data
+
+    pipeline = OpNotSupportedPipeline[input_params](
+        op,
+        test_input,
+        {op.exir_op: 1},
+        quantize=True,
+        u55_subset=True,
+    )
+    pipeline.run()
+
+
 @pytest.mark.parametrize("test_data", list(test_data.values()))
 @common.SkipIfNoModelConverter
 def test_index_select_vgf_FP(test_data: input_params):
diff --git a/backends/arm/test/ops/test_index_tensor.py b/backends/arm/test/ops/test_index_tensor.py
@@ -10,6 +10,7 @@
 import torch
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
+    OpNotSupportedPipeline,
     TosaPipelineFP,
     TosaPipelineINT,
 )
@@ -460,3 +461,18 @@ def test_index_tensor_tosa_INT_none(test_data: input_params):
                 IndexTensorTestCommon.exir_op,
             ).run()
         )
+
+
+@common.parametrize("test_data", IndexTensor.test_data)
+@common.XfailIfNoCorstone300
+def test_index_tensor_u55_INT_not_delegated(test_data: input_params):
+    """Ethos-U55 backend BI pipeline test for index.Tensor"""
+    test_input = test_data
+    with torch.no_grad():
+        OpNotSupportedPipeline[input_params](
+            IndexTensor(),
+            test_input,
+            {IndexTensorTestCommon.exir_op: 1},
+            quantize=True,
+            u55_subset=True,
+        ).run()
diff --git a/backends/arm/third-party/serialization_lib b/backends/arm/third-party/serialization_lib
@@ -0,0 +1 @@
+Subproject commit 187af0d41fe75d08d2a7ec84c1b4d24b9b641ed2
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -9,17 +9,14 @@
 
 import math
 
-from typing import Any, cast, NamedTuple, Tuple
+from typing import Any, Tuple
 
 import executorch.backends.arm.tosa_specification as tosa_specification
 
 import torch.fx
 import torch.fx.node
-from executorch.backends.arm.constants import PER_CHANNEL_QDQ_OPS, PER_TENSOR_QDQ_OPS
 
 from executorch.backends.arm.tosa_mapping import TosaArg
-from executorch.exir.dialects._ops import ops as exir_ops
-from torch import Tensor
 from torch.fx import Node
 from tosa.RoundingMode import RoundingMode  # type: ignore
 
@@ -109,122 +106,6 @@ def insert_rescale_op_to_int8(
     )
 
 
-class QuantArgs(NamedTuple):
-    scale: list[float] | float
-    zp: list[int] | int
-    qmin: int
-    qmax: int
-    dtype: torch.dtype
-    axis: int = 0
-    per_channel: bool = False
-
-    def quantize_value(self, x: torch.Tensor | float) -> Tensor:
-        """Quantizes the input tensor or value to a quantized tensor. If the input is
-        not a tensor, it is converted to a tensor first. If self.per_channel is True,
-        the quantization is done per channel, otherwise it is done per tensor.
-        """
-        if not isinstance(x, torch.Tensor):
-            x = torch.Tensor([x])
-        x = x.to(torch.float32)
-        if self.per_channel:
-            q_op = exir_ops.edge.quantized_decomposed.quantize_per_channel.default
-            args = (
-                x,
-                torch.tensor(self.scale),
-                torch.tensor(self.zp),
-                self.axis,
-                self.qmin,
-                self.qmax,
-                self.dtype,
-            )
-        else:
-            q_op = exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
-            args = (x, self.scale, self.zp, self.qmin, self.qmax, self.dtype)  # type: ignore[assignment]
-
-        return q_op(*args)
-
-    def dequantize_value(self, qx: torch.Tensor) -> torch.Tensor:
-        """Dequantizes the input tensor or value to a dequantized tensor  If the input
-        is not a tensor, it is converted to a tensor first. If self.per_channel is True,
-        the dequantization is done per channel, otherwise it is done per tensor.
-        """
-        if self.per_channel:
-            dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
-            args = (
-                qx,
-                torch.tensor(self.scale),
-                torch.tensor(self.zp),
-                self.axis,
-                self.qmin,
-                self.qmax,
-                self.dtype,
-            )
-        else:
-            dq_op = exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
-            args = (qx, self.scale, self.zp, self.qmin, self.qmax, self.dtype)  # type: ignore[assignment]
-
-        return dq_op(*args)
-
-    @classmethod
-    def from_operator(cls, op, args):
-        if op in PER_TENSOR_QDQ_OPS:
-            return cls(
-                scale=cast(float, args[1]),
-                zp=cast(int, args[2]),
-                qmin=cast(int, args[3]),
-                qmax=cast(int, args[4]),
-                dtype=cast(torch.dtype, args[5]),
-                axis=0,
-                per_channel=False,
-            )
-        elif op in PER_CHANNEL_QDQ_OPS:
-            return cls(
-                scale=cast(list[float], args[1].tolist()),
-                zp=cast(list[int], args[2].tolist()),
-                axis=cast(int, args[3]),
-                qmin=cast(int, args[4]),
-                qmax=cast(int, args[5]),
-                dtype=cast(torch.dtype, args[6]),
-                per_channel=True,
-            )
-
-        else:
-            # We're only handling per tensor and per channel quantization
-            raise NotImplementedError(f"Unsupported quantization operation: {op}")
-
-    def get_scale_per_tensor(self) -> float:
-        if not isinstance(self.scale, float):
-            raise TypeError(
-                f"Expected scale {self.scale} to be a float but found scale of "
-                f"type {type(self.scale)}"
-            )
-        return self.scale
-
-    def get_zp_per_tensor(self) -> int:
-        if not isinstance(self.zp, int):
-            raise TypeError(
-                f"Expected zero point {self.zp} to be an int but found zp of "
-                f"type {type(self.zp)}"
-            )
-        return self.zp
-
-    def get_scale_per_channel(self) -> list[float]:
-        if not isinstance(self.scale, list):
-            raise TypeError(
-                f"Expected scale {self.scale} to be a list but found scale of "
-                f"type {type(self.scale)}"
-            )
-        return self.scale
-
-    def get_zp_per_channel(self) -> list[int]:
-        if not isinstance(self.zp, list):
-            raise TypeError(
-                f"Expected zero point {self.zp} to be a list but found zp of "
-                f"type {type(self.zp)}"
-            )
-        return self.zp
-
-
 # TOSA uses the RESCALE operation to scale between values with differing precision.
 # The RESCALE operator is defined using an integer multiply, add, and shift.
 # This utility function is for calculating the multier and shift given a scale.
diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt
diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp
diff --git a/examples/arm/executor_runner/arm_perf_monitor.cpp b/examples/arm/executor_runner/arm_perf_monitor.cpp
diff --git a/examples/arm/executor_runner/arm_perf_monitor.h b/examples/arm/executor_runner/arm_perf_monitor.h