diff --git a/CHANGELOG.md b/CHANGELOG.md
index b79ce714b9..67fdce7e29 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Fix for python error when using python 3.12.11 [#189]( https://github.com/pulp-platform/Deeploy/pull/189)
 - Add support for Operators for Generic target needed in MAGIA [#193]( https://github.com/pulp-platform/Deeploy/pull/193)
 - Fix GAP9 L3 Board Tests: readfs Flash Ordering and Duplicate Input Data [#196](https://github.com/pulp-platform/Deeploy/pull/196)
+- Add support for Operators for Generic target needed in MAGIA (again) [#195]( https://github.com/pulp-platform/Deeploy/pull/195)
 
 ### Added
 - Add many missing docstrings
@@ -29,6 +30,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Added GAP9 Platform Support: Deployer, Bindings, Templates, Tiler, DMA (L3Dma/MchanDma), target library, CI workflows
 - Per-layer microbenchmarking on PULPOpen via `--profileMicrobenchmark`: new `PULPMicrobenchmark` code-transformation pass + `perf_utils.h` helpers report cycles, instructions, stalls and cache misses per layer in `RunNetwork`
 - Add support for the Generic target for the following operators [Ceil](https://onnx.ai/onnx/operators/onnx__Ceil.html), [Floor](https://onnx.ai/onnx/operators/onnx__Floor.html), [Clip](https://onnx.ai/onnx/operators/onnx__Clip.html), [Sub](https://onnx.ai/onnx/operators/onnx__Sub.html), [Exp](https://onnx.ai/onnx/operators/onnx__Exp.html), [Sigmoid](https://onnx.ai/onnx/operators/onnx__Sigmoid.html), [Swish](https://onnx.ai/onnx/operators/onnx__Swish.html), [HardSigmoid](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html), [HardSwish](https://onnx.ai/onnx/operators/onnx__HardSwish.html), [InstanceNormalization](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html), [GroupNormalization](https://onnx.ai/onnx/operators/onnx__GroupNormalization.html), [AveragePool](https://onnx.ai/onnx/operators/onnx__AveragePool.html), [GlobalAveragePool](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html), [GlobalMaxPool](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html).
+- Add support for the Generic target for the following operators: [Elu](https://onnx.ai/onnx/operators/onnx__Elu.html), [LeakyRelu](https://onnx.ai/onnx/operators/onnx__LeakyRelu.html), [Selu](https://onnx.ai/onnx/operators/onnx__Selu.html), [Scatter](https://onnx.ai/onnx/operators/onnx__Scatter.html), [ScatterElements](https://onnx.ai/onnx/operators/onnx__ScatterElements.html), [Col2Im](https://onnx.ai/onnx/operators/onnx__Col2Im.html), [Resize](https://onnx.ai/onnx/operators/onnx__Resize.html)
 
 ### Changed
 - Use by default `devel` container for GAP9 CI
@@ -44,6 +46,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Aligned CLI commands across the project
 - Added @runwangdl as a code owner
 - Skip emitting duplicate `testInputVector` data for inputs placed in L3 (loaded at runtime from the readfs hex instead), reducing test binary size
+- Allowing ONNX Operators with empty inputs.
 
 ### Fixed
 - Add missing `shell: bash` directive to CI cache generation steps to ensure correct shell execution
@@ -57,6 +60,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Reduce RunNetwork stack usage by scoping per-layer variables with braces and moving tileIdxPtr allocation into per-layer execution blocks
 - Fix invalid escape sequence python error in DeeployTypes.py: appearing when using pytest to launch regressions
 - Fix GAP9 board tests with `--defaultMemLevel L3` reading garbage inputs: place all gapy `--flash-property` options before the positional subcommand and use `image flash run` so the readfs partition (input hex files) is flashed to the device
+- Fix `ConvTranspose` layer: output buffer shape computation.
 
 ### Removed
 - `testDMA.py` was an old test; we now have `test_dmas.py` instead.
diff --git a/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py b/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py
index c70628729b..46a4896c12 100644
--- a/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py
+++ b/Deeploy/CommonExtensions/TypeCheckers/SignPropTypeChecker.py
@@ -39,8 +39,8 @@ def typeInferOutput(self, ctxt: NetworkContext, node: gs.Node,
                         operatorRepresentation: OperatorRepresentation) -> NetworkContext:
         ctxt = super().typeInferOutput(ctxt, node, operatorRepresentation)
 
-        inputs = [ctxt.lookup(inputNode.name) for inputNode in node.inputs]
-        outputs = [ctxt.lookup(outputNode.name) for outputNode in node.outputs]
+        inputs = [ctxt.lookup(inputNode.name) for inputNode in node.inputs if inputNode.name]
+        outputs = [ctxt.lookup(outputNode.name) for outputNode in node.outputs if outputNode.name]
 
         signProp = all([hasattr(_input, "_signed") and hasattr(_input, "nLevels") for _input in inputs])
 
diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
index 44abe85112..ea9aaff67f 100644
--- a/Deeploy/DeeployTypes.py
+++ b/Deeploy/DeeployTypes.py
@@ -1110,6 +1110,10 @@ def parseInputs(cls, ctxt: NetworkContext, node: gs.Node) -> NetworkContext:
         for inputNode in node.inputs:
             data_in = inputNode.name
 
+            # Skip absent optional inputs (ONNX represents them as empty-name Variables)
+            if not data_in:
+                continue
+
             # Hoist constant inputs
             if type(inputNode) == gs.ir.tensor.Constant and not ctxt.is_global(data_in):
                 ctxt.hoistConstant(inputNode)
@@ -1277,7 +1281,7 @@ def typeInferOutput(self, ctxt: NetworkContext, node: gs.Node,
         """
         newCtxt = ctxt.copy()
 
-        inputs = [ctxt.lookup(inputNode.name) for inputNode in node.inputs]
+        inputs = [ctxt.lookup(inputNode.name) for inputNode in node.inputs if inputNode.name]
         outputNames = [node.name for node in node.outputs]
 
         outputTypes = self.output_types
@@ -1348,7 +1352,7 @@ def annotateDict(self, ctxt: NetworkContext, node: gs.Node, operatorRepresentati
             The NodeParser's operatorRepresentation
 
         """
-        env = [node.name for node in node.inputs + node.outputs]
+        env = [node.name for node in node.inputs + node.outputs if node.name]
         for key, value in operatorRepresentation.items():
             # check if the referenced buffer is in the environment
             if isinstance(value, str) and value in env:
@@ -1903,7 +1907,9 @@ def broadcast(self, ctxt: NetworkContext, default_channels_first: bool = True) -
             broadcast to the target shape
 
         """
-        inputShapes = [ctxt.lookup(node.name).shape for node in self.node.inputs]
+        # Absent optional inputs are represented in ONNX as empty-name Variables; skip them.
+        validInputNodes = [node for node in self.node.inputs if node.name]
+        inputShapes = [ctxt.lookup(node.name).shape for node in validInputNodes]
         outputShapes = [ctxt.lookup(node.name).shape for node in self.node.outputs]
 
         if not "channels_first" in self.mapper.parser.operatorRepresentation:
@@ -1914,7 +1920,7 @@ def broadcast(self, ctxt: NetworkContext, default_channels_first: bool = True) -
         newInputShapes, newOutputShapes = self.computeShapes(inputShapes, outputShapes,
                                                              self.mapper.parser.operatorRepresentation, channels_first)
 
-        for node, newShape in zip(self.node.inputs + self.node.outputs, newInputShapes + newOutputShapes):
+        for node, newShape in zip(validInputNodes + self.node.outputs, newInputShapes + newOutputShapes):
             if ctxt.is_local(node.name):
                 ctxt.localObjects[node.name].shape = newShape
                 # Update shape of tensors in onnx graph
@@ -2103,7 +2109,7 @@ def bind(self, ctxt: NetworkContext) -> Tuple[NetworkContext, bool]:
                     npType = self._broadcastToNpType(ctxt.localObjects[node.name]._type)
                     if npType is not None:
                         node.dtype = npType
-                elif ctxt.is_global(node.name):
+                elif ctxt.is_global(node.name) and hasattr(ctxt.globalObjects[node.name], '_type'):
                     npType = self._broadcastToNpType(ctxt.globalObjects[node.name]._type)
                     if isinstance(ctxt.globalObjects[node.name], ConstantBuffer):
                         if isinstance(node, gs.Constant):
@@ -2954,6 +2960,8 @@ def generateBufferInitializationCode(self) -> str:
         callStack = ''
         for node in ctxt.globalObjects.values():
             if isinstance(node, VariableBuffer) and not isinstance(node, StructBuffer):
+                if not hasattr(node, '_type'):
+                    continue
                 assert issubclass(node._type, Pointer), f"Global VariableBuffer {node.name} is not a Pointer!"
                 if node._deploy:
                     name = node.name
@@ -2999,6 +3007,8 @@ def generateBufferAllocationCode(self) -> str:
 
         for node in ctxt.globalObjects.values():
             if isinstance(node, VariableBuffer) and not isinstance(node, StructBuffer):
+                if not hasattr(node, '_type'):
+                    continue
                 assert issubclass(node._type, Pointer), f"Global VariableBuffer {node.name} is not a Pointer!"
                 if node._deploy:
                     name = node.name
@@ -3535,6 +3545,8 @@ def _printMemorySummary(self):
                 # We do not count structs for now, since they are not properly modeled
                 if isinstance(_buffer, ConstantBuffer) or (isinstance(_buffer, VariableBuffer) and _buffer._deploy):
                     # SCHEREMO: We only
+                    if not hasattr(_buffer, '_type'):
+                        continue
                     if (hasattr(_buffer, "_memoryLevel") and _buffer._memoryLevel == level) or level == "None":
                         staticSize += int((np.prod(_buffer.shape) * _buffer._type.referencedType.typeWidth // 8))
                     else:
diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py
index 21cf01e52a..f5483bf669 100644
--- a/Deeploy/Targets/Generic/Bindings.py
+++ b/Deeploy/Targets/Generic/Bindings.py
@@ -11,23 +11,24 @@
     int8_t, int32_t, uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
-from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \
-    ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, \
-    FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, FloatDivTemplate, \
-    FloatDWConvTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, FloatGemmTemplate, \
-    FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, FloatHardSigmoidTemplate, \
-    FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \
-    FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatPowTemplate, FloatReduceMeanTemplate, \
-    FloatReluTemplate, FloatSigmoidTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, \
-    FloatSwishTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, \
-    MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \
-    RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, SubTemplate, \
+from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, Col2ImTemplate, ConcatTemplate, \
+    ConvTemplate, ConvTransposeTemplate, DebugPrintTemplate, DequantTemplate, DummyTemplate, DWConvTemplate, \
+    FloatAddTemplate, FloatAveragePoolTemplate, FloatCeilTemplate, FloatClipTemplate, FloatConvTemplate, \
+    FloatDivTemplate, FloatDWConvTemplate, FloatEluTemplate, FloatExpTemplate, FloatFloorTemplate, FloatGELUTemplate, \
+    FloatGemmTemplate, FloatGlobalAveragePoolTemplate, FloatGlobalMaxPoolTemplate, FloatGroupNormTemplate, \
+    FloatHardSigmoidTemplate, FloatHardSwishTemplate, FloatInstanceNormTemplate, FloatLayernormTemplate, \
+    FloatLeakyReluTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, \
+    FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSeluTemplate, FloatSigmoidTemplate, \
+    FloatSoftmaxTemplate, FloatSqrtTemplate, FloatSubTemplate, FloatSwishTemplate, GatherTemplate, GemmTemplate, \
+    IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \
+    PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \
+    ResizeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, ScatterTemplate, SliceTemplate, SubTemplate, \
     TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
 from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \
     DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \
-    LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \
-    ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \
-    SoftmaxChecker, TransposeChecker
+    LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, PassThroughTypeChecker, QuantChecker, \
+    ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, \
+    SliceChecker, SoftmaxChecker, TransposeChecker
 
 BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])
 
@@ -326,19 +327,35 @@
     for type in FloatDataTypes
 ]
 
-BasicConvTransposeBindings = [
+BasicConvTranspose1DBindings = [
     NodeBinding(
         ConvChecker(
             [PointerClass(type), PointerClass(type), PointerClass(type)],  # input, weight, bias
             [PointerClass(type)]),
-        ConvTransposeTemplate.referenceTemplate,
+        ConvTransposeTemplate.referenceTemplate1D,
         BasicTransformer) for type in FloatDataTypes
 ] + [
     NodeBinding(
         ConvChecker(
             [PointerClass(type), PointerClass(type)],  # input, weight
             [PointerClass(type)]),
-        ConvTransposeTemplate.referenceTemplate,
+        ConvTransposeTemplate.referenceTemplate1D,
+        BasicTransformer) for type in FloatDataTypes
+]
+
+BasicConvTranspose2DBindings = [
+    NodeBinding(
+        ConvChecker(
+            [PointerClass(type), PointerClass(type), PointerClass(type)],  # input, weight, bias
+            [PointerClass(type)]),
+        ConvTransposeTemplate.referenceTemplate2D,
+        BasicTransformer) for type in FloatDataTypes
+] + [
+    NodeBinding(
+        ConvChecker(
+            [PointerClass(type), PointerClass(type)],  # input, weight
+            [PointerClass(type)]),
+        ConvTransposeTemplate.referenceTemplate2D,
         BasicTransformer) for type in FloatDataTypes
 ]
 
@@ -385,6 +402,21 @@
                 FloatHardSwishTemplate.referenceTemplate, BasicTransformer),
 ]
 
+BasicEluBindings = [
+    NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatEluTemplate.referenceTemplate,
+                BasicTransformer),
+]
+
+BasicSeluBindings = [
+    NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), FloatSeluTemplate.referenceTemplate,
+                BasicTransformer),
+]
+
+BasicLeakyReluBindings = [
+    NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
+                FloatLeakyReluTemplate.referenceTemplate, BasicTransformer),
+]
+
 BasicInstanceNormBindings = [
     NodeBinding(
         DummyChecker(
@@ -420,3 +452,22 @@
     NodeBinding(DummyChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
                 FloatGlobalMaxPoolTemplate.referenceTemplate, BasicTransformer)
 ]
+
+BasicCol2ImBindings = [
+    NodeBinding(
+        PassThroughTypeChecker([PointerClass(type), PointerClass(int32_t),
+                                PointerClass(int32_t)], [PointerClass(type)]), Col2ImTemplate.referenceTemplate,
+        BasicTransformer) for type in (int8_t, uint8_t, float32_t)
+]
+
+BasicScatterBindings = [
+    NodeBinding(
+        PassThroughTypeChecker(
+            [PointerClass(type), PointerClass(int32_t), PointerClass(type)], [PointerClass(type)]),
+        ScatterTemplate.referenceTemplate, BasicTransformer) for type in (int8_t, uint8_t, float32_t)
+]
+
+BasicResizeBindings = [
+    NodeBinding(PassThroughTypeChecker([PointerClass(type)], [PointerClass(type)]), ResizeTemplate.referenceTemplate,
+                BasicTransformer) for type in (int8_t, uint8_t, float32_t)
+]
diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py
index d0a1e1db3c..bb91244447 100644
--- a/Deeploy/Targets/Generic/Layers.py
+++ b/Deeploy/Targets/Generic/Layers.py
@@ -662,45 +662,46 @@ def __init__(self, maps: List[NodeMapper]):
 
     def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorRepresentation,
                       channels_first) -> Tuple[Shape, Shape]:
-        """
-        Infers output shapes for ConvTranspose using only static info.
-        - inputShapes[0]: input tensor shape (e.g., [N, C_in, W] for 1D, [N, C_in, H, W] for 2D)
-        - inputShapes[1]: weight tensor shape (e.g., [C_in, C_out // group, kW] for 1D)
-        - outputShapes[0]: output tensor shape (to be updated)
-        """
         newInputShapes = list(inputShapes)
-        newOutputShapes = list(outputShapes)
+
+        input_shape = inputShapes[0]  # [N, C_in, d0, ...]
+        weight_shape = inputShapes[1]  # [C_in, C_out//group, k0, ...]
         group = operatorRepresentation.get('group', 1)
-        weight_shape = inputShapes[1]
 
-        if newOutputShapes and len(newOutputShapes[0]) >= 2:
-            # For 1D: weight_shape = [C_in, C_out // group, kW]
-            # For 2D: weight_shape = [C_in, C_out // group, kH, kW]
-            ch_out = weight_shape[1] * group
-            if channels_first:
-                newOutputShapes[0][1] = ch_out
-            else:
-                newOutputShapes[0][-1] = ch_out
+        batch = input_shape[0]
+        spatial_in = list(input_shape[2:]) if channels_first else list(input_shape[1:-1])
+        ndim = len(spatial_in)
 
-        return newInputShapes, newOutputShapes
+        kernel_shape = list(weight_shape[2:])
+        C_out = weight_shape[1] * group
 
-    def computeOps(self):
-        opRep = self.mapper.parser.operatorRepresentation
+        strides = operatorRepresentation.get('strides') or [1] * ndim
+        dilations = operatorRepresentation.get('dilations') or [1] * ndim
+        output_padding = operatorRepresentation.get('output_padding') or [0] * ndim
+        pads = operatorRepresentation.get('pads') or [0] * (2 * ndim)
 
-        groups = opRep.get('group', 1)
-        kernel_shape = np.prod(opRep['kernel_shape'])  # es. [3, 3] -> 9
-        ch_in = opRep['ch_im_in']
-        ch_out = opRep['ch_im_out']
+        spatial_out = [(spatial_in[d] - 1) * strides[d] - pads[d] - pads[d + ndim] + dilations[d] *
+                       (kernel_shape[d] - 1) + output_padding[d] + 1 for d in range(ndim)]
 
-        opsPerPx = int(kernel_shape * ch_in * ch_out / groups) * 2
-
-        # ConvTranspose upscales spatial dims, quindi num pixel viene da output
-        if 'dim_im_out_y' in opRep:
-            numPx = opRep['dim_im_out_x'] * opRep['dim_im_out_y']
+        if channels_first:
+            output_shape = [batch, C_out] + spatial_out
         else:
-            numPx = opRep['dim_im_out_x']
+            output_shape = [batch] + spatial_out + [C_out]
 
-        return numPx * opsPerPx
+        return newInputShapes, [output_shape]
+
+    def computeOps(self):
+        rep = self.mapper.parser.operatorRepresentation
+
+        group = rep.get('group', 1)
+        kernel_shape = np.prod(rep['kernel_shape'])  # es. [3, 3] -> 9
+        channels = rep['channels']
+        feature_maps = rep['feature_maps']
+
+        ops_per_px = int(kernel_shape * feature_maps * channels // group) * 2
+        num_px = np.prod(rep['output_shape'])
+
+        return num_px * ops_per_px
 
 
 class CeilLayer(SingleOperationPerElementLayer):
@@ -750,6 +751,28 @@ def computeOps(self):
         return self.mapper.parser.operatorRepresentation['size'] * 5
 
 
+class EluLayer(ONNXLayer):
+
+    def computeOps(self):
+        # input > 0 -> y = x (just an assignment)
+        # input <=0 -> y = alpha * (expf(x) - 1): exp, add, mul
+        # consider the worst case, which is 3 ops
+        return self.mapper.parser.operatorRepresentation['size'] * 3
+
+
+class SeluLayer(ONNXLayer):
+
+    def computeOps(self):
+        # input > 0 -> y = gamma * x: mul
+        # input <=0 -> y = gamma * alpha * (expf(x) - 1): exp, add, 2 mul
+        # consider the worst case, which is 4 ops
+        return self.mapper.parser.operatorRepresentation['size'] * 4
+
+
+class LeakyReluLayer(SingleOperationPerElementLayer):
+    pass
+
+
 class InstanceNormLayer(ONNXLayer):
 
     def computeOps(self):
@@ -792,3 +815,42 @@ def computeOps(self):
         opRep = self.mapper.parser.operatorRepresentation
         # (spatial_size - 1) comparisons per output channel
         return int(opRep['batch_size'] * opRep['num_channels'] * (opRep['spatial_size'] - 1))
+
+
+class Col2ImLayer(ONNXLayer):
+
+    def computeOps(self):
+        # Col2Im iterates over every element of the input tensor and adds it
+        # into the corresponding output position. The total number of
+        # accumulations is exactly the number of input elements which is
+        # N × C × block_volume × L
+        rep = self.mapper.parser.operatorRepresentation
+        block_volume = int(np.prod(rep['block_shape']))
+        L = int(np.prod(rep['col_dims']))
+        return rep['batch_size'] * rep['channels'] * block_volume * L
+
+
+class ScatterLayer(ONNXLayer):
+
+    def computeOps(self):
+        opRep = self.mapper.parser.operatorRepresentation
+        if opRep.get('reduction', 'none') == 'none':
+            # no arithmetic operations
+            return 0
+        else:
+            # 1 op per index element
+            return int(np.prod(opRep['indices_shape']))
+
+
+class ResizeLayer(ONNXLayer):
+
+    def computeOps(self):
+        rep = self.mapper.parser.operatorRepresentation
+        size = rep['batch_size'] * rep['channels'] * int(np.prod(rep['output_shape']))
+        spatial_dims: int = rep['spatial_dims']
+        ops = 0  # default: Nearest-neighbour is a pure copy — no arithmetic operations.
+        if rep['mode'] == 'linear':  # 2^spatial_dims multiply-accumulates per output element.
+            ops = size * (1 << spatial_dims)
+        elif rep['mode'] == 'cubic':  # 4^spatial_dims multiply-accumulates per output element.
+            ops = size * (4**spatial_dims)
+        return ops
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index aa8bd8724a..f4b8d5bae4 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -2743,127 +2743,190 @@ def __init__(self):
         super().__init__()
 
     def parseNode(self, node: gs.Node) -> bool:
+
+        if not all(
+            [node.op == 'ConvTranspose',
+             len(node.inputs) >= 2 and len(node.inputs) <= 3,
+             len(node.outputs) == 1]):
+            return False
+
         # Extract ONNX attributes with defaults
-        strides = node.attrs.get('strides', [1])
+        auto_pad: str = node.attrs.get('auto_pad', 'NOTSET')
+        group: int = node.attrs.get('group', 1)
 
-        pads = node.attrs.get('pads', [0, 0])
-        kernel_shape = node.attrs.get('kernel_shape', None)
-        dilations = node.attrs.get('dilations', [1])
-        group = node.attrs.get('group', 1)
+        if not all([
+                auto_pad in ('NOTSET', 'SAME_UPPER', 'SAME_LOWER', 'VALID'),
+                group >= 1,
+        ]):
+            return False
 
-        # Check for required attributes
-        wellFormed = (kernel_shape is not None and len(node.outputs) == 1)
-        if wellFormed:
-            self.operatorRepresentation['strides'] = strides
-            self.operatorRepresentation['pads'] = pads
-            self.operatorRepresentation['kernel_shape'] = kernel_shape
-            self.operatorRepresentation['dilations'] = dilations
-            self.operatorRepresentation['group'] = group
-            self.operatorRepresentation['nodeName'] = node.name
-            self.operatorRepresentation['nodeOp'] = node.op
-        return wellFormed
+        self.operatorRepresentation['auto_pad'] = auto_pad
+        self.operatorRepresentation['group'] = group
+
+        self.operatorRepresentation['dilations'] = node.attrs.get('dilations', None)  # default: ones
+        self.operatorRepresentation['kernel_shape'] = node.attrs.get('kernel_shape', None)  # default from weights
+        self.operatorRepresentation['output_padding'] = node.attrs.get('output_padding', None)  # default: zeros
+        self.operatorRepresentation['output_shape'] = node.attrs.get('output_shape', None)  # overwrite pads
+        self.operatorRepresentation['pads'] = node.attrs.get('pads', None)  # default: zeros
+        self.operatorRepresentation['strides'] = node.attrs.get('strides', None)  # default: ones
+
+        self.operatorRepresentation['nodeOp'] = node.op
+        self.operatorRepresentation['nodeName'] = node.name
+
+        return True
 
     def parseNodeCtxt(self, ctxt: NetworkContext, node: gs.Node, channels_first: bool = True):
-        # Register buffer names for codegen
-        self.operatorRepresentation['data_in'] = node.inputs[0].name
-        self.operatorRepresentation['weight'] = node.inputs[1].name
-        self.operatorRepresentation['data_out'] = node.outputs[0].name
+
+        rep = self.operatorRepresentation
+
+        # inputs/outputs
+        data_in: VariableBuffer = ctxt.lookup(node.inputs[0].name)
+        weight: ConstantBuffer = ctxt.lookup(node.inputs[1].name)
+        data_out: VariableBuffer = ctxt.lookup(node.outputs[0].name)
+
+        # check inputs/outputs
+        if not all([
+                len(data_in.shape) == len(data_out.shape) == len(weight.shape),  # same ndims
+                all(s > 0 for s in data_in.shape),  # no empty dim
+                all(s > 0 for s in weight.shape),  # no empty dim
+                all(s > 0 for s in data_out.shape),  # no empty dim
+                data_in.shape[0] == data_out.shape[0],  # same batch size
+                len(data_in.shape) > 2,  # at least batch, channels/feature_maps, one spatial dim
+        ]):
+            return ctxt, False
+
+        # retrieve info from inputs/outputs
+        batch_size, channels = data_in.shape[:2]
+        input_shape = data_in.shape[2:]  # spatial input shape
+        output_shape = data_out.shape[2:]  # spatial output shape
+
+        spatial_dims = len(input_shape)
+
+        kernel_shape = list(weight.shape[2:])
+        feature_maps = weight.shape[1] * rep['group']  # input channels
+
+        # optional inputs
         if len(node.inputs) == 3:
-            self.operatorRepresentation['bias'] = node.inputs[2].name
-            self.operatorRepresentation['has_bias'] = "true"
-        else:
-            self.operatorRepresentation['has_bias'] = "false"
-        # Get output shape from context
-        data_out = ctxt.lookup(node.outputs[0].name)
-        out_shape = data_out.shape
-        if len(out_shape) == 3:
-            self.operatorRepresentation['dim_im_out_x'] = out_shape[2]
-        elif len(out_shape) == 4:
-            self.operatorRepresentation['dim_im_out_x'] = out_shape[2]
-            self.operatorRepresentation['dim_im_out_y'] = out_shape[3]
-
-        stride_x, stride_y = 1, 1
-        if "strides" in node.attrs:
-            stride_y = node.attrs["strides"][0]
-            stride_x = node.attrs["strides"][1] if len(node.attrs["strides"]) > 1 else stride_y
-        self.operatorRepresentation["stride_y"] = stride_y
-        self.operatorRepresentation["stride_x"] = stride_x
-
-        if "kernel_shape" in node.attrs:
-            kernel_shape = node.attrs["kernel_shape"]
-            kernel_shape_x = kernel_shape[0]
-            # For 2D, kernel_shape may have two elements
-            kernel_shape_y = kernel_shape[1] if len(kernel_shape) > 1 else kernel_shape_x
-        else:
-            kernel_shape_x = 1
-            kernel_shape_y = 1
+            bias: ConstantBuffer = ctxt.lookup(node.inputs[2].name)
+            if not (len(bias.shape) == 1 and bias.shape[0] == feature_maps):
+                return ctxt, False
+            rep['bias'] = bias.name
+
+        # attributes with possible inconsistences
+        kernel_shape_attr: list[int] = rep['kernel_shape'] or kernel_shape
+        output_shape_attr: list[int] = rep['output_shape'] or output_shape
+        # check possible inconsistences
+        if not all([
+                kernel_shape_attr == kernel_shape,
+                output_shape_attr == output_shape,
+        ]):
+            return ctxt, False
+
+        # other attributes
+        dilations: list[int] = rep['dilations'] or [1] * spatial_dims
+        output_padding: list[int] = rep['output_padding'] or [0] * spatial_dims
+        strides: list[int] = rep['strides'] or [1] * spatial_dims
+
+        # auto_pad may lead to overwrite pads
+        if rep['auto_pad'] == 'NOTSET':
+            pads: list[int] = rep['pads'] or [0] * (2 * spatial_dims)
+        elif rep['auto_pad'] == 'VALID':
+            pads = [0] * (2 * spatial_dims)
+        else:  # SAME_UPPER, SAME_LOWER
+            starts = [0] * spatial_dims
+            ends = [0] * spatial_dims
+            for i in range(spatial_dims):
+                total_padding = (strides[i] * (input_shape[i] - 1) + output_padding[i] +
+                                 ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i])
+                half = total_padding // 2
+                if rep['auto_pad'] == 'SAME_UPPER':
+                    starts[i] = half
+                    ends[i] = total_padding - half
+                else:  # SAME_LOWER
+                    ends[i] = half
+                    starts[i] = total_padding - half
+            pads = starts + ends
+
+        # check other attributes
+        if not all([
+                len(dilations) == spatial_dims,
+                len(output_padding) == spatial_dims,
+                len(pads) == 2 * spatial_dims,
+                len(strides) == spatial_dims,
+                all(d > 0 for d in dilations),
+                all(p >= 0 for p in output_padding),
+                all(p >= 0 for p in pads),
+                all(s > 0 for s in strides),
+        ]):
+            return ctxt, False
+
+        # fill operatorRepresentation
+        rep['data_in'] = data_in.name
+        rep['weight'] = weight.name
+        rep['data_out'] = data_out.name
+        rep['has_bias'] = int('bias' in rep)
+
+        rep['kernel_shape'] = kernel_shape
+        rep['output_shape'] = output_shape
+        rep['pads'] = pads
+        rep['strides'] = strides
+        rep['dilations'] = dilations
+        rep['output_padding'] = output_padding
+
+        rep['batch_size'] = batch_size
+        rep['channels'] = channels
+        rep['feature_maps'] = feature_maps
+        rep['input_shape'] = input_shape
 
-        data_in = ctxt.lookup(node.inputs[0].name)
-        data_out = ctxt.lookup(node.outputs[0].name)
-        in_shape = data_in.shape
-        out_shape = data_out.shape
-
-        self.operatorRepresentation['ch_im_in'] = in_shape[1]
-        self.operatorRepresentation['dim_im_in_y'] = in_shape[2]
-        self.operatorRepresentation['ch_im_out'] = out_shape[1]
-        self.operatorRepresentation['dim_im_out_y'] = out_shape[2]
-
-        self.operatorRepresentation[
-            'batchOffsetIn'] = self.operatorRepresentation['ch_im_in'] * self.operatorRepresentation['dim_im_in_y']
-        self.operatorRepresentation[
-            'batchOffsetOut'] = self.operatorRepresentation['ch_im_out'] * self.operatorRepresentation['dim_im_out_y']
         return ctxt, True
 
 
 class ConvTranspose1DParser(ConvTransposeParser):
 
-    def __init__(self):
-        super().__init__()
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
 
-    def parseNode(self, node: gs.Node) -> bool:
-        # 1D ConvTranspose expects 3D input/output and 3D weight
-        wellFormed = super().parseNode(node)
-        ret = False
-        if wellFormed:
-            ret = all([
-                # Make sure strides are 2D
-                len(node.attrs['strides']) == 1,
-                len(node.attrs['pads']) == 2,
-                len(node.attrs['dilations']) == 1,
-            ])
-        if ret:
+        ctxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
+        if not ret:
+            return ctxt, False
 
-            self.operatorRepresentation['kernel_shape'] = node.attrs['kernel_shape']
-            self.operatorRepresentation['dim_kernel_y'] = int(self.operatorRepresentation['kernel_shape'][0])
-            self.operatorRepresentation['dilation_y'] = int(self.operatorRepresentation['dilations'][0])
-            self.operatorRepresentation['padding_y'] = int(self.operatorRepresentation['pads'][0])
-            self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][0])
+        rep = self.operatorRepresentation
+        spatial_dims = len(rep['kernel_shape'])
+        if spatial_dims != 1:
+            return ctxt, False
 
-        return ret
+        rep['input_length'], = rep['input_shape']
+        rep['output_length'], = rep['output_shape']
+        rep['kernel_length'], = rep['kernel_shape']
+        rep['stride'], = rep['strides']
+
+        return ctxt, True
+
+
+class ConvTranspose2DParser(ConvTransposeParser):
 
     def parseNodeCtxt(self,
                       ctxt: NetworkContext,
                       node: gs.Node,
                       channels_first: bool = True) -> Tuple[NetworkContext, bool]:
 
-        newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
+        ctxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
+        if not ret:
+            return ctxt, False
 
-        if ret:
-            data_in = newCtxt.lookup(node.inputs[0].name)
-            data_out = newCtxt.lookup(node.outputs[0].name)
-            in_shape = data_in.shape
-            out_shape = data_out.shape
-            self.operatorRepresentation['batch'] = in_shape[0]
-            self.operatorRepresentation['ch_im_in'] = in_shape[1]
-            self.operatorRepresentation['dim_im_in_y'] = in_shape[2]
-            self.operatorRepresentation['ch_im_out'] = out_shape[1]
-            self.operatorRepresentation['dim_im_out_y'] = out_shape[2]
-            self.operatorRepresentation[
-                "batchOffsetIn"] = self.operatorRepresentation["ch_im_in"] * self.operatorRepresentation["dim_im_in_y"]
-            self.operatorRepresentation["batchOffsetOut"] = self.operatorRepresentation[
-                "ch_im_out"] * self.operatorRepresentation["dim_im_out_y"]
-            return newCtxt, True
-        return ctxt, False
+        rep = self.operatorRepresentation
+        spatial_dims = len(rep['kernel_shape'])
+        if spatial_dims != 2:
+            return ctxt, False
+
+        rep['input_height'], rep['input_width'] = rep['input_shape']
+        rep['output_height'], rep['output_width'] = rep['output_shape']
+        rep['kernel_height'], rep['kernel_width'] = rep['kernel_shape']
+        rep['stride_h'], rep['stride_w'] = rep['strides']
+
+        return ctxt, True
 
 
 class SqrtParser(UnaryElementWiseParser):
@@ -2959,6 +3022,34 @@ def parseNode(self, node: gs.Node) -> bool:
         return super().parseNode(node) and node.op == 'HardSwish'
 
 
+class EluParser(UnaryElementWiseParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+        if not (super().parseNode(node) and node.op == 'Elu'):
+            return False
+        self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 1.0)
+        return True
+
+
+class SeluParser(UnaryElementWiseParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+        if not (super().parseNode(node) and node.op == 'Selu'):
+            return False
+        self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 1.67326319217681884765625)
+        self.operatorRepresentation['gamma'] = node.attrs.get('gamma', 1.05070102214813232421875)
+        return True
+
+
+class LeakyReluParser(UnaryElementWiseParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+        if not (super().parseNode(node) and node.op == 'LeakyRelu'):
+            return False
+        self.operatorRepresentation['alpha'] = node.attrs.get('alpha', 0.01)
+        return True
+
+
 class NormalizationParser(NodeParser):
 
     def parseNode(self, node: gs.Node) -> bool:
@@ -3124,3 +3215,236 @@ class GlobalMaxPoolParser(GlobalPoolParser):
 
     def parseNode(self, node: gs.Node) -> bool:
         return super().parseNode(node) and node.op == 'GlobalMaxPool'
+
+
+class ScatterParser(NodeParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        if not all([
+                node.op == 'Scatter' or node.op == 'ScatterElements',
+                len(node.inputs) == 3,
+                len(node.outputs) == 1,
+        ]):
+            return False
+
+        axis = node.attrs.get('axis', 0)
+        reduction = node.attrs.get('reduction', 'none')
+
+        if reduction not in ('none', 'add', 'mul', 'max', 'min'):
+            return False
+
+        self.operatorRepresentation['axis'] = axis
+        self.operatorRepresentation['reduction'] = reduction
+
+        return True
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        data_in = ctxt.lookup(node.inputs[0].name)
+        indices = ctxt.lookup(node.inputs[1].name)
+        updates = ctxt.lookup(node.inputs[2].name)
+        data_out = ctxt.lookup(node.outputs[0].name)
+        self.operatorRepresentation['data_in'] = data_in.name
+        self.operatorRepresentation['indices'] = indices.name
+        self.operatorRepresentation['updates'] = updates.name
+        self.operatorRepresentation['data_out'] = data_out.name
+
+        self.operatorRepresentation['ndim'] = len(data_in.shape)
+        self.operatorRepresentation['data_shape'] = list(data_in.shape)
+        self.operatorRepresentation['indices_shape'] = list(indices.shape)
+
+        return ctxt, True
+
+
+class Col2ImParser(NodeParser):
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        if not all([node.op == 'Col2Im', len(node.inputs) == 3, len(node.outputs) == 1]):
+            return False
+
+        # Deeploy is a static ahead-of-time code generator: shape tensors that
+        # appear as C compound literals in the emitted code must be known at
+        # parse time.
+        # image_shape / block_shape are therefore assumed to be constant and
+        # are not supported as variables
+        if not isinstance(node.inputs[1], gs.Constant) or not isinstance(node.inputs[2], gs.Constant):
+            return False
+
+        image_shape = node.inputs[1].values.astype(int).tolist()
+        block_shape = node.inputs[2].values.astype(int).tolist()
+        spatial_dims = len(image_shape)
+
+        if spatial_dims <= 0:
+            return False
+
+        dilations = list(node.attrs.get('dilations', [1] * spatial_dims))
+        pads = list(node.attrs.get('pads', [0] * (2 * spatial_dims)))
+        strides = list(node.attrs.get('strides', [1] * spatial_dims))
+
+        if not all([
+                len(dilations) == spatial_dims,
+                len(pads) == 2 * spatial_dims,
+                len(strides) == spatial_dims,
+                all(s > 0 for s in image_shape),
+                all(s > 0 for s in block_shape),
+                all(d > 0 for d in dilations),
+                all(p >= 0 for p in pads),
+                all(s > 0 for s in strides),
+        ]):
+            return False
+
+        col_dims = [(image_shape[p] + pads[p] + pads[p + spatial_dims] - dilations[p] *
+                     (block_shape[p] - 1) - 1) // strides[p] + 1 for p in range(spatial_dims)]
+        if any(d <= 0 for d in col_dims):
+            return False
+
+        self.operatorRepresentation['col_dims'] = col_dims
+        self.operatorRepresentation['image_shape'] = image_shape
+        self.operatorRepresentation['block_shape'] = block_shape
+        self.operatorRepresentation['spatial_dims'] = spatial_dims
+        self.operatorRepresentation['dilations'] = dilations
+        self.operatorRepresentation['pads'] = pads
+        self.operatorRepresentation['strides'] = strides
+
+        return True
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        data_in: VariableBuffer = ctxt.lookup(node.inputs[0].name)
+        data_out: VariableBuffer = ctxt.lookup(node.outputs[0].name)
+
+        image_shape = self.operatorRepresentation['image_shape']
+        block_shape = self.operatorRepresentation['block_shape']
+        col_dims = self.operatorRepresentation['col_dims']
+
+        N, C = data_out.shape[0], data_out.shape[1]
+        block_volume = int(np.prod(block_shape))
+        L = int(np.prod(col_dims))
+
+        if list(data_in.shape) != [N, C * block_volume, L]:
+            return ctxt, False
+
+        if list(data_out.shape) != [N, C] + image_shape:
+            return ctxt, False
+
+        self.operatorRepresentation['data_in'] = data_in.name
+        self.operatorRepresentation['data_out'] = data_out.name
+        self.operatorRepresentation['batch_size'] = N
+        self.operatorRepresentation['channels'] = C
+
+        return ctxt, True
+
+
+class ResizeParser(NodeParser):
+
+    @staticmethod
+    def _is_empty(input: gs.Variable | gs.Constant | None) -> bool:
+        if input is None:
+            return True
+        if isinstance(input, gs.Constant):
+            return input.values.size <= 0
+        if isinstance(input, gs.Variable):
+            return input.shape is None
+        return True
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        if not all([node.op == 'Resize', len(node.inputs) >= 1, len(node.outputs) == 1]):
+            return False
+
+        antialias = node.attrs.get('antialias', 0)
+        axes = node.attrs.get('axes', None)  # None -> all axes
+        coord_mode = node.attrs.get('coordinate_transformation_mode', 'half_pixel')
+        cubic_coeff_a = node.attrs.get('cubic_coeff_a', -0.75)
+        exclude_outside = node.attrs.get('exclude_outside', 0)
+        extrapolation_value = node.attrs.get('extrapolation_value', 0.0)
+        keep_aspect_ratio_policy = node.attrs.get('keep_aspect_ratio_policy', 'stretch')
+        mode = node.attrs.get('mode', 'nearest')
+        nearest_mode = node.attrs.get('nearest_mode', 'round_prefer_floor')
+
+        if not all([
+                coord_mode in ('half_pixel', 'half_pixel_symmetric', 'pytorch_half_pixel', 'align_corners',
+                               'asymmetric', 'tf_crop_and_resize'),
+                keep_aspect_ratio_policy in ('stretch', 'not_larger', 'not_smaller'),
+                mode in ('nearest', 'linear', 'cubic'),
+                nearest_mode in ('floor', 'ceil', 'round_prefer_floor', 'round_prefer_ceil'),
+        ]):
+            return False
+
+        self.operatorRepresentation['antialias'] = antialias
+        self.operatorRepresentation['axes'] = axes
+        self.operatorRepresentation['coord_mode'] = coord_mode
+        self.operatorRepresentation['cubic_coeff_a'] = cubic_coeff_a
+        self.operatorRepresentation['exclude_outside'] = exclude_outside
+        self.operatorRepresentation['extrapolation_value'] = extrapolation_value
+        self.operatorRepresentation['keep_aspect_ratio_policy'] = keep_aspect_ratio_policy
+        self.operatorRepresentation['mode'] = mode
+        self.operatorRepresentation['nearest_mode'] = nearest_mode
+
+        return True
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        data_in: VariableBuffer = ctxt.lookup(node.inputs[0].name)
+        data_out: VariableBuffer = ctxt.lookup(node.outputs[0].name)
+
+        if not all([
+                len(data_in.shape) == len(data_out.shape),  # same ndims
+                all(s > 0 for s in data_in.shape),  # no empty dim
+                all(s > 0 for s in data_out.shape),  # no empty dim
+                len(data_in.shape) > 2,  # at least batch, channels, one spatial dim
+                data_in.shape[:2] == data_out.shape[:2],  # batch_size and channels are unchanged
+        ]):
+            return ctxt, False
+
+        roi: gs.Variable | gs.Constant | None = node.inputs[1] if len(node.inputs) > 1 else None
+        scales: gs.Constant | None = node.inputs[2] if len(node.inputs) > 2 else None
+        sizes: gs.Constant | None = node.inputs[3] if len(node.inputs) > 3 else None
+
+        has_scales = not self._is_empty(scales)
+        has_sizes = not self._is_empty(sizes)
+
+        if any([
+                # ONNX requires exactly one of scales / sizes to be non-empty
+                has_scales and has_sizes,
+            (not has_scales) and (not has_sizes),
+                # scales and sizes assumed constants otherwise output shape
+                # cannot be inferred at parsing time
+                has_scales and not isinstance(scales, gs.Constant),
+                has_sizes and not isinstance(sizes, gs.Constant),
+        ]):
+            return ctxt, False
+
+        if not self._is_empty(roi):
+            if isinstance(roi, gs.Constant):
+                _roi = roi.values.tolist()
+            elif isinstance(roi, gs.Variable):
+                _roi = ctxt.lookup(roi.name).name
+            else:
+                return ctxt, False
+        else:
+            _roi = None
+
+        self.operatorRepresentation['data_in'] = data_in.name
+        self.operatorRepresentation['data_out'] = data_out.name
+        self.operatorRepresentation['batch_size'] = data_in.shape[0]
+        self.operatorRepresentation['channels'] = data_in.shape[1]
+        self.operatorRepresentation['spatial_dims'] = len(data_in.shape[2:])
+        self.operatorRepresentation['input_shape'] = list(data_in.shape[2:])
+        self.operatorRepresentation['output_shape'] = list(data_out.shape[2:])
+        self.operatorRepresentation['roi'] = _roi
+        self.operatorRepresentation['scales'] = scales.values.tolist() if has_scales else None
+        self.operatorRepresentation['sizes'] = sizes.values.tolist() if has_sizes else None
+
+        return ctxt, True
diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py
index 2aa1ef1c38..413dd6f878 100644
--- a/Deeploy/Targets/Generic/Platform.py
+++ b/Deeploy/Targets/Generic/Platform.py
@@ -7,32 +7,35 @@
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
 from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicAveragePool1DBindings, BasicAveragePool2DBindings, \
-    BasicBatchNormBindings, BasicCeilBindings, BasicClipBindings, BasicConcatBindings, BasicConv1DBindings, \
-    BasicConv2DBindings, BasicConvTransposeBindings, BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, \
-    BasicDWConv1DBinding, BasicDWConv2DBindings, BasicExpBindings, BasicFloorBindings, BasicGatherBindings, \
-    BasicGELUBindings, BasicGEMMBindings, BasicGlobalAveragePoolBindings, BasicGlobalMaxPoolBindings, \
-    BasicGroupNormBindings, BasicHardSigmoidBindings, BasicHardSwishBindings, BasicInstanceNormBindings, \
-    BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, BasicLayerNormBindings, BasicMatMulBindings, \
-    BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \
-    BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \
-    BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSigmoidBindings, \
-    BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicSubBindings, BasicSwishBindings, \
-    BasicTransposeBindings, DummyBinding
+    BasicBatchNormBindings, BasicCeilBindings, BasicClipBindings, BasicCol2ImBindings, BasicConcatBindings, \
+    BasicConv1DBindings, BasicConv2DBindings, BasicConvTranspose1DBindings, BasicConvTranspose2DBindings, \
+    BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \
+    BasicEluBindings, BasicExpBindings, BasicFloorBindings, BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, \
+    BasicGlobalAveragePoolBindings, BasicGlobalMaxPoolBindings, BasicGroupNormBindings, BasicHardSigmoidBindings, \
+    BasicHardSwishBindings, BasicInstanceNormBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
+    BasicLayerNormBindings, BasicLeakyReluBindings, BasicMatMulBindings, BasicMaxPool1DBindings, \
+    BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, \
+    BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \
+    BasicResizeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicScatterBindings, \
+    BasicSeluBindings, BasicSigmoidBindings, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, \
+    BasicSubBindings, BasicSwishBindings, BasicTransposeBindings, DummyBinding
 from Deeploy.Targets.Generic.Layers import AddLayer, AveragePoolLayer, BatchNormalizationLayer, CeilLayer, ClipLayer, \
-    ConcatLayer, ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, ExpLayer, FloorLayer, \
-    GatherLayer, GELULayer, GEMMLayer, GlobalAveragePoolLayer, GlobalMaxPoolLayer, GroupNormLayer, InstanceNormLayer, \
-    ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \
-    ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SigmoidLayer, \
-    SliceLayer, SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer
+    Col2ImLayer, ConcatLayer, ConvLayer, ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, EluLayer, \
+    ExpLayer, FloorLayer, GatherLayer, GELULayer, GEMMLayer, GlobalAveragePoolLayer, GlobalMaxPoolLayer, \
+    GroupNormLayer, InstanceNormLayer, ITAMaxLayer, LayerNormLayer, LeakyReluLayer, MatMulLayer, MaxPoolLayer, \
+    MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, \
+    ReshapeLayer, ResizeLayer, RQIntegerDivLayer, RQSiGELULayer, ScatterLayer, SeluLayer, SigmoidLayer, SliceLayer, \
+    SoftmaxLayer, SqrtLayer, SubLayer, SwishLayer, TransposeLayer
 from Deeploy.Targets.Generic.Parsers import AddParser, AveragePool1DParser, AveragePool2DParser, BatchNormParser, \
-    CeilParser, ClipParser, ConcatParser, ConvTranspose1DParser, DebugParser, DequantParser, DivParser, DummyParser, \
-    ExpParser, FlattenParser, FloorParser, GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, \
-    GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, GlobalAveragePoolParser, \
-    GlobalMaxPoolParser, GroupNormParser, HardSigmoidParser, HardSwishParser, InstanceNormParser, IntegerDivParser, \
-    ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, \
-    Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, \
-    ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SigmoidParser, SliceParser, SoftmaxParser, SqrtParser, \
-    SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser
+    CeilParser, ClipParser, Col2ImParser, ConcatParser, ConvTranspose1DParser, ConvTranspose2DParser, DebugParser, \
+    DequantParser, DivParser, DummyParser, EluParser, ExpParser, FlattenParser, FloorParser, GatherParser, GELUParser, \
+    GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, \
+    GenericMaxPool2DParser, GlobalAveragePoolParser, GlobalMaxPoolParser, GroupNormParser, HardSigmoidParser, \
+    HardSwishParser, InstanceNormParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \
+    LeakyReluParser, MatMulParser, MaxPool1DParser, MulParser, Pad1DParser, Pad2DParser, PowParser, QuantParser, \
+    ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, ResizeParser, \
+    RQIntegerDivParser, RQSiGELUParser, ScatterParser, SeluParser, SigmoidParser, SliceParser, SoftmaxParser, \
+    SqrtParser, SubParser, SwishParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser
 from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \
     ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \
@@ -78,7 +81,8 @@
 QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings)
 DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings)
 BatchNormalizationMapper = NodeMapper(BatchNormParser(), BasicBatchNormBindings)
-ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings)
+ConvTranspose1DMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTranspose1DBindings)
+ConvTranspose2DMapper = NodeMapper(ConvTranspose2DParser(), BasicConvTranspose2DBindings)
 SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings)
 CeilMapper = NodeMapper(CeilParser(), BasicCeilBindings)
 FloorMapper = NodeMapper(FloorParser(), BasicFloorBindings)
@@ -88,12 +92,18 @@
 SwishMapper = NodeMapper(SwishParser(), BasicSwishBindings)
 HardSigmoidMapper = NodeMapper(HardSigmoidParser(), BasicHardSigmoidBindings)
 HardSwishMapper = NodeMapper(HardSwishParser(), BasicHardSwishBindings)
+EluMapper = NodeMapper(EluParser(), BasicEluBindings)
+SeluMapper = NodeMapper(SeluParser(), BasicSeluBindings)
+LeakyReluMapper = NodeMapper(LeakyReluParser(), BasicLeakyReluBindings)
 InstanceNormMapper = NodeMapper(InstanceNormParser(), BasicInstanceNormBindings)
 GroupNormMapper = NodeMapper(GroupNormParser(), BasicGroupNormBindings)
 AveragePool1DMapper = NodeMapper(AveragePool1DParser(), BasicAveragePool1DBindings)
 AveragePool2DMapper = NodeMapper(AveragePool2DParser(), BasicAveragePool2DBindings)
 GlobalAveragePoolMapper = NodeMapper(GlobalAveragePoolParser(), BasicGlobalAveragePoolBindings)
 GlobalMaxPoolMapper = NodeMapper(GlobalMaxPoolParser(), BasicGlobalMaxPoolBindings)
+ScatterMapper = NodeMapper(ScatterParser(), BasicScatterBindings)
+Col2ImMapper = NodeMapper(Col2ImParser(), BasicCol2ImBindings)
+ResizeMapper = NodeMapper(ResizeParser(), BasicResizeBindings)
 
 # Dummy nodes are intended for development purposes only!
 # They should always generate compiler errors to not accidentally end up in production code
@@ -106,12 +116,14 @@
     'Concat': ConcatLayer([ConcatMapper]),
     'DebugPrint': DebugPrintLayer([DebugMapper]),
     'Div': DivLayer([DivMapper]),
+    'Elu': EluLayer([EluMapper]),
     'Flatten': ReshapeLayer([FlattenMapper]),
     'Gather': GatherLayer([GatherMapper]),
     'Gemm': GEMMLayer([GEMMMapper]),
     'iGELU': GELULayer([GELUMapper]),
     'Gelu': GELULayer([GELUMapper]),
     'LayerNormalization': LayerNormLayer([LayerNormMapper]),
+    'LeakyRelu': LeakyReluLayer([LeakyReluMapper]),
     'iLayerNorm': LayerNormLayer([iLayerNormMapper]),
     'IntegerDiv': DivLayer([IntegerDivMapper]),
     'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]),
@@ -140,11 +152,12 @@
     'Quant': QuantLayer([QuantMapper]),
     'Dequant': DequantLayer([DequantMapper]),
     'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]),
-    'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]),
+    'ConvTranspose': ConvTransposeLayer([ConvTranspose1DMapper, ConvTranspose2DMapper]),
     'Ceil': CeilLayer([CeilMapper]),
     'Floor': FloorLayer([FloorMapper]),
     'Clip': ClipLayer([ClipMapper]),
     'Exp': ExpLayer([ExpMapper]),
+    'Selu': SeluLayer([SeluMapper]),
     'Sigmoid': SigmoidLayer([SigmoidMapper]),
     'Swish': SwishLayer([SwishMapper]),
     'HardSigmoid': SigmoidLayer([HardSigmoidMapper]),
@@ -154,6 +167,10 @@
     'AveragePool': AveragePoolLayer([AveragePool1DMapper, AveragePool2DMapper]),
     'GlobalAveragePool': GlobalAveragePoolLayer([GlobalAveragePoolMapper]),
     'GlobalMaxPool': GlobalMaxPoolLayer([GlobalMaxPoolMapper]),
+    'Resize': ResizeLayer([ResizeMapper]),
+    'Scatter': ScatterLayer([ScatterMapper]),
+    'ScatterElements': ScatterLayer([ScatterMapper]),
+    'Col2Im': Col2ImLayer([Col2ImMapper]),
     # # For example, you can use the DummpyMapper, in case you want to test
     # # deployment or optimizations with GlobalAveragePool nodes but did not yet
     # # implement the corresponding kernel
diff --git a/Deeploy/Targets/Generic/Templates/Col2ImTemplate.py b/Deeploy/Targets/Generic/Templates/Col2ImTemplate.py
new file mode 100644
index 0000000000..11d10988fe
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/Col2ImTemplate.py
@@ -0,0 +1,42 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Dict, List, Tuple
+
+from Deeploy.AbstractDataTypes import FloatImmediate
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+def _typeSuffix(ref_type) -> str:
+    if issubclass(ref_type, FloatImmediate):
+        return f'fp{ref_type.typeWidth}'
+    elif ref_type.signed:
+        return f's{ref_type.typeWidth}'
+    else:
+        return f'u{ref_type.typeWidth}'
+
+
+class _Col2ImTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        operatorRepresentation['type_suffix'] = _typeSuffix(data_in._type.referencedType)
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _Col2ImTemplate("""
+// Col2Im (Name: ${nodeName}, Op: ${nodeOp})
+Col2Im_${type_suffix}(
+    ${data_in}, ${data_out},
+    ${batch_size}, ${channels}, ${spatial_dims},
+    (int32_t[]){${', '.join(str(s) for s in image_shape)}},
+    (int32_t[]){${', '.join(str(s) for s in block_shape)}},
+    (int32_t[]){${', '.join(str(s) for s in dilations)}},
+    (int32_t[]){${', '.join(str(s) for s in pads)}},
+    (int32_t[]){${', '.join(str(s) for s in strides)}}
+);
+""")
diff --git a/Deeploy/Targets/Generic/Templates/ConvTransposeTemplate.py b/Deeploy/Targets/Generic/Templates/ConvTransposeTemplate.py
index 9bf864c91f..0461283dec 100644
--- a/Deeploy/Targets/Generic/Templates/ConvTransposeTemplate.py
+++ b/Deeploy/Targets/Generic/Templates/ConvTransposeTemplate.py
@@ -4,10 +4,35 @@
 
 from Deeploy.DeeployTypes import NodeTemplate
 
-referenceTemplate = NodeTemplate("""
+referenceTemplate2D = NodeTemplate("""
 <%
-batchOffsetIn = ch_im_in * dim_im_in_y
-batchOffsetOut = ch_im_out * dim_im_out_y
+batch_stride_input = channels * input_height * input_width
+batch_stride_output = feature_maps * output_height * output_width
+%>
+
+// 2D Transposed Conv (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+    ${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in};
+    ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
+
+    for (uint32_t n=0; n<${batch_size}; ++n) {
+        ConvTranspose2d_fp32(
+            ref_${data_out}_${data_in}, ${channels}, ${input_height},
+            ${input_width}, ${weight}, ${feature_maps}, ${kernel_height},
+            ${kernel_width}, ${stride_h}, ${stride_w}, ${bias}, ${has_bias},
+            ref_${data_out}_${data_out}, ${output_height}, ${output_width}
+        );
+
+        ref_${data_out}_${data_in} += ${batch_stride_input};
+        ref_${data_out}_${data_out} += ${batch_stride_output};
+    }
+END_SINGLE_CORE
+""")
+
+referenceTemplate1D = NodeTemplate("""
+<%
+batch_stride_input = channels * input_length
+batch_stride_output = feature_maps * output_length
 %>
 
 // 1D Transposed Conv (Name: ${nodeName}, Op: ${nodeOp})
@@ -15,17 +40,15 @@
     ${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in};
     ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
 
-    for (uint32_t n=0; n<${batch}; ++n) {
+    for (uint32_t n=0; n<${batch_size}; ++n) {
         ConvTranspose1d_fp32(
-            ref_${data_out}_${data_in}, ${ch_im_in}, ${dim_im_in_y},
-            ${weight}, ${ch_im_out}, ${dim_kernel_y},
-            ${stride_y},
-            ${bias}, ${has_bias},
-            ref_${data_out}_${data_out}, ${dim_im_out_y}
+            ref_${data_out}_${data_in}, ${channels}, ${input_length}, ${weight},
+            ${feature_maps}, ${kernel_length}, ${stride}, ${bias}, ${has_bias},
+            ref_${data_out}_${data_out}, ${output_length}
         );
 
-        ref_${data_out}_${data_in} += ${batchOffsetIn};
-        ref_${data_out}_${data_out} += ${batchOffsetOut};
+        ref_${data_out}_${data_in} += ${batch_stride_input};
+        ref_${data_out}_${data_out} += ${batch_stride_output};
     }
 END_SINGLE_CORE
 """)
diff --git a/Deeploy/Targets/Generic/Templates/FloatEluTemplate.py b/Deeploy/Targets/Generic/Templates/FloatEluTemplate.py
new file mode 100644
index 0000000000..fc7a1886ae
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatEluTemplate.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+import numpy as np
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _EluTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        operatorRepresentation['size'] = int(np.prod(data_in.shape))
+        operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _EluTemplate("""
+// ELU (Name: ${nodeName}, Op: ${nodeOp})
+Elu_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}, ${alpha});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatLeakyReluTemplate.py b/Deeploy/Targets/Generic/Templates/FloatLeakyReluTemplate.py
new file mode 100644
index 0000000000..35804bd3d7
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatLeakyReluTemplate.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+import numpy as np
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _LeakyReluTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        operatorRepresentation['size'] = int(np.prod(data_in.shape))
+        operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _LeakyReluTemplate("""
+// LeakyRelu (Name: ${nodeName}, Op: ${nodeOp})
+LeakyRelu_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}, ${alpha});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatSeluTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSeluTemplate.py
new file mode 100644
index 0000000000..2585a1966d
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatSeluTemplate.py
@@ -0,0 +1,23 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+import numpy as np
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _SeluTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> tuple[NetworkContext, dict, list[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        operatorRepresentation['size'] = int(np.prod(data_in.shape))
+        operatorRepresentation['type_width'] = data_in._type.referencedType.typeWidth
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _SeluTemplate("""
+// SELU (Name: ${nodeName}, Op: ${nodeOp})
+Selu_fp${type_width}_fp${type_width}(${data_in}, ${data_out}, ${size}, ${alpha}, ${gamma});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/ResizeTemplate.py b/Deeploy/Targets/Generic/Templates/ResizeTemplate.py
new file mode 100644
index 0000000000..3a1ce42acd
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/ResizeTemplate.py
@@ -0,0 +1,98 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Dict, List, Tuple
+
+from Deeploy.AbstractDataTypes import FloatImmediate
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+_TYPE_TAG_MAP = {
+    'fp32': 'RESIZE_TYPE_FLOAT32',
+    's8': 'RESIZE_TYPE_INT8',
+    'u8': 'RESIZE_TYPE_UINT8',
+    's16': 'RESIZE_TYPE_INT16',
+    'u16': 'RESIZE_TYPE_UINT16',
+    's32': 'RESIZE_TYPE_INT32',
+    'u32': 'RESIZE_TYPE_UINT32',
+}
+
+_MODE_MAP = {
+    "nearest": "RESIZE_MODE_NEAREST",
+    "linear": "RESIZE_MODE_LINEAR",
+    "cubic": "RESIZE_MODE_CUBIC",
+}
+
+_COORD_MAP = {
+    "asymmetric": "RESIZE_COORD_ASYMMETRIC",
+    "half_pixel": "RESIZE_COORD_HALF_PIXEL",
+    "half_pixel_symmetric": "RESIZE_COORD_HALF_PIXEL_SYMMETRIC",
+    "align_corners": "RESIZE_COORD_ALIGN_CORNERS",
+    "pytorch_half_pixel": "RESIZE_COORD_PYTORCH_HALF_PIXEL",
+    "tf_crop_and_resize": "RESIZE_COORD_TF_CROP_AND_RESIZE",
+}
+
+_NEAREST_MAP = {
+    "floor": "RESIZE_NEAREST_FLOOR",
+    "ceil": "RESIZE_NEAREST_CEIL",
+    "round_prefer_floor": "RESIZE_NEAREST_ROUND_PREFER_FLOOR",
+    "round_prefer_ceil": "RESIZE_NEAREST_ROUND_PREFER_CEIL",
+}
+
+
+def _typeSuffix(ref_type) -> str:
+    if issubclass(ref_type, FloatImmediate):
+        return f'fp{ref_type.typeWidth}'
+    elif ref_type.signed:
+        return f's{ref_type.typeWidth}'
+    else:
+        return f'u{ref_type.typeWidth}'
+
+
+class _ResizeTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        rep = operatorRepresentation
+
+        if rep.get('roi', None) is not None:
+            raise ValueError("Resize: 'roi' input is not supported.")
+        if rep.get('scales', None) is not None:
+            raise ValueError("Resize: 'scales' input is not supported; use 'sizes' instead. ")
+        if rep.get('antialias', 0) != 0:
+            raise ValueError(f"Resize: antialias={rep['antialias']} is not supported by this kernel.")
+        if rep.get('exclude_outside', 0) != 0:
+            raise ValueError(f"Resize: exclude_outside={rep['exclude_outside']} is not supported by this kernel.")
+        if rep.get('axes', None) is not None:
+            raise ValueError(f"Resize: axes={rep['axes']} is not supported; all axes must be resized.")
+        if rep.get('keep_aspect_ratio_policy', 'stretch') != 'stretch':
+            raise ValueError(
+                f"Resize: keep_aspect_ratio_policy='{rep['keep_aspect_ratio_policy']}' is not supported by this kernel."
+            )
+        if rep.get('coord_mode', 'half_pixel') == 'tf_crop_and_resize':
+            raise ValueError(
+                "Resize: coordinate_transformation_mode='tf_crop_and_resize' is not supported by this kernel.")
+        if rep.get('mode', 'nearest') == 'cubic':
+            raise ValueError("Resize: mode='cubic' is not supported by this kernel.")
+
+        data_in = ctxt.lookup(rep['data_in'])
+        type_suffix = _typeSuffix(data_in._type.referencedType)
+        rep['type_tag'] = _TYPE_TAG_MAP[type_suffix]
+        rep['mode'] = _MODE_MAP[rep['mode']]
+        rep['coord_mode'] = _COORD_MAP[rep['coord_mode']]
+        rep['nearest_mode'] = _NEAREST_MAP[rep['nearest_mode']]
+
+        return ctxt, rep, []
+
+
+referenceTemplate = _ResizeTemplate("""
+// Resize (Name: ${nodeName}, Op: ${nodeOp})
+Resize(
+    ${data_in}, ${data_out}, ${type_tag},
+    ${batch_size}, ${channels}, ${spatial_dims},
+    (int32_t[]){${', '.join(str(s) for s in input_shape)}},
+    (int32_t[]){${', '.join(str(s) for s in output_shape)}},
+    ${mode}, ${coord_mode}, ${nearest_mode}
+);
+""")
diff --git a/Deeploy/Targets/Generic/Templates/ScatterTemplate.py b/Deeploy/Targets/Generic/Templates/ScatterTemplate.py
new file mode 100644
index 0000000000..71172ab3d6
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/ScatterTemplate.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Dict, List, Tuple
+
+from Deeploy.AbstractDataTypes import FloatImmediate
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+_REDUCTION_MAP = {
+    "none": "SCATTER_REDUCTION_NONE",
+    "add": "SCATTER_REDUCTION_ADD",
+    "mul": "SCATTER_REDUCTION_MUL",
+    "min": "SCATTER_REDUCTION_MIN",
+    "max": "SCATTER_REDUCTION_MAX",
+}
+
+
+def _typeSuffix(ref_type) -> str:
+    if issubclass(ref_type, FloatImmediate):
+        return f'fp{ref_type.typeWidth}'
+    elif ref_type.signed:
+        return f's{ref_type.typeWidth}'
+    else:
+        return f'u{ref_type.typeWidth}'
+
+
+class _ScatterTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        operatorRepresentation['type_suffix'] = _typeSuffix(data_in._type.referencedType)
+        operatorRepresentation['reduction_c'] = _REDUCTION_MAP[operatorRepresentation['reduction']]
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _ScatterTemplate("""
+// Scatter (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+Scatter_${type_suffix}(${data_in}, ${indices}, ${updates}, ${data_out}, ${ndim},
+    (int32_t[]){${', '.join(str(s) for s in data_shape)}},
+    (int32_t[]){${', '.join(str(s) for s in indices_shape)}},
+    ${axis}, ${reduction_c}
+);
+END_SINGLE_CORE
+""")
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
index c2c8d436f8..4224cba538 100644
--- a/Deeploy/Targets/Generic/TypeCheckers.py
+++ b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -2,59 +2,22 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List, Optional, Sequence, Type
+from typing import List, Optional
 
 import numpy as np
 
-from Deeploy.AbstractDataTypes import Pointer
 from Deeploy.CommonExtensions.TypeCheckers.SignPropTypeChecker import SignPropTypeChecker
 from Deeploy.DeeployTypes import ConstantBuffer, OperatorRepresentation, VariableBuffer
 
 
-class ConcatChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
-
-        maxNLevel = max(i.nLevels for i in inputs)
-
-        return [maxNLevel]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
-        assert (all([_inp._signed == True for _inp in inputs]) or all(
-            [[_inp._signed == False for _inp in inputs]])), "Some inputs in concat operation have different signs!"
-
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
-
-
-class SliceChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
+class DummyChecker(SignPropTypeChecker):
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
-        return [inputs[0].nLevels]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
-
+                        operatorRepresentation: OperatorRepresentation) -> List[int]:
+        return [2**(self.input_types[0].referencedType.typeWidth)]
 
-class TransposeChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
+class PassThroughTypeChecker(SignPropTypeChecker):
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
@@ -62,75 +25,54 @@ def _inferNumLevels(self, inputs: List[VariableBuffer],
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
+        return [bool(inputs[0]._signed)]
 
 
-class PadChecker(SignPropTypeChecker):
+SliceChecker = PassThroughTypeChecker
+TransposeChecker = PassThroughTypeChecker
+PadChecker = PassThroughTypeChecker
+GatherChecker = PassThroughTypeChecker
+ReshapeChecker = PassThroughTypeChecker
+MaxPoolChecker = PassThroughTypeChecker
+DebugPrintChecker = PassThroughTypeChecker
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
 
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels]
+class SignedOutputTypeChecker(SignPropTypeChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
-
+        return [True]
 
-class AddChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
+class ConcatChecker(SignPropTypeChecker):
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels + inputs[1].nLevels]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        if inputs[0]._signed or isinstance(inputs[1], ConstantBuffer):
-            return [True]
-        else:
-            return [False]
-
-
-class GatherChecker(SignPropTypeChecker):
+                        operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
+        maxNLevel = max(i.nLevels for i in inputs)
 
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels]
+        return [maxNLevel]
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
+                         operatorRepresentation: OperatorRepresentation) -> Optional[List[bool]]:
+        assert (all([_inp._signed == True for _inp in inputs]) or all(
+            [[_inp._signed == False for _inp in inputs]])), "Some inputs in concat operation have different signs!"
+
         if inputs[0]._signed:
             return [True]
         else:
             return [False]
 
 
-class ReshapeChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
+class AddChecker(SignPropTypeChecker):
 
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels]
+        return [inputs[0].nLevels + inputs[1].nLevels]
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        if inputs[0]._signed:
+        if inputs[0]._signed or isinstance(inputs[1], ConstantBuffer):
             return [True]
         else:
             return [False]
@@ -138,9 +80,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class MHSAChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['n_levels']]
@@ -150,28 +89,14 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [True]
 
 
-class CLCAChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class CLCAChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
         return [True]
 
 
-class LinearAttentionChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class LinearAttentionChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -180,9 +105,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class GEMMChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [
@@ -195,14 +117,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [True]
 
 
-class LayerNormChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class LayerNormChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -211,9 +126,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class MulChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(self.input_types[1].typeWidth)]
@@ -228,9 +140,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class DivChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(self.output_types[0].referencedType.typeWidth)]
@@ -245,9 +154,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class RQIntegerDivChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(self.output_types[0].referencedType.typeWidth)]
@@ -262,9 +168,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class MatMulChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [np.max(inputs[0].shape) * np.max(inputs[1].shape) * 2**(self.input_types[0].referencedType.typeWidth)]
@@ -281,9 +184,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class RQMatMulChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['n_levels']]
@@ -295,9 +195,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class RQGEMMChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['n_levels']]
@@ -307,14 +204,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [bool(operatorRepresentation["signed"])]
 
 
-class ReduceMeanChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class ReduceMeanChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -326,9 +216,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class ReduceSumChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['axisLength'] * 2**(self.input_types[0].referencedType.typeWidth)]
@@ -343,9 +230,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class ReluChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs, operatorRepresentation):
         return [2**(self.input_types[0].referencedType.typeWidth)]
 
@@ -354,14 +238,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [False]
 
 
-class SoftmaxChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class SoftmaxChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -370,9 +247,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class iNoNormChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(4 * self.input_types[0].referencedType.typeWidth)]
@@ -385,14 +259,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
-class GELUChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class GELUChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -404,9 +271,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class HardswishChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(4 * self.input_types[0].referencedType.typeWidth)]
@@ -419,31 +283,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
-class RQHardswishChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
-
-
-class MaxPoolChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels]
+class RQHardswishChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
@@ -455,9 +295,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class ConvChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         weight = inputs[1]
@@ -476,9 +313,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class RequantShiftChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['n_levels']]
@@ -488,38 +322,8 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [operatorRepresentation["signed"]]
 
 
-class DummyChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
-
-
-class DebugPrintChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        if inputs[0]._signed:
-            return [True]
-        else:
-            return [False]
-
-
 class RQAddChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [operatorRepresentation['rqsOut_n_levels']]
@@ -540,9 +344,6 @@ def checkOutputType(self, inputs: List[VariableBuffer], operatorRepresentation:
 
 class QuantChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         # Calculate number of levels based on bit_width
@@ -557,9 +358,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class DequantChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> List[int]:
         return [2**(self.output_types[0].referencedType.typeWidth)]
@@ -571,9 +369,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class SoftmaxCrossEntropyLossChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
 
@@ -586,9 +381,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
 
 class SGDChecker(SignPropTypeChecker):
 
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
     def _inferNumLevels(self, inputs: List[VariableBuffer],
                         operatorRepresentation: OperatorRepresentation) -> Optional[List[int]]:
         return [2**(self.input_types[0].referencedType.typeWidth)]
@@ -598,14 +390,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [True]
 
 
-class BatchNormChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [2**(self.input_types[0].referencedType.typeWidth)]
+class BatchNormChecker(DummyChecker):
 
     def _inferSignedness(self, inputs: List[VariableBuffer],
                          operatorRepresentation: OperatorRepresentation) -> List[bool]:
diff --git a/DeeployTest/Tests/Kernels/FP32/Col2Im/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Col2Im/inputs.npz
new file mode 100644
index 0000000000..9c4058dbcb
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Col2Im/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Col2Im/network.onnx b/DeeployTest/Tests/Kernels/FP32/Col2Im/network.onnx
new file mode 100644
index 0000000000..fc03a38ccf
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Col2Im/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Col2Im/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Col2Im/outputs.npz
new file mode 100644
index 0000000000..e81b80e67f
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Col2Im/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/inputs.npz
new file mode 100644
index 0000000000..f55ba85683
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/network.onnx b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/network.onnx
new file mode 100644
index 0000000000..c4158441d2
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/outputs.npz b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/outputs.npz
new file mode 100644
index 0000000000..532fbdaa5a
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_1D/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/inputs.npz b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/inputs.npz
new file mode 100644
index 0000000000..c46f130fc3
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/network.onnx b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/network.onnx
new file mode 100644
index 0000000000..d9bc804665
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/outputs.npz b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/outputs.npz
new file mode 100644
index 0000000000..fa872f8286
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ConvTranspose/Regular_2D/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Elu/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Elu/inputs.npz
new file mode 100644
index 0000000000..070beaf015
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Elu/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Elu/network.onnx b/DeeployTest/Tests/Kernels/FP32/Elu/network.onnx
new file mode 100644
index 0000000000..123fab1153
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Elu/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Elu/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Elu/outputs.npz
new file mode 100644
index 0000000000..223e5bfc85
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Elu/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/LeakyRelu/inputs.npz b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/inputs.npz
new file mode 100644
index 0000000000..1ae95b34f8
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/LeakyRelu/network.onnx b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/network.onnx
new file mode 100644
index 0000000000..038a4c7562
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/LeakyRelu/outputs.npz b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/outputs.npz
new file mode 100644
index 0000000000..3cf5e869a7
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/LeakyRelu/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Resize/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Resize/inputs.npz
new file mode 100644
index 0000000000..12d2c04d5d
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Resize/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Resize/network.onnx b/DeeployTest/Tests/Kernels/FP32/Resize/network.onnx
new file mode 100644
index 0000000000..569b3cf20a
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Resize/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Resize/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Resize/outputs.npz
new file mode 100644
index 0000000000..97ca7332b7
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Resize/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ScatterElements/inputs.npz b/DeeployTest/Tests/Kernels/FP32/ScatterElements/inputs.npz
new file mode 100644
index 0000000000..2efbcd30c9
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ScatterElements/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ScatterElements/network.onnx b/DeeployTest/Tests/Kernels/FP32/ScatterElements/network.onnx
new file mode 100644
index 0000000000..17031f8c19
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ScatterElements/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/ScatterElements/outputs.npz b/DeeployTest/Tests/Kernels/FP32/ScatterElements/outputs.npz
new file mode 100644
index 0000000000..c7dae2412b
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/ScatterElements/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Selu/inputs.npz b/DeeployTest/Tests/Kernels/FP32/Selu/inputs.npz
new file mode 100644
index 0000000000..4565d80b6c
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Selu/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Selu/network.onnx b/DeeployTest/Tests/Kernels/FP32/Selu/network.onnx
new file mode 100644
index 0000000000..55a951a513
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Selu/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/FP32/Selu/outputs.npz b/DeeployTest/Tests/Kernels/FP32/Selu/outputs.npz
new file mode 100644
index 0000000000..0811fb34d1
Binary files /dev/null and b/DeeployTest/Tests/Kernels/FP32/Selu/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Col2Im/inputs.npz b/DeeployTest/Tests/Kernels/Integer/Col2Im/inputs.npz
new file mode 100644
index 0000000000..5cccf345b2
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Col2Im/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Col2Im/network.onnx b/DeeployTest/Tests/Kernels/Integer/Col2Im/network.onnx
new file mode 100644
index 0000000000..9e31c06d27
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Col2Im/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Col2Im/outputs.npz b/DeeployTest/Tests/Kernels/Integer/Col2Im/outputs.npz
new file mode 100644
index 0000000000..a36d20ef99
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Col2Im/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Resize/inputs.npz b/DeeployTest/Tests/Kernels/Integer/Resize/inputs.npz
new file mode 100644
index 0000000000..9076dba614
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Resize/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Resize/network.onnx b/DeeployTest/Tests/Kernels/Integer/Resize/network.onnx
new file mode 100644
index 0000000000..fab06b2c64
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Resize/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/Integer/Resize/outputs.npz b/DeeployTest/Tests/Kernels/Integer/Resize/outputs.npz
new file mode 100644
index 0000000000..9ee4676afc
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/Resize/outputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/ScatterElements/inputs.npz b/DeeployTest/Tests/Kernels/Integer/ScatterElements/inputs.npz
new file mode 100644
index 0000000000..c034759e7c
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/ScatterElements/inputs.npz differ
diff --git a/DeeployTest/Tests/Kernels/Integer/ScatterElements/network.onnx b/DeeployTest/Tests/Kernels/Integer/ScatterElements/network.onnx
new file mode 100644
index 0000000000..77eca65237
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/ScatterElements/network.onnx differ
diff --git a/DeeployTest/Tests/Kernels/Integer/ScatterElements/outputs.npz b/DeeployTest/Tests/Kernels/Integer/ScatterElements/outputs.npz
new file mode 100644
index 0000000000..41dbf1edde
Binary files /dev/null and b/DeeployTest/Tests/Kernels/Integer/ScatterElements/outputs.npz differ
diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py
index eaea3d6400..abe14bfff0 100644
--- a/DeeployTest/test_generic_config.py
+++ b/DeeployTest/test_generic_config.py
@@ -12,6 +12,9 @@
     "Kernels/FP32/AveragePool/Regular_2D",
     "Kernels/FP32/Ceil",
     "Kernels/FP32/Clip",
+    "Kernels/FP32/Col2Im",
+    "Kernels/FP32/ConvTranspose/Regular_1D",
+    "Kernels/FP32/ConvTranspose/Regular_2D",
     "Kernels/FP32/Conv/DW_2D_Bias",
     "Kernels/FP32/Conv/DW_2D_NoBias",
     "Kernels/FP32/Conv/DW_2D_ZeroValuedBias",
@@ -19,6 +22,7 @@
     "Kernels/FP32/Conv/Regular_2D_NoBias",
     "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias",
     "Kernels/FP32/Div",
+    "Kernels/FP32/Elu",
     "Kernels/FP32/Exp",
     "Kernels/FP32/Floor",
     "Kernels/FP32/GEMM/Regular",
@@ -33,6 +37,7 @@
     "Kernels/FP32/MaxPool/Regular_2D",
     "Kernels/FP32/Mul",
     "Kernels/FP32/LayerNorm",
+    "Kernels/FP32/LeakyRelu",
     "Kernels/FP32/RMSNorm",
     "Kernels/FP32/Pow/Scalar",
     "Kernels/FP32/Pow/Vector",
@@ -55,6 +60,9 @@
     "Kernels/FP32/ReduceMean/NoKeepDims/Axis2",
     "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add",
     "Kernels/FP32/Reshape/SkipConnection",
+    "Kernels/FP32/Resize",
+    "Kernels/FP32/ScatterElements",
+    "Kernels/FP32/Selu",
     "Kernels/FP32/Sigmoid",
     "Kernels/FP32/Sqrt",
     "Kernels/FP32/Sub",
@@ -64,6 +72,7 @@
     "Kernels/Integer/Softmax/Regular",
     "Kernels/Integer/Add/MultIO",
     "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Col2Im",
     "Kernels/Integer/Conv/DW_1D",
     "Kernels/Integer/Conv/Regular_1D",
     "Kernels/Integer/Conv/DW_2D",
@@ -77,6 +86,8 @@
     "Kernels/Integer/Pad/Regular_2D",
     "Kernels/Integer/ReduceMean",
     "Kernels/Integer/ReduceSum",
+    "Kernels/Integer/Resize",
+    "Kernels/Integer/ScatterElements",
     "Kernels/Integer/Slice",
     "Kernels/Integer/Sub",
     # Special test from TinyViT model layers
diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h
index 2023b9e725..36c6e4cd74 100644
--- a/TargetLibraries/Generic/inc/DeeployBasicMath.h
+++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h
@@ -36,10 +36,12 @@
 #include "kernel/BatchNorm.h"
 #include "kernel/Ceil.h"
 #include "kernel/Clip.h"
-#include "kernel/ConvTranspose1d_fp32.h"
+#include "kernel/Col2Im.h"
+#include "kernel/ConvTranspose_fp32.h"
 #include "kernel/Convolution.h"
 #include "kernel/DWConvolution.h"
 #include "kernel/Div.h"
+#include "kernel/Elu.h"
 #include "kernel/Exp.h"
 #include "kernel/Floor.h"
 #include "kernel/GELU.h"
@@ -51,6 +53,7 @@
 #include "kernel/HardSwish.h"
 #include "kernel/InstanceNorm.h"
 #include "kernel/Layernorm.h"
+#include "kernel/LeakyRelu.h"
 #include "kernel/MatMul.h"
 #include "kernel/MaxPool.h"
 #include "kernel/Pow.h"
@@ -60,6 +63,9 @@
 #include "kernel/RQHardswish.h"
 #include "kernel/Relu.h"
 #include "kernel/RequantShift.h"
+#include "kernel/Resize.h"
+#include "kernel/Scatter.h"
+#include "kernel/Selu.h"
 #include "kernel/Sigmoid.h"
 #include "kernel/Softmax.h"
 #include "kernel/Sqrt.h"
diff --git a/TargetLibraries/Generic/inc/kernel/Col2Im.h b/TargetLibraries/Generic/inc/kernel/Col2Im.h
new file mode 100644
index 0000000000..31e31e3180
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/Col2Im.h
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef __DEEPLOY_BASIC_MATH_COL2IM_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_COL2IM_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/******************************************************************************/
+/*                               Col2Im                                       */
+/******************************************************************************/
+
+/* Maximum supported number of spatial dimensions. */
+#define COL2IM_MAX_SPATIAL_DIMS 4
+
+/*
+ * DECLARE_COL2IM_FN(SUFFIX, DATA_TYPE)
+ *
+ * Emits a forward declaration for Col2Im_<SUFFIX>.
+ * The matching definition lives in Col2Im.c via DEFINE_COL2IM_FN.
+ *
+ * Implements ONNX Col2Im semantics:
+ *   input  : (N, C * prod(block_shape), L)  — column matrix
+ *   output : (N, C, image_shape[0], ..., image_shape[P-1])
+ *
+ * For each kernel position bk and output block ob the contribution is
+ * accumulated into the corresponding image location (with bounds checking
+ * to handle padding).  The output is zero-initialised before accumulation.
+ *
+ * pads layout: [p_0_begin, ..., p_{P-1}_begin, p_0_end, ..., p_{P-1}_end]
+ */
+#define DECLARE_COL2IM_FN(SUFFIX, DATA_TYPE)                                   \
+  void Col2Im_##SUFFIX(const DATA_TYPE *input, DATA_TYPE *output, int32_t N,   \
+                       int32_t C, int32_t spatial_dims,                        \
+                       const int32_t *image_shape, const int32_t *block_shape, \
+                       const int32_t *dilations, const int32_t *pads,          \
+                       const int32_t *strides)
+
+DECLARE_COL2IM_FN(fp32, float32_t);
+DECLARE_COL2IM_FN(s8, int8_t);
+DECLARE_COL2IM_FN(u8, uint8_t);
+
+#endif //__DEEPLOY_BASIC_MATH_COL2IM_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/ConvTranspose1d_fp32.h b/TargetLibraries/Generic/inc/kernel/ConvTranspose1d_fp32.h
deleted file mode 100644
index 40ef065992..0000000000
--- a/TargetLibraries/Generic/inc/kernel/ConvTranspose1d_fp32.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#ifndef CONV_TRANSPOSE1D_FP32_H
-#define CONV_TRANSPOSE1D_FP32_H
-
-#include <stdbool.h>
-#include <stdint.h>
-
-void ConvTranspose1d_fp32(const float32_t *input, uint32_t C_in, uint32_t W_in,
-                          const float32_t *weight, uint32_t C_out, uint32_t K,
-                          uint32_t stride, const float32_t *bias, bool has_bias,
-                          float32_t *output, uint32_t W_out);
-
-#endif // CONV_TRANSPOSE1D_FP32_H
diff --git a/TargetLibraries/Generic/inc/kernel/ConvTranspose_fp32.h b/TargetLibraries/Generic/inc/kernel/ConvTranspose_fp32.h
new file mode 100644
index 0000000000..7ff06f171e
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/ConvTranspose_fp32.h
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef CONV_TRANSPOSE_FP32_H
+#define CONV_TRANSPOSE_FP32_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+void ConvTranspose1d_fp32(const float32_t *input, uint32_t C_in, uint32_t W_in,
+                          const float32_t *weight, uint32_t C_out, uint32_t K,
+                          uint32_t stride, const float32_t *bias, bool has_bias,
+                          float32_t *output, uint32_t W_out);
+
+void ConvTranspose2d_fp32(const float32_t *input, uint32_t C_in, uint32_t H_in,
+                          uint32_t W_in, const float32_t *weight,
+                          uint32_t C_out, uint32_t kH, uint32_t kW,
+                          uint32_t stride_h, uint32_t stride_w,
+                          const float32_t *bias, bool has_bias,
+                          float32_t *output, uint32_t H_out, uint32_t W_out);
+
+#endif // CONV_TRANSPOSE_FP32_H
diff --git a/TargetLibraries/Generic/inc/kernel/Elu.h b/TargetLibraries/Generic/inc/kernel/Elu.h
new file mode 100644
index 0000000000..ac6d03c4ee
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/Elu.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __DEEPLOY_BASIC_MATH_ELU_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_ELU_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/*
+ * element wise Exponential Linear Unit (ELU) function
+ */
+
+/******************************************************************************/
+/*                              Elu                                          */
+/******************************************************************************/
+void Elu_fp32_fp32(const float32_t *data_in, float32_t *data_out, int32_t size,
+                   float32_t alpha);
+
+#endif //__DEEPLOY_BASIC_MATH_ELU_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/LeakyRelu.h b/TargetLibraries/Generic/inc/kernel/LeakyRelu.h
new file mode 100644
index 0000000000..daa096c2a9
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/LeakyRelu.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __DEEPLOY_BASIC_MATH_LEAKYRELU_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_LEAKYRELU_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/*
+ * element wise LeakyRelu function
+ */
+
+/******************************************************************************/
+/*                          LeakyRelu                                         */
+/******************************************************************************/
+void LeakyRelu_fp32_fp32(const float32_t *data_in, float32_t *data_out,
+                         int32_t size, float32_t alpha);
+
+#endif //__DEEPLOY_BASIC_MATH_LEAKYRELU_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Resize.h b/TargetLibraries/Generic/inc/kernel/Resize.h
new file mode 100644
index 0000000000..153831f6bc
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/Resize.h
@@ -0,0 +1,67 @@
+// SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef __DEEPLOY_BASIC_MATH_RESIZE_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_RESIZE_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/* Maximum number of spatial dimensions (excludes batch N and channels C). */
+#define RESIZE_MAX_SPATIAL_DIMS 4
+
+/* Element type — passed as a compile-time constant from generated code. */
+typedef enum {
+  RESIZE_TYPE_FLOAT32 = 0,
+  RESIZE_TYPE_INT8,
+  RESIZE_TYPE_UINT8,
+  RESIZE_TYPE_INT16,
+  RESIZE_TYPE_UINT16,
+  RESIZE_TYPE_INT32,
+  RESIZE_TYPE_UINT32,
+} resize_type_t;
+
+/* Interpolation mode (mirrors ONNX Resize `mode` attribute). */
+typedef enum {
+  RESIZE_MODE_NEAREST = 0,
+  RESIZE_MODE_LINEAR,
+  RESIZE_MODE_CUBIC,
+} resize_mode_t;
+
+/* Coordinate transformation mode. */
+typedef enum {
+  RESIZE_COORD_ASYMMETRIC = 0,
+  RESIZE_COORD_HALF_PIXEL,
+  RESIZE_COORD_HALF_PIXEL_SYMMETRIC,
+  RESIZE_COORD_PYTORCH_HALF_PIXEL,
+  RESIZE_COORD_ALIGN_CORNERS,
+  RESIZE_COORD_TF_CROP_AND_RESIZE,
+} resize_coord_mode_t;
+
+/* Nearest-neighbour rounding mode. */
+typedef enum {
+  RESIZE_NEAREST_FLOOR = 0,
+  RESIZE_NEAREST_CEIL,
+  RESIZE_NEAREST_ROUND_PREFER_FLOOR,
+  RESIZE_NEAREST_ROUND_PREFER_CEIL,
+} resize_nearest_mode_t;
+
+/*
+ * Resize — single function for all element types.
+ *
+ *   input / output – NCHW tensors (void* to stay type-agnostic)
+ *   type_tag       – element type; drives element size and float conversion
+ *   N, C           – batch size and number of channels
+ *   spatial_dims   – number of spatial dimensions (1..RESIZE_MAX_SPATIAL_DIMS)
+ *   input_shape    – spatial sizes of the input  [d0, d1, …]
+ *   output_shape   – spatial sizes of the output [d0, d1, …]
+ *   mode           – interpolation mode
+ *   coord_mode     – coordinate transformation mode
+ *   nearest_mode   – rounding mode (only used when mode == RESIZE_MODE_NEAREST)
+ */
+void Resize(const void *input, void *output, resize_type_t type_tag, int32_t N,
+            int32_t C, int32_t spatial_dims, const int32_t *input_shape,
+            const int32_t *output_shape, resize_mode_t mode,
+            resize_coord_mode_t coord_mode, resize_nearest_mode_t nearest_mode);
+
+#endif // __DEEPLOY_BASIC_MATH_RESIZE_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Scatter.h b/TargetLibraries/Generic/inc/kernel/Scatter.h
new file mode 100644
index 0000000000..8f4d2a41ca
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/Scatter.h
@@ -0,0 +1,45 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __DEEPLOY_BASIC_MATH_SCATTER_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_SCATTER_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/******************************************************************************/
+/*                              Scatter                                       */
+/******************************************************************************/
+
+/* Maximum supported tensor rank. */
+#define SCATTER_MAX_NDIM 8
+
+/* Reduction modes (mirrors ONNX ScatterElements `reduction` attribute). */
+typedef enum {
+  SCATTER_REDUCTION_NONE = 0,
+  SCATTER_REDUCTION_ADD,
+  SCATTER_REDUCTION_MUL,
+  SCATTER_REDUCTION_MIN,
+  SCATTER_REDUCTION_MAX,
+} scatter_reduction_t;
+
+/*
+ * DECLARE_SCATTER_FN(SUFFIX, DATA_TYPE)
+ *
+ * Emits a forward declaration for Scatter_<SUFFIX>.
+ * The matching definition lives in Scatter.c via DEFINE_SCATTER_FN.
+ */
+#define DECLARE_SCATTER_FN(SUFFIX, DATA_TYPE)                                  \
+  void Scatter_##SUFFIX(const DATA_TYPE *data, const int32_t *indices,         \
+                        const DATA_TYPE *updates, DATA_TYPE *output,           \
+                        int32_t ndim, const int32_t *data_shape,               \
+                        const int32_t *indices_shape, int32_t axis,            \
+                        scatter_reduction_t reduction)
+
+DECLARE_SCATTER_FN(fp32, float32_t);
+DECLARE_SCATTER_FN(s8, int8_t);
+DECLARE_SCATTER_FN(u8, uint8_t);
+
+#endif //__DEEPLOY_BASIC_MATH_SCATTER_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Selu.h b/TargetLibraries/Generic/inc/kernel/Selu.h
new file mode 100644
index 0000000000..225ec75df8
--- /dev/null
+++ b/TargetLibraries/Generic/inc/kernel/Selu.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __DEEPLOY_BASIC_MATH_SELU_KERNEL_HEADER_
+#define __DEEPLOY_BASIC_MATH_SELU_KERNEL_HEADER_
+
+#include "DeeployBasicMath.h"
+
+/*
+ * element wise Scaled Exponential Linear Unit (SELU) function
+ */
+
+/******************************************************************************/
+/*                             Selu                                          */
+/******************************************************************************/
+void Selu_fp32_fp32(const float32_t *input, float32_t *output, int32_t size,
+                    float32_t alpha, float32_t gamma);
+
+#endif //__DEEPLOY_BASIC_MATH_SELU_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/src/Col2Im.c b/TargetLibraries/Generic/src/Col2Im.c
new file mode 100644
index 0000000000..94e715e620
--- /dev/null
+++ b/TargetLibraries/Generic/src/Col2Im.c
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "DeeployBasicMath.h"
+
+// clang-format off
+#define DEFINE_COL2IM_FN(SUFFIX, DATA_TYPE)                                    \
+  DECLARE_COL2IM_FN(SUFFIX, DATA_TYPE) {                                       \
+    if (spatial_dims < 1 || spatial_dims > COL2IM_MAX_SPATIAL_DIMS ||          \
+        N <= 0 || C <= 0)                                                      \
+      return;                                                                  \
+    for (int32_t p = 0; p < spatial_dims; p++) {                               \
+      if (image_shape[p] <= 0 || block_shape[p] <= 0 || strides[p] <= 0 ||     \
+          dilations[p] <= 0 || pads[p] < 0 || pads[p + spatial_dims] < 0)      \
+        return;                                                                \
+    }                                                                          \
+    /* Compute per-dim sliding-window sizes, L, block_volume, image_volume. */ \
+    int32_t col_dims[COL2IM_MAX_SPATIAL_DIMS];                                 \
+    int32_t col_strides[COL2IM_MAX_SPATIAL_DIMS];                              \
+    int32_t blk_strides[COL2IM_MAX_SPATIAL_DIMS];                              \
+    int32_t img_strides[COL2IM_MAX_SPATIAL_DIMS];                              \
+    int32_t L = 1, block_volume = 1, image_volume = 1;                         \
+    for (int32_t p = 0; p < spatial_dims; p++) {                               \
+      col_dims[p] = (image_shape[p] + pads[p] + pads[p + spatial_dims]         \
+                     - dilations[p] * (block_shape[p] - 1) - 1)                \
+                    / strides[p] + 1;                                          \
+      if (col_dims[p] <= 0) return;                                            \
+      L            *= col_dims[p];                                             \
+      block_volume *= block_shape[p];                                          \
+      image_volume *= image_shape[p];                                          \
+    }                                                                          \
+    /* Row-major strides for flat-index decomposition. */                      \
+    col_strides[spatial_dims - 1] = 1;                                         \
+    blk_strides[spatial_dims - 1] = 1;                                         \
+    img_strides[spatial_dims - 1] = 1;                                         \
+    for (int32_t p = spatial_dims - 2; p >= 0; p--) {                          \
+      col_strides[p] = col_strides[p + 1] * col_dims[p + 1];                   \
+      blk_strides[p] = blk_strides[p + 1] * block_shape[p + 1];                \
+      img_strides[p] = img_strides[p + 1] * image_shape[p + 1];                \
+    }                                                                          \
+    /* Zero-initialise output. */                                              \
+    memset(output, 0, (size_t)(N * C * image_volume) * sizeof(DATA_TYPE));     \
+    /* Accumulate each column entry into its image position. */                \
+    for (int32_t n = 0; n < N; n++) {                                          \
+      for (int32_t c = 0; c < C; c++) {                                        \
+        for (int32_t bk = 0; bk < block_volume; bk++) {                        \
+          /* Decompose kernel flat index once per bk. */                       \
+          int32_t k_coords[COL2IM_MAX_SPATIAL_DIMS];                           \
+          int32_t bk_rem = bk;                                                 \
+          for (int32_t p = 0; p < spatial_dims; p++) {                         \
+            k_coords[p] = bk_rem / blk_strides[p];                             \
+            bk_rem     -= k_coords[p] * blk_strides[p];                        \
+          }                                                                    \
+          for (int32_t ob = 0; ob < L; ob++) {                                 \
+            /* Decompose output-block flat index and map to image coords. */   \
+            int32_t ob_rem = ob, img_flat = 0, in_bounds = 1;                  \
+            for (int32_t p = 0; p < spatial_dims; p++) {                       \
+              int32_t o_coord = ob_rem / col_strides[p];                       \
+              ob_rem -= o_coord * col_strides[p];                              \
+              int32_t h = o_coord * strides[p] - pads[p]                       \
+                          + k_coords[p] * dilations[p];                        \
+              if (h < 0 || h >= image_shape[p]) { in_bounds = 0; break; }      \
+              img_flat += h * img_strides[p];                                  \
+            }                                                                  \
+            if (in_bounds) {                                                   \
+              int32_t in_flat = (n * C * block_volume                          \
+                                 + c * block_volume + bk) * L + ob;            \
+              output[(n * C + c) * image_volume + img_flat] += input[in_flat]; \
+            }                                                                  \
+          }                                                                    \
+        }                                                                      \
+      }                                                                        \
+    }                                                                          \
+  }
+// clang-format on
+
+DEFINE_COL2IM_FN(fp32, float32_t)
+DEFINE_COL2IM_FN(s8, int8_t)
+DEFINE_COL2IM_FN(u8, uint8_t)
diff --git a/TargetLibraries/Generic/src/ConvTranspose1d_fp32.c b/TargetLibraries/Generic/src/ConvTranspose1d_fp32.c
deleted file mode 100644
index 362058734e..0000000000
--- a/TargetLibraries/Generic/src/ConvTranspose1d_fp32.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "DeeployBasicMath.h"
-
-void ConvTranspose1d_fp32(const float32_t *input, uint32_t C_in, uint32_t W_in,
-                          const float32_t *weight, uint32_t C_out, uint32_t K,
-                          uint32_t stride, const float32_t *bias, bool has_bias,
-                          float32_t *output, uint32_t W_out) {
-  /*
-  input:       [C_in, W_in]
-  weight:      [C_in, C_out, K]
-  output:      [C_out, W_out]
-  bias:        [C_out] optionally
-
-  */
-
-  // Output initialization
-  for (uint32_t c = 0; c < C_out; ++c) {
-    for (uint32_t w = 0; w < W_out; ++w) {
-      output[c * W_out + w] = 0.0f;
-    }
-  }
-
-  // For each output channel
-  for (uint32_t cout = 0; cout < C_out; ++cout) {
-    // For each input channel
-    for (uint32_t cin = 0; cin < C_in; ++cin) {
-      // For each input width
-      for (uint32_t w_in = 0; w_in < W_in; ++w_in) {
-        float32_t val = input[cin * W_in + w_in];
-        // Transposed convolution: output width is calculated based on stride
-        for (uint32_t k = 0; k < K; ++k) {
-          uint32_t w_out = w_in * stride + k;
-          if (w_out < W_out) {
-            // weight indexing: weight[cin, cout, k]
-            float32_t wgt = weight[cin * (C_out * K) + cout * K + k];
-            output[cout * W_out + w_out] += val * wgt;
-          }
-        }
-      }
-    }
-    if (has_bias) {
-      for (uint32_t w = 0; w < W_out; ++w) {
-        output[cout * W_out + w] += bias[cout];
-      }
-    }
-  }
-}
diff --git a/TargetLibraries/Generic/src/ConvTranspose_fp32.c b/TargetLibraries/Generic/src/ConvTranspose_fp32.c
new file mode 100644
index 0000000000..a64dcdb582
--- /dev/null
+++ b/TargetLibraries/Generic/src/ConvTranspose_fp32.c
@@ -0,0 +1,82 @@
+// SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "DeeployBasicMath.h"
+
+void ConvTranspose1d_fp32(const float32_t *input, uint32_t C_in, uint32_t L_in,
+                          const float32_t *weight, uint32_t C_out, uint32_t K,
+                          uint32_t stride, const float32_t *bias, bool has_bias,
+                          float32_t *output, uint32_t L_out) {
+  /*
+  input:  [C_in, L_in]
+  weight: [C_in, C_out, K]
+  output: [C_out, L_out]
+  bias:   [C_out] (optional)
+  */
+
+  for (uint32_t cout = 0; cout < C_out; ++cout) {
+    float32_t b = has_bias ? bias[cout] : 0.0f;
+    float32_t *out_row = output + cout * L_out;
+    for (uint32_t l = 0; l < L_out; ++l)
+      out_row[l] = b;
+  }
+
+  for (uint32_t cout = 0; cout < C_out; ++cout) {
+    float32_t *out_row = output + cout * L_out;
+    for (uint32_t cin = 0; cin < C_in; ++cin) {
+      const float32_t *in_row = input + cin * L_in;
+      const float32_t *wgt_row = weight + cin * (C_out * K) + cout * K;
+      for (uint32_t l_in = 0; l_in < L_in; ++l_in) {
+        float32_t val = in_row[l_in];
+        uint32_t base = l_in * stride;
+        for (uint32_t k = 0; k < K; ++k)
+          out_row[base + k] += val * wgt_row[k];
+      }
+    }
+  }
+}
+
+void ConvTranspose2d_fp32(const float32_t *input, uint32_t C_in, uint32_t H_in,
+                          uint32_t W_in, const float32_t *weight,
+                          uint32_t C_out, uint32_t kH, uint32_t kW,
+                          uint32_t stride_h, uint32_t stride_w,
+                          const float32_t *bias, bool has_bias,
+                          float32_t *output, uint32_t H_out, uint32_t W_out) {
+  /*
+  input:  [C_in, H_in, W_in]
+  weight: [C_in, C_out, kH, kW]
+  output: [C_out, H_out, W_out]
+  bias:   [C_out] (optional)
+  */
+
+  for (uint32_t cout = 0; cout < C_out; ++cout) {
+    float32_t b = has_bias ? bias[cout] : 0.0f;
+    float32_t *out_ch = output + cout * H_out * W_out;
+    for (uint32_t i = 0; i < H_out * W_out; ++i)
+      out_ch[i] = b;
+  }
+
+  for (uint32_t cout = 0; cout < C_out; ++cout) {
+    float32_t *out_ch = output + cout * H_out * W_out;
+    for (uint32_t cin = 0; cin < C_in; ++cin) {
+      const float32_t *in_ch = input + cin * H_in * W_in;
+      const float32_t *wgt_ch =
+          weight + cin * (C_out * kH * kW) + cout * (kH * kW);
+      for (uint32_t h_in = 0; h_in < H_in; ++h_in) {
+        const float32_t *in_row = in_ch + h_in * W_in;
+        uint32_t h_base = h_in * stride_h;
+        for (uint32_t w_in = 0; w_in < W_in; ++w_in) {
+          float32_t val = in_row[w_in];
+          uint32_t w_base = w_in * stride_w;
+          for (uint32_t kh = 0; kh < kH; ++kh) {
+            float32_t *out_row = out_ch + (h_base + kh) * W_out + w_base;
+            const float32_t *wgt_krow = wgt_ch + kh * kW;
+            for (uint32_t kw = 0; kw < kW; ++kw)
+              out_row[kw] += val * wgt_krow[kw];
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/TargetLibraries/Generic/src/Elu_fp32.c b/TargetLibraries/Generic/src/Elu_fp32.c
new file mode 100644
index 0000000000..c71a1c9282
--- /dev/null
+++ b/TargetLibraries/Generic/src/Elu_fp32.c
@@ -0,0 +1,20 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "DeeployBasicMath.h"
+#include <math.h>
+
+void Elu_fp32_fp32(const float32_t *input, float32_t *output, int32_t size,
+                   float32_t alpha) {
+
+  for (int i = 0; i < size; i++) {
+    if (input[i] >= 0) {
+      output[i] = input[i];
+    } else {
+      output[i] = alpha * (expf(input[i]) - 1.0f);
+    }
+  }
+}
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c
index 907de4bb90..6df133b8fe 100644
--- a/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c
+++ b/TargetLibraries/Generic/src/GlobalAveragePool_fp32.c
@@ -22,7 +22,7 @@ void GlobalAveragePool_fp32_fp32(float32_t const *__restrict__ src,
       for (uint32_t i = 0; i < spatial_size; ++i) {
         sum += x[i];
       }
-      dst[n * C + c] = sum / spatial_size;
+      dst[n * C + c] = sum / (float32_t)spatial_size;
     }
   }
 }
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c
index 209404494c..92164eaa72 100644
--- a/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c
+++ b/TargetLibraries/Generic/src/GlobalMaxPool_fp32.c
@@ -15,17 +15,13 @@ void GlobalMaxPool_fp32_fp32(float32_t const *__restrict__ src,
   }
   for (uint32_t n = 0; n < N; n++) {
     for (uint32_t c = 0; c < C; c++) {
-
-      float32_t sum = 0.0f;
       const float32_t *x = src + (n * C + c) * spatial_size;
-
       float32_t max = x[0];
       for (uint32_t i = 1; i < spatial_size; i++) {
         if (x[i] > max) {
           max = x[i];
         }
       }
-
       dst[n * C + c] = max;
     }
   }
diff --git a/TargetLibraries/Generic/src/HardSwish_fp32.c b/TargetLibraries/Generic/src/HardSwish_fp32.c
index 4776586fff..632123bc98 100644
--- a/TargetLibraries/Generic/src/HardSwish_fp32.c
+++ b/TargetLibraries/Generic/src/HardSwish_fp32.c
@@ -11,6 +11,6 @@ void HardSwish_fp32_fp32(float32_t *data_in, float32_t *data_out,
                          int32_t size) {
   for (int i = 0; i < size; i++) {
     float32_t x = data_in[i];
-    data_out[i] = x * fmaxf(0, fminf(1, x / 6 + 0.5));
+    data_out[i] = x * fmaxf(0, fminf(1, x / 6.0f + 0.5f));
   }
 }
diff --git a/TargetLibraries/Generic/src/Layernorm_fp32.c b/TargetLibraries/Generic/src/Layernorm_fp32.c
index fb68df8dfe..b0eec7d8df 100644
--- a/TargetLibraries/Generic/src/Layernorm_fp32.c
+++ b/TargetLibraries/Generic/src/Layernorm_fp32.c
@@ -42,7 +42,7 @@ void LayernormGrad_fp32_fp32(float32_t *grad_in, float32_t *data_in,
                              float32_t *bias, float32_t epsilon, int32_t size,
                              int32_t lastDimLength) {
   float32_t mean, variance, std, inv_std;
-  float32_t sum_dy, sum_dy_scaled, sum_dy_scaled_centered;
+  float32_t sum_dy, sum_dy_scaled;
   float32_t centered_input;
 
   for (int i = 0; i < (size / lastDimLength); i++) {
@@ -53,26 +53,26 @@ void LayernormGrad_fp32_fp32(float32_t *grad_in, float32_t *data_in,
     for (int j = 0; j < lastDimLength; j++) {
       mean += data_in[j + i * lastDimLength];
     }
-    mean = mean / lastDimLength;
+    mean = mean / (float32_t)lastDimLength;
 
     for (int j = 0; j < lastDimLength; j++) {
       centered_input = data_in[j + i * lastDimLength] - mean;
       variance += centered_input * centered_input;
     }
-    variance = variance / lastDimLength;
+    variance = variance / (float32_t)lastDimLength;
     variance += epsilon;
     std = sqrtf(variance);
     inv_std = 1.0f / std;
 
     // RW: Step 2: Compute intermediate values needed for gradient calculation
     sum_dy = 0.0f;
-    sum_dy_scaled_centered = 0.0f;
+    sum_dy_scaled = 0.0f;
 
     // RW: Calculate sum(dy) and sum(dy * scale * (x - mean) / std)
     for (int j = 0; j < lastDimLength; j++) {
       sum_dy += grad_in[j + i * lastDimLength];
       centered_input = data_in[j + i * lastDimLength] - mean;
-      sum_dy_scaled_centered +=
+      sum_dy_scaled +=
           grad_in[j + i * lastDimLength] * scale[j] * centered_input * inv_std;
     }
 
@@ -85,9 +85,10 @@ void LayernormGrad_fp32_fp32(float32_t *grad_in, float32_t *data_in,
       // (x-mean)/(N*std^2)*sum(dy*scale*(x-mean)/std))
       grad_out[j + i * lastDimLength] =
           inv_std * scale[j] *
-          (grad_in[j + i * lastDimLength] - (sum_dy / lastDimLength) -
-           (centered_input * inv_std * inv_std / lastDimLength) *
-               sum_dy_scaled_centered);
+          (grad_in[j + i * lastDimLength] -
+           (sum_dy / (float32_t)lastDimLength) -
+           (centered_input * inv_std * inv_std / (float32_t)lastDimLength) *
+               sum_dy_scaled);
     }
   }
 }
diff --git a/TargetLibraries/Generic/src/LeakyRelu_fp32.c b/TargetLibraries/Generic/src/LeakyRelu_fp32.c
new file mode 100644
index 0000000000..3994b98937
--- /dev/null
+++ b/TargetLibraries/Generic/src/LeakyRelu_fp32.c
@@ -0,0 +1,19 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "DeeployBasicMath.h"
+
+void LeakyRelu_fp32_fp32(const float32_t *input, float32_t *output,
+                         int32_t size, float32_t alpha) {
+
+  for (int i = 0; i < size; i++) {
+    if (input[i] >= 0) {
+      output[i] = input[i];
+    } else {
+      output[i] = alpha * input[i];
+    }
+  }
+}
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/Resize.c b/TargetLibraries/Generic/src/Resize.c
new file mode 100644
index 0000000000..e0b19791f3
--- /dev/null
+++ b/TargetLibraries/Generic/src/Resize.c
@@ -0,0 +1,257 @@
+// SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <math.h>
+
+#include "DeeployBasicMath.h"
+
+/* Number of bytes per element. */
+static inline uint32_t _resize_element_size(resize_type_t type_tag) {
+  switch (type_tag) {
+  case RESIZE_TYPE_FLOAT32:
+    return sizeof(float32_t);
+  case RESIZE_TYPE_INT16:
+  case RESIZE_TYPE_UINT16:
+    return sizeof(int16_t);
+  case RESIZE_TYPE_INT32:
+  case RESIZE_TYPE_UINT32:
+    return sizeof(int32_t);
+  default: /* INT8, UINT8 */
+    return sizeof(int8_t);
+  }
+}
+
+/* Read one element as float for use in the linear interpolation path. */
+static inline float32_t _resize_read(const void *buf, int32_t idx,
+                                     resize_type_t type_tag) {
+  switch (type_tag) {
+  case RESIZE_TYPE_FLOAT32:
+    return ((const float32_t *)buf)[idx];
+  case RESIZE_TYPE_INT8:
+    return (float32_t)((const int8_t *)buf)[idx];
+  case RESIZE_TYPE_UINT8:
+    return (float32_t)((const uint8_t *)buf)[idx];
+  case RESIZE_TYPE_INT16:
+    return (float32_t)((const int16_t *)buf)[idx];
+  case RESIZE_TYPE_UINT16:
+    return (float32_t)((const uint16_t *)buf)[idx];
+  case RESIZE_TYPE_INT32:
+    return (float32_t)((const int32_t *)buf)[idx];
+  default: /* RESIZE_TYPE_UINT32 */
+    return (float32_t)((const uint32_t *)buf)[idx];
+  }
+}
+
+/* Round val to the nearest integer, breaking ties toward the nearest even
+ * integer (banker's rounding). This matches numpy.round / the ONNX reference
+ * implementation. */
+static inline float32_t _round_half_to_even(float32_t val) {
+  float32_t f = floorf(val);
+  float32_t diff = val - f;
+  if (diff < 0.5f)
+    return f;
+  if (diff > 0.5f)
+    return f + 1.0f;
+  /* exactly 0.5: pick the even neighbour */
+  return (fmodf(f, 2.0f) == 0.0f) ? f : f + 1.0f;
+}
+
+/* Write a float result back as the element's native type. */
+static inline void _resize_write(void *buf, int32_t idx, float32_t val,
+                                 resize_type_t type_tag) {
+  switch (type_tag) {
+  case RESIZE_TYPE_FLOAT32:
+    ((float32_t *)buf)[idx] = val;
+    break;
+  case RESIZE_TYPE_INT8:
+    ((int8_t *)buf)[idx] = (int8_t)_round_half_to_even(val);
+    break;
+  case RESIZE_TYPE_UINT8:
+    ((uint8_t *)buf)[idx] = (uint8_t)_round_half_to_even(val);
+    break;
+  case RESIZE_TYPE_INT16:
+    ((int16_t *)buf)[idx] = (int16_t)_round_half_to_even(val);
+    break;
+  case RESIZE_TYPE_UINT16:
+    ((uint16_t *)buf)[idx] = (uint16_t)_round_half_to_even(val);
+    break;
+  case RESIZE_TYPE_INT32:
+    ((int32_t *)buf)[idx] = (int32_t)_round_half_to_even(val);
+    break;
+  default: /* RESIZE_TYPE_UINT32 */
+    ((uint32_t *)buf)[idx] = (uint32_t)_round_half_to_even(val);
+    break;
+  }
+}
+
+/* Map an output coordinate to its source coordinate in the input. */
+static float32_t _resize_get_coord(int32_t out_idx, int32_t in_size,
+                                   int32_t out_size,
+                                   resize_coord_mode_t coord_mode) {
+  float32_t x_scale = (float32_t)out_size / (float32_t)in_size;
+  switch (coord_mode) {
+
+  case RESIZE_COORD_HALF_PIXEL:
+    return ((float32_t)out_idx + 0.5f) / x_scale - 0.5f;
+
+  case RESIZE_COORD_HALF_PIXEL_SYMMETRIC: {
+    float32_t adjustment =
+        (float32_t)out_size / (floorf((float32_t)in_size + 0.5f) * x_scale);
+    return ((float32_t)out_idx + 0.5f) / x_scale * adjustment - 0.5f;
+  }
+  case RESIZE_COORD_ALIGN_CORNERS:
+    if (out_size == 1)
+      return 0.0f;
+    return (float32_t)out_idx * (float32_t)(in_size - 1) /
+           (float32_t)(out_size - 1);
+
+  case RESIZE_COORD_PYTORCH_HALF_PIXEL:
+    if (out_size == 1)
+      return 0.0f;
+    return ((float32_t)out_idx + 0.5f) / x_scale - 0.5f;
+
+  default: /* RESIZE_COORD_ASYMMETRIC */
+    return (float32_t)out_idx / x_scale;
+  }
+}
+
+/* Round a source coordinate to the nearest input index and clamp to [0,
+ * max_idx]. */
+static int32_t _resize_nearest_idx(float32_t x, int32_t max_idx,
+                                   resize_nearest_mode_t nearest_mode) {
+  int32_t in_idx;
+  switch (nearest_mode) {
+  case RESIZE_NEAREST_CEIL:
+    in_idx = (int32_t)ceilf(x);
+    break;
+  case RESIZE_NEAREST_ROUND_PREFER_FLOOR:
+    /* At exactly n+0.5 choose floor; otherwise standard round. */
+    in_idx = (x - floorf(x) == 0.5f) ? (int32_t)floorf(x) : (int32_t)roundf(x);
+    break;
+  case RESIZE_NEAREST_ROUND_PREFER_CEIL:
+    in_idx = (int32_t)roundf(x);
+    break;
+  default: /* RESIZE_NEAREST_FLOOR */
+    in_idx = (int32_t)floorf(x);
+    break;
+  }
+  return CLAMP(in_idx, 0, max_idx);
+}
+
+void Resize(const void *input, void *output, resize_type_t type_tag, int32_t N,
+            int32_t C, int32_t spatial_dims, const int32_t *input_shape,
+            const int32_t *output_shape, resize_mode_t mode,
+            resize_coord_mode_t coord_mode,
+            resize_nearest_mode_t nearest_mode) {
+
+  if (N <= 0 || C <= 0 || spatial_dims < 1 ||
+      spatial_dims > RESIZE_MAX_SPATIAL_DIMS)
+    return;
+
+  /* not implemented */
+  if (mode == RESIZE_MODE_CUBIC ||
+      coord_mode == RESIZE_COORD_TF_CROP_AND_RESIZE)
+    return;
+
+  uint32_t elem_size = _resize_element_size(type_tag);
+
+  /* Row-major strides for the spatial dimensions. */
+  int32_t out_strides[RESIZE_MAX_SPATIAL_DIMS];
+  int32_t in_strides[RESIZE_MAX_SPATIAL_DIMS];
+  int32_t L_out = 1, L_in = 1;
+  out_strides[spatial_dims - 1] = 1;
+  in_strides[spatial_dims - 1] = 1;
+  for (int32_t d = spatial_dims - 2; d >= 0; d--) {
+    out_strides[d] = out_strides[d + 1] * output_shape[d + 1];
+    in_strides[d] = in_strides[d + 1] * input_shape[d + 1];
+  }
+  for (int32_t d = 0; d < spatial_dims; d++) {
+    L_out *= output_shape[d];
+    L_in *= input_shape[d];
+  }
+
+  for (int32_t n = 0; n < N; n++) {
+    for (int32_t c = 0; c < C; c++) {
+      int32_t in_base = (n * C + c) * L_in;
+      int32_t out_base = (n * C + c) * L_out;
+
+      for (int32_t oi = 0; oi < L_out; oi++) {
+        int32_t rem = oi;
+
+        if (mode == RESIZE_MODE_NEAREST) {
+          /* Nearest-neighbour: map each spatial coord and copy the element. */
+          int32_t in_flat = 0;
+          for (int32_t d = 0; d < spatial_dims; d++) {
+            int32_t out_idx = rem / out_strides[d];
+            rem -= out_idx * out_strides[d];
+            float32_t x = _resize_get_coord(out_idx, input_shape[d],
+                                            output_shape[d], coord_mode);
+            in_flat +=
+                _resize_nearest_idx(x, input_shape[d] - 1, nearest_mode) *
+                in_strides[d];
+          }
+          memcpy((char *)output + (uint32_t)(out_base + oi) * elem_size,
+                 (const char *)input +
+                     (uint32_t)(in_base + in_flat) * elem_size,
+                 elem_size);
+
+        } else {
+
+          float32_t x_in[RESIZE_MAX_SPATIAL_DIMS]; // fractional input coord
+          int32_t lo[RESIZE_MAX_SPATIAL_DIMS];     // index lower than x_in
+          int32_t hi[RESIZE_MAX_SPATIAL_DIMS];     // index higher than x_in
+          float32_t w_lo[RESIZE_MAX_SPATIAL_DIMS]; // low interpolation weight
+          float32_t w_hi[RESIZE_MAX_SPATIAL_DIMS]; // high interpolation weight
+
+          /*
+          prepares the data for the N-linear interpolation that follows.
+          For each spatial dimension d:
+          - Extract the per-dimension output coordinate from the flat index oi
+          - Map the output coordinate to a fractional input coordinate x_in
+          - Find the two bracketing input indices (lo, hi) along dimension d
+          - Compute interpolation weights (w_hi, w_lo)
+          */
+          for (int32_t d = 0; d < spatial_dims; d++) {
+            int32_t out_idx = rem / out_strides[d];
+            rem -= out_idx * out_strides[d]; // flat output index
+            x_in[d] = _resize_get_coord(out_idx, input_shape[d],
+                                        output_shape[d], coord_mode);
+            x_in[d] = CLAMP(x_in[d], 0.0f, (float32_t)(input_shape[d] - 1));
+            lo[d] = (int32_t)floorf(x_in[d]);
+            hi[d] = (lo[d] + 1 < input_shape[d]) ? lo[d] + 1 : lo[d];
+            w_hi[d] = x_in[d] - (float32_t)lo[d];
+            w_lo[d] = 1.0f - w_hi[d];
+          }
+
+          /*
+          N-linear interpolation: weighted sum over the 2^spatial_dims corners.
+
+          example: spatial_dims = 2 (bilinear), there are 2^2 = 4 corners
+          corner=0 (bits: 00) → (lo[0], lo[1])   weight = w_lo[0] * w_lo[1]
+          corner=1 (bits: 01) → (hi[0], lo[1])   weight = w_hi[0] * w_lo[1]
+          corner=2 (bits: 10) → (lo[0], hi[1])   weight = w_lo[0] * w_hi[1]
+          corner=3 (bits: 11) → (hi[0], hi[1])   weight = w_hi[0] * w_hi[1]
+          */
+          float32_t result = 0.0f;
+          for (int32_t corner = 0, n_corners = 1 << spatial_dims;
+               corner < n_corners; corner++) {
+            float32_t weight = 1.0f;
+            int32_t in_flat = 0;
+            for (int32_t d = 0; d < spatial_dims; d++) {
+              if ((corner >> d) & 1) {
+                weight *= w_hi[d];
+                in_flat += hi[d] * in_strides[d];
+              } else {
+                weight *= w_lo[d];
+                in_flat += lo[d] * in_strides[d];
+              }
+            }
+            result += weight * _resize_read(input, in_base + in_flat, type_tag);
+          }
+          _resize_write(output, out_base + oi, result, type_tag);
+        }
+      }
+    }
+  }
+}
diff --git a/TargetLibraries/Generic/src/Scatter.c b/TargetLibraries/Generic/src/Scatter.c
new file mode 100644
index 0000000000..f7e5185013
--- /dev/null
+++ b/TargetLibraries/Generic/src/Scatter.c
@@ -0,0 +1,67 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "DeeployBasicMath.h"
+
+// clang-format off
+#define DEFINE_SCATTER_FN(SUFFIX, DATA_TYPE)                                   \
+  DECLARE_SCATTER_FN(SUFFIX, DATA_TYPE) {                                      \
+    int32_t data_size = 1;                                                     \
+    for (int32_t dim = 0; dim < ndim; dim++) {                                 \
+      data_size *= data_shape[dim];                                            \
+    }                                                                          \
+    int32_t indices_size = 1;                                                  \
+    for (int32_t dim = 0; dim < ndim; dim++) {                                 \
+      indices_size *= indices_shape[dim];                                      \
+    }                                                                          \
+    memcpy(output, data, (size_t)data_size * sizeof(DATA_TYPE));               \
+    int32_t stride_data[SCATTER_MAX_NDIM];                                     \
+    int32_t stride_idx[SCATTER_MAX_NDIM];                                      \
+    stride_data[ndim - 1] = 1;                                                 \
+    stride_idx[ndim - 1] = 1;                                                  \
+    for (int32_t dim = ndim - 2; dim >= 0; dim--) {                            \
+      stride_data[dim] = stride_data[dim + 1] * data_shape[dim + 1];           \
+      stride_idx[dim] = stride_idx[dim + 1] * indices_shape[dim + 1];          \
+    }                                                                          \
+    for (int32_t fi = 0; fi < indices_size; fi++) {                            \
+      int32_t out_idx = 0;                                                     \
+      int32_t rem = fi;                                                        \
+      for (int32_t dim = 0; dim < ndim; dim++) {                               \
+        int32_t coord = rem / stride_idx[dim];                                 \
+        rem -= coord * stride_idx[dim];                                        \
+        if (dim == axis) {                                                     \
+          int32_t scatter_idx = indices[fi];                                   \
+          if (scatter_idx < 0) scatter_idx += data_shape[dim];                 \
+          out_idx += scatter_idx * stride_data[dim];                           \
+        } else {                                                               \
+          out_idx += coord * stride_data[dim];                                 \
+        }                                                                      \
+      }                                                                        \
+      switch (reduction) {                                                     \
+      case SCATTER_REDUCTION_ADD:                                              \
+        output[out_idx] += updates[fi];                                        \
+        break;                                                                 \
+      case SCATTER_REDUCTION_MUL:                                              \
+        output[out_idx] *= updates[fi];                                        \
+        break;                                                                 \
+      case SCATTER_REDUCTION_MIN:                                              \
+        if (updates[fi] < output[out_idx])                                     \
+          output[out_idx] = updates[fi];                                       \
+        break;                                                                 \
+      case SCATTER_REDUCTION_MAX:                                              \
+        if (updates[fi] > output[out_idx])                                     \
+          output[out_idx] = updates[fi];                                       \
+        break;                                                                 \
+      default:                                                                 \
+        output[out_idx] = updates[fi]; break;                                  \
+      }                                                                        \
+    }                                                                          \
+  }
+// clang-format on
+
+DEFINE_SCATTER_FN(fp32, float32_t)
+DEFINE_SCATTER_FN(s8, int8_t)
+DEFINE_SCATTER_FN(u8, uint8_t)
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/Selu_fp32.c b/TargetLibraries/Generic/src/Selu_fp32.c
new file mode 100644
index 0000000000..ac120c7e55
--- /dev/null
+++ b/TargetLibraries/Generic/src/Selu_fp32.c
@@ -0,0 +1,20 @@
+/*
+ * SPDX-FileCopyrightText: 2026 ETH Zurich and University of Bologna
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "DeeployBasicMath.h"
+#include <math.h>
+
+void Selu_fp32_fp32(const float32_t *input, float32_t *output, int32_t size,
+                    float32_t alpha, float32_t gamma) {
+
+  for (int i = 0; i < size; i++) {
+    float32_t tmp = input[i];
+    if (input[i] < 0) {
+      tmp = alpha * (expf(tmp) - 1.0f);
+    }
+    output[i] = gamma * tmp;
+  }
+}
\ No newline at end of file