From 152a00254d5bc8f2e8efe93e076837bddf57f2e8 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Sun, 19 Apr 2026 16:08:02 -0700 Subject: [PATCH 1/9] [AURON #1745] Add per-version module scaffolding for Spark 3.1-4.1 Introduce empty test modules for Spark 3.1/3.2/3.4/3.5/4.0/4.1 alongside the existing spark33 module. Each module ships only a Maven pom and an empty AuronSparkTestSettings stub so that profile activation and the reflection lookup in common/SparkTestSettings both succeed. Per-area suites (Aggregate/Sort/Parquet/Functions/Expressions) will land in separate follow-up PRs tracked under #2170-#2174. --- auron-spark-tests/pom.xml | 36 +++++ auron-spark-tests/spark31/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ auron-spark-tests/spark32/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ auron-spark-tests/spark34/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ auron-spark-tests/spark35/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ auron-spark-tests/spark40/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ auron-spark-tests/spark41/pom.xml | 146 ++++++++++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 25 +++ 13 files changed, 1062 insertions(+) create mode 100644 auron-spark-tests/spark31/pom.xml create mode 100644 auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala create mode 100644 auron-spark-tests/spark32/pom.xml create mode 100644 auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala create mode 100644 auron-spark-tests/spark34/pom.xml create mode 100644 auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala create mode 100644 auron-spark-tests/spark35/pom.xml create mode 100644 auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala create mode 100644 auron-spark-tests/spark40/pom.xml create mode 100644 auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala create mode 100644 auron-spark-tests/spark41/pom.xml create mode 100644 auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala diff --git a/auron-spark-tests/pom.xml b/auron-spark-tests/pom.xml index 75e88fdf8..795f888dc 100644 --- a/auron-spark-tests/pom.xml +++ b/auron-spark-tests/pom.xml @@ -82,11 +82,47 @@ + + spark-3.1 + + spark31 + + + + spark-3.2 + + spark32 + + spark-3.3 spark33 + + spark-3.4 + + spark34 + + + + spark-3.5 + + spark35 + + + + spark-4.0 + + spark40 + + + + spark-4.1 + + spark41 + + diff --git a/auron-spark-tests/spark31/pom.xml b/auron-spark-tests/spark31/pom.xml new file mode 100644 index 000000000..a697653f3 --- /dev/null +++ b/auron-spark-tests/spark31/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark31 + jar + Auron Spark Test for Spark 3.1 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} diff --git a/auron-spark-tests/spark32/pom.xml b/auron-spark-tests/spark32/pom.xml new file mode 100644 index 000000000..cb56fafd0 --- /dev/null +++ b/auron-spark-tests/spark32/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark32 + jar + Auron Spark Test for Spark 3.2 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} diff --git a/auron-spark-tests/spark34/pom.xml b/auron-spark-tests/spark34/pom.xml new file mode 100644 index 000000000..1f6ef00e9 --- /dev/null +++ b/auron-spark-tests/spark34/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark34 + jar + Auron Spark Test for Spark 3.4 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} diff --git a/auron-spark-tests/spark35/pom.xml b/auron-spark-tests/spark35/pom.xml new file mode 100644 index 000000000..69f38c485 --- /dev/null +++ b/auron-spark-tests/spark35/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark35 + jar + Auron Spark Test for Spark 3.5 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} diff --git a/auron-spark-tests/spark40/pom.xml b/auron-spark-tests/spark40/pom.xml new file mode 100644 index 000000000..94d7fa287 --- /dev/null +++ b/auron-spark-tests/spark40/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark40 + jar + Auron Spark Test for Spark 4.0 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} diff --git a/auron-spark-tests/spark41/pom.xml b/auron-spark-tests/spark41/pom.xml new file mode 100644 index 000000000..7096cb309 --- /dev/null +++ b/auron-spark-tests/spark41/pom.xml @@ -0,0 +1,146 @@ + + + + 4.0.0 + + + org.apache.auron + auron-spark-tests + ${project.version} + ../pom.xml + + + auron-spark-tests-spark41 + jar + Auron Spark Test for Spark 4.1 + + + + org.apache.auron + spark-extension_${scalaVersion} + ${project.version} + + + org.apache.auron + spark-extension-shims-spark_${scalaVersion} + ${project.version} + + + org.apache.auron + auron-spark-tests-common + ${project.version} + test-jar + + + net.bytebuddy + byte-buddy + + + net.bytebuddy + byte-buddy-agent + + + org.apache.arrow + arrow-memory-unsafe + + + org.apache.spark + spark-core_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-catalyst_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-sql_${scalaVersion} + test-jar + test + + + org.apache.spark + spark-tags_${scalaVersion} + test-jar + test + + + org.scalatestplus + scalatestplus-scalacheck_${scalaVersion} + test + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + net.alchim31.maven + scala-maven-plugin + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.apache.maven.plugins + maven-checkstyle-plugin + + + org.scalatest + scalatest-maven-plugin + + . + + + + test + + test + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + + test-jar + + test-compile + + + + + target/scala-${scalaVersion}/classes + target/scala-${scalaVersion}/test-classes + + diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala new file mode 100644 index 000000000..052cca5d1 --- /dev/null +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.utils + +class AuronSparkTestSettings extends SparkTestSettings { + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { + override def getResourceFilePath: String = "" + override def getSupportedSQLQueryTests: Set[String] = Set.empty + override def getOverwriteSQLQueryTests: Set[String] = Set.empty + } +} From ba7e4ee4e9313964af581a38ab87dbb42d19d7a6 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Wed, 22 Apr 2026 07:27:13 -0700 Subject: [PATCH 2/9] [AURON #2170] Add matrix CI workflow to run Spark correctness tests across all versions Wire up a new spark-tests.yml workflow that exercises the auron-spark-tests module for every supported Spark profile (3.1/3.2/3.3/3.4/3.5/4.0/4.1) using the JDK+Scala combos already validated in tpcds.yml. Build step installs the Auron extension + spark-tests modules with tests skipped, then a scoped `mvn test` targets only auron-spark-tests/common + the per-version submodule so the job does not redundantly re-run every other module's tests. --- .github/workflows/spark-tests.yml | 130 ++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 .github/workflows/spark-tests.yml diff --git a/.github/workflows/spark-tests.yml b/.github/workflows/spark-tests.yml new file mode 100644 index 000000000..8ca7b1c22 --- /dev/null +++ b/.github/workflows/spark-tests.yml @@ -0,0 +1,130 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Spark Correctness Tests + +on: + workflow_dispatch: + push: + branches: + - master + - branch-* + pull_request: + branches: + - master + - branch-* + +concurrency: + group: spark-tests-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + spark-correctness-tests: + name: Spark ${{ matrix.sparkver }} (JDK ${{ matrix.javaver }}, Scala ${{ matrix.scalaver }}) + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + include: + - sparkver: "3.1" + scalaver: "2.12" + javaver: "8" + - sparkver: "3.2" + scalaver: "2.12" + javaver: "8" + - sparkver: "3.3" + scalaver: "2.12" + javaver: "8" + - sparkver: "3.4" + scalaver: "2.12" + javaver: "11" + - sparkver: "3.5" + scalaver: "2.13" + javaver: "17" + - sparkver: "3.5" + scalaver: "2.13" + javaver: "21" + - sparkver: "4.0" + scalaver: "2.13" + javaver: "21" + - sparkver: "4.1" + scalaver: "2.13" + javaver: "21" + + steps: + - name: Checkout Auron + uses: actions/checkout@v6 + with: + submodules: recursive + + - name: Setup Java and Maven cache + uses: actions/setup-java@v5 + with: + distribution: 'adopt-hotspot' + java-version: ${{ matrix.javaver }} + cache: 'maven' + + - name: Setup protoc + uses: arduino/setup-protoc@v3 + with: + version: "21.7" + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Rust toolchain + uses: ./.github/actions/setup-rust-toolchain + with: + rustflags: --allow warnings -C target-feature=+aes + components: + cargo + + - name: Compute short spark version + id: short + run: | + SHORT="${{ matrix.sparkver }}" + echo "sparkver=${SHORT//./}" >> "$GITHUB_OUTPUT" + + - name: Install Auron + spark-tests modules (skip tests) + run: | + rm -f .build-checksum_*.cache + ./auron-build.sh \ + --pre \ + --sparkver ${{ matrix.sparkver }} \ + --scalaver ${{ matrix.scalaver }} \ + --skiptests true \ + --sparktests true + + - name: Run Auron spark correctness tests + run: > + ./build/mvn -B test + -pl auron-spark-tests/common,auron-spark-tests/spark${{ steps.short.outputs.sparkver }} + -Pspark-tests + -Pspark-${{ matrix.sparkver }} + -Pscala-${{ matrix.scalaver }} + + - name: Upload surefire reports + if: always() + uses: actions/upload-artifact@v7 + with: + name: spark-tests-reports-${{ matrix.sparkver }}_${{ matrix.scalaver }}-jdk-${{ matrix.javaver }} + path: "auron-spark-tests/**/target/surefire-reports/*.xml" + + - name: Upload unit tests log + if: failure() + uses: actions/upload-artifact@v7 + with: + name: spark-tests-logs-${{ matrix.sparkver }}_${{ matrix.scalaver }}-jdk-${{ matrix.javaver }} + path: "auron-spark-tests/**/target/unit-tests.log" From acea7b37017891669615c439e3953387a2c32196 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Thu, 23 Apr 2026 16:06:06 -0700 Subject: [PATCH 3/9] [AURON #2170] Port aggregate correctness tests to Spark 3.1/3.2/3.4/3.5/4.0/4.1 Mirror the three aggregate suites from spark33 (AuronDataFrameAggregateSuite, AuronDatasetAggregatorSuite, AuronTypedImperativeAggregateSuite) and wire them into each per-version AuronSparkTestSettings with the same exclude list (collect functions prefix, SPARK-19471 overridden locally, SPARK-24788) so the matrix CI exercises aggregates on every supported Spark profile. --- .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ .../auron/utils/AuronSparkTestSettings.scala | 21 +++++ .../sql/AuronDataFrameAggregateSuite.scala | 78 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++ .../AuronTypedImperativeAggregateSuite.scala | 21 +++++ 24 files changed, 834 insertions(+) create mode 100644 auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala create mode 100644 auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala create mode 100644 auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala create mode 100644 auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala create mode 100644 auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala create mode 100644 auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala diff --git a/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark31/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark31/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase diff --git a/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark34/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark34/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 052cca5d1..928798af8 100644 --- a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -16,7 +16,28 @@ */ package org.apache.auron.utils +import org.apache.spark.sql._ + class AuronSparkTestSettings extends SparkTestSettings { + { + // Use Arrow's unsafe implementation. + System.setProperty("arrow.allocation.manager.type", "Unsafe") + } + + enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 + .excludeByPrefix("collect functions") + // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite + // with modified plan checks for Auron's native aggregates, so we exclude the original here. + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" override def getSupportedSQLQueryTests: Set[String] = Set.empty diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..d1361ab7e --- /dev/null +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +} diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..a6e3af241 --- /dev/null +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase From 3d4a554b530cba789539fce1b6827306caf51c09 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Thu, 23 Apr 2026 20:25:30 -0700 Subject: [PATCH 4/9] [AURON #2170] Fix Spark 4.0/4.1 compile errors in auron-spark-tests-common MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The common test module compiles against every Spark version we support, but it called several APIs that were reshaped in Spark 4: * `Column.apply(Expression)` was removed — the classic module now exposes it as `ExpressionUtils.column(expr)`. * `SparkSession.internalCreateDataFrame` lives on `classic.SparkSession` in 4.x and requires the `isStreaming` argument. * `DataFrame.logicalPlan` is no longer on the `api.Dataset` trait, and the `SQLExecution.withSQLConfPropagated` overload now takes a `classic.SparkSession` rather than the abstract `SparkSession`. Wrap the two Spark-4-only calls in `@sparkver` helpers so the right implementation is emitted under each profile, switch to `df.queryExecution.logical` / `df.queryExecution.sparkSession` (both public on `QueryExecution` across every supported version and returning the concrete session type in 4.x), and pull in the `spark-version-annotation-macros` dependency the helpers need. --- auron-spark-tests/common/pom.xml | 5 +++ .../spark/sql/SparkExpressionTestsBase.scala | 33 +++++++++++++++++-- .../spark/sql/SparkQueryTestsBase.scala | 7 ++-- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/auron-spark-tests/common/pom.xml b/auron-spark-tests/common/pom.xml index 02eefdcb7..96fca025a 100644 --- a/auron-spark-tests/common/pom.xml +++ b/auron-spark-tests/common/pom.xml @@ -35,6 +35,11 @@ spark-extension_${scalaVersion} ${project.version} + + org.apache.auron + spark-version-annotation-macros_${scalaVersion} + ${project.version} + org.apache.spark spark-core_${scalaVersion} diff --git a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala index 14490aef0..d978b2b04 100644 --- a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala +++ b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala @@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.commons.io.FileUtils import org.apache.commons.math3.util.Precision import org.apache.spark.SparkFunSuite +import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone import org.apache.spark.sql.catalyst.expressions._ @@ -35,6 +36,8 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.scalactic.TripleEqualsSupport.Spread +import org.apache.auron.sparkver + /** * Base trait for all Spark expression tests. */ @@ -143,7 +146,7 @@ trait SparkExpressionTestsBase val empData = Seq(Row(1)) _spark.createDataFrame(_spark.sparkContext.parallelize(empData), schema) } - val resultDF = df.select(Column(expression)) + val resultDF = df.select(exprToColumn(expression)) val result = resultDF.collect() if (checkDataTypeSupported(expression) && @@ -350,8 +353,34 @@ trait SparkExpressionTestsBase structFieldSeq.append(StructField("n", IntegerType, nullable = true)) } - _spark.internalCreateDataFrame( + internalCreateDF( + _spark, _spark.sparkContext.parallelize(Seq(inputRow)), StructType(structFieldSeq.toSeq)) } + + // Wrap Column construction from an Expression. Spark 4 removed + // `Column.apply(Expression)`; the classic module exposes it via ExpressionUtils. + @sparkver("3.0 / 3.1 / 3.2 / 3.3 / 3.4 / 3.5") + private def exprToColumn(expr: Expression): Column = Column(expr) + + @sparkver("4.0 / 4.1") + private def exprToColumn(expr: Expression): Column = + org.apache.spark.sql.classic.ExpressionUtils.column(expr) + + // Wrap SparkSession.internalCreateDataFrame; in Spark 4+ it lives on classic.SparkSession. + @sparkver("3.0 / 3.1 / 3.2 / 3.3 / 3.4 / 3.5") + private def internalCreateDF( + spark: SparkSession, + rdd: RDD[InternalRow], + schema: StructType): DataFrame = + spark.internalCreateDataFrame(rdd, schema) + + @sparkver("4.0 / 4.1") + private def internalCreateDF( + spark: SparkSession, + rdd: RDD[InternalRow], + schema: StructType): DataFrame = + spark.asInstanceOf[org.apache.spark.sql.classic.SparkSession] + .internalCreateDataFrame(rdd, schema, isStreaming = false) } diff --git a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala index 159cb6174..1c9cabd06 100644 --- a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala +++ b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala @@ -91,9 +91,12 @@ object AuronQueryTestUtil extends Assertions { df: DataFrame, expectedAnswer: Seq[Row], checkToRDD: Boolean = true): Option[String] = { - val isSorted = df.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty + val isSorted = df.queryExecution.logical.collect { case s: logical.Sort => s }.nonEmpty if (checkToRDD) { - SQLExecution.withSQLConfPropagated(df.sparkSession) { + // df.queryExecution.sparkSession returns the concrete session type in every Spark + // version we support (SparkSession pre-4.0, classic.SparkSession since 4.0), matching + // the overload resolved by SQLExecution.withSQLConfPropagated. + SQLExecution.withSQLConfPropagated(df.queryExecution.sparkSession) { df.rdd.count() // Also attempt to deserialize as an RDD [SPARK-15791] } } From e5202003212f8cd606aa812ebb9341d37bfe309d Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Thu, 23 Apr 2026 20:25:45 -0700 Subject: [PATCH 5/9] [AURON #2170] Exclude Spark tests incompatible with Auron native aggregates Three ported DataFrameAggregateSuite tests fail not because of a regression but because they assert on Spark-specific internals that Auron's native aggregation deliberately replaces: * Spark 3.2 SPARK-34837 (`avg` on ANSI intervals) emits invalid Java when Spark's HashAggregate codegen consumes values produced by Auron's native project; later Spark versions avoid this path. * Spark 3.5 SPARK-16484 negative tests assert the thrown error implements `SparkThrowable`, but `SparkUDAFWrapper` surfaces UDAF failures as `RuntimeException`. * Spark 3.5 SPARK-43876 greps for `public class hashAgg_FastHashMap_0` in the WholeStageCodegen output, which never exists when the aggregate runs natively. Exclude these three tests in the relevant per-version `AuronSparkTestSettings`, matching the existing precedent for the SPARK-19471 / SPARK-24788 cases. --- .../org/apache/auron/utils/AuronSparkTestSettings.scala | 3 +++ .../org/apache/auron/utils/AuronSparkTestSettings.scala | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 928798af8..4342e16f0 100644 --- a/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark32/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -33,6 +33,9 @@ class AuronSparkTestSettings extends SparkTestSettings { "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + // Spark 3.2's avg(ANSI interval) codegen emits invalid Java when Auron's native project + // feeds the HashAggregate; later Spark versions don't hit this path. + .exclude("SPARK-34837: Support ANSI SQL intervals by the aggregate function `avg`") enableSuite[AuronDatasetAggregatorSuite] diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 928798af8..bd808660c 100644 --- a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -33,6 +33,12 @@ class AuronSparkTestSettings extends SparkTestSettings { "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + // Auron's SparkUDAFWrapper surfaces UDAF errors as RuntimeException, so the negative-path + // assertion that the thrown error is a SparkThrowable fails under native execution. + .exclude("SPARK-16484: hll_*_agg + hll_union negative tests") + // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the + // HashAggregate with a native aggregate so no Spark-generated class is emitted. + .exclude("SPARK-43876: Enable fast hashmap for distinct queries") enableSuite[AuronDatasetAggregatorSuite] From 38743a08f5e8cc57efbf14163dad071574fdf1f7 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Sun, 26 Apr 2026 15:46:51 -0700 Subject: [PATCH 6/9] [AURON #2170] Preserve SparkThrowable when surfacing native errors `AuronCallNativeWrapper.checkError` was unconditionally wrapping the captured throwable in a fresh `RuntimeException`, which discarded the concrete type (e.g. `SparkRuntimeException` -> `SparkThrowable`). The native side already wraps the cause in a `RuntimeException`, so the extra wrap was both redundant and lossy: tests that did `intercept[SparkThrowable](...)` saw a plain `RuntimeException` and failed with `ClassCastException`. Rethrow `RuntimeException`/`Error` subtypes directly, falling back to the wrap only for genuinely checked exceptions. This lets the inherited `SPARK-16484: hll_*_agg + hll_union negative tests` from `DataFrameAggregateSuite` pass natively across Spark 3.5/4.0/4.1, so the exclusion just added in spark35 is dropped to keep the test exercised. --- .../org/apache/auron/jni/AuronCallNativeWrapper.java | 9 +++++++++ .../org/apache/auron/utils/AuronSparkTestSettings.scala | 3 --- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java b/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java index 661d8070e..1c03a4e6c 100644 --- a/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java +++ b/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java @@ -163,6 +163,15 @@ protected void checkError() { Throwable throwable = error.getAndSet(null); if (throwable != null) { close(); + // Rethrow RuntimeException / Error subtypes directly so callers can match on + // specific types like SparkThrowable. The native side already wraps the cause + // in a RuntimeException, so the previous unconditional wrap discarded type info. + if (throwable instanceof RuntimeException) { + throw (RuntimeException) throwable; + } + if (throwable instanceof Error) { + throw (Error) throwable; + } throw new RuntimeException(throwable); } } diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index bd808660c..060edbbd8 100644 --- a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -33,9 +33,6 @@ class AuronSparkTestSettings extends SparkTestSettings { "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") - // Auron's SparkUDAFWrapper surfaces UDAF errors as RuntimeException, so the negative-path - // assertion that the thrown error is a SparkThrowable fails under native execution. - .exclude("SPARK-16484: hll_*_agg + hll_union negative tests") // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the // HashAggregate with a native aggregate so no Spark-generated class is emitted. .exclude("SPARK-43876: Enable fast hashmap for distinct queries") From 12baa59125c6b28e738b97a85e8b6a9040f3add9 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Sun, 26 Apr 2026 15:47:07 -0700 Subject: [PATCH 7/9] [AURON #2170] Exclude unsupported aggregate tests on Spark 4.0/4.1 Most of the runtime failures in the newly-enabled Spark 4.x correctness matrix exercise functionality Auron's native engine deliberately replaces or has not implemented yet, mirroring how Spark 3.3-3.5 already handle similar gaps. Add per-version exclusions with comments that name the specific reason so future maintainers can tell deliberate skips from real regressions: * SPARK-43876 / SPARK-19471-style codegen-output assertions: Auron uses native aggregates so no Spark-generated class is emitted. * SPARK-47430 GROUP BY MapType: shuffle writer doesn't accept MapType keys. * SPARK-28067 / SPARK-35955: native sum/avg use wrapping decimal arithmetic and don't honour `spark.sql.ansi.enabled` (already excluded for 3.3-3.5). * `dataframe aggregate with object aggregate buffer, no group by`: ObjectHashAggregate fallback hits a RemoteClassLoaderError on the no-group path; out of scope for this PR. * `typed aggregation: complex result type`: Dataset encoder fails on tuple-of-struct results round-tripping through native execution. Spark 4.1 only: * SPARK-16484 positive tests / `hll_union_agg`: when 4.1's `allowDifferentLgConfigK=true` flag is set, the post-Union `NativeHashAggregate` returns the per-side rows instead of merging by id (sum(count) too). Same test passes in 3.5/4.0; tracking as a follow-up. * SPARK-52407 theta_sketch_*: new UDAF family in 4.1, not registered in SparkUDAFWrapper. * SPARK-52626 / SPARK-52660: 4.1's new TIME type is not in NativeConverters.isTypeSupported. --- .../auron/utils/AuronSparkTestSettings.scala | 18 +++++++++++ .../auron/utils/AuronSparkTestSettings.scala | 32 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 928798af8..3843c8047 100644 --- a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -33,10 +33,28 @@ class AuronSparkTestSettings extends SparkTestSettings { "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the + // HashAggregate with a native aggregate so no Spark-generated class is emitted. + .exclude("SPARK-43876: Enable fast hashmap for distinct queries") + // MapType isn't a supported shuffle key in Auron's shuffle writer, so grouping by a + // MapType column fails when writing the shuffle index file. + .exclude("SPARK-47430 Support GROUP BY MapType") + // Auron's native sum/avg use wrapping arithmetic on decimals and don't honor + // spark.sql.ansi.enabled, so the ArithmeticException these tests expect is never + // raised. Same exclusion is in place for Spark 3.3-3.5 (under AuronDataFrameSuite there). + .exclude("SPARK-28067: Aggregate sum should not return wrong results for decimal overflow") + .exclude("SPARK-35955: Aggregate avg should not return wrong results for decimal overflow") enableSuite[AuronDatasetAggregatorSuite] + // Dataset encoder fails to materialize tuple-of-struct results that round-trip through + // Auron's native execution; tracked as a follow-up. + .exclude("typed aggregation: complex result type") enableSuite[AuronTypedImperativeAggregateSuite] + // ObjectHashAggregateExec without a grouping falls back to a JVM path whose closure + // can't be deserialized in the executor classloader (RemoteClassLoaderError on + // catalyst.expressions.Object). Investigating this fallback path is out of scope for #2170. + .exclude("dataframe aggregate with object aggregate buffer, no group by") override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 928798af8..17fd51e66 100644 --- a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -33,10 +33,42 @@ class AuronSparkTestSettings extends SparkTestSettings { "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") + // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the + // HashAggregate with a native aggregate so no Spark-generated class is emitted. + .exclude("SPARK-43876: Enable fast hashmap for distinct queries") + // MapType isn't a supported shuffle key in Auron's shuffle writer, so grouping by a + // MapType column fails when writing the shuffle index file. + .exclude("SPARK-47430 Support GROUP BY MapType") + // Auron's native sum/avg use wrapping arithmetic on decimals and don't honor + // spark.sql.ansi.enabled, so the ArithmeticException these tests expect is never + // raised. Same exclusion is in place for Spark 3.3-3.5 (under AuronDataFrameSuite there). + .exclude("SPARK-28067: Aggregate sum should not return wrong results for decimal overflow") + .exclude("SPARK-35955: Aggregate avg should not return wrong results for decimal overflow") + // Spark 4.1 added an allowDifferentLgConfigK=true variant on hll_union_agg. With that + // flag set, the NativeHashAggregate placed after a Union returns the pre-Union per-side + // rows (count=7 and count=8 for the same id) instead of collapsing into one row + // (count=15). Even the plain sum(count) column fails to merge, so this looks like a + // post-Union aggregate bug rather than missing HLL coverage. The same test passes on + // Spark 3.5 and 4.0, so this is a 4.1-specific divergence; tracking as a follow-up. + .exclude("SPARK-16484: hll_*_agg + hll_union + hll_sketch_estimate positive tests") + .exclude("hll_union_agg") + // Spark 4.1 introduced theta_sketch_*; Auron has no native UDAF support and + // SparkUDAFWrapper doesn't recognise the new function family yet. + .excludeByPrefix("SPARK-52407") + // Spark 4.1 introduced the TIME type; not in NativeConverters.isTypeSupported. + .exclude("SPARK-52626: Support group by Time column") + .exclude("SPARK-52660: Support aggregation of Time column when codegen is split") enableSuite[AuronDatasetAggregatorSuite] + // Dataset encoder fails to materialize tuple-of-struct results that round-trip through + // Auron's native execution; tracked as a follow-up. + .exclude("typed aggregation: complex result type") enableSuite[AuronTypedImperativeAggregateSuite] + // ObjectHashAggregateExec without a grouping falls back to a JVM path whose closure + // can't be deserialized in the executor classloader (RemoteClassLoaderError on + // catalyst.expressions.Object). Investigating this fallback path is out of scope for #2170. + .exclude("dataframe aggregate with object aggregate buffer, no group by") override def getSQLQueryTestSettings: SQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = "" From 9877aaeec3e61c8c44f413abcaeb2829fd8a71c8 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Tue, 28 Apr 2026 10:49:37 -0700 Subject: [PATCH 8/9] [AURON #2170] Apply scalafmt to common spark-tests base traits No behavioral changes; reflow the multi-line `sideBySide` argument and the `asInstanceOf[classic.SparkSession].internalCreateDataFrame` chain to match the project's scalafmt config. --- .../apache/spark/sql/SparkExpressionTestsBase.scala | 3 ++- .../org/apache/spark/sql/SparkQueryTestsBase.scala | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala index d978b2b04..435e87f08 100644 --- a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala +++ b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkExpressionTestsBase.scala @@ -381,6 +381,7 @@ trait SparkExpressionTestsBase spark: SparkSession, rdd: RDD[InternalRow], schema: StructType): DataFrame = - spark.asInstanceOf[org.apache.spark.sql.classic.SparkSession] + spark + .asInstanceOf[org.apache.spark.sql.classic.SparkSession] .internalCreateDataFrame(rdd, schema, isStreaming = false) } diff --git a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala index 1c9cabd06..fd91fee1f 100644 --- a/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala +++ b/auron-spark-tests/common/src/test/scala/org/apache/spark/sql/SparkQueryTestsBase.scala @@ -179,12 +179,12 @@ object AuronQueryTestUtil extends Assertions { s""" |== Results == |${sideBySide( - s"== Correct Answer - ${expectedAnswer.size} ==" +: - getRowType(expectedAnswer.headOption) +: - prepareAnswer(expectedAnswer, isSorted).map(_.toString()), - s"== Auron Answer - ${sparkAnswer.size} ==" +: - getRowType(sparkAnswer.headOption) +: - prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")} + s"== Correct Answer - ${expectedAnswer.size} ==" +: + getRowType(expectedAnswer.headOption) +: + prepareAnswer(expectedAnswer, isSorted).map(_.toString()), + s"== Auron Answer - ${sparkAnswer.size} ==" +: + getRowType(sparkAnswer.headOption) +: + prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")} """.stripMargin } From 7aa96531b8348b30e030475f2bd8712312da8365 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Tue, 28 Apr 2026 10:49:49 -0700 Subject: [PATCH 9/9] [AURON #2170] Restore SPARK-16484 negative-test exclusion across all versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The native-error-preservation change in 38743a08 was intended to let the inherited `SPARK-16484: hll_*_agg + hll_union negative tests` pass natively, but the failure mode is architectural rather than a JNI wrapping bug: In vanilla Spark, `HllSketchAgg.lgConfigK` is a lazy val accessed during driver-side analysis, so `SparkRuntimeException(HLL_INVALID_LG_K)` reaches the test through the analyzer call chain. Auron defers HLL UDAF evaluation to the native executor via `SparkUDAFWrapper`, so the exception fires inside a task; Spark's `TaskRunner` then wraps any task failure in `SparkException` regardless of whether the inner cause is a `SparkThrowable`. `intercept[SparkRuntimeException] { df.collect() }` sees `SparkException`, not the inner type, so the test fails. Revert the `AuronCallNativeWrapper.checkError` change (it doesn't address this case and is out of scope for #2170) and restore the spark35 exclusion with a comment that names the actual cause. Add the same exclusion to spark40 and spark41 — same root cause, same outcome on those versions. --- .../org/apache/auron/jni/AuronCallNativeWrapper.java | 9 --------- .../org/apache/auron/utils/AuronSparkTestSettings.scala | 6 ++++++ .../org/apache/auron/utils/AuronSparkTestSettings.scala | 6 ++++++ .../org/apache/auron/utils/AuronSparkTestSettings.scala | 9 +++++++-- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java b/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java index 1c03a4e6c..661d8070e 100644 --- a/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java +++ b/auron-core/src/main/java/org/apache/auron/jni/AuronCallNativeWrapper.java @@ -163,15 +163,6 @@ protected void checkError() { Throwable throwable = error.getAndSet(null); if (throwable != null) { close(); - // Rethrow RuntimeException / Error subtypes directly so callers can match on - // specific types like SparkThrowable. The native side already wraps the cause - // in a RuntimeException, so the previous unconditional wrap discarded type info. - if (throwable instanceof RuntimeException) { - throw (RuntimeException) throwable; - } - if (throwable instanceof Error) { - throw (Error) throwable; - } throw new RuntimeException(throwable); } } diff --git a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 060edbbd8..95edecec8 100644 --- a/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark35/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -36,6 +36,12 @@ class AuronSparkTestSettings extends SparkTestSettings { // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the // HashAggregate with a native aggregate so no Spark-generated class is emitted. .exclude("SPARK-43876: Enable fast hashmap for distinct queries") + // In vanilla Spark, HllSketchAgg.lgConfigK is a lazy val accessed during driver-side + // analysis, so SparkRuntimeException(HLL_INVALID_LG_K) propagates to the test directly. + // Auron defers HLL UDAF evaluation to the native executor via SparkUDAFWrapper, so the + // same exception is thrown inside a task and Spark's TaskRunner wraps it in SparkException + // before it reaches the test. intercept[SparkRuntimeException] no longer matches. + .exclude("SPARK-16484: hll_*_agg + hll_union negative tests") enableSuite[AuronDatasetAggregatorSuite] diff --git a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 3843c8047..bd517db1c 100644 --- a/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark40/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -36,6 +36,12 @@ class AuronSparkTestSettings extends SparkTestSettings { // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the // HashAggregate with a native aggregate so no Spark-generated class is emitted. .exclude("SPARK-43876: Enable fast hashmap for distinct queries") + // In vanilla Spark, HllSketchAgg.lgConfigK is a lazy val accessed during driver-side + // analysis, so SparkRuntimeException(HLL_INVALID_LG_K) propagates to the test directly. + // Auron defers HLL UDAF evaluation to the native executor via SparkUDAFWrapper, so the + // same exception is thrown inside a task and Spark's TaskRunner wraps it in SparkException + // before it reaches the test. intercept[SparkRuntimeException] no longer matches. + .exclude("SPARK-16484: hll_*_agg + hll_union negative tests") // MapType isn't a supported shuffle key in Auron's shuffle writer, so grouping by a // MapType column fails when writing the shuffle index file. .exclude("SPARK-47430 Support GROUP BY MapType") diff --git a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 17fd51e66..0cc22aeec 100644 --- a/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark41/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -29,13 +29,18 @@ class AuronSparkTestSettings extends SparkTestSettings { .excludeByPrefix("collect functions") // A custom version of the SPARK-19471 test has been added to AuronDataFrameAggregateSuite // with modified plan checks for Auron's native aggregates, so we exclude the original here. - .exclude( - "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") // The fast-hashmap test asserts on WholeStageCodegen output, but Auron replaces the // HashAggregate with a native aggregate so no Spark-generated class is emitted. .exclude("SPARK-43876: Enable fast hashmap for distinct queries") + // In vanilla Spark, HllSketchAgg.lgConfigK is a lazy val accessed during driver-side + // analysis, so SparkRuntimeException(HLL_INVALID_LG_K) propagates to the test directly. + // Auron defers HLL UDAF evaluation to the native executor via SparkUDAFWrapper, so the + // same exception is thrown inside a task and Spark's TaskRunner wraps it in SparkException + // before it reaches the test. intercept[SparkRuntimeException] no longer matches. + .exclude("SPARK-16484: hll_*_agg + hll_union negative tests") // MapType isn't a supported shuffle key in Auron's shuffle writer, so grouping by a // MapType column fails when writing the shuffle index file. .exclude("SPARK-47430 Support GROUP BY MapType")