diff --git a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java index 84d3c5b7ba..d3d8b1b6de 100644 --- a/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java +++ b/parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java @@ -167,9 +167,10 @@ public CapacityByteArrayOutputStream(int initialSlabSize, int maxCapacityHint, B private void addSlab(int minimumSize) { int nextSlabSize; - // check for overflow + // check for overflow using bytesAllocated which is always up to date (unlike bytesUsed which + // is updated after addSlab returns in write()) try { - Math.addExact(bytesUsed, minimumSize); + Math.addExact(bytesAllocated, minimumSize); } catch (ArithmeticException e) { // This is interpreted as a request for a value greater than Integer.MAX_VALUE // We throw OOM because that is what java.io.ByteArrayOutputStream also does @@ -191,6 +192,12 @@ private void addSlab(int minimumSize) { nextSlabSize = minimumSize; } + // Cap nextSlabSize to avoid integer overflow on bytesAllocated + int maxNextSlabSize = Integer.MAX_VALUE - bytesAllocated; + if (nextSlabSize > maxNextSlabSize) { + nextSlabSize = max(minimumSize, maxNextSlabSize); + } + LOG.debug("used {} slabs, adding new slab of size {}", slabs.size(), nextSlabSize); this.currentSlab = allocator.allocate(nextSlabSize); diff --git a/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java new file mode 100644 index 0000000000..771d9e17b4 --- /dev/null +++ b/parquet-common/src/test/java/org/apache/parquet/bytes/TestCapacityByteArrayOutputStreamOverflow.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.bytes; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +import java.lang.reflect.Field; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for integer overflow handling in {@link CapacityByteArrayOutputStream#addSlab(int)}. + * Verifies the fix for PARQUET-3261. + */ +public class TestCapacityByteArrayOutputStreamOverflow { + + private TrackingByteBufferAllocator allocator; + + @Before + public void initAllocator() { + allocator = TrackingByteBufferAllocator.wrap(new HeapByteBufferAllocator()); + } + + @After + public void closeAllocator() { + allocator.close(); + } + + /** + * Regression test for PARQUET-3261: bytesAllocated overflow in addSlab(). + * Simulates near-overflow by setting bytesAllocated via reflection, then verifying + * that addSlab caps the slab size instead of throwing ArithmeticException. + */ + @Test + public void testAddSlabCapsSlabSizeNearIntegerMaxValue() throws Exception { + int slabSize = 1024; + try (CapacityByteArrayOutputStream cbaos = + new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE, allocator)) { + // Write initial data to set up internal state + byte[] data = new byte[slabSize]; + cbaos.write(data, 0, data.length); + + // Simulate near-overflow by setting bytesAllocated close to Integer.MAX_VALUE + Field bytesAllocatedField = CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated"); + bytesAllocatedField.setAccessible(true); + bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 100); + + // Writing 1 byte triggers addSlab with minimumSize=1. + // Without the fix, the doubling strategy would compute nextSlabSize = bytesUsed (1024), + // and bytesAllocated + 1024 would overflow. With the fix, nextSlabSize is capped to 100. + cbaos.write(1); + assertEquals(slabSize + 1, cbaos.size()); + } + } + + /** + * Verify that a true overflow (bytesAllocated + minimumSize > Integer.MAX_VALUE) + * still throws OutOfMemoryError. + */ + @Test + public void testAddSlabThrowsOOMOnTrueOverflow() throws Exception { + int slabSize = 1024; + try (CapacityByteArrayOutputStream cbaos = + new CapacityByteArrayOutputStream(slabSize, Integer.MAX_VALUE, allocator)) { + byte[] data = new byte[slabSize]; + cbaos.write(data, 0, data.length); + + // Set bytesAllocated so that even minimumSize=200 would overflow + Field bytesAllocatedField = CapacityByteArrayOutputStream.class.getDeclaredField("bytesAllocated"); + bytesAllocatedField.setAccessible(true); + bytesAllocatedField.setInt(cbaos, Integer.MAX_VALUE - 50); + + // Writing 200 bytes requires minimumSize=200, but only 50 bytes remain. + // The addExact(bytesAllocated, minimumSize) check should throw OOM. + byte[] tooLarge = new byte[200]; + assertThrows(OutOfMemoryError.class, () -> cbaos.write(tooLarge, 0, tooLarge.length)); + } + } +}