Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## Unreleased

### Bug Fixes

- **decode:** enforce alloc limit in bytes decode paths — `bytes()`/`bytesPtr()` called the unbounded `readN` directly, so on a stream decode a malicious `bin32`/`str32` header declaring ~4GB forced a ~4GB upfront allocation even with alloc limits enabled (the default). Bytes decoding now routes through `readNInto`, which chunks allocation by `bytesAllocLimit` unless `DisableAllocLimit(true)` is set; on the byte-slice (`Unmarshal`) path the declared length is validated against the remaining input before a single exact-size allocation ([#63](https://github.com/Basekick-Labs/msgpack/issues/63))

### Performance

- **decode:** exact-capacity chunked growth in `readNGrow` — replaces append-based chunk growth with explicit `min(n, max(2*pos, pos+bytesAllocLimit))` sizing: same allocation-ahead-of-data security bound, no capacity overshoot beyond the target, fewer alloc+copy rounds ([#63](https://github.com/Basekick-Labs/msgpack/issues/63)) (4MB string stream decode **-30.5% ns/op**, **-28.5% B/op**)

---

## v6.1.0 (2026-04-27)

### Performance
Expand Down
44 changes: 44 additions & 0 deletions bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,50 @@ func BenchmarkByteSlice(b *testing.B) {
benchmarkEncodeDecode(b, src, &dst)
}

// Stream decodes larger than bytesAllocLimit (1MB) exercise the chunked
// growth path in readNGrow (issue #63).

func BenchmarkDecodeBytesStream4MB(b *testing.B) {
data, err := msgpack.Marshal(make([]byte, 4<<20))
if err != nil {
b.Fatal(err)
}
rd := bytes.NewReader(data)
var out []byte

b.ReportAllocs()
b.ResetTimer()

for i := 0; i < b.N; i++ {
rd.Reset(data)
dec := msgpack.NewDecoder(rd)
out = nil
if err := dec.Decode(&out); err != nil {
b.Fatal(err)
}
}
}

func BenchmarkDecodeStringStream4MB(b *testing.B) {
data, err := msgpack.Marshal(string(make([]byte, 4<<20)))
if err != nil {
b.Fatal(err)
}
rd := bytes.NewReader(data)
var out string

b.ReportAllocs()
b.ResetTimer()

for i := 0; i < b.N; i++ {
rd.Reset(data)
dec := msgpack.NewDecoder(rd)
if err := dec.Decode(&out); err != nil {
b.Fatal(err)
}
}
}

func BenchmarkByteArray(b *testing.B) {
var src [1024]byte
var dst [1024]byte
Expand Down
77 changes: 54 additions & 23 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -761,18 +761,46 @@ func (d *Decoder) readN(n int) ([]byte, error) {
return d.buf, nil
}

// readNInto reads n bytes into b, growing it as needed, and honors the
// decoder's alloc limit: unless DisableAllocLimit is set, allocation for
// a declared length is chunked by bytesAllocLimit so a malicious header
// can't force a huge upfront allocation.
func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

On 32-bit architectures, a large uint32 length (e.g., >= 2^31) decoded from bin32 or str32 headers can overflow when cast to int, resulting in a negative value for n. If n is negative, it will bypass the remaining check and cause a panic in make([]byte, n) or slice bounds out of range. We should add a check for n < 0 at the beginning of readNInto to prevent this.

Suggested change
func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) {
func (d *Decoder) readNInto(b []byte, n int) ([]byte, error) {
if n < 0 {
return nil, fmt.Errorf("msgpack: invalid length %d", n)
}

// Byte-slice reader: the declared length can be validated against the
// data actually present before allocating, so a single exact-size
// allocation is safe regardless of the alloc limit. (Unlike d.readN's
// zero-copy fast path, the result here is caller-owned and must be a
// copy, not a sub-slice of the input.)
if d.bsr.data != nil {
if remaining := len(d.bsr.data) - d.bsr.pos; n > remaining {
// Match io.ReadFull semantics: EOF when no data remains,
// ErrUnexpectedEOF on a partial read.
if remaining == 0 && n > 0 {
return b, io.EOF
}
return b, io.ErrUnexpectedEOF
}
return readN(d.r, b, n)
}
if d.flags&disableAllocLimitFlag != 0 {
return readN(d.r, b, n)
}
return readNGrow(d.r, b, n)
}

func readN(r io.Reader, b []byte, n int) ([]byte, error) {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

To prevent panics or silent data corruption on 32-bit architectures when a large uint32 length overflows to a negative int, we should validate that n >= 0 at the beginning of readN.

func readN(r io.Reader, b []byte, n int) ([]byte, error) {
	if n < 0 {
		return nil, fmt.Errorf("msgpack: invalid length %d", n)
	}

if b == nil {
if n == 0 {
return make([]byte, 0), nil
}
b = make([]byte, 0, n)
}

if n > cap(b) {
b = append(b, make([]byte, n-len(b))...)
b = make([]byte, n)
} else if n <= cap(b) {
b = b[:n]
} else {
// ReadFull overwrites all of b, so allocate exactly n instead of
// growing with append (which would copy the old contents for
// nothing and overshoot the capacity).
b = make([]byte, n)
}

_, err := io.ReadFull(r, b)
Expand All @@ -799,30 +827,33 @@ func readNGrow(r io.Reader, b []byte, n int) ([]byte, error) {
_, err := io.ReadFull(r, b)
return b, err
}
b = b[:cap(b)]

var pos int
for {
alloc := min(n-len(b), bytesAllocLimit)
b = append(b, make([]byte, alloc)...)

_, err := io.ReadFull(r, b[pos:])
if err != nil {
// n exceeds the current capacity. Fill the available capacity first,
// then grow to at most one bytesAllocLimit chunk or 2x the bytes
// already received ahead of the data — whichever is larger — capped
// at the exact target n. A malicious declared length can therefore
// never force allocation more than max(received, bytesAllocLimit)
// ahead of the data actually supplied (the same bound the previous
// chunked append provided), while exact sizing avoids append's
// capacity overshoot beyond n and reduces alloc+copy rounds.
b = b[:cap(b)]
pos := len(b)
if pos > 0 {
if _, err := io.ReadFull(r, b); err != nil {
return b, err
}
}
for pos < n {
newCap := min(max(2*pos, pos+bytesAllocLimit), n)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

On 32-bit architectures, if pos is large (e.g., >= 2^30), 2*pos or pos+bytesAllocLimit can overflow to a negative value. This would cause max to return a negative value or a value smaller than expected, leading to a negative newCap and a panic in make([]byte, newCap). We should calculate newCap in an overflow-safe manner.

		newCap := pos + bytesAllocLimit
		if newCap < pos || newCap < 0 {
			newCap = n
		}
		if double := 2 * pos; double > newCap && double > 0 {
			newCap = double
		}
		if newCap > n || newCap < 0 {
			newCap = n
		}

nb := make([]byte, newCap)
copy(nb, b[:pos])
b = nb

if len(b) == n {
break
if _, err := io.ReadFull(r, b[pos:]); err != nil {
return b, err
}
pos = len(b)
pos = newCap
}

return b, nil
}

func min(a, b int) int { //nolint:unparam
if a <= b {
return a
}
return b
}
4 changes: 2 additions & 2 deletions decode_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (d *Decoder) bytes(c byte, b []byte) ([]byte, error) {
if n == -1 {
return nil, nil
}
return readN(d.r, b, n)
return d.readNInto(b, n)
Comment on lines 92 to +95

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

On 32-bit architectures, a large uint32 length (e.g., 0xffffffff) decoded from bin32 or str32 headers will overflow to -1 when cast to int. Since n == -1 is checked here, it will be incorrectly treated as msgpcode.Nil, returning nil, nil and silently succeeding instead of failing with an error. We should ensure that n == -1 only returns nil, nil if the code is actually msgpcode.Nil, and otherwise fail on negative lengths.

Suggested change
if n == -1 {
return nil, nil
}
return readN(d.r, b, n)
return d.readNInto(b, n)
if n == -1 && c == msgpcode.Nil {
return nil, nil
}
if n < 0 {
return nil, fmt.Errorf("msgpack: invalid length %d", n)
}
return d.readNInto(b, n)

}

func (d *Decoder) decodeStringTemp() (string, error) {
Expand Down Expand Up @@ -139,7 +139,7 @@ func (d *Decoder) bytesPtr(c byte, ptr *[]byte) error {
return nil
}

*ptr, err = readN(d.r, *ptr, n)
*ptr, err = d.readNInto(*ptr, n)
return err
}

Expand Down
64 changes: 64 additions & 0 deletions msgpack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package msgpack_test
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"math"
"reflect"
"testing"
Expand Down Expand Up @@ -75,6 +77,68 @@ func (t *MsgpackTest) TestLargeString() {
t.Equal(dst, src)
}

func (t *MsgpackTest) TestDecodeBytesHugeDeclaredLen() {
// A bin32 header declaring ~4GB with a 1-byte payload must fail with a
// read error after at most one bytesAllocLimit-sized chunk — not attempt
// to allocate the full declared length upfront.
data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'}

var out []byte
dec := msgpack.NewDecoder(bytes.NewReader(data))
t.True(errors.Is(dec.Decode(&out), io.ErrUnexpectedEOF))

// Same via DecodeBytes (d.bytes path).
dec = msgpack.NewDecoder(bytes.NewReader(data))
_, err := dec.DecodeBytes()
t.True(errors.Is(err, io.ErrUnexpectedEOF))

// And via a []byte struct field (decodeBytesValue path).
var s struct{ Data []byte }
payload := append([]byte{0x81, 0xa4, 'D', 'a', 't', 'a'}, data...)
dec = msgpack.NewDecoder(bytes.NewReader(payload))
t.True(errors.Is(dec.Decode(&s), io.ErrUnexpectedEOF))
}

func (t *MsgpackTest) TestUnmarshalBytesHugeDeclaredLen() {
// On the byte-slice path the declared length is validated against the
// remaining input before allocating: must fail fast, no huge alloc.
data := []byte{0xc6, 0xff, 0xff, 0xff, 0xff, 'x'}
var out []byte
t.True(errors.Is(msgpack.Unmarshal(data, &out), io.ErrUnexpectedEOF))
}

func (t *MsgpackTest) TestUnmarshalBytesLarge() {
src := bytes.Repeat([]byte{'x'}, 2500*1024)
data, err := msgpack.Marshal(src)
t.Nil(err)

var dst []byte
t.Nil(msgpack.Unmarshal(data, &dst))
t.Equal(src, dst)
// The result must be caller-owned, not an alias of the input buffer.
data[len(data)-1] = 'y'
t.Equal(byte('x'), dst[len(dst)-1])
}

func (t *MsgpackTest) TestDecodeBytesLargeStream() {
// Larger than bytesAllocLimit (1MB) so the chunked grow path is hit.
src := bytes.Repeat([]byte{'x'}, 2500*1024)
data, err := msgpack.Marshal(src)
t.Nil(err)

var dst []byte
dec := msgpack.NewDecoder(bytes.NewReader(data))
t.Nil(dec.Decode(&dst))
t.Equal(src, dst)

// DisableAllocLimit path still works.
dst = nil
dec = msgpack.NewDecoder(bytes.NewReader(data))
dec.DisableAllocLimit(true)
t.Nil(dec.Decode(&dst))
t.Equal(src, dst)
}

func (t *MsgpackTest) TestSliceOfStructs() {
in := []*nameStruct{{"hello"}}
var out []*nameStruct
Expand Down
Loading