Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cmd/benchdiff/benchdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ var benchVars = kong.Vars{
"WarmupCountHelp": `Run benchmarks with -count=n as a warmup`,
"WarmupTimeHelp": `When warmups are run, set -benchtime=n`,
"TagsHelp": `Set the -tags flag on the go test command`,
"StrictDeterministicHelp": `Strictly evaluate deterministic metrics (allocs/op, B/op) for regressions, ignoring statistical significance.`,
"DeterministicToleranceHelp": `The minimum percent change before a deterministic metric (allocs/op, B/op) is considered degraded.`,
}

var groupHelp = kong.Vars{
Expand All @@ -100,6 +102,8 @@ var cli struct {
JSON bool `kong:"help=${JSONHelp},group='x'"`
OnDegrade int `kong:"name=on-degrade,default=0,help=${OnDegradeHelp},group='x'"`
Tolerance float64 `kong:"default='10.0',help=${ToleranceHelp},group='x'"`
StrictDeterministic bool `kong:"help=${StrictDeterministicHelp},group='x'"`
DeterministicTolerance float64 `kong:"name=deterministic-tolerance,default='0.0',help=${DeterministicToleranceHelp},group='x'"`

Bench string `kong:"default='.',help=${BenchHelp},group='gotest'"`
BenchmarkArgs string `kong:"placeholder='args',help=${BenchmarkArgsHelp},group='gotest'"`
Expand Down Expand Up @@ -289,9 +293,11 @@ func main() {
BenchstatFormatter: bStat.OutputFormatter,
OutputFormat: outputFormat,
Tolerance: cli.Tolerance,
StrictDeterministic: cli.StrictDeterministic,
DeterministicTolerance: cli.DeterministicTolerance,
})
kctx.FatalIfErrorf(err)
if result.HasDegradedResult(cli.Tolerance) {
if result.HasDegradedResult(cli.Tolerance, cli.DeterministicTolerance, cli.StrictDeterministic) {
os.Exit(cli.OnDegrade)
}
}
Expand Down
33 changes: 19 additions & 14 deletions cmd/benchdiff/internal/benchdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ type RunResultOutputOptions struct {
BenchstatFormatter benchstatter.OutputFormatter // default benchstatter.TextFormatter(nil)
OutputFormat string // one of json or human. default: human
Tolerance float64
StrictDeterministic bool
DeterministicTolerance float64
}

// WriteOutput outputs the result
Expand All @@ -262,6 +264,8 @@ func (r *RunResult) WriteOutput(w io.Writer, opts *RunResultOutputOptions) error
BenchstatFormatter: benchstatter.TextFormatter(nil),
OutputFormat: "human",
Tolerance: opts.Tolerance,
StrictDeterministic: opts.StrictDeterministic,
DeterministicTolerance: opts.DeterministicTolerance,
}
if opts.BenchstatFormatter != nil {
finalOpts.BenchstatFormatter = opts.BenchstatFormatter
Expand All @@ -281,13 +285,13 @@ func (r *RunResult) WriteOutput(w io.Writer, opts *RunResultOutputOptions) error
case "human":
return r.writeHumanResult(w, benchstatBuf.String())
case "json":
return r.writeJSONResult(w, benchstatBuf.String(), finalOpts.Tolerance)
return r.writeJSONResult(w, benchstatBuf.String(), finalOpts.Tolerance, finalOpts.DeterministicTolerance, finalOpts.StrictDeterministic)
default:
return fmt.Errorf("unknown OutputFormat")
}
}

func (r *RunResult) writeJSONResult(w io.Writer, benchstatResult string, tolerance float64) error {
func (r *RunResult) writeJSONResult(w io.Writer, benchstatResult string, tolerance, detTolerance float64, strictDeterministic bool) error {
type runResultJSON struct {
BenchCommand string `json:"bench_command,omitempty"`
HeadSHA string `json:"head_sha,omitempty"`
Expand All @@ -302,7 +306,7 @@ func (r *RunResult) writeJSONResult(w io.Writer, benchstatResult string, toleran
BenchstatOutput: benchstatResult,
HeadSHA: r.headSHA,
BaseSHA: r.baseSHA,
DegradedResult: r.HasDegradedResult(tolerance),
DegradedResult: r.HasDegradedResult(tolerance, detTolerance, strictDeterministic),
})
}

Expand All @@ -329,23 +333,24 @@ func (r *RunResult) writeHumanResult(w io.Writer, benchstatResult string) error
}

// HasDegradedResult returns true if there are any rows with DegradingChange and PctDelta over tolerance
func (r *RunResult) HasDegradedResult(tolerance float64) bool {
return r.maxDegradedPct() > tolerance
}

func (r *RunResult) maxDegradedPct() float64 {
max := 0.0
func (r *RunResult) HasDegradedResult(tolerance, detTolerance float64, strictDeterministic bool) bool {
for _, table := range r.tables {
isDeterministic := table.Metric == "B/op" || table.Metric == "allocs/op"
thresh := tolerance
if isDeterministic {
thresh = detTolerance
}
for _, row := range table.Rows {
if row.Change != DegradingChange {
continue
isDegraded := row.Change == DegradingChange
if strictDeterministic && isDeterministic && row.PctDelta > 0 {
isDegraded = true
}
if row.PctDelta > max {
max = row.PctDelta
if isDegraded && row.PctDelta > thresh {
return true
}
}
}
return max
return false
}

// BenchmarkChangeType is whether a change is an improvement or degradation
Expand Down
97 changes: 97 additions & 0 deletions cmd/benchdiff/internal/degraded_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package internal

import (
"testing"

"github.com/stretchr/testify/require"
"golang.org/x/perf/benchstat"
)

func TestRunResult_HasDegradedResult(t *testing.T) {
t.Run("time-based degradation", func(t *testing.T) {
r := &RunResult{
tables: []*benchstat.Table{
{
Metric: "ns/op",
Rows: []*benchstat.Row{
{
PctDelta: 15.0,
Change: DegradingChange,
},
},
},
},
}
require.True(t, r.HasDegradedResult(10.0, 10.0, false))
require.False(t, r.HasDegradedResult(20.0, 20.0, false))
})

t.Run("deterministic degradation with strict flag", func(t *testing.T) {
r := &RunResult{
tables: []*benchstat.Table{
{
Metric: "allocs/op",
Rows: []*benchstat.Row{
{
PctDelta: 5.0,
Change: InsignificantChange, // benchstat didn't mark it
},
},
},
},
}
// Without strict flag, it should not be degraded because Change != DegradingChange
require.False(t, r.HasDegradedResult(0.0, 0.0, false))
// With strict flag, it should be degraded because PctDelta > detTolerance (0.0)
require.True(t, r.HasDegradedResult(0.0, 0.0, true))
require.False(t, r.HasDegradedResult(0.0, 10.0, true))
})

t.Run("deterministic degradation with B/op and strict flag", func(t *testing.T) {
r := &RunResult{
tables: []*benchstat.Table{
{
Metric: "B/op",
Rows: []*benchstat.Row{
{
PctDelta: 20.0,
Change: InsignificantChange,
},
},
},
},
}
require.False(t, r.HasDegradedResult(10.0, 10.0, false))
require.True(t, r.HasDegradedResult(10.0, 10.0, true))
require.False(t, r.HasDegradedResult(10.0, 25.0, true))
})

t.Run("mixed metrics and tolerances", func(t *testing.T) {
r := &RunResult{
tables: []*benchstat.Table{
{
Metric: "ns/op",
Rows: []*benchstat.Row{
{
PctDelta: 5.0,
Change: InsignificantChange,
},
},
},
{
Metric: "allocs/op",
Rows: []*benchstat.Row{
{
PctDelta: 15.0,
Change: InsignificantChange,
},
},
},
},
}
// ns/op is below tolerance (10), and allocs/op is below detTolerance (20)
require.False(t, r.HasDegradedResult(10.0, 20.0, true))
// ns/op is below tolerance (10), but allocs/op is ABOVE detTolerance (10)
require.True(t, r.HasDegradedResult(10.0, 10.0, true))
})
}