diff --git a/docs/design/2026_05_31_partial_6e_enable_raft_envelope.md b/docs/design/2026_05_31_partial_6e_enable_raft_envelope.md index da07a089..3bb20a68 100644 --- a/docs/design/2026_05_31_partial_6e_enable_raft_envelope.md +++ b/docs/design/2026_05_31_partial_6e_enable_raft_envelope.md @@ -17,8 +17,10 @@ | 6E-2a — typed cutover sentinel + sidecar `RaftEnvelopeCutoverIndex` apply seam | shipped | (rolled into earlier slices) | | 6E-2b — `ProposeAdmin` sibling on `raftengine.Proposer` (barrier-exempt by interface contract) | shipped | (rolled into earlier slices) | | 6E-2c — Coordinator `dynamicWrappedProposer` + `ShardGroup.raftPayloadWrap` hot-swap + `Proposer()` accessor + Internal.Forward wrap-aware proposer + fail-closed startup guard on active cutover | shipped | #922 (eb371ca6) | -| 6E-2d — §7.1 6-step quiescence barrier on `dynamicWrappedProposer.Propose` + `ShardGroup` barrier forwarders + `CutoverBarrierController` option + state-machine in `EnableRaftEnvelope` handler (gated behind `raftEnvelopeWrapEnabled = false`; flipped in 6E-2f) | shipped | this PR | -| 6E-2e — `main.go` wiring: `OpenConfig.RaftCipher` + `RaftCutoverIndex` → `CutoverBarrierController` implementation fanning out over participating `ShardGroup`s. **BLOCKER (a):** route admin RPCs (RotateDEK, RegisterEncryptionWriter) through the wrap-aware proposer so post-cutover admin entries are wrapped — the raw-engine ProposeAdmin path leaves cleartext admin entries at `index > cutoverIdx` and §6.3 halts the cluster (codex P1 #1 round-2 on PR933). **BLOCKER (b):** auto-install the wrap on every replica's FSM-apply of the cutover marker so a leader failover between cutover commit and `InstallWrap` doesn't admit cleartext writes on the newly-elected leader (codex P1 round-3 on PR933). Both blockers MUST land before 6E-2f flips the gate. | not started | — | +| 6E-2d — §7.1 6-step quiescence barrier on `dynamicWrappedProposer.Propose` + `ShardGroup` barrier forwarders + `CutoverBarrierController` option + state-machine in `EnableRaftEnvelope` handler (gated behind `raftEnvelopeWrapEnabled = false`; flipped in 6E-2f) | shipped | #933 (aa4a7baa) | +| 6E-2e-1 — Applier `WithRaftCutoverWrapInstaller` hook + invocation on fresh-success AND already-active branches of `applyEnableRaftEnvelope`. Closes BLOCKER (b) at the apply layer: every replica's FSM-apply of the cutover marker publishes the wrap closure on this node, so a follower that becomes leader post-cutover already has wrap active. Production wiring of the installer closure lives in 6E-2e-3. | shipped | this PR | +| 6E-2e-2 — Admin RPCs (RotateDEK, RegisterEncryptionWriter) routed through the wrap-aware proposer so post-cutover admin entries are wrapped. Closes BLOCKER (a): the raw-engine ProposeAdmin path leaves cleartext admin entries at `index > cutoverIdx` and §6.3 halts the cluster (codex P1 #1 round-2 on PR933). The cutover marker itself remains on a separate raw-engine reference held by EnableRaftEnvelope. | not started | — | +| 6E-2e-3 — `main.go` wiring: `OpenConfig.RaftCipher` + `RaftCutoverIndex` → `CutoverBarrierController` implementation fanning out over participating `ShardGroup`s + concrete `RaftCutoverWrapInstaller` closure that publishes the §4.2 wrap to every ShardGroup + startup-time install when `sidecar.RaftEnvelopeCutoverIndex != 0`. | not started | — | | 6E-2f — atomic flip of `raftEnvelopeWrapEnabled` to `true` (the §3.3 6E-1b gate release) | not started | — | | 6E-3 — §6C-4 fail-closed guards (`ErrEnvelopeCutoverDivergence`, `ErrEncryptionNotBootstrapped`, `ErrLocalEpochOutOfRange`) | not started | — | diff --git a/internal/encryption/applier.go b/internal/encryption/applier.go index 3e4c7542..09940623 100644 --- a/internal/encryption/applier.go +++ b/internal/encryption/applier.go @@ -112,8 +112,58 @@ type Applier struct { // write-path state — they have no nonce factory and accept the // zero value as the "preserve today's behavior" fallback. localEpoch uint16 + // raftCutoverWrapInstaller is the Stage 6E-2e-1 hook that + // applyEnableRaftEnvelope invokes on every replica's local FSM + // apply of the cutover marker. Production wiring (6E-2e-3: + // main.go) supplies a closure that publishes the wrap closure to + // every participating kv.ShardGroup via SetRaftPayloadWrap, so a + // follower that becomes leader post-cutover already has wrap + // active without needing the EnableRaftEnvelope handler to + // re-run (closes the BLOCKER (b) leader-failover hazard from + // codex P1 round-3 on PR933). + // + // Called from the fresh-success branch AND the already-active + // branch (FSM replay safety: a snapshot that excludes the + // cutover entry replays it on restart; idempotent install is + // expected). NOT called from the stale-DEKID benign-no-op + // branch — that branch leaves RaftEnvelopeCutoverIndex at 0 and + // the cutover has not taken effect. + // + // Errors from the installer halt apply: the sidecar already + // records the cutover but the in-process wrap is missing, so a + // subsequent USER proposal on this node would land cleartext at + // `index > cutoverIdx` and brick the §6.3 strict-`>` unwrap + // hook cluster-wide. Halting forces the operator to investigate + // (typically a misconfigured cipher) before any further apply + // runs. + // + // nil disables the hook — preserves the pre-6E-2e-1 test + // surface (no behavior change for callers that don't opt in). + raftCutoverWrapInstaller RaftCutoverWrapInstaller } +// RaftCutoverWrapInstaller is the Stage 6E-2e-1 callback the Applier +// invokes on every replica's local FSM apply of the +// EnableRaftEnvelope cutover marker to publish the §4.2 raft envelope +// wrap closure on this node. +// +// Contract: +// - cutoverIdx is the Raft index recorded in the sidecar +// (sc.RaftEnvelopeCutoverIndex). Fresh-success apply passes the +// just-stamped value; already-active apply passes the previously- +// recorded value (idempotent re-install). +// - activeRaftDEKID is the sidecar.Active.Raft value at apply time. +// The installer constructs the wrap closure using this DEK so the +// §6.3 strict-`>` apply hook on every replica unwraps with the +// same key. +// - Returns nil on success or an error that halts apply (see the +// raftCutoverWrapInstaller field comment for the rationale). +// +// The installer MUST be idempotent: replayed FSM apply, snapshot +// restore, and the explicit EnableRaftEnvelope handler's InstallWrap +// call all converge on the same wrap closure publication. +type RaftCutoverWrapInstaller func(cutoverIdx uint64, activeRaftDEKID uint32) error + // StateCache mirrors the sidecar fields the storage hot path needs // to consult on every Put. Two requirements drive its existence: // @@ -351,6 +401,19 @@ func WithLocalEpoch(epoch uint16) ApplierOption { return func(a *Applier) { a.localEpoch = epoch } } +// WithRaftCutoverWrapInstaller installs the Stage 6E-2e-1 hook the +// Applier invokes from applyEnableRaftEnvelope to publish the wrap +// closure on this node. nil is a no-op (the option is omitted on the +// test surface and on the pre-6E-2e-1 production posture); a +// non-nil installer is invoked on both fresh-success and +// already-active apply paths but NOT on the stale-DEK no-op branch. +// +// See the RaftCutoverWrapInstaller type comment for the contract; +// production wiring lives in main.go (6E-2e-3). +func WithRaftCutoverWrapInstaller(installer RaftCutoverWrapInstaller) ApplierOption { + return func(a *Applier) { a.raftCutoverWrapInstaller = installer } +} + // NewApplier wires an Applier against the supplied registry store // plus optional KEK / Keystore / sidecar / clock dependencies. // Returns an error if registry is nil so misconfiguration is caught @@ -1102,21 +1165,36 @@ func validateEnableRaftEnvelopePayload(p fsmwire.RotationPayload) error { // writer-registry layout is per-(DEK_id, NodeID) so storage // and raft registrations are independent rows. // -// Outcomes and FSM-level treatment match the storage variant: +// Outcomes and FSM-level treatment match the storage variant. +// The CHECK ORDER differs from the storage variant though +// (codex P1 round-1 on PR944): the raft path tests +// RaftEnvelopeCutoverIndex != 0 BEFORE the stale-DEK check so +// FSM replay of the original cutover marker after a later +// RotateDEK lands on the already-active branch (which republishes +// the wrap closure via raftCutoverWrapInstaller) rather than the +// stale-DEK no-op (which does NOT install). The storage variant +// has no installer hook so the order swap is unnecessary there. // // - Malformed payload — halt-apply via ErrEncryptionApply. -// - Stale DEKID (RotateDEK raced between propose and apply) — -// benign no-op, advance RaftAppliedIndex only. -// - Already active (duplicate cutover entry) — idempotent; -// preserve original RaftEnvelopeCutoverIndex, advance -// RaftAppliedIndex only. +// - Already active (duplicate cutover entry, OR FSM replay of +// the original marker after RotateDEK advanced Active.Raft) — +// idempotent; preserve original RaftEnvelopeCutoverIndex, +// advance RaftAppliedIndex only, invoke installer with +// sc.Active.Raft so the wrap is keyed to the current DEK on +// every replica. +// - Stale DEKID (RotateDEK raced between propose and apply AND +// RaftEnvelopeCutoverIndex == 0, i.e. cutover never took +// effect) — benign no-op, advance RaftAppliedIndex only. +// Installer is NOT invoked: no cutover took effect, so +// publishing a wrap closure would be incorrect. // - Fresh success — register proposer FIRST, then set // RaftEnvelopeCutoverIndex and advance RaftAppliedIndex // inside one WriteSidecar fsync. The registration-before- // sidecar ordering matches the storage variant's // crash-recovery invariant (§4.1 case 2-idempotent re-runs // are safe; the sidecar flip is the last observable -// side-effect). +// side-effect). Installer is invoked with sc.Active.Raft +// after the sidecar write completes. // // Stage 6E-1 deliberately does NOT activate the §6.3 engine // apply-hook unwrap or the coordinator wrap-on-propose switch @@ -1145,25 +1223,43 @@ func (a *Applier) applyEnableRaftEnvelope(raftIdx uint64, p fsmwire.RotationPayl if err != nil { return errors.Wrap(err, "applier: read sidecar for enable-raft-envelope") } - // Stage 6E-1 constraint #3 — DEKID stale at apply (RotateDEK - // raced). Benign no-op: consume the entry without halting - // and without flipping the cutover field. - if p.DEKID != sc.Active.Raft { + // Stage 6E-1 constraint #4 (idempotency) — ordered BEFORE the + // stale-DEK check so an FSM replay of the original cutover + // marker after a successful cutover + later RotateDEK reaches + // the already-active branch and republishes the wrap. With the + // reverse order, a replayed payload whose p.DEKID predates the + // rotation would be treated as a stale-DEK no-op, the wrap + // would never be installed in this process, and a freshly- + // elected leader on this node would admit cleartext writes + // above the cutover index (codex P1 round-1 on PR944). + // + // The already-active branch preserves the original + // RaftEnvelopeCutoverIndex; only RaftAppliedIndex advances so + // the duplicate entry is not replayed again. Non-zero + // RaftEnvelopeCutoverIndex IS the "already-active" signal (no + // separate bool flag). + if sc.RaftEnvelopeCutoverIndex != 0 { advanceRaftAppliedIndex(sc, raftIdx) if err := WriteSidecar(a.sidecarPath, sc); err != nil { - return errors.Wrap(err, "applier: write sidecar for stale-dekid raft-cutover no-op") + return errors.Wrap(err, "applier: write sidecar for already-active raft-cutover no-op") } - return nil + // Installer takes the CURRENT sc.Active.Raft, NOT the + // replayed p.DEKID — the wrap closure must key to the + // active DEK on this node so the §6.3 strict-`>` hook + // unwraps with the same key on every replica (gemini + // medium #1 on PR944). + return a.invokeRaftCutoverWrapInstaller(sc.RaftEnvelopeCutoverIndex, sc.Active.Raft, "already-active replay") } - // Stage 6E-1 constraint #4 — idempotency. Preserve the - // original RaftEnvelopeCutoverIndex; only advance the generic - // RaftAppliedIndex so the duplicate entry is not replayed. - // Non-zero RaftEnvelopeCutoverIndex IS the "already-active" - // signal (no separate bool flag). - if sc.RaftEnvelopeCutoverIndex != 0 { + // Stage 6E-1 constraint #3 — DEKID stale at apply (RotateDEK + // raced between propose and apply, AND the cutover never + // took effect — RaftEnvelopeCutoverIndex==0 is the gate above + // that distinguishes the genuine race from a replay). Benign + // no-op: consume the entry without halting and without + // flipping the cutover field. + if p.DEKID != sc.Active.Raft { advanceRaftAppliedIndex(sc, raftIdx) if err := WriteSidecar(a.sidecarPath, sc); err != nil { - return errors.Wrap(err, "applier: write sidecar for already-active raft-cutover no-op") + return errors.Wrap(err, "applier: write sidecar for stale-dekid raft-cutover no-op") } return nil } @@ -1185,6 +1281,46 @@ func (a *Applier) applyEnableRaftEnvelope(raftIdx uint64, p fsmwire.RotationPayl return errors.Wrap(err, "applier: write sidecar for raft cutover") } a.stateCache.RefreshFromSidecar(sc) + // Stage 6E-2e-1 BLOCKER (b) — publish the wrap closure on every + // replica's local FSM apply so a follower that becomes leader + // post-cutover already has wrap active without needing the + // EnableRaftEnvelope handler to re-run. Without this hook, the + // per-leader InstallWrap call in adapter/encryption_admin.go's + // runRaftEnvelopeCutoverBarrier is the only path that installs + // the wrap — a leader failover between cutover commit and + // InstallWrap would let the new leader admit cleartext writes at + // indexes > cutoverIdx and brick the §6.3 strict-`>` apply hook + // cluster-wide (codex P1 round-3 on PR933). + // + // Ordered AFTER WriteSidecar so a crash between sidecar fsync + // and installer invocation is recoverable: process restart sees + // RaftEnvelopeCutoverIndex != 0 in the sidecar, the startup-time + // install (6E-2e-3 main.go wiring) republishes the wrap, and + // the next apply hits the already-active branch where the + // installer is idempotent. The reverse ordering (installer + // first) would leave the wrap published but the sidecar + // pre-cutover on crash, breaking the equality the §6.3 hook + // relies on cluster-wide. + // Installer takes sc.Active.Raft (which equals p.DEKID here + // because the stale-DEK check above passed) for documentation + // clarity and to match the already-active branch's argument + // shape (gemini medium #2 on PR944). + return a.invokeRaftCutoverWrapInstaller(raftIdx, sc.Active.Raft, "fresh-success apply") +} + +// invokeRaftCutoverWrapInstaller is the Stage 6E-2e-1 dispatch +// shared between fresh-success and already-active branches of +// applyEnableRaftEnvelope. nil installer is a no-op (preserves the +// pre-6E-2e-1 test surface); a non-nil installer's error is wrapped +// with the branch tag so operator logs distinguish a failure on +// fresh apply from one on FSM replay. +func (a *Applier) invokeRaftCutoverWrapInstaller(cutoverIdx uint64, activeRaftDEKID uint32, branchTag string) error { + if a.raftCutoverWrapInstaller == nil { + return nil + } + if err := a.raftCutoverWrapInstaller(cutoverIdx, activeRaftDEKID); err != nil { + return errors.Wrapf(err, "applier: install raft-cutover wrap on %s", branchTag) + } return nil } diff --git a/internal/encryption/applier_test.go b/internal/encryption/applier_test.go index 34b62316..ff7b41c6 100644 --- a/internal/encryption/applier_test.go +++ b/internal/encryption/applier_test.go @@ -3,6 +3,7 @@ package encryption_test import ( "errors" "strconv" + "strings" "testing" "github.com/bootjp/elastickv/internal/encryption" @@ -2502,6 +2503,304 @@ func TestApplyRotation_EnableRaftEnvelope_AlreadyActive_IdempotentNoOp(t *testin } } +// recordingRaftCutoverWrapInstaller captures every invocation so the +// 6E-2e-1 tests can pin (a) how many times the installer fired, and +// (b) the arguments (cutover_idx, active_raft_dek_id) it received +// on each call. errToReturn lets a test exercise the halt-apply +// error propagation contract. +type recordingRaftCutoverWrapInstaller struct { + calls []recordingRaftCutoverWrapInstallerCall + errToReturn error +} + +type recordingRaftCutoverWrapInstallerCall struct { + cutoverIdx uint64 + activeRaftDEKID uint32 +} + +func (r *recordingRaftCutoverWrapInstaller) install(cutoverIdx uint64, activeRaftDEKID uint32) error { + r.calls = append(r.calls, recordingRaftCutoverWrapInstallerCall{ + cutoverIdx: cutoverIdx, + activeRaftDEKID: activeRaftDEKID, + }) + return r.errToReturn +} + +// newCutoverApplierWithInstaller mirrors newCutoverApplier but also +// wires a recording installer through the Stage 6E-2e-1 +// WithRaftCutoverWrapInstaller option. Returns both the Applier and +// the recorder so tests can assert on call shape. +func newCutoverApplierWithInstaller(t *testing.T, sidecarPath string) (*encryption.Applier, *recordingRaftCutoverWrapInstaller) { + t.Helper() + rec := &recordingRaftCutoverWrapInstaller{} + app := newCutoverApplierWithOpts(t, sidecarPath, + encryption.WithRaftCutoverWrapInstaller(rec.install)) + return app, rec +} + +// newCutoverApplierWithOpts returns an Applier wired with the +// shared cutover fixtures (KEK, keystore, sidecar) plus any +// extra options the test supplies. Reuses the same bootstrap +// fixtures the parent newCutoverApplier helper uses. +func newCutoverApplierWithOpts(t *testing.T, sidecarPath string, extra ...encryption.ApplierOption) *encryption.Applier { + t.Helper() + reg := newMapRegistryStore() + ks := encryption.NewKeystore() + opts := []encryption.ApplierOption{ + encryption.WithKEK(&fakeKEK{}), + encryption.WithKeystore(ks), + encryption.WithSidecarPath(sidecarPath), + } + opts = append(opts, extra...) + app, err := encryption.NewApplier(reg, opts...) + if err != nil { + t.Fatalf("NewApplier: %v", err) + } + return app +} + +// TestApplyRotation_EnableRaftEnvelope_FreshSuccess_InvokesInstaller +// pins the BLOCKER (b) contract from codex P1 round-3 on PR933: on +// a fresh-success apply of the cutover marker, the Applier MUST +// invoke the registered wrap installer exactly once with the +// just-stamped cutover index and the active raft DEK ID. The +// installer is what publishes the wrap closure to every +// participating kv.ShardGroup on this replica, so a follower that +// becomes leader post-cutover has wrap active without needing the +// EnableRaftEnvelope handler to re-run. +func TestApplyRotation_EnableRaftEnvelope_FreshSuccess_InvokesInstaller(t *testing.T) { + t.Parallel() + _, sidecarPath := bootstrappedDir(t) + app, rec := newCutoverApplierWithInstaller(t, sidecarPath) + const raftCutoverIdx uint64 = 600 + if err := app.ApplyRotation(raftCutoverIdx, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{ + DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 1, + }, + }); err != nil { + t.Fatalf("ApplyRotation raft-cutover: %v", err) + } + if got, want := len(rec.calls), 1; got != want { + t.Fatalf("installer called %d times on fresh-success, want %d", got, want) + } + if got := rec.calls[0].cutoverIdx; got != raftCutoverIdx { + t.Errorf("installer cutoverIdx = %d, want %d (must equal the just-stamped sidecar value)", + got, raftCutoverIdx) + } + if got, want := rec.calls[0].activeRaftDEKID, cutoverBootstrapRaftDEKID; got != want { + t.Errorf("installer activeRaftDEKID = %d, want %d", got, want) + } +} + +// TestApplyRotation_EnableRaftEnvelope_AlreadyActive_InvokesInstaller +// pins the FSM-replay safety contract: a duplicate cutover apply +// (sidecar.RaftEnvelopeCutoverIndex already non-zero) MUST still +// invoke the installer so a process restart that replays the +// cutover entry through this branch republishes the wrap closure +// in-process. The original cutover index is passed, not the new +// raftIdx — the wrap closure semantics must match the value the +// §6.3 strict-`>` apply hook compares against on every replica. +func TestApplyRotation_EnableRaftEnvelope_AlreadyActive_InvokesInstaller(t *testing.T) { + t.Parallel() + _, sidecarPath := bootstrappedDir(t) + app, rec := newCutoverApplierWithInstaller(t, sidecarPath) + const firstIdx uint64 = 500 + if err := app.ApplyRotation(firstIdx, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 1}, + }); err != nil { + t.Fatalf("first cutover: %v", err) + } + const duplicateIdx uint64 = 501 + if err := app.ApplyRotation(duplicateIdx, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 2}, + }); err != nil { + t.Fatalf("duplicate cutover: %v", err) + } + // One fresh-success + one already-active call = two invocations total. + if got, want := len(rec.calls), 2; got != want { + t.Fatalf("installer called %d times across fresh+duplicate, want %d (already-active branch must also invoke for replay safety)", got, want) + } + if got := rec.calls[1].cutoverIdx; got != firstIdx { + t.Errorf("already-active installer cutoverIdx = %d, want %d (the ORIGINAL cutover index, not the duplicate raftIdx)", + got, firstIdx) + } +} + +// TestApplyRotation_EnableRaftEnvelope_AlreadyActive_StaleDEKReplay_InvokesInstaller +// pins the codex P1 invariant on PR944: an FSM replay of the +// original cutover marker AFTER a successful cutover AND a +// subsequent RotateDEK MUST hit the already-active branch (which +// invokes the installer), NOT the stale-DEK no-op branch. This is +// the exact replay-safety path the installer hook is meant to +// repair. +// +// Scenario: +// 1. Original cutover applied at index N with DEK D1 (Active.Raft=D1). +// Installer fires, RaftEnvelopeCutoverIndex=N. +// 2. RotateDEK advances Active.Raft from D1 to D2. +// 3. Process restarts. Snapshot didn't include the cutover entry, +// so FSM replay re-applies it. Replay carries p.DEKID=D1, but +// sidecar.Active.Raft is now D2. +// 4. Order MUST be: check RaftEnvelopeCutoverIndex != 0 FIRST → hit +// the already-active branch → installer fires with sc.Active.Raft +// (D2). Reversed order (stale-DEK first) short-circuits at +// p.DEKID != D2 and the wrap is never re-installed in this +// process — a freshly-elected leader on this node would admit +// cleartext proposals above the cutover index and brick the §6.3 +// strict-`>` apply hook on every replica. +func TestApplyRotation_EnableRaftEnvelope_AlreadyActive_StaleDEKReplay_InvokesInstaller(t *testing.T) { + t.Parallel() + _, sidecarPath := bootstrappedDir(t) + app, rec := newCutoverApplierWithInstaller(t, sidecarPath) + + const originalCutoverIdx uint64 = 500 + if err := app.ApplyRotation(originalCutoverIdx, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 1}, + }); err != nil { + t.Fatalf("original cutover: %v", err) + } + const newRaftDEKID uint32 = 99 + if err := app.ApplyRotation(600, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubRotateDEK, + DEKID: newRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte("new-raft-dek"), + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: newRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 2}, + }); err != nil { + t.Fatalf("RotateDEK: %v", err) + } + callsBeforeReplay := len(rec.calls) + // FSM replay carries the ORIGINAL payload's DEKID, now stale + // against the post-rotate Active.Raft. + if err := app.ApplyRotation(700, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 3}, + }); err != nil { + t.Fatalf("FSM replay of cutover marker: %v", err) + } + if got := len(rec.calls); got != callsBeforeReplay+1 { + t.Errorf("installer called %d times on replay, want 1 (already-active branch MUST install regardless of replayed payload's DEKID)", + got-callsBeforeReplay) + } + if got := rec.calls[len(rec.calls)-1].activeRaftDEKID; got != newRaftDEKID { + t.Errorf("installer activeRaftDEKID on replay = %d, want %d (sidecar.Active.Raft, NOT replayed p.DEKID)", + got, newRaftDEKID) + } + if got := rec.calls[len(rec.calls)-1].cutoverIdx; got != originalCutoverIdx { + t.Errorf("installer cutoverIdx on replay = %d, want %d (originally-recorded sidecar value)", + got, originalCutoverIdx) + } + sc, err := encryption.ReadSidecar(sidecarPath) + if err != nil { + t.Fatalf("ReadSidecar: %v", err) + } + if sc.RaftEnvelopeCutoverIndex != originalCutoverIdx { + t.Errorf("RaftEnvelopeCutoverIndex = %d after replay, want %d", + sc.RaftEnvelopeCutoverIndex, originalCutoverIdx) + } + if sc.Active.Raft != newRaftDEKID { + t.Errorf("Active.Raft = %d after replay, want %d", sc.Active.Raft, newRaftDEKID) + } +} + +// TestApplyRotation_EnableRaftEnvelope_StaleDEKID_SkipsInstaller +// pins the constraint #3 contract: a stale-DEK race produces a +// benign no-op apply that advances RaftAppliedIndex but does NOT +// activate the cutover. The installer MUST NOT fire — the wrap +// closure has no DEK to bind to at this point (Active.Raft has +// moved on), and installing would publish a closure keyed to the +// wrong DEK that the §6.3 hook would then fail to unwrap. +func TestApplyRotation_EnableRaftEnvelope_StaleDEKID_SkipsInstaller(t *testing.T) { + t.Parallel() + _, sidecarPath := bootstrappedDir(t) + app, rec := newCutoverApplierWithInstaller(t, sidecarPath) + const newRaftDEKID uint32 = 99 + if err := app.ApplyRotation(200, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubRotateDEK, + DEKID: newRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte("new-raft-dek"), + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: newRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 1}, + }); err != nil { + t.Fatalf("ApplyRotation RotateDEK: %v", err) + } + if err := app.ApplyRotation(300, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, // stale + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 2}, + }); err != nil { + t.Fatalf("stale-DEK cutover: %v", err) + } + if got := len(rec.calls); got != 0 { + t.Errorf("installer called %d times on stale-DEK no-op, want 0 (no cutover took effect; wrap installation would key to the wrong DEK)", got) + } +} + +// TestApplyRotation_EnableRaftEnvelope_InstallerError_HaltsApply pins +// the fail-closed contract: if the installer returns an error, the +// applier surfaces it so the FSM apply halts. Sidecar already +// records the cutover (write ordered BEFORE installer call), so +// crash recovery is consistent: process restart sees the cutover +// active, startup-time install (6E-2e-3 main.go wiring) republishes +// the wrap closure, and the next apply hits the already-active +// idempotent branch. The reverse ordering (install first) would +// leave wrap published but sidecar pre-cutover on crash, breaking +// the equality the §6.3 hook relies on cluster-wide. +func TestApplyRotation_EnableRaftEnvelope_InstallerError_HaltsApply(t *testing.T) { + t.Parallel() + _, sidecarPath := bootstrappedDir(t) + rec := &recordingRaftCutoverWrapInstaller{errToReturn: errors.New("synthetic wrap-install failure")} + app := newCutoverApplierWithOpts(t, sidecarPath, + encryption.WithRaftCutoverWrapInstaller(rec.install)) + const raftCutoverIdx uint64 = 600 + err := app.ApplyRotation(raftCutoverIdx, fsmwire.RotationPayload{ + SubTag: fsmwire.RotateSubEnableRaftEnvelope, + DEKID: cutoverBootstrapRaftDEKID, + Purpose: fsmwire.PurposeRaft, + Wrapped: []byte{}, + ProposerRegistration: fsmwire.RegistrationPayload{ + DEKID: cutoverBootstrapRaftDEKID, FullNodeID: 0xBBBB, LocalEpoch: 1, + }, + }) + if err == nil { + t.Fatal("ApplyRotation: want installer-error halt, got nil") + } + if !strings.Contains(err.Error(), "install raft-cutover wrap") { + t.Errorf("error %q does not mention the install path", err) + } + // Sidecar MUST already reflect the cutover (write ordered + // before installer call) so recovery via restart converges. + sc, readErr := encryption.ReadSidecar(sidecarPath) + if readErr != nil { + t.Fatalf("ReadSidecar: %v", readErr) + } + if sc.RaftEnvelopeCutoverIndex != raftCutoverIdx { + t.Errorf("RaftEnvelopeCutoverIndex = %d after installer error, want %d (sidecar write MUST precede installer call for crash-recovery convergence)", + sc.RaftEnvelopeCutoverIndex, raftCutoverIdx) + } +} + // assertRaftCutoverNotApplied verifies a rejected raft-envelope // cutover left the cutover sidecar state untouched. func assertRaftCutoverNotApplied(t *testing.T, sidecarPath string) {