From 212f20a5072a85dee23c620e69e671a790478a1e Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 25 Jun 2026 23:48:16 +0000 Subject: [PATCH] feat(whp): support no-surrogate mode via HYPERLIGHT_MAX_SURROGATES=0 When HYPERLIGHT_MAX_SURROGATES=0, bypass surrogate processes entirely: - Use WHvMapGpaRange (host VA) instead of WHvMapGpaRange2 (surrogate) - Reuse existing WindowsMapping::Anonymous allocation path - RAII guard (stored as WhpVm field) enforces single-VM-per-process - Add dedicated unit test and run existing integration/snapshot tests in CI with per-test verification against silent renames - Add just test-no-surrogate recipe mirroring the CI step Signed-off-by: danbugs --- .github/workflows/dep_build_test.yml | 32 +++ Justfile | 14 ++ .../hypervisor/surrogate_process_manager.rs | 51 ++-- .../src/hypervisor/virtual_machine/whp.rs | 231 ++++++++++++++---- 4 files changed, 258 insertions(+), 70 deletions(-) diff --git a/.github/workflows/dep_build_test.yml b/.github/workflows/dep_build_test.yml index 91ce867aa..0b0f8a1f5 100644 --- a/.github/workflows/dep_build_test.yml +++ b/.github/workflows/dep_build_test.yml @@ -118,6 +118,38 @@ jobs: # with only one driver enabled (kvm/mshv3 features are unix-only, no-op on Windows) just test ${{ inputs.config }} ${{ inputs.hypervisor == 'mshv3' && 'mshv3' || 'kvm' }} + - name: Run no-surrogate VM tests (WHP only) + if: runner.os == 'Windows' + env: + HYPERLIGHT_MAX_SURROGATES: "0" + HYPERLIGHT_INITIAL_SURROGATES: "0" + run: | + PROFILE=${{ inputs.config == 'debug' && 'dev' || inputs.config }} + # Verify each expected test actually ran (guards against silent + # renames where cargo test exits 0 with 0 matches). + # NOTE: keep in sync with `just test-no-surrogate`. + assert_ran() { + local output="$1"; shift + for name in "$@"; do + if ! echo "$output" | grep -q "$name \.\.\. ok"; then + echo "::error::Expected test '$name' did not run — may have been renamed" + return 1 + fi + done + } + OUT=$(cargo test -p hyperlight-host --profile=$PROFILE --lib -- no_surrogate_tests --test-threads=1 2>&1) || { echo "$OUT"; exit 1; } + echo "$OUT" + assert_ran "$OUT" single_vm_lifecycle + OUT=$(cargo test -p hyperlight-host --profile=$PROFILE --test integration_test -- guest_malloc guest_panic corrupt_output_size_prefix_rejected --test-threads=1 2>&1) || { echo "$OUT"; exit 1; } + echo "$OUT" + assert_ran "$OUT" guest_malloc guest_panic corrupt_output_size_prefix_rejected + OUT=$(cargo test -p hyperlight-host --profile=$PROFILE --test sandbox_host_tests -- --exact callback_test float_roundtrip --test-threads=1 2>&1) || { echo "$OUT"; exit 1; } + echo "$OUT" + assert_ran "$OUT" callback_test float_roundtrip + OUT=$(cargo test -p hyperlight-host --profile=$PROFILE --lib -- snapshot_evolve_restore_handles_state_correctly restore_from_loaded_snapshot --test-threads=1 2>&1) || { echo "$OUT"; exit 1; } + echo "$OUT" + assert_ran "$OUT" snapshot_evolve_restore_handles_state_correctly restore_from_loaded_snapshot + - name: Run Rust tests with hw-interrupts run: | # with hw-interrupts feature enabled (+ explicit driver on Linux) diff --git a/Justfile b/Justfile index 2bdb842b9..70d63ddfb 100644 --- a/Justfile +++ b/Justfile @@ -104,6 +104,9 @@ test-like-ci config=default-target hypervisor="kvm": @# with hw-interrupts enabled (+ explicit driver on Linux) {{ if os() == "linux" { if hypervisor == "mshv3" { "just test " + config + " mshv3,hw-interrupts" } else { "just test " + config + " kvm,hw-interrupts" } } else { "just test " + config + " hw-interrupts" } }} + @# no-surrogate mode smoke tests (Windows/WHP only) + {{ if os() == "windows" { "just test-no-surrogate " + config } else { "" } }} + @# make sure certain cargo features compile just check @@ -262,6 +265,17 @@ test-compilation-no-default-features target=default-target: {{ if os() == "linux" { cargo-cmd + " check -p hyperlight-host --no-default-features --features kvm" } else { "" } }} {{ target-triple-flag }} {{ if os() == "linux" { cargo-cmd + " check -p hyperlight-host --no-default-features --features mshv3" } else { "" } }} {{ target-triple-flag }} +# runs a subset of existing tests with HYPERLIGHT_MAX_SURROGATES=0 (Windows only). +# Covers: guest calls, host callbacks, in-memory snapshot/restore, and +# save/load snapshot from disk. +# NOTE: if any of the test names below are renamed, update both this +# recipe AND the matching CI step in .github/workflows/dep_build_test.yml. +test-no-surrogate target=default-target: + {{ set-env-command }}HYPERLIGHT_MAX_SURROGATES=0; {{ set-env-command }}HYPERLIGHT_INITIAL_SURROGATES=0; {{ cargo-cmd }} test -p hyperlight-host --profile={{ if target == "debug" { "dev" } else { target } }} --lib -- no_surrogate_tests --test-threads=1 + {{ set-env-command }}HYPERLIGHT_MAX_SURROGATES=0; {{ set-env-command }}HYPERLIGHT_INITIAL_SURROGATES=0; {{ cargo-cmd }} test -p hyperlight-host --profile={{ if target == "debug" { "dev" } else { target } }} --test integration_test -- guest_malloc guest_panic corrupt_output_size_prefix_rejected --test-threads=1 + {{ set-env-command }}HYPERLIGHT_MAX_SURROGATES=0; {{ set-env-command }}HYPERLIGHT_INITIAL_SURROGATES=0; {{ cargo-cmd }} test -p hyperlight-host --profile={{ if target == "debug" { "dev" } else { target } }} --test sandbox_host_tests -- --exact callback_test float_roundtrip --test-threads=1 + {{ set-env-command }}HYPERLIGHT_MAX_SURROGATES=0; {{ set-env-command }}HYPERLIGHT_INITIAL_SURROGATES=0; {{ cargo-cmd }} test -p hyperlight-host --profile={{ if target == "debug" { "dev" } else { target } }} --lib -- snapshot_evolve_restore_handles_state_correctly restore_from_loaded_snapshot --test-threads=1 + # runs tests that exercise gdb debugging test-rust-gdb-debugging target=default-target features="": {{ cargo-cmd }} test --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --example guest-debugging {{ if features =="" {'--features gdb'} else { "--features gdb," + features } }} diff --git a/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs b/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs index de15fde0b..e4bd03092 100644 --- a/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs +++ b/src/hyperlight_host/src/hypervisor/surrogate_process_manager.rs @@ -19,6 +19,7 @@ use std::fs::File; use std::io::Write; use std::mem::size_of; use std::path::{Path, PathBuf}; +use std::sync::OnceLock; use std::sync::atomic::{AtomicUsize, Ordering}; use crossbeam_channel::{Receiver, Sender, TryRecvError, unbounded}; @@ -86,17 +87,20 @@ fn surrogate_binary_name() -> Result { /// (or `None` when the variable is unset or unparsable). /// /// Resolution order: -/// 1. `max` is clamped to `1..=HARD_MAX_SURROGATE_PROCESSES`, defaulting +/// 1. `max` is clamped to `0..=HARD_MAX_SURROGATE_PROCESSES`, defaulting /// to `HARD_MAX_SURROGATE_PROCESSES` when `None`. -/// 2. `initial` is clamped to `1..=max`, defaulting to `max` when `None`. +/// 2. `initial` is clamped to `0..=max`, defaulting to `max` when `None`. /// This guarantees `initial <= max` without an extra conditional. +/// +/// When `max == 0`, surrogates are disabled entirely and the system +/// falls back to `WHvMapGpaRange` (single-VM-per-process mode). fn compute_surrogate_counts(raw_initial: Option, raw_max: Option) -> (usize, usize) { let max = raw_max - .map(|n| n.clamp(1, HARD_MAX_SURROGATE_PROCESSES)) + .map(|n| n.clamp(0, HARD_MAX_SURROGATE_PROCESSES)) .unwrap_or(HARD_MAX_SURROGATE_PROCESSES); - // Clamp initial to 1..=max so it can never exceed the authoritative limit. - let initial = raw_initial.map(|n| n.clamp(1, max)).unwrap_or(max); + // Clamp initial to 0..=max so it can never exceed the authoritative limit. + let initial = raw_initial.map(|n| n.clamp(0, max)).unwrap_or(max); (initial, max) } @@ -104,8 +108,8 @@ fn compute_surrogate_counts(raw_initial: Option, raw_max: Option) /// Returns the (initial, max) surrogate process counts from environment /// variables, applying validation and clamping. /// -/// - `HYPERLIGHT_INITIAL_SURROGATES`: clamped to `1..=max`, default `max`. -/// - `HYPERLIGHT_MAX_SURROGATES`: clamped to `1..=512`, default 512. +/// - `HYPERLIGHT_INITIAL_SURROGATES`: clamped to `0..=max`, default `max`. +/// - `HYPERLIGHT_MAX_SURROGATES`: clamped to `0..=512`, default 512. fn surrogate_process_counts() -> (usize, usize) { let raw_initial = std::env::var(INITIAL_SURROGATES_ENV_VAR) .ok() @@ -353,6 +357,21 @@ pub(crate) fn get_surrogate_process_manager() -> Result<&'static SurrogateProces } } +/// Returns `true` when `HYPERLIGHT_MAX_SURROGATES=0`, meaning surrogate +/// processes are disabled and the system should use `WHvMapGpaRange` +/// (single-VM-per-process mode) instead of `WHvMapGpaRange2`. +/// +/// The result is cached on first call — the env var is read only once. +pub(crate) fn surrogates_disabled() -> bool { + static DISABLED: OnceLock = OnceLock::new(); + *DISABLED.get_or_init(|| { + std::env::var(MAX_SURROGATES_ENV_VAR) + .ok() + .and_then(|v| v.parse::().ok()) + .is_some_and(|n| n == 0) + }) +} + // Creates a job object that will terminate all the surrogate processes when the struct instance is dropped. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] fn create_job_object() -> Result { @@ -885,9 +904,9 @@ mod tests { "initial should be clamped down to max when it exceeds it" ); - // --- initial below minimum → clamped to 1 --- + // --- initial at zero → allowed (surrogates disabled when max is also 0) --- let (initial, max) = compute_surrogate_counts(Some(0), None); - assert_eq!(initial, 1, "initial should be clamped to minimum of 1"); + assert_eq!(initial, 0, "initial of 0 should be allowed"); assert_eq!( max, HARD_MAX_SURROGATE_PROCESSES, "max should default when unset" @@ -909,10 +928,10 @@ mod tests { "initial should be clamped down to max when it defaults above it" ); - // --- max below minimum → clamped to 1, initial follows --- + // --- max at zero → allowed (surrogates disabled), initial follows --- let (initial, max) = compute_surrogate_counts(None, Some(0)); - assert_eq!(max, 1, "max should be clamped to minimum of 1"); - assert_eq!(initial, 1, "initial should be clamped down to max"); + assert_eq!(max, 0, "max of 0 should be allowed"); + assert_eq!(initial, 0, "initial should be clamped down to max"); // --- max above hard limit → clamped to 512 --- let (initial, max) = compute_surrogate_counts(None, Some(9999)); @@ -947,12 +966,12 @@ mod tests { // gracefully adapts: it only asserts the invariant initial <= max <= 512. let (initial, max) = surrogate_process_counts(); assert!( - (1..=HARD_MAX_SURROGATE_PROCESSES).contains(&initial), - "initial {initial} should be in 1..={HARD_MAX_SURROGATE_PROCESSES}" + (0..=HARD_MAX_SURROGATE_PROCESSES).contains(&initial), + "initial {initial} should be in 0..={HARD_MAX_SURROGATE_PROCESSES}" ); assert!( - (1..=HARD_MAX_SURROGATE_PROCESSES).contains(&max), - "max {max} should be in 1..={HARD_MAX_SURROGATE_PROCESSES}" + (0..=HARD_MAX_SURROGATE_PROCESSES).contains(&max), + "max {max} should be in 0..={HARD_MAX_SURROGATE_PROCESSES}" ); assert!(initial <= max, "initial ({initial}) must be <= max ({max})"); } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 868766eee..6de2b29f1 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -15,6 +15,7 @@ limitations under the License. */ use std::os::raw::c_void; +use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::VmAction; #[cfg(feature = "trace_guest")] @@ -37,7 +38,9 @@ use crate::hypervisor::regs::{ WHP_SREGS_NAMES_LEN, }; use crate::hypervisor::surrogate_process::SurrogateProcess; -use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; +use crate::hypervisor::surrogate_process_manager::{ + get_surrogate_process_manager, surrogates_disabled, +}; #[cfg(feature = "hw-interrupts")] use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; use crate::hypervisor::virtual_machine::{ @@ -87,25 +90,62 @@ fn release_file_mapping(view_base: *mut c_void, mapping_handle: HandleWrapper) { } } +/// When surrogates are disabled (`HYPERLIGHT_MAX_SURROGATES=0`), only one WHP +/// partition can exist per process. This flag enforces that constraint and is +/// cleared when the `NoSurrogateGuard` stored on the `WhpVm` is dropped. +static NO_SURROGATE_VM_ACTIVE: AtomicBool = AtomicBool::new(false); + +/// RAII guard that sets `NO_SURROGATE_VM_ACTIVE` on creation and clears +/// it on drop. Stored as a field on `WhpVm` so the flag stays set for +/// exactly the lifetime of the VM. +#[derive(Debug)] +struct NoSurrogateGuard; + +impl NoSurrogateGuard { + fn acquire() -> Result { + if NO_SURROGATE_VM_ACTIVE.swap(true, Ordering::SeqCst) { + return Err(CreateVmError::SurrogateProcess( + "HYPERLIGHT_MAX_SURROGATES=0 limits the process to a single VM; \ + a VM is already active" + .into(), + )); + } + Ok(Self) + } +} + +impl Drop for NoSurrogateGuard { + fn drop(&mut self) { + NO_SURROGATE_VM_ACTIVE.store(false, Ordering::SeqCst); + } +} + /// A Windows Hypervisor Platform implementation of a single-vcpu VM #[derive(Debug)] pub(crate) struct WhpVm { partition: WHV_PARTITION_HANDLE, - // Surrogate process for memory mapping - surrogate_process: SurrogateProcess, + // Surrogate process for memory mapping. `None` when surrogates are + // disabled (`HYPERLIGHT_MAX_SURROGATES=0`), in which case + // `WHvMapGpaRange` is used instead of `WHvMapGpaRange2`. + surrogate_process: Option, /// Tracks host-side file mappings (view_base, mapping_handle) for /// cleanup on unmap or drop. Only populated for MappedFile regions. file_mappings: Vec<(HandleWrapper, *mut c_void)>, + /// RAII guard that clears `NO_SURROGATE_VM_ACTIVE` when this VM is + /// dropped. `None` when surrogates are enabled. + _no_surrogate_guard: Option, /// Handle to the background timer (if started). #[cfg(feature = "hw-interrupts")] timer: Option, } -// Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer -// `allocated_address` (*mut c_void). This pointer represents a memory mapped view address -// in the surrogate process. It is never dereferenced, only used for address arithmetic and -// resource management (unmapping). This is a system resource that is not bound to the creating -// thread and can be safely transferred between threads. +// Safety: `WhpVm` is !Send because it holds `Option` which +// contains a raw pointer `allocated_address` (*mut c_void). This pointer +// represents a memory mapped view address in the surrogate process. It is +// never dereferenced, only used for address arithmetic and resource management +// (unmapping). This is a system resource that is not bound to the creating +// thread and can be safely transferred between threads. When the `Option` is +// `None` (surrogates disabled), no such pointer exists. // `file_mappings` contains raw pointers that are also kernel resource handles, // safe to use from any thread. unsafe impl Send for WhpVm {} @@ -114,6 +154,13 @@ impl WhpVm { pub(crate) fn new() -> Result { const NUM_CPU: u32 = 1; + let no_surrogate = surrogates_disabled(); + let no_surrogate_guard = if no_surrogate { + Some(NoSurrogateGuard::acquire()?) + } else { + None + }; + let partition = unsafe { #[cfg(feature = "hw-interrupts")] Self::check_lapic_emulation_support()?; @@ -134,25 +181,28 @@ impl WhpVm { WHvCreateVirtualProcessor(p, 0, 0) .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - // Initialize the LAPIC via the bulk interrupt-controller - // state API (individual APIC register writes via - // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). #[cfg(feature = "hw-interrupts")] Self::init_lapic_bulk(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; p }; - let mgr = get_surrogate_process_manager() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; - let surrogate_process = mgr - .get_surrogate_process() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + let surrogate_process = if no_surrogate { + None + } else { + let mgr = get_surrogate_process_manager() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + Some( + mgr.get_surrogate_process() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?, + ) + }; Ok(WhpVm { partition, surrogate_process, file_mappings: Vec::new(), + _no_surrogate_guard: no_surrogate_guard, #[cfg(feature = "hw-interrupts")] timer: None, }) @@ -183,18 +233,6 @@ impl VirtualMachine for WhpVm { &mut self, (_slot, region): (u32, &MemoryRegion), ) -> Result<(), MapMemoryError> { - // Calculate the surrogate process address for this region - let surrogate_base = self - .surrogate_process - .map( - region.host_region.start.from_handle, - region.host_region.start.handle_base, - region.host_region.start.handle_size, - ®ion.region_type.surrogate_mapping(), - ) - .map_err(|e| MapMemoryError::SurrogateProcess(e.to_string()))?; - let surrogate_addr = surrogate_base.wrapping_add(region.host_region.start.offset); - let flags = region .flags .iter() @@ -212,32 +250,71 @@ impl VirtualMachine for WhpVm { .iter() .fold(WHvMapGpaRangeFlagNone, |acc, flag| acc | *flag); - let whvmapgparange2_func = unsafe { - match try_load_whv_map_gpa_range2() { - Ok(func) => func, - Err(e) => { - return Err(MapMemoryError::LoadApi { - api_name: "WHvMapGpaRange2", - source: e, - }); + match &mut self.surrogate_process { + None => { + let host_addr = (region.host_region.start.handle_base + + region.host_region.start.offset) + as *const c_void; + let res = unsafe { + WHvMapGpaRange( + self.partition, + host_addr, + region.guest_region.start as u64, + region.guest_region.len() as u64, + flags, + ) + }; + if let Err(e) = res { + return Err(MapMemoryError::Hypervisor(HypervisorError::WindowsError(e))); + } + } + Some(surrogate) => { + // Calculate the surrogate process address for this region + let surrogate_base = surrogate + .map( + region.host_region.start.from_handle, + region.host_region.start.handle_base, + region.host_region.start.handle_size, + ®ion.region_type.surrogate_mapping(), + ) + .map_err(|e| MapMemoryError::SurrogateProcess(e.to_string()))?; + let surrogate_addr = surrogate_base.wrapping_add(region.host_region.start.offset); + + // This function dynamically loads the WHvMapGpaRange2 function from the winhvplatform.dll + // WHvMapGpaRange2 only available on Windows 11 or Windows Server 2022 and later + // we do things this way to allow a user trying to load hyperlight on an older version of windows to + // get an error message saying that hyperlight requires a newer version of windows, rather than just failing + // with an error about a missing entrypoint + // This function should always succeed since before we get here we have already checked that the hypervisor is present and + // that we are on a supported version of windows. + let whvmapgparange2_func = unsafe { + match try_load_whv_map_gpa_range2() { + Ok(func) => func, + Err(e) => { + return Err(MapMemoryError::LoadApi { + api_name: "WHvMapGpaRange2", + source: e, + }); + } + } + }; + + let res = unsafe { + whvmapgparange2_func( + self.partition, + surrogate.process_handle.into(), + surrogate_addr, + region.guest_region.start as u64, + region.guest_region.len() as u64, + flags, + ) + }; + if res.is_err() { + return Err(MapMemoryError::Hypervisor(HypervisorError::WindowsError( + windows_result::Error::from_hresult(res), + ))); } } - }; - - let res = unsafe { - whvmapgparange2_func( - self.partition, - self.surrogate_process.process_handle.into(), - surrogate_addr, - region.guest_region.start as u64, - region.guest_region.len() as u64, - flags, - ) - }; - if res.is_err() { - return Err(MapMemoryError::Hypervisor(HypervisorError::WindowsError( - windows_result::Error::from_hresult(res), - ))); } // Track host-side file mappings for cleanup on unmap or drop. @@ -263,8 +340,9 @@ impl VirtualMachine for WhpVm { ) .map_err(|e| UnmapMemoryError::Hypervisor(HypervisorError::WindowsError(e)))?; } - self.surrogate_process - .unmap(region.host_region.start.handle_base); + if let Some(surrogate) = &mut self.surrogate_process { + surrogate.unmap(region.host_region.start.handle_base); + } // Clean up host-side file mapping resources for MappedFile regions. if region.region_type == MemoryRegionType::MappedFile { @@ -1164,6 +1242,51 @@ unsafe fn try_load_whv_map_gpa_range2() -> windows_result::Result