diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 8ae2d22..69b971e 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -287,9 +287,9 @@ jobs: - name: "Snapshot size" if: steps.kvm_check.outputs.available == 'true' run: | - snap=".pyhl/snapshot.hls" - apparent=$(($(stat -c '%s' "$snap") / 1024 / 1024)) - disk=$(($(stat -c '%b' "$snap") * 512 / 1024 / 1024)) + snap=".pyhl/snapshot" + apparent=$(($(find "$snap" -type f -exec stat -c '%s' {} + | paste -sd+ | bc) / 1024 / 1024)) + disk=$(($(find "$snap" -type f -exec stat -c '%b' {} + | paste -sd+ | bc) * 512 / 1024 / 1024)) echo "| Metric | Value |" echo "|--------|-------|" echo "| Apparent size | ${apparent} MiB |" @@ -483,27 +483,10 @@ jobs: - name: "Snapshot size" shell: pwsh run: | - $snap = ".pyhl\snapshot.hls" - $f = Get-Item $snap - $apparentMiB = [math]::Round($f.Length / 1MB) - # GetCompressedFileSize returns actual on-disk allocation for sparse files - $wide = $f.FullName - Add-Type -TypeDefinition @" - using System; - using System.Runtime.InteropServices; - public class SparseHelper { - [DllImport("kernel32.dll", SetLastError=true, CharSet=CharSet.Unicode)] - public static extern uint GetCompressedFileSizeW(string lpFileName, out uint lpFileSizeHigh); - } - "@ -ErrorAction SilentlyContinue - $high = [uint32]0 - $low = [SparseHelper]::GetCompressedFileSizeW($wide, [ref]$high) - if ($low -ne [uint32]::MaxValue) { - $diskBytes = ([uint64]$high -shl 32) -bor [uint64]$low - $diskMiB = [math]::Round($diskBytes / 1MB) - } else { - $diskMiB = $apparentMiB - } + $snap = ".pyhl\snapshot" + $files = Get-ChildItem -Path $snap -Recurse -File + $apparentMiB = [math]::Round(($files | Measure-Object -Property Length -Sum).Sum / 1MB) + $diskMiB = $apparentMiB Write-Host "| Metric | Value |" Write-Host "|--------|-------|" Write-Host "| Apparent size | ${apparentMiB} MiB |" diff --git a/host/Cargo.lock b/host/Cargo.lock index acfa292..c665eba 100644 --- a/host/Cargo.lock +++ b/host/Cargo.lock @@ -285,6 +285,27 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -349,6 +370,72 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "digest" version = "0.10.7" @@ -393,9 +480,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + [[package]] name = "filetime" version = "0.2.29" @@ -412,6 +505,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "25.12.19" @@ -432,6 +531,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -509,6 +614,17 @@ dependencies = [ "wasip3", ] +[[package]] +name = "getset" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cf442baaabe4213ce7d1239afc26c039180b6456da2cededa316ae2c8a77a77" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "git2" version = "0.21.0" @@ -566,6 +682,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "http" version = "1.4.2" @@ -585,11 +707,16 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "hyperlight-common" version = "0.15.0" -source = "git+https://github.com/danbugs/hyperlight?rev=5cf37d92#5cf37d92262c918e7d633577a6cf946fb1f069bd" +source = "git+https://github.com/danbugs/hyperlight?rev=70e8e722#70e8e722064be62821d342da71f9847b53299324" dependencies = [ "anyhow", + "bitflags 2.13.0", + "bytemuck", + "bytes", + "fixedbitset", "flatbuffers", "log", + "smallvec", "spin", "thiserror", "tracing", @@ -599,7 +726,7 @@ dependencies = [ [[package]] name = "hyperlight-host" version = "0.15.0" -source = "git+https://github.com/danbugs/hyperlight?rev=5cf37d92#5cf37d92262c918e7d633577a6cf946fb1f069bd" +source = "git+https://github.com/danbugs/hyperlight?rev=70e8e722#70e8e722064be62821d342da71f9847b53299324" dependencies = [ "anyhow", "bitflags 2.13.0", @@ -611,6 +738,7 @@ dependencies = [ "crossbeam-channel", "flatbuffers", "goblin", + "hex", "hyperlight-common", "kvm-bindings", "kvm-ioctls", @@ -620,15 +748,18 @@ dependencies = [ "metrics", "mshv-bindings", "mshv-ioctls", + "oci-spec", "page_size", "rand", "rust-embed", + "serde", "serde_json", + "sha2", + "tempfile", "termcolor", "thiserror", "tracing", "tracing-core", - "tracing-log", "uuid", "vmm-sys-util", "windows", @@ -647,7 +778,6 @@ dependencies = [ "flate2", "hyperlight-host", "libc", - "memmap2", "nix", "serde_json", "socket2", @@ -686,6 +816,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indexmap" version = "2.14.0" @@ -731,6 +867,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kvm-bindings" version = "0.14.1" @@ -742,9 +893,9 @@ dependencies = [ [[package]] name = "kvm-ioctls" -version = "0.24.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "333f77a20344a448f3f70664918135fddeb804e938f28a99d685bd92926e0b19" +checksum = "06ac372c120eb893b086d1a12027669cf2b478d1f71204021ffa7adf57948d63" dependencies = [ "bitflags 2.13.0", "kvm-bindings", @@ -830,15 +981,6 @@ version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", -] - [[package]] name = "metrics" version = "0.24.6" @@ -925,6 +1067,23 @@ dependencies = [ "syn", ] +[[package]] +name = "oci-spec" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3da52b83ce3258fbf29f66ac784b279453c2ac3c22c5805371b921ede0d308" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -1060,6 +1219,18 @@ dependencies = [ "thiserror", ] +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.14" @@ -1146,7 +1317,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -1238,6 +1409,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", + "serde_derive", ] [[package]] @@ -1311,6 +1483,12 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" +[[package]] +name = "smallvec" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90" + [[package]] name = "socket2" version = "0.5.10" @@ -1323,9 +1501,9 @@ dependencies = [ [[package]] name = "spin" -version = "0.10.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591" +checksum = "bd5231412d905519dca6a5deb0327d407be68d6c941feec004533401d3a0a715" dependencies = [ "lock_api", ] @@ -1336,6 +1514,24 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "subtle" version = "2.6.1" @@ -1364,6 +1560,19 @@ dependencies = [ "xattr", ] +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1426,17 +1635,6 @@ dependencies = [ "valuable", ] -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - [[package]] name = "typenum" version = "1.20.1" @@ -1685,7 +1883,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] diff --git a/host/Cargo.toml b/host/Cargo.toml index c312f67..242b69c 100644 --- a/host/Cargo.toml +++ b/host/Cargo.toml @@ -27,11 +27,10 @@ name = "pyhl" path = "src/bin/pyhl.rs" [dependencies] -# danbugs/hyperlight perf/whp-warm-start — snapshot file support + WHP warm-start optimizations. -hyperlight-host = { git = "https://github.com/danbugs/hyperlight", rev = "5cf37d92", features = ["executable_heap", "hw-interrupts", "whp-no-surrogate"] } +# danbugs/hyperlight hyperlight-unikraft — upstream main + whp-no-surrogate. +hyperlight-host = { git = "https://github.com/danbugs/hyperlight", rev = "70e8e722", features = ["executable_heap", "hw-interrupts", "whp-no-surrogate"] } clap = { version = "4", features = ["derive", "env"] } anyhow = "1" -memmap2 = "0.9" serde_json = "1" base64 = "0.22" socket2 = { version = "0.5", features = ["all"] } @@ -44,5 +43,5 @@ nix = { version = "0.29", features = ["fs"] } libc = "0.2" [target.'cfg(windows)'.dependencies] -windows-sys = { version = "0.61", features = ["Win32_System_IO", "Win32_System_Ioctl", "Win32_Storage_FileSystem", "Win32_Networking_WinSock"] } +windows-sys = { version = "0.61", features = ["Win32_Networking_WinSock"] } diff --git a/host/examples/pyhl_as_library.rs b/host/examples/pyhl_as_library.rs index 4e4f59f..0255a15 100644 --- a/host/examples/pyhl_as_library.rs +++ b/host/examples/pyhl_as_library.rs @@ -3,7 +3,7 @@ //! //! Usage: `cargo run --release --example pyhl_as_library -- ` //! -//! Assumes `pyhl setup` has already run and `.pyhl/snapshot.hls` exists +//! Assumes `pyhl setup` has already run and `.pyhl/snapshot/` exists //! in the current directory (or override with `PYHL_HOME`). use hyperlight_unikraft::{pyhl, Preopen}; diff --git a/host/examples/test_cpiovfs.rs b/host/examples/test_cpiovfs.rs index 3d93cdd..1a286b3 100644 --- a/host/examples/test_cpiovfs.rs +++ b/host/examples/test_cpiovfs.rs @@ -21,14 +21,9 @@ fn main() -> anyhow::Result<()> { sbox.snapshot_now()?; eprintln!(" snapshot OK"); - let snap_path = "/tmp/cpiovfs_snapshot.hls"; + let snap_path = "/tmp/cpiovfs_snapshot"; sbox.save_snapshot(snap_path)?; - let snap_size = std::fs::metadata(snap_path)?.len(); - eprintln!( - " snapshot size: {} MiB ({} bytes)", - snap_size / 1024 / 1024, - snap_size - ); + eprintln!(" snapshot saved to {snap_path}"); sbox.restore()?; eprintln!(" restore OK"); diff --git a/host/src/bin/pydriver_run.rs b/host/src/bin/pydriver_run.rs index 4956f50..448f83c 100644 --- a/host/src/bin/pydriver_run.rs +++ b/host/src/bin/pydriver_run.rs @@ -44,7 +44,7 @@ fn main() -> Result<()> { let t_evolve = Instant::now(); let mut sandbox = Sandbox::builder(&kernel) .initrd_file(&initrd) - .heap_size(5 * 512 * 1024 * 1024) + .heap_size(1280 * 1024 * 1024) .build()?; eprintln!( "[timing] evolve={:.1}ms", diff --git a/host/src/bin/pyhl.rs b/host/src/bin/pyhl.rs index da4f898..502172a 100644 --- a/host/src/bin/pyhl.rs +++ b/host/src/bin/pyhl.rs @@ -302,7 +302,7 @@ struct RunArgs { const CWD_HOME: &str = ".pyhl"; const KERNEL_FILE: &str = "kernel"; const INITRD_FILE: &str = "initrd.cpio"; -const SNAPSHOT_FILE: &str = "snapshot.hls"; +const SNAPSHOT_DIR: &str = "snapshot"; const VERSION_FILE: &str = "VERSION"; /// Resolve the image home to use. Tries (in order): explicit, PYHL_HOME, @@ -368,10 +368,10 @@ fn cmd_setup(args: SetupArgs) -> Result<()> { let dst_kernel = home.join(KERNEL_FILE); let dst_initrd = home.join(INITRD_FILE); - let dst_snapshot = home.join(SNAPSHOT_FILE); + let dst_snapshot = home.join(SNAPSHOT_DIR); let dst_version = home.join(VERSION_FILE); - if image_installed(&home) && dst_snapshot.is_file() && !args.force { + if image_installed(&home) && dst_snapshot.is_dir() && !args.force { eprintln!( "pyhl: image already installed at {} (use --force to overwrite)", home.display() @@ -451,7 +451,7 @@ fn cmd_setup(args: SetupArgs) -> Result<()> { { let mut builder = Sandbox::builder(&dst_kernel) .initrd_file(&dst_initrd) - .heap_size(5 * 512 * 1024 * 1024); + .heap_size(1280 * 1024 * 1024); for p in &setup_preopens { builder = builder.preopen(p.clone()); } @@ -494,12 +494,7 @@ fn cmd_setup(args: SetupArgs) -> Result<()> { dst_initrd.display(), mib(&dst_initrd) ); - eprintln!( - " snapshot: {} ({} MiB on disk, {} MiB apparent)", - dst_snapshot.display(), - disk_mib(&dst_snapshot), - mib(&dst_snapshot) - ); + eprintln!(" snapshot: {}", dst_snapshot.display()); Ok(()) } @@ -507,38 +502,6 @@ fn mib(p: &Path) -> u64 { fs::metadata(p).map(|m| m.len() / 1024 / 1024).unwrap_or(0) } -#[cfg(unix)] -fn disk_mib(p: &Path) -> u64 { - use std::os::unix::fs::MetadataExt; - fs::metadata(p) - .map(|m| m.blocks() * 512 / 1024 / 1024) - .unwrap_or_else(|_| mib(p)) -} - -#[cfg(windows)] -fn disk_mib(p: &Path) -> u64 { - use std::os::windows::ffi::OsStrExt; - let wide: Vec = p - .as_os_str() - .encode_wide() - .chain(std::iter::once(0)) - .collect(); - let mut high: u32 = 0; - let low = unsafe { - windows_sys::Win32::Storage::FileSystem::GetCompressedFileSizeW(wide.as_ptr(), &mut high) - }; - if low == u32::MAX { - return mib(p); - } - let bytes = ((high as u64) << 32) | (low as u64); - bytes / 1024 / 1024 -} - -#[cfg(not(any(unix, windows)))] -fn disk_mib(p: &Path) -> u64 { - mib(p) -} - /// Lightweight timestamp (seconds since epoch in ISO-8601-ish) so we don't /// need to pull chrono just for the VERSION stamp. fn now_iso8601() -> String { @@ -563,17 +526,13 @@ fn cmd_run(args: RunArgs) -> Result<()> { }; let home = resolve_home(args.dest.as_deref(), ResolveMode::ForRun)?; - let snapshot = home.join(SNAPSHOT_FILE); + let snapshot = home.join(SNAPSHOT_DIR); - // Fast path: `pyhl setup` already warmed up a sandbox, ran - // Py_Initialize + preloaded modules, captured the state, and - // persisted it to snapshot.hls. Here we mmap that file back and - // instantiate a sandbox directly — no kernel boot, no Python init. - if !snapshot.is_file() { + if !snapshot.is_dir() { return Err(anyhow!( "no warmed-up snapshot at {}.\n\ run `pyhl setup` first (or `pyhl setup --force` if you have\n\ - an older install without the snapshot file).", + an older install without the snapshot).", snapshot.display() )); } diff --git a/host/src/lib.rs b/host/src/lib.rs index bd1229b..7fd457b 100644 --- a/host/src/lib.rs +++ b/host/src/lib.rs @@ -60,7 +60,7 @@ pub mod stderr_capture; use anyhow::{anyhow, Result}; use hyperlight_host::func::Registerable; -use hyperlight_host::sandbox::snapshot::Snapshot; +use hyperlight_host::sandbox::snapshot::{OciTag, Snapshot}; use hyperlight_host::sandbox::uninitialized::GuestEnvironment; use hyperlight_host::sandbox::SandboxConfiguration; use hyperlight_host::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; @@ -2299,9 +2299,9 @@ pub struct Sandbox { inner: MultiUseSandbox, /// Post-init snapshot for fast restore between calls. snapshot: Option>, - /// When set, restore uses the preserving variant to keep the - /// read-only file mapping alive across restores. - file_mapping_path: Option, + /// Initrd path — re-mapped after every restore() since restore + /// overwrites the region with the snapshot's original memory. + initrd_path: Option, exit_code: Arc, /// Shared socket table — cleared on [`Sandbox::restore`] so that /// host-side fds don't leak across guest restore cycles. @@ -2578,9 +2578,12 @@ impl Sandbox { // from KVM_SET_USER_MEMORY_REGION on kernels where in-kernel // IRQCHIP reserves that range. const INITRD_MAP_BASE: u64 = 0xFEF0_0000; - if let Some(path) = initrd_path { - usbox.map_file_cow(path, INITRD_MAP_BASE, Some("initrd"))?; - } + let initrd_owned = if let Some(path) = initrd_path { + usbox.map_file_cow(path, INITRD_MAP_BASE)?; + Some(path.to_path_buf()) + } else { + None + }; let exit_code = Arc::new(AtomicI32::new(0)); let sleep_cancel = SleepCancel::new(); @@ -2593,18 +2596,12 @@ impl Sandbox { tools_ref.dispatch(&payload) })?; - Self::finish_evolve( - usbox, - initrd_path.map(|p| p.to_path_buf()), - exit_code, - sleep_cancel, - socket_table, - ) + Self::finish_evolve(usbox, initrd_owned, exit_code, sleep_cancel, socket_table) } fn finish_evolve( usbox: UninitializedSandbox, - file_mapping_path: Option, + initrd_path: Option, exit_code: Arc, sleep_cancel: SleepCancel, socket_table: Option>>, @@ -2614,7 +2611,7 @@ impl Sandbox { Ok(Self { inner, snapshot, - file_mapping_path, + initrd_path, exit_code, socket_table, sleep_cancel, @@ -2627,11 +2624,11 @@ impl Sandbox { /// guest memory to the state captured after init. pub fn restore(&mut self) -> Result<()> { if let Some(ref snap) = self.snapshot { - if self.file_mapping_path.is_some() { - self.inner.restore_preserving_file_mappings(snap.clone())?; - } else { - self.inner.restore(snap.clone())?; - } + self.inner.restore(snap.clone())?; + } + const INITRD_MAP_BASE: u64 = 0xFEF0_0000; + if let Some(ref path) = self.initrd_path { + self.inner.map_file_cow(path, INITRD_MAP_BASE)?; } if let Some(ref table) = self.socket_table { table.lock().unwrap().clear(); @@ -2711,17 +2708,17 @@ impl Sandbox { /// Persist the current snapshot to disk as a sparse file. /// - /// After writing the raw HLS snapshot, zero-filled 4 KiB pages are - /// punched out with `fallocate(PUNCH_HOLE)` so they consume no disk - /// space. The file is still mmap-loadable — holes read back as - /// zeros with no decompression overhead. + /// Persist the current snapshot to disk in OCI image-layout format. + /// + /// `path` is the directory for the OCI layout (created if absent). + /// The snapshot is tagged `latest`. pub fn save_snapshot>(&self, path: P) -> Result<()> { let snap = self .snapshot .as_ref() .ok_or_else(|| anyhow!("no snapshot present; build() or snapshot_now() first"))?; - snap.to_file(path.as_ref())?; - sparsify_snapshot(path.as_ref())?; + let tag = OciTag::new("latest").map_err(|e| anyhow!("{e}"))?; + snap.save(path.as_ref(), &tag).map_err(|e| anyhow!("{e}"))?; Ok(()) } @@ -2734,12 +2731,10 @@ impl Sandbox { /// warm-Python snapshot once, and every `pyhl run` instantiates /// straight from it — no kernel boot, no Py_Initialize. /// - /// Uses `Snapshot::from_file_unchecked`, which skips the SHA-256 - /// verification over the file. We trust snapshots written by our - /// own `save_snapshot()` earlier in the same process family (the - /// pyhl install dir), and the hash verify alone costs ~500ms on - /// a 2.5 GB snapshot — enough to double the whole `pyhl run` wall - /// time on simple scripts. + /// Uses `Snapshot::load`, which skips SHA-256 digest verification. + /// We trust snapshots written by our own `save_snapshot()` in the + /// same install dir; checked_load costs ~500ms on a 2.5 GB + /// snapshot — enough to double the whole `pyhl run` wall time. pub fn from_snapshot_file>(path: P) -> Result { Self::from_snapshot_file_full(path, &[], None, None, None) } @@ -2802,7 +2797,8 @@ impl Sandbox { network: Option<&NetworkPolicy>, listen_ports: Option<&ListenPorts>, ) -> Result { - let loaded = Snapshot::from_file_unchecked(path.as_ref())?; + let tag = OciTag::new("latest").map_err(|e| anyhow!("{e}"))?; + let loaded = Snapshot::load(path.as_ref(), tag).map_err(|e| anyhow!("{e}"))?; let arc = Arc::new(loaded); let exit_code = Arc::new(AtomicI32::new(0)); @@ -2822,13 +2818,13 @@ impl Sandbox { const INITRD_MAP_BASE: u64 = 0xFEF0_0000; if let Some(ref initrd_path) = initrd { - inner.map_file_cow(initrd_path, INITRD_MAP_BASE, Some("initrd"))?; + inner.map_file_cow(initrd_path, INITRD_MAP_BASE)?; } Ok(Self { inner, snapshot: Some(arc), - file_mapping_path: initrd, + initrd_path: initrd, exit_code, socket_table, sleep_cancel, @@ -2904,152 +2900,6 @@ pub fn run_vm_capture_output( }) } -// --------------------------------------------------------------------------- -// Snapshot sparsification -// --------------------------------------------------------------------------- - -/// Punch holes in zero-filled 4 KiB pages of a snapshot file. -/// -/// The HLS snapshot format is a 4 KiB header followed by a dense memory -/// blob where ~80 % of pages are all-zeros (unused heap). Punching them -/// with `fallocate(PUNCH_HOLE)` turns the file sparse — the zeros still -/// read back via mmap but consume no disk blocks. -#[cfg(target_os = "linux")] -fn sparsify_snapshot(path: &Path) -> Result<()> { - use std::os::unix::io::AsRawFd; - - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(path)?; - let len = file.metadata()?.len(); - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - - const PAGE: usize = 4096; - const HEADER: usize = PAGE; - let zero_page = [0u8; PAGE]; - - let mut punched = 0u64; - let mut offset = HEADER; - while offset + PAGE <= len as usize { - if mmap[offset..offset + PAGE] == zero_page { - let ret = unsafe { - libc::fallocate( - file.as_raw_fd(), - libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE, - offset as i64, - PAGE as i64, - ) - }; - if ret == 0 { - punched += 1; - } - } - offset += PAGE; - } - drop(mmap); - - if punched > 0 { - let disk_mib = (len - punched * PAGE as u64) / 1024 / 1024; - eprintln!(" sparsified: {disk_mib} MiB on disk (punched {punched} zero pages)",); - } - - Ok(()) -} - -/// Windows equivalent: mark the file sparse with FSCTL_SET_SPARSE, then -/// punch zero ranges with FSCTL_SET_ZERO_DATA. -#[cfg(target_os = "windows")] -fn sparsify_snapshot(path: &Path) -> Result<()> { - use std::os::windows::io::AsRawHandle; - use windows_sys::Win32::System::Ioctl::{FSCTL_SET_SPARSE, FSCTL_SET_ZERO_DATA}; - use windows_sys::Win32::System::IO::DeviceIoControl; - - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(path)?; - let len = file.metadata()?.len(); - let handle = file.as_raw_handle(); - - // Mark file as sparse. - let ok = unsafe { - DeviceIoControl( - handle, - FSCTL_SET_SPARSE, - std::ptr::null(), - 0, - std::ptr::null_mut(), - 0, - std::ptr::null_mut(), - std::ptr::null_mut(), - ) - }; - if ok == 0 { - return Ok(()); - } - - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - - const PAGE: usize = 4096; - const HEADER: usize = PAGE; - let zero_page = [0u8; PAGE]; - - // Coalesce contiguous zero pages into ranges for fewer syscalls. - let mut punched = 0u64; - let mut offset = HEADER; - while offset + PAGE <= len as usize { - if mmap[offset..offset + PAGE] != zero_page { - offset += PAGE; - continue; - } - let range_start = offset; - while offset + PAGE <= len as usize && mmap[offset..offset + PAGE] == zero_page { - offset += PAGE; - } - let range_end = offset; - - #[repr(C)] - struct FileZeroDataInformation { - file_offset: i64, - beyond_final_zero: i64, - } - - let info = FileZeroDataInformation { - file_offset: range_start as i64, - beyond_final_zero: range_end as i64, - }; - let ok = unsafe { - DeviceIoControl( - handle, - FSCTL_SET_ZERO_DATA, - &info as *const _ as *const _, - std::mem::size_of::() as u32, - std::ptr::null_mut(), - 0, - std::ptr::null_mut(), - std::ptr::null_mut(), - ) - }; - if ok != 0 { - punched += (range_end - range_start) as u64 / PAGE as u64; - } - } - drop(mmap); - - if punched > 0 { - let disk_mib = (len - punched * PAGE as u64) / 1024 / 1024; - eprintln!(" sparsified: {disk_mib} MiB on disk (punched {punched} zero pages)",); - } - - Ok(()) -} - -#[cfg(not(any(target_os = "linux", target_os = "windows")))] -fn sparsify_snapshot(_path: &Path) -> Result<()> { - Ok(()) -} - // --------------------------------------------------------------------------- // FsSandbox tests — prove that host-side path resolution rejects escapes. // diff --git a/host/src/pyhl.rs b/host/src/pyhl.rs index 2bfd15c..54db214 100644 --- a/host/src/pyhl.rs +++ b/host/src/pyhl.rs @@ -4,7 +4,7 @@ //! Two pieces: //! //! - [`install`] — one-time: take a source image (or a GHCR pull) and -//! materialize `kernel`, `initrd.cpio`, and a warmed-up `snapshot.hls` +//! materialize `kernel`, `initrd.cpio`, and a warmed-up `snapshot/` //! in the image home. //! //! - [`Runtime`] — the steady-state object: holds an open @@ -80,14 +80,14 @@ pub const KERNEL_FILE: &str = "kernel"; /// Standard file names inside an image home. pub const INITRD_FILE: &str = "initrd.cpio"; /// Standard file names inside an image home. -pub const SNAPSHOT_FILE: &str = "snapshot.hls"; +pub const SNAPSHOT_DIR: &str = "snapshot"; /// Standard file names inside an image home. pub const VERSION_FILE: &str = "VERSION"; /// Configuration for [`install`]. pub struct InstallOptions<'a> { /// Target directory. Files are written at - /// `{home}/{kernel,initrd.cpio,snapshot.hls,VERSION}`. + /// `{home}/{kernel,initrd.cpio,snapshot/,VERSION}`. pub home: &'a Path, /// Where to get the image from. @@ -146,10 +146,10 @@ pub fn install(opts: &InstallOptions<'_>) -> Result { let home = opts.home.to_path_buf(); let dst_kernel = home.join(KERNEL_FILE); let dst_initrd = home.join(INITRD_FILE); - let dst_snapshot = home.join(SNAPSHOT_FILE); + let dst_snapshot = home.join(SNAPSHOT_DIR); let dst_version = home.join(VERSION_FILE); - let already = dst_kernel.is_file() && dst_initrd.is_file() && dst_snapshot.is_file(); + let already = dst_kernel.is_file() && dst_initrd.is_file() && dst_snapshot.is_dir(); if already && !opts.force { return Ok(InstallReport { home, @@ -197,7 +197,7 @@ pub fn install(opts: &InstallOptions<'_>) -> Result { { let mut builder = Sandbox::builder(&dst_kernel) .initrd_file(&dst_initrd) - .heap_size(5 * 512 * 1024 * 1024); + .heap_size(1280 * 1024 * 1024); for p in opts.mounts { builder = builder.preopen(p.clone()); } @@ -255,7 +255,7 @@ pub struct Runtime { impl Runtime { /// Open a runtime against an existing install. Looks for - /// `{home}/snapshot.hls` and mmap-loads it. `mounts` specify host + /// `{home}/snapshot/` and mmap-loads it. `mounts` specify host /// directories to expose under the guest paths that were baked in /// at `install` time. `network` enables guest networking with the /// given policy (`None` = disabled). `listen_ports` controls which @@ -267,8 +267,8 @@ impl Runtime { listen_ports: Option<&crate::ListenPorts>, ) -> Result { default_surrogate_count(); - let snap = home.join(SNAPSHOT_FILE); - if !snap.is_file() { + let snap = home.join(SNAPSHOT_DIR); + if !snap.is_dir() { bail!( "no snapshot at {} — run pyhl::install first", snap.display() diff --git a/host/tests/pyhl_runtime.rs b/host/tests/pyhl_runtime.rs index 15e9fb6..719facb 100644 --- a/host/tests/pyhl_runtime.rs +++ b/host/tests/pyhl_runtime.rs @@ -5,7 +5,7 @@ //! (filesystem mounts, network policies, exit codes, hermetic rewinds). //! //! All tests self-skip if the pyhl image is not installed (no snapshot at -//! `.pyhl/snapshot.hls`) or if no hypervisor is available. Run `pyhl setup` +//! `.pyhl/snapshot/`) or if no hypervisor is available. Run `pyhl setup` //! to populate the image before running these tests. use hyperlight_unikraft::pyhl::Runtime; @@ -37,12 +37,12 @@ fn pyhl_home() -> Option { .parent() .unwrap() .join(".pyhl"); - if workspace.join("snapshot.hls").is_file() { + if workspace.join("snapshot").is_dir() { return Some(workspace); } // Check user-level install if let Some(home) = dirs_or_default() { - if home.join("snapshot.hls").is_file() { + if home.join("snapshot").is_dir() { return Some(home); } }