diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml
index 6619d2a6b..0676be051 100644
--- a/.github/workflows/release-vm-dev.yml
+++ b/.github/workflows/release-vm-dev.yml
@@ -421,7 +421,7 @@ jobs:
# ---------------------------------------------------------------------------
build-driver-vm-linux:
name: Build Driver VM (Linux ${{ matrix.arch }})
- needs: [compute-versions, download-kernel-runtime, build-rootfs]
+ needs: [compute-versions, download-kernel-runtime]
strategy:
matrix:
include:
@@ -477,12 +477,6 @@ jobs:
name: kernel-runtime-tarballs
path: runtime-download/
- - name: Download rootfs tarball
- uses: actions/download-artifact@v4
- with:
- name: rootfs-${{ matrix.arch }}
- path: rootfs-download/
-
- name: Stage compressed runtime for embedding
run: |
set -euo pipefail
@@ -504,12 +498,15 @@ jobs:
zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
done
- # Copy rootfs tarball (already zstd-compressed)
- cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst"
-
echo "Staged compressed artifacts:"
ls -lah "$COMPRESSED_DIR"
+ - name: Build bundled supervisor
+ run: |
+ set -euo pipefail
+ OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" \
+ tasks/scripts/vm/build-supervisor-bundle.sh --arch "${{ matrix.guest_arch }}"
+
- name: Scope workspace to driver-vm crates
run: |
set -euo pipefail
@@ -551,7 +548,7 @@ jobs:
# ---------------------------------------------------------------------------
build-driver-vm-macos:
name: Build Driver VM (macOS)
- needs: [compute-versions, download-kernel-runtime, build-rootfs]
+ needs: [compute-versions, download-kernel-runtime]
runs-on: build-amd64
timeout-minutes: 60
container:
@@ -591,12 +588,6 @@ jobs:
name: kernel-runtime-tarballs
path: runtime-download/
- - name: Download rootfs tarball (arm64)
- uses: actions/download-artifact@v4
- with:
- name: rootfs-arm64
- path: rootfs-download/
-
- name: Prepare compressed runtime directory
run: |
set -euo pipefail
@@ -619,12 +610,24 @@ jobs:
zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file"
done
- # The macOS VM guest is always Linux ARM64, so use the arm64 rootfs
- cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst"
-
echo "Staged macOS compressed artifacts:"
ls -lah "$COMPRESSED_DIR"
+ - name: Build bundled supervisor
+ run: |
+ set -euo pipefail
+ docker buildx build \
+ --file deploy/docker/Dockerfile.images \
+ --platform linux/arm64 \
+ --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \
+ --build-arg OPENSHELL_IMAGE_TAG=dev \
+ --target supervisor-output \
+ --output type=local,dest=supervisor-out/ \
+ .
+
+ zstd -19 -T0 -f supervisor-out/openshell-sandbox \
+ -o "${PWD}/target/vm-runtime-compressed-macos/openshell-sandbox.zst"
+
- name: Build macOS binary via Docker (osxcross)
run: |
set -euo pipefail
@@ -776,7 +779,7 @@ jobs:
### VM Compute Driver Binaries
- `openshell-driver-vm` binaries with embedded kernel runtime and sandbox rootfs.
+ `openshell-driver-vm` binaries with embedded kernel runtime and bundled sandbox supervisor.
Launched by the gateway when `--drivers=vm` is configured. Rebuilt on every
push to main alongside the openshell-vm binaries.
diff --git a/Cargo.lock b/Cargo.lock
index bfaa55d93..4e74e89a6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -621,6 +621,12 @@ dependencies = [
"shlex",
]
+[[package]]
+name = "cesu8"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
+
[[package]]
name = "cexpr"
version = "0.6.0"
@@ -761,6 +767,16 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
+[[package]]
+name = "combine"
+version = "4.6.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
+dependencies = [
+ "bytes",
+ "memchr",
+]
+
[[package]]
name = "compact_str"
version = "0.7.1"
@@ -808,6 +824,27 @@ version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
+[[package]]
+name = "const_format"
+version = "0.2.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e"
+dependencies = [
+ "const_format_proc_macros",
+ "konst",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
[[package]]
name = "constant_time_eq"
version = "0.4.2"
@@ -1175,6 +1212,37 @@ dependencies = [
"syn 1.0.109",
]
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.117",
+]
+
[[package]]
name = "dialoguer"
version = "0.11.0"
@@ -1648,6 +1716,18 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "getset"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912"
+dependencies = [
+ "proc-macro-error2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
[[package]]
name = "ghash"
version = "0.5.1"
@@ -1861,6 +1941,15 @@ dependencies = [
"itoa",
]
+[[package]]
+name = "http-auth"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "150fa4a9462ef926824cf4519c84ed652ca8f4fbae34cb8af045b5cbcaf98822"
+dependencies = [
+ "memchr",
+]
+
[[package]]
name = "http-body"
version = "1.0.1"
@@ -2324,6 +2413,50 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+[[package]]
+name = "jni"
+version = "0.21.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97"
+dependencies = [
+ "cesu8",
+ "cfg-if",
+ "combine",
+ "jni-sys 0.3.1",
+ "log",
+ "thiserror 1.0.69",
+ "walkdir",
+ "windows-sys 0.45.0",
+]
+
+[[package]]
+name = "jni-sys"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258"
+dependencies = [
+ "jni-sys 0.4.1",
+]
+
+[[package]]
+name = "jni-sys"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2"
+dependencies = [
+ "jni-sys-macros",
+]
+
+[[package]]
+name = "jni-sys-macros"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264"
+dependencies = [
+ "quote",
+ "syn 2.0.117",
+]
+
[[package]]
name = "jobserver"
version = "0.1.34"
@@ -2387,6 +2520,20 @@ dependencies = [
"simple_asn1",
]
+[[package]]
+name = "jsonwebtoken"
+version = "10.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1"
+dependencies = [
+ "base64 0.22.1",
+ "getrandom 0.2.17",
+ "js-sys",
+ "serde",
+ "serde_json",
+ "signature 2.2.0",
+]
+
[[package]]
name = "k8s-openapi"
version = "0.21.1"
@@ -2400,6 +2547,21 @@ dependencies = [
"serde_json",
]
+[[package]]
+name = "konst"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb"
+dependencies = [
+ "konst_macro_rules",
+]
+
+[[package]]
+name = "konst_macro_rules"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37"
+
[[package]]
name = "kube"
version = "0.90.0"
@@ -3052,7 +3214,7 @@ dependencies = [
"getrandom 0.2.17",
"http",
"rand 0.8.6",
- "reqwest",
+ "reqwest 0.12.28",
"serde",
"serde_json",
"serde_path_to_error",
@@ -3070,6 +3232,60 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "oci-client"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b7f8deaffcd3b0e3baf93dddcab3d18b91d46dc37d38a8b170089b234de5bb3"
+dependencies = [
+ "bytes",
+ "chrono",
+ "futures-util",
+ "http",
+ "http-auth",
+ "jsonwebtoken 10.3.0",
+ "lazy_static",
+ "oci-spec",
+ "olpc-cjson",
+ "regex",
+ "reqwest 0.13.2",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "unicase",
+]
+
+[[package]]
+name = "oci-spec"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8445a2631507cec628a15fdd6154b54a3ab3f20ed4fe9d73a3b8b7a4e1ba03a"
+dependencies = [
+ "const_format",
+ "derive_builder",
+ "getset",
+ "regex",
+ "serde",
+ "serde_json",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "olpc-cjson"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "696183c9b5fe81a7715d074fd632e8bd46f4ccc0231a3ed7fc580a80de5f7083"
+dependencies = [
+ "serde",
+ "serde_json",
+ "unicode-normalization",
+]
+
[[package]]
name = "once_cell"
version = "1.21.4"
@@ -3137,7 +3353,7 @@ dependencies = [
"owo-colors",
"prost-types",
"rcgen",
- "reqwest",
+ "reqwest 0.12.28",
"rustls",
"rustls-pemfile",
"serde",
@@ -3240,18 +3456,22 @@ dependencies = [
name = "openshell-driver-vm"
version = "0.0.0"
dependencies = [
+ "bollard",
"clap",
+ "flate2",
"futures",
"libc",
"libloading",
"miette",
"nix",
+ "oci-client",
"openshell-core",
"openshell-vfio",
"polling",
"prost-types",
"serde",
"serde_json",
+ "sha2 0.10.9",
"tar",
"tokio",
"tokio-stream",
@@ -3311,7 +3531,7 @@ version = "0.0.0"
dependencies = [
"bytes",
"openshell-core",
- "reqwest",
+ "reqwest 0.12.28",
"serde",
"serde_json",
"serde_yml",
@@ -3387,7 +3607,7 @@ dependencies = [
"hyper-rustls",
"hyper-util",
"ipnet",
- "jsonwebtoken",
+ "jsonwebtoken 9.3.1",
"metrics",
"metrics-exporter-prometheus",
"miette",
@@ -3404,7 +3624,7 @@ dependencies = [
"prost-types",
"rand 0.9.4",
"rcgen",
- "reqwest",
+ "reqwest 0.12.28",
"russh",
"rustls",
"rustls-pemfile",
@@ -4062,6 +4282,7 @@ version = "0.11.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
dependencies = [
+ "aws-lc-rs",
"bytes",
"getrandom 0.3.4",
"lru-slab",
@@ -4200,7 +4421,7 @@ dependencies = [
"lru",
"paste",
"stability",
- "strum",
+ "strum 0.26.3",
"unicode-segmentation",
"unicode-truncate",
"unicode-width 0.1.14",
@@ -4335,6 +4556,47 @@ dependencies = [
"webpki-roots 1.0.7",
]
+[[package]]
+name = "reqwest"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-pki-types",
+ "rustls-platform-verifier",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-rustls",
+ "tokio-util",
+ "tower 0.5.3",
+ "tower-http 0.6.8",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-streams",
+ "web-sys",
+]
+
[[package]]
name = "rfc6979"
version = "0.4.0"
@@ -4538,6 +4800,7 @@ version = "0.23.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
dependencies = [
+ "aws-lc-rs",
"log",
"once_cell",
"ring",
@@ -4578,12 +4841,40 @@ dependencies = [
"zeroize",
]
+[[package]]
+name = "rustls-platform-verifier"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784"
+dependencies = [
+ "core-foundation",
+ "core-foundation-sys",
+ "jni",
+ "log",
+ "once_cell",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-platform-verifier-android",
+ "rustls-webpki",
+ "security-framework",
+ "security-framework-sys",
+ "webpki-root-certs",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustls-platform-verifier-android"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
+
[[package]]
name = "rustls-webpki"
version = "0.103.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06"
dependencies = [
+ "aws-lc-rs",
"ring",
"rustls-pki-types",
"untrusted 0.9.0",
@@ -4610,6 +4901,15 @@ dependencies = [
"cipher",
]
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
[[package]]
name = "schannel"
version = "0.1.29"
@@ -5333,9 +5633,15 @@ version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
dependencies = [
- "strum_macros",
+ "strum_macros 0.26.4",
]
+[[package]]
+name = "strum"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+
[[package]]
name = "strum_macros"
version = "0.26.4"
@@ -5349,6 +5655,18 @@ dependencies = [
"syn 2.0.117",
]
+[[package]]
+name = "strum_macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
[[package]]
name = "subtle"
version = "2.6.1"
@@ -6001,6 +6319,12 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
[[package]]
name = "unicode-bidi"
version = "0.3.18"
@@ -6157,6 +6481,16 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
[[package]]
name = "want"
version = "0.3.1"
@@ -6273,6 +6607,19 @@ dependencies = [
"wasmparser",
]
+[[package]]
+name = "wasm-streams"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
[[package]]
name = "wasmparser"
version = "0.244.0"
@@ -6305,6 +6652,15 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "webpki-root-certs"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c"
+dependencies = [
+ "rustls-pki-types",
+]
+
[[package]]
name = "webpki-roots"
version = "0.26.11"
@@ -6349,6 +6705,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
@@ -6456,6 +6821,15 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-sys"
+version = "0.45.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
+dependencies = [
+ "windows-targets 0.42.2",
+]
+
[[package]]
name = "windows-sys"
version = "0.48.0"
@@ -6501,6 +6875,21 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows-targets"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
+dependencies = [
+ "windows_aarch64_gnullvm 0.42.2",
+ "windows_aarch64_msvc 0.42.2",
+ "windows_i686_gnu 0.42.2",
+ "windows_i686_msvc 0.42.2",
+ "windows_x86_64_gnu 0.42.2",
+ "windows_x86_64_gnullvm 0.42.2",
+ "windows_x86_64_msvc 0.42.2",
+]
+
[[package]]
name = "windows-targets"
version = "0.48.5"
@@ -6558,6 +6947,12 @@ dependencies = [
"windows-link",
]
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
+
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
@@ -6576,6 +6971,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
+
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
@@ -6594,6 +6995,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+[[package]]
+name = "windows_i686_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
+
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
@@ -6624,6 +7031,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+[[package]]
+name = "windows_i686_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
+
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
@@ -6642,6 +7055,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
+
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
@@ -6660,6 +7079,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
+
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
@@ -6678,6 +7103,12 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.42.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
+
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
@@ -6887,7 +7318,7 @@ dependencies = [
"bindgen",
"cmake",
"pkg-config",
- "reqwest",
+ "reqwest 0.12.28",
"serde_json",
"zip",
]
diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md
index 045ee2e9a..ad677d3ae 100644
--- a/architecture/custom-vm-runtime.md
+++ b/architecture/custom-vm-runtime.md
@@ -20,8 +20,9 @@ kernel.
The driver is spawned by `openshell-gateway` as a subprocess, talks to it over a
Unix domain socket (`compute-driver.sock`) with the
`openshell.compute.v1.ComputeDriver` gRPC surface, and manages per-sandbox
-microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox rootfs are
-embedded directly in the driver binary — no sibling files required at runtime.
+microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox
+supervisor are embedded directly in the driver binary; each sandbox guest
+rootfs is derived from a container image at create time.
## Architecture
@@ -30,7 +31,7 @@ graph TD
subgraph Host["Host (macOS / Linux)"]
GATEWAY["openshell-gateway
(compute::vm::spawn)"]
DRIVER["openshell-driver-vm
(compute-driver.sock)"]
- EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ sandbox rootfs.tar.zst"]
+ EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ openshell-sandbox.zst"]
GVP["gvproxy (per sandbox)
virtio-net · DHCP · DNS"]
GATEWAY <-->|gRPC over UDS| DRIVER
@@ -58,8 +59,8 @@ never binds a host-side TCP listener.
## Embedded Runtime
-`openshell-driver-vm` embeds the VM runtime libraries and the sandbox rootfs as
-zstd-compressed byte arrays, extracting on demand:
+`openshell-driver-vm` embeds the VM runtime libraries and the sandbox
+supervisor as zstd-compressed byte arrays, extracting on demand:
```text
~/.local/share/openshell/vm-runtime// # libkrun / libkrunfw / gvproxy
@@ -74,14 +75,20 @@ Old runtime cache versions are cleaned up when a new version is extracted.
### Sandbox rootfs preparation
-The rootfs tarball the driver embeds starts from the same minimal Ubuntu base
-used across the project, and is **rewritten into a supervisor-only sandbox
-guest** during extraction:
+Each VM sandbox starts from either a registry image fetched directly over OCI or
+a local Docker image reference produced by Dockerfile-based `--from` sources.
+For local Dockerfile sources, the CLI builds the image on the local Docker
+daemon and passes the ordinary image tag through `template.image`. The VM driver
+first checks the local Docker daemon for that tag; when present, it exports the
+image filesystem and **rewrites that filesystem into a supervisor-only sandbox
+guest** before caching it:
-- k3s state and Kubernetes manifests are stripped out
- `/srv/openshell-vm-sandbox-init.sh` is installed as the guest entrypoint
-- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy,
- no CNI plugins
+- the bundled `openshell-sandbox` binary is copied into
+ `/opt/openshell/bin/openshell-sandbox`
+- k3s state and Kubernetes manifests are stripped out if the image contains them
+- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy, no
+ CNI plugins
See `crates/openshell-driver-vm/src/rootfs.rs` for the rewrite logic and
`crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh` for the init
@@ -95,6 +102,48 @@ spawns one launcher per sandbox as a subprocess, which in turn starts `gvproxy`
and calls `krun_start_enter` to boot the guest. Keeping the launcher in the
same binary means the driver ships a single artifact for both roles.
+When a sandbox sets `template.image` through `openshell sandbox create --from ...`,
+the VM driver treats that image as the base guest rootfs source for that
+sandbox. When `template.image` is omitted, the gateway fills it from the VM
+driver's advertised `default_image`, which matches the gateway's configured
+sandbox image. The driver:
+
+- resolves the image on the gateway host without Docker for registry and
+ community image refs
+- for local Dockerfile sources, the CLI builds through the host Docker socket
+ and passes the resulting ordinary Docker tag through `template.image`
+- unpacks the image filesystem, injects the VM sandbox init/supervisor files,
+ and validates required guest tools such as `bash`, `mount`, `ip`, and `sed`
+- caches the prepared guest rootfs under
+ `/images//rootfs.tar`
+- extracts a private runtime copy under
+ `/sandboxes//rootfs`
+
+The cache key uses an immutable image identity: repo digest for registry images
+and the local Docker image ID for images resolved from the local daemon.
+Different VM sandboxes can use different base images concurrently because the
+shared cache is per image, not global for the driver. Cached prepared rootfs
+entries remain on disk until the operator removes them from the VM driver state
+directory.
+
+Docker is therefore no longer required for VM sandboxes created from registry or
+community image refs. It is only required on the local CLI/gateway host when the
+source is a local Dockerfile or build context.
+
+Local Dockerfile sources are treated as trusted local-development inputs for VM
+gateways. Remote VM gateways still reject local Dockerfile sources until a
+gateway-side artifact validation and transfer boundary is designed.
+
+There is no embedded guest rootfs fallback anymore. VM sandboxes therefore
+require either `template.image` or a configured default sandbox image. This is
+still replace-the-rootfs semantics, so VM images must remain base-compatible
+with the sandbox guest init path. Distroless or `scratch` images are not
+expected to work.
+
+The separate `openshell-vm` binary still uses `vm:rootfs` to build a standalone
+embedded guest filesystem, but `openshell-driver-vm` no longer consumes that
+artifact.
+
## Network Plane
The driver launches a **dedicated `gvproxy` instance per sandbox** to provide the
@@ -178,8 +227,8 @@ graph LR
The `vm-runtime-.tar.zst` artifact is consumed by
`openshell-driver-vm`'s `build.rs`, which embeds the library set into the
binary via `include_bytes!()`. Setting `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR`
-at build time (wired up by `crates/openshell-driver-vm/start.sh`) points the
-build at the staged artifacts.
+at build time (wired up by `tasks/scripts/gateway-vm.sh`, registered as
+`mise run gateway:vm`) points the build at the staged artifacts.
## Kernel Config Fragment
@@ -262,8 +311,8 @@ host platform.
### Driver Binary (`release-vm-dev.yml`)
Builds the self-contained `openshell-driver-vm` binary for every platform,
-with the kernel runtime + sandbox rootfs embedded. Runs on every push to
-`main` that touches VM-related crates.
+with the kernel runtime + bundled sandbox supervisor embedded. Runs on every
+push to `main` that touches VM-related crates.
The `download-kernel-runtime` job pulls the current `vm-runtime-.tar.zst`
from the `vm-dev` release; the `build-openshell-driver-vm` jobs set
@@ -273,14 +322,15 @@ cross-compiled via osxcross (no macOS runner needed for the binary build —
only for the kernel build).
macOS driver binaries produced via osxcross are not codesigned. Development
-builds are signed automatically by `crates/openshell-driver-vm/start.sh`; a
-packaged release needs signing in CI.
+builds are signed automatically by `tasks/scripts/gateway-vm.sh`
+(registered as `mise run gateway:vm`); a packaged release needs signing in
+CI.
## Rollout Strategy
1. Custom runtime is embedded by default when building `openshell-driver-vm`
with `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` set (wired up by
- `crates/openshell-driver-vm/start.sh`).
+ `tasks/scripts/gateway-vm.sh`).
2. The sandbox init script validates kernel capabilities at boot and fails
fast if missing.
3. For development, override with `OPENSHELL_VM_RUNTIME_DIR` to use a local
diff --git a/architecture/gateway.md b/architecture/gateway.md
index 8e2724bc6..62381637e 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -136,8 +136,8 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d
| `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods |
| `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) |
| `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes`, `docker`, and `vm`. |
-| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state |
| `--driver-dir` | `OPENSHELL_DRIVER_DIR` | unset | Override directory for `openshell-driver-vm`. When unset, the gateway searches `~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling binary. |
+| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, runtime state, and shared image-rootfs cache |
| `--vm-krun-log-level` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun log level for VM helper processes |
| `--vm-driver-vcpus` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | Default vCPU count for VM sandboxes |
| `--vm-driver-mem-mib` | `OPENSHELL_VM_DRIVER_MEM_MIB` | `2048` | Default memory allocation for VM sandboxes in MiB |
@@ -619,7 +619,7 @@ The Docker driver (`crates/openshell-driver-docker/src/lib.rs`) is an in-process
`VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket.
-- **Create**: The VM driver process allocates a sandbox-specific rootfs from its own embedded `rootfs.tar.zst`, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor.
+- **Create**: The VM driver process exports the selected sandbox image from the local Docker daemon, rewrites it into a sandbox-specific guest rootfs, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor.
- **Networking**: The helper starts an embedded `gvproxy`, wires it into libkrun as virtio-net, and gives the guest outbound connectivity. No inbound TCP listener is needed — the supervisor reaches the gateway over its outbound `ConnectSupervisor` stream.
- **Gateway callback**: The guest init script configures `eth0` for gvproxy networking, seeds `/etc/hosts` so `host.openshell.internal` resolves to the gvproxy gateway IP (`192.168.127.1`), preserves gvproxy's legacy `host.containers.internal` / `host.docker.internal` DNS answers, prefers the configured `OPENSHELL_GRPC_ENDPOINT`, and falls back to those aliases or the raw gateway IP when local hostname resolution is unavailable on macOS.
- **Guest boot**: The sandbox guest runs a minimal init script that starts `openshell-sandbox` directly as PID 1 inside the VM.
diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md
index 5d482ffe0..3dad52f0e 100644
--- a/architecture/sandbox-custom-containers.md
+++ b/architecture/sandbox-custom-containers.md
@@ -9,7 +9,7 @@ The `--from` flag accepts four kinds of input:
| Input | Example | Behavior |
|-------|---------|----------|
| **Community sandbox name** | `--from openclaw` | Resolves to `ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest` |
-| **Dockerfile path** | `--from ./Dockerfile` | Builds the image, pushes it into the cluster, then creates the sandbox |
+| **Dockerfile path** | `--from ./Dockerfile` | Builds the image locally, makes it available to the local gateway, then creates the sandbox |
| **Directory with Dockerfile** | `--from ./my-sandbox/` | Uses the directory as the build context |
| **Full image reference** | `--from myregistry.com/img:tag` | Uses the image directly |
@@ -19,8 +19,9 @@ The CLI classifies the value in this order:
1. **Existing file** whose name contains "Dockerfile" (case-insensitive) — treated as a Dockerfile to build.
2. **Existing directory** containing a `Dockerfile` — treated as a build context directory.
-3. **Contains `/`, `:`, or `.`** — treated as a full container image reference.
-4. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`.
+3. **Missing explicit local path** (for example `./Dockerfile`, `../ctx`, or an absolute path) — rejected locally instead of sent to the gateway as an image pull.
+4. **Contains `/`, `:`, or `.`** — treated as a full container image reference.
+5. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`.
The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sandboxes` and can be overridden with the `OPENSHELL_COMMUNITY_REGISTRY` environment variable.
@@ -33,8 +34,14 @@ The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sa
When `--from` points to a Dockerfile or directory, the CLI:
1. Builds the image locally via the Docker daemon (respecting `.dockerignore`).
-2. Pushes it into the cluster's containerd runtime using `docker save` / `ctr import`.
-3. Creates the sandbox with the resulting image tag.
+2. For a local Kubernetes gateway, pushes it into the cluster's containerd runtime using `docker save` / `ctr import`.
+3. For standalone local Docker and VM gateways, passes the ordinary image tag through. The Docker driver runs that tag directly; the VM driver resolves it from the local Docker daemon, exports the image filesystem, and prepares the VM rootfs in its own cache.
+4. Creates the sandbox with the resulting image tag.
+
+Local Dockerfile sources for VM gateways are trusted local-development inputs.
+Remote gateways continue to reject local Dockerfile sources because the gateway
+API does not yet validate or transfer local build artifacts across that
+boundary.
## How It Works
diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs
index fb9b4a63d..a313d4394 100644
--- a/crates/openshell-bootstrap/src/build.rs
+++ b/crates/openshell-bootstrap/src/build.rs
@@ -1,11 +1,13 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
-//! Build and push container images into a k3s gateway.
+//! Build container images for gateway runtimes.
//!
//! This module wraps bollard's `build_image()` API to build a container image
-//! from a Dockerfile and build context, then reuses the existing push pipeline
-//! to import the image into the gateway's containerd runtime.
+//! from a Dockerfile and build context. Kubernetes deployments reuse the
+//! existing push pipeline to import the image into the gateway's containerd
+//! runtime. VM deployments keep the built image in the local Docker daemon and
+//! pass an internal local-image reference to the VM driver.
use std::collections::HashMap;
use std::path::Path;
@@ -18,37 +20,39 @@ use miette::{IntoDiagnostic, Result, WrapErr};
use crate::constants::container_name;
use crate::push::push_local_images;
-/// Build a container image from a Dockerfile and push it into the gateway.
+/// Build a container image from a Dockerfile using the local Docker daemon.
///
-/// This is used by `openshell sandbox create --from `. It:
-/// 1. Creates a tar archive of the build context directory.
-/// 2. Sends it to the local Docker daemon via `build_image()`.
-/// 3. Pushes the resulting image into the gateway's containerd via the
-/// existing `push_local_images()` pipeline.
+/// This is used by `openshell sandbox create --from ` for both the
+/// Kubernetes and VM backends. The image remains available in the local Docker
+/// daemon so the caller can either hand the resulting tag directly to the VM
+/// backend or import it into a local gateway containerd runtime.
#[allow(clippy::implicit_hasher)]
-pub async fn build_and_push_image(
+pub async fn build_local_image(
dockerfile_path: &Path,
tag: &str,
context_dir: &Path,
- gateway_name: &str,
build_args: &HashMap,
on_log: &mut impl FnMut(String),
) -> Result<()> {
- // 1. Build the image locally.
on_log(format!(
"Building image {tag} from {}",
dockerfile_path.display()
));
build_image(dockerfile_path, tag, context_dir, build_args, on_log).await?;
on_log(format!("Built image {tag}"));
+ Ok(())
+}
- // 2. Push into the gateway.
+/// Push a locally-built image into the gateway's containerd runtime.
+#[allow(clippy::implicit_hasher)]
+pub async fn push_image_into_gateway(
+ tag: &str,
+ gateway_name: &str,
+ on_log: &mut impl FnMut(String),
+) -> Result<()> {
on_log(format!(
"Pushing image {tag} into gateway \"{gateway_name}\""
));
- // Use the long-timeout Docker client so `docker save` of multi-GB images
- // doesn't trip the 120s bollard default mid-stream. Override with
- // OPENSHELL_DOCKER_TIMEOUT_SECS=.
let local_docker = crate::docker::connect_local_for_large_transfers()
.into_diagnostic()
.wrap_err("failed to connect to local Docker daemon")?;
@@ -60,6 +64,28 @@ pub async fn build_and_push_image(
Ok(())
}
+/// Build a container image from a Dockerfile and push it into the gateway.
+///
+/// This is used by `openshell sandbox create --from ` when the
+/// active gateway is the local Kubernetes deployment. It:
+/// 1. Creates a tar archive of the build context directory.
+/// 2. Sends it to the local Docker daemon via `build_image()`.
+/// 3. Pushes the resulting image into the gateway's containerd via the
+/// existing `push_local_images()` pipeline.
+#[allow(clippy::implicit_hasher)]
+pub async fn build_and_push_image(
+ dockerfile_path: &Path,
+ tag: &str,
+ context_dir: &Path,
+ gateway_name: &str,
+ build_args: &HashMap,
+ on_log: &mut impl FnMut(String),
+) -> Result<()> {
+ build_local_image(dockerfile_path, tag, context_dir, build_args, on_log).await?;
+ push_image_into_gateway(tag, gateway_name, on_log).await?;
+ Ok(())
+}
+
/// Build a container image using the local Docker daemon.
///
/// Creates a tar archive of `context_dir`, sends it to Docker with the
diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs
index beadcbeac..663476167 100644
--- a/crates/openshell-bootstrap/src/metadata.rs
+++ b/crates/openshell-bootstrap/src/metadata.rs
@@ -65,6 +65,10 @@ pub struct GatewayMetadata {
/// When set, tokens will include these scopes for fine-grained access control.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub oidc_scopes: Option,
+
+ /// Local VM driver state directory for standalone VM gateways.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub vm_driver_state_dir: Option,
}
impl GatewayMetadata {
diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs
index eaadf7908..2ad634cf2 100644
--- a/crates/openshell-cli/src/run.rs
+++ b/crates/openshell-cli/src/run.rs
@@ -2758,6 +2758,7 @@ pub async fn sandbox_create(
}
/// Resolved source for the `--from` flag on `sandbox create`.
+#[derive(Debug)]
enum ResolvedSource {
/// A ready-to-use container image reference.
Image(String),
@@ -2774,19 +2775,15 @@ enum ResolvedSource {
/// Resolution order:
/// 1. Existing file whose name contains "Dockerfile" → build from file.
/// 2. Existing directory that contains a `Dockerfile` → build from directory.
-/// 3. Value contains `/`, `:`, or `.` → treat as a full image reference.
-/// 4. Otherwise → community sandbox name, expanded via the registry prefix.
+/// 3. Missing explicit local paths → local error, not image pull.
+/// 4. Value contains `/`, `:`, or `.` → treat as a full image reference.
+/// 5. Otherwise → community sandbox name, expanded via the registry prefix.
fn resolve_from(value: &str) -> Result {
let path = Path::new(value);
// 1. Existing file that looks like a Dockerfile.
if path.is_file() {
- let name = path
- .file_name()
- .map(|n| n.to_string_lossy())
- .unwrap_or_default();
- let lower = name.to_lowercase();
- if lower.contains("dockerfile") || lower.ends_with(".dockerfile") {
+ if filename_looks_like_dockerfile(path) {
let dockerfile = path
.canonicalize()
.into_diagnostic()
@@ -2800,6 +2797,13 @@ fn resolve_from(value: &str) -> Result {
context,
});
}
+
+ if value_looks_like_local_source(value) {
+ return Err(miette::miette!(
+ "local --from file is not a Dockerfile: {}",
+ path.display()
+ ));
+ }
}
// 2. Existing directory containing a Dockerfile.
@@ -2822,13 +2826,57 @@ fn resolve_from(value: &str) -> Result {
));
}
- // 3. Full image reference or community sandbox name — delegate to shared
+ if path.exists() {
+ return Err(miette::miette!(
+ "local --from path is not a regular file or directory: {}",
+ path.display()
+ ));
+ }
+
+ // 3. Missing explicit local paths should fail locally. Otherwise values
+ // like `./Dockerfile` reach the gateway as image references and fail as
+ // Docker pull errors.
+ if value_looks_like_local_source(value) {
+ return Err(miette::miette!(
+ "local --from path does not exist: {}\n\
+ Use an existing Dockerfile, a directory containing Dockerfile, or a container image reference.",
+ path.display()
+ ));
+ }
+
+ // 4. Full image reference or community sandbox name — delegate to shared
// resolution in openshell-core.
Ok(ResolvedSource::Image(
openshell_core::image::resolve_community_image(value),
))
}
+fn filename_looks_like_dockerfile(path: &Path) -> bool {
+ let name = path
+ .file_name()
+ .map(|n| n.to_string_lossy())
+ .unwrap_or_default();
+ let lower = name.to_lowercase();
+ lower.contains("dockerfile") || lower.ends_with(".dockerfile")
+}
+
+fn value_looks_like_local_source(value: &str) -> bool {
+ value_is_explicit_local_path(value) || value_looks_like_bare_dockerfile_name(value)
+}
+
+fn value_is_explicit_local_path(value: &str) -> bool {
+ let path = Path::new(value);
+ path.is_absolute()
+ || matches!(value, "." | "..")
+ || value.starts_with("./")
+ || value.starts_with("../")
+ || value.starts_with("~/")
+}
+
+fn value_looks_like_bare_dockerfile_name(value: &str) -> bool {
+ !value.contains('/') && !value.contains(':') && filename_looks_like_dockerfile(Path::new(value))
+}
+
fn source_requests_gpu(source: &str) -> bool {
resolve_from(source).is_ok_and(|resolved| match resolved {
ResolvedSource::Image(image) => image_requests_gpu(&image),
@@ -2849,15 +2897,29 @@ fn image_requests_gpu(image: &str) -> bool {
image_name.contains("gpu")
}
-/// Build a Dockerfile and push the resulting image into the gateway.
+fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) -> bool {
+ !metadata.is_some_and(|metadata| metadata.is_remote)
+}
+
+/// Build a Dockerfile and make the resulting image available to the gateway.
///
-/// Returns the image tag that was built so the caller can use it for sandbox
-/// creation.
+/// For local Kubernetes gateways running in Docker, this imports the built image
+/// into the gateway runtime and returns the Docker tag. Standalone local
+/// gateways use the same Docker daemon that the CLI built into, so the tag is
+/// passed through directly and the active compute driver resolves it.
async fn build_from_dockerfile(
dockerfile: &Path,
context: &Path,
gateway_name: &str,
) -> Result {
+ let metadata = get_gateway_metadata(gateway_name);
+ if !dockerfile_sources_supported_for_gateway(metadata.as_ref()) {
+ return Err(miette!(
+ "local Dockerfile sources are only supported for local gateways; gateway '{}' is remote",
+ gateway_name
+ ));
+ }
+
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
@@ -2877,21 +2939,39 @@ async fn build_from_dockerfile(
eprintln!(" {msg}");
};
- openshell_bootstrap::build::build_and_push_image(
+ openshell_bootstrap::build::build_local_image(
dockerfile,
&tag,
context,
- gateway_name,
&HashMap::new(),
&mut on_log,
)
.await?;
+ let existing_gateway = openshell_bootstrap::check_existing_deployment(gateway_name, None)
+ .await
+ .wrap_err("failed to inspect local gateway deployment state")?;
+ let pushed_into_gateway = existing_gateway
+ .is_some_and(|gateway| gateway.container_exists && gateway.container_running);
+ if pushed_into_gateway {
+ openshell_bootstrap::build::push_image_into_gateway(&tag, gateway_name, &mut on_log)
+ .await?;
+ eprintln!();
+ eprintln!(
+ "{} Image {} is available in the gateway.",
+ "✓".green().bold(),
+ tag.cyan(),
+ );
+ eprintln!();
+ return Ok(tag);
+ }
+
eprintln!();
eprintln!(
- "{} Image {} is available in the gateway.",
+ "{} Image {} is available in the local Docker daemon for gateway '{}'.",
"✓".green().bold(),
tag.cyan(),
+ gateway_name,
);
eprintln!();
@@ -5712,13 +5792,14 @@ fn format_timestamp_ms(ms: i64) -> String {
#[cfg(test)]
mod tests {
use super::{
- GatewayControlTarget, TlsOptions, format_gateway_select_header,
- format_gateway_select_items, gateway_add, gateway_auth_label, gateway_select_with,
- gateway_type_label, git_sync_files, http_health_check, image_requests_gpu,
- inferred_provider_type, parse_cli_setting_value, parse_credential_pairs,
- plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message,
- resolve_gateway_control_target_from, sandbox_should_persist, shell_escape,
- source_requests_gpu, validate_gateway_name, validate_ssh_host,
+ GatewayControlTarget, TlsOptions, dockerfile_sources_supported_for_gateway,
+ format_gateway_select_header, format_gateway_select_items, gateway_add, gateway_auth_label,
+ gateway_select_with, gateway_type_label, git_sync_files, http_health_check,
+ image_requests_gpu, inferred_provider_type, parse_cli_setting_value,
+ parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message,
+ ready_false_condition_message, resolve_from, resolve_gateway_control_target_from,
+ sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name,
+ validate_ssh_host,
};
use crate::TEST_ENV_LOCK;
use hyper::StatusCode;
@@ -5964,6 +6045,103 @@ mod tests {
assert!(!source_requests_gpu("base"));
}
+ #[test]
+ fn resolve_from_classifies_existing_dockerfile_path() {
+ let temp = tempfile::tempdir().expect("failed to create tempdir");
+ let dockerfile = temp.path().join("Dockerfile");
+ fs::write(&dockerfile, "FROM scratch\n").expect("failed to write Dockerfile");
+
+ match resolve_from(dockerfile.to_str().expect("temp path is not UTF-8"))
+ .expect("expected Dockerfile source")
+ {
+ super::ResolvedSource::Dockerfile {
+ dockerfile: resolved,
+ context,
+ } => {
+ assert_eq!(
+ resolved,
+ dockerfile
+ .canonicalize()
+ .expect("failed to canonicalize Dockerfile")
+ );
+ assert_eq!(
+ context,
+ temp.path()
+ .canonicalize()
+ .expect("failed to canonicalize context")
+ );
+ }
+ super::ResolvedSource::Image(image) => {
+ panic!("expected Dockerfile source, got image {image}");
+ }
+ }
+ }
+
+ #[test]
+ fn resolve_from_rejects_missing_explicit_dockerfile_path() {
+ let temp = tempfile::tempdir().expect("failed to create tempdir");
+ let missing = temp.path().join("Dockerfile");
+
+ let err = resolve_from(missing.to_str().expect("temp path is not UTF-8"))
+ .expect_err("expected missing Dockerfile path to be rejected");
+
+ assert!(
+ err.to_string().contains("local --from path does not exist"),
+ "unexpected error: {err}"
+ );
+ }
+
+ #[test]
+ fn resolve_from_keeps_dockerfile_named_image_refs_as_images() {
+ let image_ref = "ghcr.io/acme/dockerfile-runner:latest";
+
+ match resolve_from(image_ref).expect("expected image source") {
+ super::ResolvedSource::Image(image) => assert_eq!(image, image_ref),
+ super::ResolvedSource::Dockerfile { .. } => {
+ panic!("expected image ref, got Dockerfile source");
+ }
+ }
+ }
+
+ #[test]
+ fn dockerfile_sources_are_rejected_for_remote_gateways() {
+ let metadata = GatewayMetadata {
+ name: "remote".to_string(),
+ gateway_endpoint: "https://gateway.example.com".to_string(),
+ is_remote: true,
+ gateway_port: 443,
+ remote_host: Some("user@gateway.example.com".to_string()),
+ resolved_host: Some("gateway.example.com".to_string()),
+ auth_mode: None,
+ edge_team_domain: None,
+ edge_auth_url: None,
+ vm_driver_state_dir: None,
+ ..Default::default()
+ };
+
+ assert!(!dockerfile_sources_supported_for_gateway(Some(&metadata)));
+ }
+
+ #[test]
+ fn dockerfile_sources_are_allowed_for_local_gateways() {
+ let metadata = GatewayMetadata {
+ name: "local".to_string(),
+ gateway_endpoint: "http://127.0.0.1:8080".to_string(),
+ is_remote: false,
+ gateway_port: 8080,
+ remote_host: None,
+ resolved_host: None,
+ auth_mode: None,
+ edge_team_domain: None,
+ edge_auth_url: None,
+ vm_driver_state_dir: None,
+ ..Default::default()
+ };
+
+ assert!(dockerfile_sources_supported_for_gateway(Some(&metadata)));
+ assert!(dockerfile_sources_supported_for_gateway(None));
+ }
+
#[test]
fn ready_false_condition_message_prefers_reason_and_message() {
let status = SandboxStatus {
@@ -6302,6 +6480,7 @@ mod tests {
#[tokio::test]
async fn http_health_check_supports_plain_http_endpoints() {
+ let _ = rustls::crypto::ring::default_provider().install_default();
let listener = TcpListener::bind("127.0.0.1:0").expect("bind listener");
let addr = listener.local_addr().expect("listener addr");
let server = thread::spawn(move || {
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index e69d06f4f..687ee87b2 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -735,6 +735,10 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
let _env = test_env(&fake_ssh_dir, &xdg_dir);
let tls = test_tls(&server);
install_fake_ssh(&fake_ssh_dir);
+ let forward_port = {
+ let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+ listener.local_addr().unwrap().port()
+ };
run::sandbox_create(
&server.endpoint,
@@ -750,7 +754,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
None,
&[],
None,
- Some(openshell_core::forward::ForwardSpec::new(8080)),
+ Some(openshell_core::forward::ForwardSpec::new(forward_port)),
&["echo".to_string(), "OK".to_string()],
Some(false),
Some(false),
diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml
index 04f4e9fc5..c13d904a6 100644
--- a/crates/openshell-driver-vm/Cargo.toml
+++ b/crates/openshell-driver-vm/Cargo.toml
@@ -22,6 +22,7 @@ path = "src/main.rs"
openshell-core = { path = "../openshell-core" }
openshell-vfio = { path = "../openshell-vfio" }
+bollard = { version = "0.20", features = ["ssh"] }
tokio = { workspace = true }
tonic = { workspace = true, features = ["transport"] }
prost-types = { workspace = true }
@@ -35,9 +36,12 @@ miette = { workspace = true }
url = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
+oci-client = "0.16"
libc = "0.2"
libloading = "0.8"
tar = "0.4"
+flate2 = "1"
+sha2 = "0.10"
zstd = "0.13"
# smol-rs/polling drives the BSD/macOS parent-death detection in
diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md
index a36f3ea44..dbb90bb67 100644
--- a/crates/openshell-driver-vm/README.md
+++ b/crates/openshell-driver-vm/README.md
@@ -2,7 +2,7 @@
> Status: Experimental. The VM compute driver is under active development and the interface still has VM-specific plumbing that will be generalized.
-Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and sandbox rootfs are embedded directly in the binary — no sibling files required at runtime.
+Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox supervisor are embedded directly in the binary; each sandbox guest rootfs is derived from a configured container image at create time.
## How it fits together
@@ -10,7 +10,7 @@ Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) fo
flowchart LR
subgraph host["Host process"]
gateway["openshell-server
(compute::vm::spawn)"]
- driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── rootfs.tar.zst"]
+ driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── openshell-sandbox.zst"]
gateway <-->|"gRPC over UDS
compute-driver.sock"| driver
end
@@ -35,9 +35,15 @@ Sandbox guests execute `/opt/openshell/bin/openshell-sandbox` as PID 1 inside th
mise run gateway:vm
```
-First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:rootfs -- --base` builds the embedded rootfs. Subsequent runs are cached. To keep the Unix socket path under macOS `SUN_LEN`, `mise run gateway:vm` and `start.sh` default the state dir to `/tmp/openshell-vm-driver-dev-$USER-port-$PORT/` (SQLite DB + per-sandbox rootfs + `compute-driver.sock`) unless `OPENSHELL_VM_DRIVER_STATE_DIR` is set.
-The wrapper auto-registers the gateway with the CLI (`gateway destroy` + `gateway add`) so no manual registration step is needed. When running under `sudo`, it uses `sudo -u $SUDO_USER` for the registration so the config is written under the invoking user's home directory. Re-runs are idempotent.
-It also exports `OPENSHELL_DRIVER_DIR=$PWD/target/debug` before starting the gateway so local dev runs use the freshly built `openshell-driver-vm` instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`.
+First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:supervisor` builds the bundled guest supervisor. Subsequent runs are cached.
+
+By default `mise run gateway:vm`:
+
+- Listens on plaintext HTTP at `127.0.0.1:18081`.
+- Registers the CLI gateway `vm-dev` by writing `~/.config/openshell/gateways/vm-dev/metadata.json`. It does not modify the workspace `.env`.
+- Persists the gateway SQLite DB under `.cache/gateway-vm/gateway.db`.
+- Places the VM driver state (per-sandbox rootfs + `compute-driver.sock`) under `/tmp/openshell-vm-driver-$USER-vm-dev/` so the AF_UNIX socket path stays under macOS `SUN_LEN`.
+- Passes `--driver-dir $PWD/target/debug` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`.
For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges:
@@ -47,50 +53,43 @@ sudo -E env "PATH=$PATH" mise run gateway:vm -- --gpu
See [`architecture/vm-gpu-sandbox-guide.md`](../../architecture/vm-gpu-sandbox-guide.md) for full GPU prerequisites and usage.
-Override via environment:
+Point the CLI at the gateway with one of:
```shell
-OPENSHELL_SERVER_PORT=9090 \
-crates/openshell-driver-vm/start.sh
+openshell --gateway vm-dev status
+openshell gateway select vm-dev # then plain `openshell `
```
-Run multiple dev gateways side by side by giving each one a unique port. The wrapper derives a distinct default state dir from that port automatically:
+Override defaults via environment:
```shell
-OPENSHELL_SERVER_PORT=8080 mise run gateway:vm
-OPENSHELL_SERVER_PORT=8081 mise run gateway:vm
-```
+# custom port (fails fast if in use)
+OPENSHELL_SERVER_PORT=18091 mise run gateway:vm
-If you want a custom suffix instead of `port-$PORT`, set `OPENSHELL_VM_INSTANCE`:
-
-```shell
-OPENSHELL_SERVER_PORT=8082 \
-OPENSHELL_VM_INSTANCE=feature-a \
-mise run gateway:vm
-```
-
-If you want a custom CLI gateway name, set `OPENSHELL_VM_GATEWAY_NAME`:
-
-```shell
-OPENSHELL_SERVER_PORT=8082 \
+# custom CLI gateway name + namespace
OPENSHELL_VM_GATEWAY_NAME=vm-feature-a \
+OPENSHELL_SANDBOX_NAMESPACE=vm-feature-a \
mise run gateway:vm
+
+# custom sandbox image
+OPENSHELL_SANDBOX_IMAGE=ghcr.io/example/sandbox:latest mise run gateway:vm
```
Teardown:
```shell
-rm -rf /tmp/openshell-vm-driver-dev-$USER-port-8080
+rm -rf /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm
+rm -rf "${XDG_CONFIG_HOME:-$HOME/.config}/openshell/gateways/vm-dev"
```
## Manual equivalent
-If you want to drive the launch yourself instead of using `start.sh`:
+If you want to drive the launch yourself instead of using `mise run gateway:vm` (i.e. `tasks/scripts/gateway-vm.sh`):
```shell
-# 1. Stage runtime artifacts + base rootfs into target/vm-runtime-compressed/
+# 1. Stage runtime artifacts + supervisor bundle into target/vm-runtime-compressed/
mise run vm:setup
-mise run vm:rootfs -- --base # if rootfs.tar.zst is not already present
+mise run vm:supervisor # if openshell-sandbox.zst is not already present
# 2. Build both binaries with the staged artifacts embedded
OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=$PWD/target/vm-runtime-compressed \
@@ -102,16 +101,17 @@ codesign \
--force -s - target/debug/openshell-driver-vm
# 4. Start the gateway with the VM driver
-mkdir -p /tmp/openshell-vm-driver-dev-$USER-port-8080
+mkdir -p /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm
target/debug/openshell-gateway \
--drivers vm \
--disable-tls \
- --database-url sqlite:/tmp/openshell-vm-driver-dev-$USER-port-8080/openshell.db \
+ --db-url "sqlite:.cache/gateway-vm/gateway.db?mode=rwc" \
--driver-dir $PWD/target/debug \
- --grpc-endpoint http://host.containers.internal:8080 \
- --ssh-gateway-host 127.0.0.1 \
- --ssh-gateway-port 8080 \
- --vm-driver-state-dir /tmp/openshell-vm-driver-dev-$USER-port-8080
+ --sandbox-namespace vm-dev \
+ --sandbox-image \
+ --grpc-endpoint http://host.containers.internal:18081 \
+ --port 18081 \
+ --vm-driver-state-dir /tmp/openshell-vm-driver-$USER-vm-dev
```
The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conventional install locations (`~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`), then a sibling of the gateway binary.
@@ -121,7 +121,7 @@ The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conven
| Flag | Env var | Default | Purpose |
|---|---|---|---|
| `--drivers vm` | `OPENSHELL_DRIVERS` | `kubernetes` | Select the VM compute driver. |
-| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest calls back to. Use a host alias that resolves to the gateway's host from inside the VM (`host.containers.internal` comes from gvproxy DNS; the guest init script also seeds `host.openshell.internal` to `192.168.127.1`). |
+| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest dials to reach the gateway. Use `http://host.containers.internal:` (or `host.docker.internal` / `host.openshell.internal`) so traffic flows through gvproxy's host-loopback NAT (HostIP `192.168.127.254` → host `127.0.0.1`). Loopback URLs like `http://127.0.0.1:` are rewritten automatically by the driver. The bare gateway IP (`192.168.127.1`) only carries gvproxy's own services and will not reach host-bound ports. |
| `--vm-driver-state-dir DIR` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Per-sandbox rootfs, console logs, and the `compute-driver.sock` UDS. |
| `--driver-dir DIR` | `OPENSHELL_DRIVER_DIR` | unset | Override the directory searched for `openshell-driver-vm`. |
| `--vm-driver-vcpus N` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | vCPUs per sandbox. |
@@ -135,14 +135,15 @@ See [`openshell-gateway --help`](../openshell-server/src/cli.rs) for the full fl
## Verifying the gateway
-The gateway is auto-registered by `start.sh`. In another terminal:
+The gateway is auto-registered by `mise run gateway:vm`. In another terminal:
```shell
-scripts/bin/openshell sandbox create --name demo
-scripts/bin/openshell sandbox connect demo
+./scripts/bin/openshell status
+./scripts/bin/openshell sandbox create --name demo --from
+./scripts/bin/openshell sandbox connect demo
```
-First sandbox takes 10–30 seconds to boot (rootfs extraction + libkrun + guest init). Subsequent creates reuse the prepared sandbox rootfs.
+First sandbox takes 10–30 seconds to boot (image fetch/prepare/cache + libkrun + guest init). If `--from` is omitted, the VM driver uses the gateway's configured default sandbox image. Without either `--from` or `--sandbox-image`, VM sandbox creation fails. Subsequent creates reuse the prepared sandbox rootfs.
## Logs and debugging
@@ -150,7 +151,7 @@ Raise log verbosity for both processes:
```shell
RUST_LOG=openshell_server=debug,openshell_driver_vm=debug \
- crates/openshell-driver-vm/start.sh
+ mise run gateway:vm
```
The VM guest's serial console is appended to `//console.log`. The `compute-driver.sock` lives at `/compute-driver.sock`; the gateway removes it on clean shutdown via `ManagedDriverProcess::drop`.
@@ -161,9 +162,11 @@ The VM guest's serial console is appended to `//console.l
- Rust toolchain
- Guest-supervisor cross-compile toolchain (needed on macOS, and on Linux when host arch ≠ guest arch):
- Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest)
- - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `build-rootfs.sh` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary.
+ - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary.
- [mise](https://mise.jdx.dev/) task runner
-- Docker (needed by `mise run vm:rootfs` to build the base rootfs)
+- Docker-compatible socket on the local CLI/gateway host when using
+ `openshell sandbox create --from ./Dockerfile` or `--from ./dir`; the CLI
+ builds the image and the VM driver exports it via the local Docker daemon
- `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts)
## Relationship to `openshell-vm`
@@ -173,4 +176,4 @@ The VM guest's serial console is appended to `//console.l
## TODOs
- The gateway still configures the driver via CLI args; this will move to a gRPC bootstrap call so the driver interface is uniform across backends. See the `TODO(driver-abstraction)` notes in `crates/openshell-server/src/lib.rs` and `crates/openshell-server/src/compute/vm.rs`.
-- macOS codesigning is handled by `start.sh`; a packaged release would need signing in CI.
+- macOS codesigning is handled by `tasks/scripts/gateway-vm.sh`; a packaged release would need signing in CI.
diff --git a/crates/openshell-driver-vm/build.rs b/crates/openshell-driver-vm/build.rs
index e10a1dde0..ea4c4d2e0 100644
--- a/crates/openshell-driver-vm/build.rs
+++ b/crates/openshell-driver-vm/build.rs
@@ -3,9 +3,9 @@
//! Build script for openshell-driver-vm.
//!
-//! This crate embeds the sandbox rootfs plus the minimal libkrun runtime
-//! artifacts it needs to boot base VMs without depending on the openshell-vm
-//! binary or crate.
+//! This crate embeds the sandbox supervisor plus the minimal libkrun runtime
+//! artifacts it needs to boot VMs without depending on the openshell-vm binary
+//! or crate.
use std::path::{Path, PathBuf};
use std::{env, fs};
@@ -21,8 +21,7 @@ fn main() {
"libkrun.dylib.zst",
"libkrunfw.5.dylib.zst",
"gvproxy.zst",
- "rootfs.tar.zst",
- "rootfs-gpu.tar.zst",
+ "openshell-sandbox.zst",
] {
println!("cargo:rerun-if-changed={dir}/{name}");
}
@@ -37,15 +36,7 @@ fn main() {
"linux" => ("libkrun.so", "libkrunfw.so.5"),
_ => {
println!("cargo:warning=VM runtime not available for {target_os}-{target_arch}");
- generate_stub_resources(
- &out_dir,
- &[
- "libkrun",
- "libkrunfw",
- "rootfs.tar.zst",
- "rootfs-gpu.tar.zst",
- ],
- );
+ generate_stub_resources(&out_dir, &["libkrun", "libkrunfw", "openshell-sandbox.zst"]);
return;
}
};
@@ -54,15 +45,14 @@ fn main() {
PathBuf::from(dir)
} else {
println!("cargo:warning=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR not set");
- println!("cargo:warning=Run: mise run vm:setup");
+ println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor");
generate_stub_resources(
&out_dir,
&[
&format!("{libkrun_name}.zst"),
&format!("{libkrunfw_name}.zst"),
"gvproxy.zst",
- "rootfs.tar.zst",
- "rootfs-gpu.tar.zst",
+ "openshell-sandbox.zst",
],
);
return;
@@ -73,15 +63,14 @@ fn main() {
"cargo:warning=Compressed runtime dir not found: {}",
compressed_dir.display()
);
- println!("cargo:warning=Run: mise run vm:setup");
+ println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor");
generate_stub_resources(
&out_dir,
&[
&format!("{libkrun_name}.zst"),
&format!("{libkrunfw_name}.zst"),
"gvproxy.zst",
- "rootfs.tar.zst",
- "rootfs-gpu.tar.zst",
+ "openshell-sandbox.zst",
],
);
return;
@@ -94,10 +83,9 @@ fn main() {
format!("{libkrunfw_name}.zst"),
),
("gvproxy.zst".to_string(), "gvproxy.zst".to_string()),
- ("rootfs.tar.zst".to_string(), "rootfs.tar.zst".to_string()),
(
- "rootfs-gpu.tar.zst".to_string(),
- "rootfs-gpu.tar.zst".to_string(),
+ "openshell-sandbox.zst".to_string(),
+ "openshell-sandbox.zst".to_string(),
),
];
@@ -131,15 +119,16 @@ fn main() {
}
if !all_found {
- println!("cargo:warning=Some artifacts missing. Run: mise run vm:setup");
+ println!(
+ "cargo:warning=Some artifacts missing. Run: mise run vm:setup && mise run vm:supervisor"
+ );
generate_stub_resources(
&out_dir,
&[
&format!("{libkrun_name}.zst"),
&format!("{libkrunfw_name}.zst"),
"gvproxy.zst",
- "rootfs.tar.zst",
- "rootfs-gpu.tar.zst",
+ "openshell-sandbox.zst",
],
);
}
diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
index 1c009a7f1..063a75032 100644
--- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
+++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh
@@ -15,7 +15,20 @@ if [ -f /srv/openshell-env.sh ]; then
fi
BOOT_START=$(date +%s%3N 2>/dev/null || date +%s)
+# gvisor-tap-vsock subnet layout:
+# 192.168.127.1 — gateway: gvproxy's DNS / DHCP / HTTP API. Does NOT
+# proxy arbitrary host ports.
+# 192.168.127.254 — host-loopback: NAT-rewritten to host's 127.0.0.1 by
+# gvproxy's TCP/UDP/ICMP forwarder. Use this address
+# (or any of the host.* hostnames below) to reach a
+# service the host is listening on.
+# The host.containers.internal / host.docker.internal DNS records served
+# by gvproxy's embedded resolver point at 192.168.127.254. We mirror that
+# in /etc/hosts so the supervisor can reach the gateway even when
+# gvproxy's DNS is not in resolv.conf (e.g. DHCP failed and we fell
+# back to 8.8.8.8).
GVPROXY_GATEWAY_IP="192.168.127.1"
+GVPROXY_HOST_LOOPBACK_IP="192.168.127.254"
GATEWAY_IP="$GVPROXY_GATEWAY_IP"
# Parse kernel cmdline for GPU and TAP networking parameters
@@ -96,15 +109,39 @@ tcp_probe() {
}
ensure_host_gateway_aliases() {
+ # Seed /etc/hosts with the well-known gvproxy hostnames so the supervisor
+ # can reach the OpenShell server even when gvproxy's built-in DNS is not
+ # in resolv.conf (e.g. when DHCP fails and we fall back to 8.8.8.8).
+ #
+ # Critical distinction: host.* aliases point at the gvproxy *host-loopback*
+ # IP (192.168.127.254), not the gateway IP (192.168.127.1). Only the
+ # host-loopback IP carries NAT rewriting to the host's 127.0.0.1 — the
+ # gateway IP only listens on gvproxy's own service ports (DNS:53, DHCP,
+ # HTTP API:80). Pinning host.containers.internal to the gateway IP
+ # silently breaks guest→host port reachability for arbitrary ports.
local hosts_tmp="/tmp/openshell-hosts.$$"
+ local host_aliases="host.openshell.internal host.containers.internal host.docker.internal"
+ local gateway_aliases="gateway.containers.internal"
+ local filter='(^|[[:space:]])(host\.openshell\.internal|host\.containers\.internal|host\.docker\.internal|gateway\.containers\.internal)([[:space:]]|$)'
if [ -f /etc/hosts ]; then
- grep -vE '(^|[[:space:]])host\.openshell\.internal([[:space:]]|$)' /etc/hosts > "$hosts_tmp" || true
+ grep -vE "$filter" /etc/hosts > "$hosts_tmp" || true
else
: > "$hosts_tmp"
fi
- printf '%s host.openshell.internal\n' "$GATEWAY_IP" >> "$hosts_tmp"
+ # In TAP/GPU mode, GATEWAY_IP is overridden to VM_NET_GW (the host-side
+ # of the TAP), and the gateway is reachable directly there. In gvproxy
+ # mode, host.openshell.internal etc. need GVPROXY_HOST_LOOPBACK_IP
+ # (192.168.127.254) which is gvproxy's host-NAT entry, while
+ # gateway.containers.internal points at the gvproxy gateway itself.
+ if [ "${GATEWAY_IP}" = "${GVPROXY_GATEWAY_IP}" ]; then
+ printf '%s %s\n' "$GVPROXY_HOST_LOOPBACK_IP" "$host_aliases" >> "$hosts_tmp"
+ printf '%s %s\n' "$GVPROXY_GATEWAY_IP" "$gateway_aliases" >> "$hosts_tmp"
+ else
+ # TAP networking: gateway and host are both reachable at GATEWAY_IP.
+ printf '%s %s %s\n' "$GATEWAY_IP" "$host_aliases" "$gateway_aliases" >> "$hosts_tmp"
+ fi
cat "$hosts_tmp" > /etc/hosts
rm -f "$hosts_tmp"
}
@@ -129,7 +166,15 @@ rewrite_openshell_endpoint_if_needed() {
return 0
fi
- for candidate in host.openshell.internal host.containers.internal host.docker.internal "$GATEWAY_IP"; do
+ # Probe candidates in preference order. Hostnames first for informative
+ # log output, then a bare IP as a final safety net. In gvproxy mode the
+ # bare IP is the host-loopback (192.168.127.254). In TAP/GPU mode it's
+ # the TAP host gateway.
+ local fallback_ip="$GVPROXY_HOST_LOOPBACK_IP"
+ if [ "${GATEWAY_IP}" != "${GVPROXY_GATEWAY_IP}" ]; then
+ fallback_ip="$GATEWAY_IP"
+ fi
+ for candidate in host.openshell.internal host.containers.internal host.docker.internal "$fallback_ip"; do
if [ "$candidate" = "$host" ]; then
continue
fi
diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs
index 704f91610..d79e5d922 100644
--- a/crates/openshell-driver-vm/src/driver.rs
+++ b/crates/openshell-driver-vm/src/driver.rs
@@ -5,12 +5,22 @@ use crate::gpu::{
GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name,
};
use crate::rootfs::{
- extract_gpu_sandbox_rootfs_to, extract_sandbox_rootfs_to, sandbox_guest_init_path,
+ create_rootfs_archive_from_dir, extract_rootfs_archive_to,
+ prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path,
};
-use futures::Stream;
+use bollard::Docker;
+use bollard::errors::Error as BollardError;
+use bollard::models::ContainerCreateBody;
+use bollard::query_parameters::{CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder};
+use flate2::read::GzDecoder;
+use futures::{Stream, StreamExt};
use nix::errno::Errno;
use nix::sys::signal::{Signal, kill};
use nix::unistd::Pid;
+use oci_client::client::{Client as OciClient, ClientConfig};
+use oci_client::manifest::{ImageIndexEntry, OciDescriptor};
+use oci_client::secrets::RegistryAuth;
+use oci_client::{Reference, RegistryOperation};
use openshell_core::proto::compute::v1::{
CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse,
DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent,
@@ -22,31 +32,68 @@ use openshell_core::proto::compute::v1::{
compute_driver_server::ComputeDriver, watch_sandboxes_event,
};
use openshell_vfio::SysfsRoot;
+use sha2::{Digest, Sha256};
use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::io::Read;
use std::net::Ipv4Addr;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::process::Stdio;
use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
+use tokio::io::AsyncWriteExt;
use tokio::process::{Child, Command};
use tokio::sync::{Mutex, broadcast, mpsc};
use tokio_stream::wrappers::ReceiverStream;
use tonic::{Request, Response, Status};
+use tracing::{info, warn};
use url::{Host, Url};
const DRIVER_NAME: &str = "openshell-driver-vm";
const WATCH_BUFFER: usize = 256;
const DEFAULT_VCPUS: u8 = 2;
const DEFAULT_MEM_MIB: u32 = 2048;
-const GVPROXY_GATEWAY_IP: &str = "192.168.127.1";
+/// gvproxy host-loopback IP — gvproxy's TCP/UDP/ICMP forwarder NAT-rewrites
+/// this destination to the host's `127.0.0.1` and dials out from the host
+/// process. This is the only address that transparently reaches host-bound
+/// services without explicit `expose` rules.
+///
+/// See gvisor-tap-vsock `cmd/gvproxy/config.go` (default NAT entry
+/// `HostIP -> 127.0.0.1`) and `pkg/services/forwarder/tcp.go` (NAT lookup
+/// before `net.Dial`).
+///
+/// Code paths route via `GVPROXY_HOST_LOOPBACK_ALIAS` (DNS / /etc/hosts)
+/// instead so logs stay readable; this constant is kept for documentation
+/// and parity with the guest init script.
+#[allow(dead_code)]
+const GVPROXY_HOST_LOOPBACK_IP: &str = "192.168.127.254";
const OPENSHELL_HOST_GATEWAY_ALIAS: &str = "host.openshell.internal";
+/// Hostname gvproxy resolves (via its embedded DNS) to the host-loopback IP.
+///
+/// We rewrite loopback URLs to this hostname rather than the bare IP because:
+/// * the guest init script seeds /etc/hosts with the same mapping, so it
+/// resolves even when gvproxy's DNS is not in resolv.conf;
+/// * keeping a recognisable hostname makes log messages clearer than a bare
+/// 192.168.127.254 reference;
+/// * `host.docker.internal` works the same way for Docker-flavoured tooling.
+///
+/// Both names ultimately route through the gvproxy NAT path on
+/// `GVPROXY_HOST_LOOPBACK_IP` — they do **not** go through the gateway IP.
+const GVPROXY_HOST_LOOPBACK_ALIAS: &str = "host.containers.internal";
const GUEST_SSH_SOCKET_PATH: &str = "/run/openshell/ssh.sock";
const GUEST_TLS_DIR: &str = "/opt/openshell/tls";
const GUEST_TLS_CA_PATH: &str = "/opt/openshell/tls/ca.crt";
const GUEST_TLS_CERT_PATH: &str = "/opt/openshell/tls/tls.crt";
const GUEST_TLS_KEY_PATH: &str = "/opt/openshell/tls/tls.key";
+const IMAGE_CACHE_ROOT_DIR: &str = "images";
+const IMAGE_CACHE_ROOTFS_ARCHIVE: &str = "rootfs.tar";
+const IMAGE_EXPORT_ROOTFS_ARCHIVE: &str = "source-rootfs.tar";
+const IMAGE_IDENTITY_FILE: &str = "image-identity";
+const IMAGE_REFERENCE_FILE: &str = "image-reference";
+static IMAGE_CACHE_BUILD_COUNTER: AtomicU64 = AtomicU64::new(0);
#[derive(Debug, Clone)]
struct VmDriverTlsPaths {
@@ -60,6 +107,7 @@ pub struct VmDriverConfig {
pub openshell_endpoint: String,
pub state_dir: PathBuf,
pub launcher_bin: Option,
+ pub default_image: String,
pub ssh_handshake_secret: String,
pub ssh_handshake_skew_secs: u64,
pub log_level: String,
@@ -80,6 +128,7 @@ impl Default for VmDriverConfig {
openshell_endpoint: String::new(),
state_dir: PathBuf::from("target/openshell-vm-driver"),
launcher_bin: None,
+ default_image: String::new(),
ssh_handshake_secret: String::new(),
ssh_handshake_skew_secs: 300,
log_level: "info".to_string(),
@@ -188,6 +237,7 @@ pub struct VmDriver {
config: VmDriverConfig,
launcher_bin: PathBuf,
registry: Arc>>,
+ image_cache_lock: Arc>,
events: broadcast::Sender,
gpu_inventory: Option>>,
subnet_allocator: Arc>,
@@ -209,7 +259,7 @@ impl VmDriver {
.map_err(|e| format!("cleanup stale TAP interfaces panicked: {e}"))?;
}
- let state_root = config.state_dir.join("sandboxes");
+ let state_root = sandboxes_root_dir(&config.state_dir);
tokio::fs::create_dir_all(&state_root)
.await
.map_err(|err| {
@@ -218,6 +268,15 @@ impl VmDriver {
state_root.display()
)
})?;
+ let image_cache_root = image_cache_root_dir(&config.state_dir);
+ tokio::fs::create_dir_all(&image_cache_root)
+ .await
+ .map_err(|err| {
+ format!(
+ "failed to create state dir '{}': {err}",
+ image_cache_root.display()
+ )
+ })?;
let launcher_bin = if let Some(path) = config.launcher_bin.clone() {
path
@@ -248,6 +307,7 @@ impl VmDriver {
config,
launcher_bin,
registry: Arc::new(Mutex::new(HashMap::new())),
+ image_cache_lock: Arc::new(Mutex::new(())),
events,
gpu_inventory,
subnet_allocator,
@@ -264,7 +324,7 @@ impl VmDriver {
GetCapabilitiesResponse {
driver_name: DRIVER_NAME.to_string(),
driver_version: openshell_core::VERSION.to_string(),
- default_image: String::new(),
+ default_image: self.config.default_image.clone(),
supports_gpu: self.gpu_inventory.is_some(),
gpu_count,
}
@@ -274,13 +334,24 @@ impl VmDriver {
// gRPC API surface; boxing here would diverge from every other handler.
#[allow(clippy::result_large_err)]
pub fn validate_sandbox(&self, sandbox: &Sandbox) -> Result<(), Status> {
- validate_vm_sandbox(sandbox, self.config.gpu_enabled)
+ validate_vm_sandbox(sandbox, self.config.gpu_enabled)?;
+ if self.resolved_sandbox_image(sandbox).is_none() {
+ return Err(Status::failed_precondition(
+ "vm sandboxes require template.image or a configured default sandbox image",
+ ));
+ }
+ Ok(())
}
// `tonic::Status` is large but is the standard error type across the
// gRPC API surface; boxing here would diverge from every other handler.
#[allow(clippy::result_large_err)]
pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result {
+ info!(
+ sandbox_id = %sandbox.id,
+ sandbox_name = %sandbox.name,
+ "vm driver: create_sandbox received"
+ );
validate_vm_sandbox(sandbox, self.config.gpu_enabled)?;
if self.registry.lock().await.contains_key(&sandbox.id) {
@@ -293,6 +364,17 @@ impl VmDriver {
let state_dir = sandbox_state_dir(&self.config.state_dir, &sandbox.id);
let rootfs = state_dir.join("rootfs");
+ let image_ref = self.resolved_sandbox_image(sandbox).ok_or_else(|| {
+ Status::failed_precondition(
+ "vm sandboxes require template.image or a configured default sandbox image",
+ )
+ })?;
+ info!(
+ sandbox_id = %sandbox.id,
+ image_ref = %image_ref,
+ state_dir = %state_dir.display(),
+ "vm driver: resolved image ref, preparing rootfs"
+ );
tokio::fs::create_dir_all(&state_dir)
.await
@@ -302,22 +384,57 @@ impl VmDriver {
.config
.tls_paths()
.map_err(Status::failed_precondition)?;
- let rootfs_for_extract = rootfs.clone();
- let extract_fn = if is_gpu {
- extract_gpu_sandbox_rootfs_to
- } else {
- extract_sandbox_rootfs_to
- };
- tokio::task::spawn_blocking(move || extract_fn(&rootfs_for_extract))
+ // Mirror the K8s `Scheduled` event so the CLI can complete the
+ // "Requesting sandbox" step and switch the spinner over to the
+ // image-pull phase before we block on the registry.
+ self.publish_platform_event(
+ sandbox.id.clone(),
+ platform_event(
+ "vm",
+ "Normal",
+ "Scheduled",
+ format!("Sandbox accepted by vm driver to image \"{image_ref}\""),
+ ),
+ );
+
+ let image_identity = match self
+ .prepare_runtime_rootfs(&sandbox.id, &image_ref, &rootfs)
.await
- .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
- .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;
- if let Some(tls_paths) = tls_paths.as_ref() {
- prepare_guest_tls_materials(&rootfs, tls_paths)
- .await
- .map_err(|err| {
- Status::internal(format!("prepare guest TLS materials failed: {err}"))
- })?;
+ {
+ Ok(image_identity) => {
+ info!(
+ sandbox_id = %sandbox.id,
+ image_identity = %image_identity,
+ "vm driver: rootfs prepared"
+ );
+ image_identity
+ }
+ Err(err) => {
+ warn!(
+ sandbox_id = %sandbox.id,
+ error = %err.message(),
+ "vm driver: rootfs preparation failed"
+ );
+ let _ = tokio::fs::remove_dir_all(&state_dir).await;
+ return Err(err);
+ }
+ };
+ if let Some(tls_paths) = tls_paths.as_ref()
+ && let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await
+ {
+ let _ = tokio::fs::remove_dir_all(&state_dir).await;
+ return Err(Status::internal(format!(
+ "prepare guest TLS materials failed: {err}"
+ )));
+ }
+
+ if let Err(err) =
+ write_sandbox_image_metadata(&state_dir, &image_ref, &image_identity).await
+ {
+ let _ = tokio::fs::remove_dir_all(&state_dir).await;
+ return Err(Status::internal(format!(
+ "write sandbox image metadata failed: {err}"
+ )));
}
let gpu_bdf = if is_gpu {
@@ -431,9 +548,20 @@ impl VmDriver {
command.arg("--vm-env").arg(env);
}
+ info!(
+ sandbox_id = %sandbox.id,
+ launcher = %self.launcher_bin.display(),
+ console_output = %console_output.display(),
+ "vm driver: spawning VM launcher"
+ );
let child = match command.spawn() {
Ok(child) => child,
Err(err) => {
+ warn!(
+ sandbox_id = %sandbox.id,
+ error = %err,
+ "vm driver: launcher spawn failed"
+ );
if gpu_bdf.is_some() {
self.release_gpu_and_subnet(&sandbox.id);
}
@@ -444,6 +572,18 @@ impl VmDriver {
)));
}
};
+ info!(
+ sandbox_id = %sandbox.id,
+ launcher_pid = child.id().unwrap_or(0),
+ "vm driver: launcher spawned"
+ );
+ // Mirror the K8s `Started` event so the CLI can complete the
+ // "Starting sandbox" step. The supervisor-ready transition still
+ // promotes the sandbox to `Ready` separately.
+ self.publish_platform_event(
+ sandbox.id.clone(),
+ platform_event("vm", "Normal", "Started", "Started VM launcher".to_string()),
+ );
let snapshot = sandbox_snapshot(sandbox, provisioning_condition(), false);
let process = Arc::new(Mutex::new(VmProcess {
child,
@@ -587,6 +727,443 @@ impl VmDriver {
}
}
+ async fn prepare_runtime_rootfs(
+ &self,
+ sandbox_id: &str,
+ image_ref: &str,
+ rootfs: &Path,
+ ) -> Result {
+ let image_identity = self
+ .ensure_cached_image_rootfs_archive(sandbox_id, image_ref)
+ .await?;
+ let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity);
+ let rootfs_dest = rootfs.to_path_buf();
+ tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest))
+ .await
+ .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))?
+ .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?;
+
+ Ok(image_identity)
+ }
+
+ fn resolved_sandbox_image(&self, sandbox: &Sandbox) -> Option {
+ requested_sandbox_image(sandbox)
+ .map(ToOwned::to_owned)
+ .or_else(|| {
+ let image = self.config.default_image.trim();
+ (!image.is_empty()).then(|| image.to_string())
+ })
+ }
+
+ async fn ensure_cached_image_rootfs_archive(
+ &self,
+ sandbox_id: &str,
+ image_ref: &str,
+ ) -> Result {
+ if let Some((docker, image_identity)) = self.resolve_local_docker_image(image_ref).await? {
+ return self
+ .ensure_cached_local_image_rootfs_archive(
+ sandbox_id,
+ image_ref,
+ &docker,
+ &image_identity,
+ )
+ .await;
+ }
+
+ info!(image_ref = %image_ref, "vm driver: ensuring cached image rootfs archive (registry)");
+ let reference = parse_registry_reference(image_ref)?;
+ let client = registry_client();
+ let auth = registry_auth(image_ref)?;
+ info!(image_ref = %image_ref, "vm driver: authenticating with registry");
+ client
+ .auth(&reference, &auth, RegistryOperation::Pull)
+ .await
+ .map_err(|err| {
+ Status::failed_precondition(format!(
+ "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}"
+ ))
+ })?;
+ info!(image_ref = %image_ref, "vm driver: fetching manifest digest");
+ let image_identity = client
+ .fetch_manifest_digest(&reference, &auth)
+ .await
+ .map_err(|err| {
+ Status::failed_precondition(format!(
+ "failed to resolve vm sandbox image '{image_ref}': {err}"
+ ))
+ })?;
+ info!(
+ image_ref = %image_ref,
+ image_identity = %image_identity,
+ "vm driver: manifest digest resolved"
+ );
+ let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity);
+
+ // Mirror the K8s `Pulling` event so the CLI flips to the
+ // image-pull spinner with the image name as detail. We emit it
+ // for cache hits too and immediately follow with `Pulled` so the
+ // spinner step still advances cleanly.
+ self.publish_platform_event(
+ sandbox_id.to_string(),
+ platform_event(
+ "vm",
+ "Normal",
+ "Pulling",
+ format!("Pulling image \"{image_ref}\""),
+ ),
+ );
+
+ if tokio::fs::metadata(&archive_path).await.is_ok() {
+ info!(
+ image_identity = %image_identity,
+ archive_path = %archive_path.display(),
+ "vm driver: image rootfs archive cache hit (no build needed)"
+ );
+ self.publish_pulled_event(sandbox_id, image_ref, &archive_path)
+ .await;
+ return Ok(image_identity);
+ }
+
+ info!(
+ image_identity = %image_identity,
+ "vm driver: image rootfs archive cache miss, acquiring build lock"
+ );
+ let _cache_guard = self.image_cache_lock.lock().await;
+ info!(
+ image_identity = %image_identity,
+ "vm driver: build lock acquired"
+ );
+ if tokio::fs::metadata(&archive_path).await.is_ok() {
+ info!(
+ image_identity = %image_identity,
+ "vm driver: image rootfs archive cache hit after lock (built by another task)"
+ );
+ self.publish_pulled_event(sandbox_id, image_ref, &archive_path)
+ .await;
+ return Ok(image_identity);
+ }
+
+ self.build_cached_registry_image_rootfs_archive(
+ sandbox_id,
+ &client,
+ &reference,
+ &auth,
+ image_ref,
+ &image_identity,
+ )
+ .await?;
+ self.publish_pulled_event(sandbox_id, image_ref, &archive_path)
+ .await;
+ Ok(image_identity)
+ }
+
+ async fn resolve_local_docker_image(
+ &self,
+ image_ref: &str,
+ ) -> Result