diff --git a/.github/workflows/release-vm-dev.yml b/.github/workflows/release-vm-dev.yml index 6619d2a6b..0676be051 100644 --- a/.github/workflows/release-vm-dev.yml +++ b/.github/workflows/release-vm-dev.yml @@ -421,7 +421,7 @@ jobs: # --------------------------------------------------------------------------- build-driver-vm-linux: name: Build Driver VM (Linux ${{ matrix.arch }}) - needs: [compute-versions, download-kernel-runtime, build-rootfs] + needs: [compute-versions, download-kernel-runtime] strategy: matrix: include: @@ -477,12 +477,6 @@ jobs: name: kernel-runtime-tarballs path: runtime-download/ - - name: Download rootfs tarball - uses: actions/download-artifact@v4 - with: - name: rootfs-${{ matrix.arch }} - path: rootfs-download/ - - name: Stage compressed runtime for embedding run: | set -euo pipefail @@ -504,12 +498,15 @@ jobs: zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" done - # Copy rootfs tarball (already zstd-compressed) - cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" - echo "Staged compressed artifacts:" ls -lah "$COMPRESSED_DIR" + - name: Build bundled supervisor + run: | + set -euo pipefail + OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${PWD}/target/vm-runtime-compressed" \ + tasks/scripts/vm/build-supervisor-bundle.sh --arch "${{ matrix.guest_arch }}" + - name: Scope workspace to driver-vm crates run: | set -euo pipefail @@ -551,7 +548,7 @@ jobs: # --------------------------------------------------------------------------- build-driver-vm-macos: name: Build Driver VM (macOS) - needs: [compute-versions, download-kernel-runtime, build-rootfs] + needs: [compute-versions, download-kernel-runtime] runs-on: build-amd64 timeout-minutes: 60 container: @@ -591,12 +588,6 @@ jobs: name: kernel-runtime-tarballs path: runtime-download/ - - name: Download rootfs tarball (arm64) - uses: actions/download-artifact@v4 - with: - name: rootfs-arm64 - path: rootfs-download/ - - name: Prepare compressed runtime directory run: | set -euo pipefail @@ -619,12 +610,24 @@ jobs: zstd -19 -f -q -T0 -o "${COMPRESSED_DIR}/${name}.zst" "$file" done - # The macOS VM guest is always Linux ARM64, so use the arm64 rootfs - cp rootfs-download/rootfs.tar.zst "${COMPRESSED_DIR}/rootfs.tar.zst" - echo "Staged macOS compressed artifacts:" ls -lah "$COMPRESSED_DIR" + - name: Build bundled supervisor + run: | + set -euo pipefail + docker buildx build \ + --file deploy/docker/Dockerfile.images \ + --platform linux/arm64 \ + --build-arg OPENSHELL_CARGO_VERSION="${{ needs.compute-versions.outputs.cargo_version }}" \ + --build-arg OPENSHELL_IMAGE_TAG=dev \ + --target supervisor-output \ + --output type=local,dest=supervisor-out/ \ + . + + zstd -19 -T0 -f supervisor-out/openshell-sandbox \ + -o "${PWD}/target/vm-runtime-compressed-macos/openshell-sandbox.zst" + - name: Build macOS binary via Docker (osxcross) run: | set -euo pipefail @@ -776,7 +779,7 @@ jobs: ### VM Compute Driver Binaries - `openshell-driver-vm` binaries with embedded kernel runtime and sandbox rootfs. + `openshell-driver-vm` binaries with embedded kernel runtime and bundled sandbox supervisor. Launched by the gateway when `--drivers=vm` is configured. Rebuilt on every push to main alongside the openshell-vm binaries. diff --git a/Cargo.lock b/Cargo.lock index bfaa55d93..4e74e89a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,6 +621,12 @@ dependencies = [ "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cexpr" version = "0.6.0" @@ -761,6 +767,16 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "compact_str" version = "0.7.1" @@ -808,6 +824,27 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" +[[package]] +name = "const_format" +version = "0.2.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4481a617ad9a412be3b97c5d403fef8ed023103368908b9c50af598ff467cc1e" +dependencies = [ + "const_format_proc_macros", + "konst", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.4.2" @@ -1175,6 +1212,37 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.117", +] + [[package]] name = "dialoguer" version = "0.11.0" @@ -1648,6 +1716,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getset" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912" +dependencies = [ + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "ghash" version = "0.5.1" @@ -1861,6 +1941,15 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-auth" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "150fa4a9462ef926824cf4519c84ed652ca8f4fbae34cb8af045b5cbcaf98822" +dependencies = [ + "memchr", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2324,6 +2413,50 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys 0.3.1", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "jobserver" version = "0.1.34" @@ -2387,6 +2520,20 @@ dependencies = [ "simple_asn1", ] +[[package]] +name = "jsonwebtoken" +version = "10.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" +dependencies = [ + "base64 0.22.1", + "getrandom 0.2.17", + "js-sys", + "serde", + "serde_json", + "signature 2.2.0", +] + [[package]] name = "k8s-openapi" version = "0.21.1" @@ -2400,6 +2547,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "konst" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128133ed7824fcd73d6e7b17957c5eb7bacb885649bd8c69708b2331a10bcefb" +dependencies = [ + "konst_macro_rules", +] + +[[package]] +name = "konst_macro_rules" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" + [[package]] name = "kube" version = "0.90.0" @@ -3052,7 +3214,7 @@ dependencies = [ "getrandom 0.2.17", "http", "rand 0.8.6", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_path_to_error", @@ -3070,6 +3232,60 @@ dependencies = [ "memchr", ] +[[package]] +name = "oci-client" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b7f8deaffcd3b0e3baf93dddcab3d18b91d46dc37d38a8b170089b234de5bb3" +dependencies = [ + "bytes", + "chrono", + "futures-util", + "http", + "http-auth", + "jsonwebtoken 10.3.0", + "lazy_static", + "oci-spec", + "olpc-cjson", + "regex", + "reqwest 0.13.2", + "serde", + "serde_json", + "sha2 0.10.9", + "thiserror 2.0.18", + "tokio", + "tracing", + "unicase", +] + +[[package]] +name = "oci-spec" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8445a2631507cec628a15fdd6154b54a3ab3f20ed4fe9d73a3b8b7a4e1ba03a" +dependencies = [ + "const_format", + "derive_builder", + "getset", + "regex", + "serde", + "serde_json", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", +] + +[[package]] +name = "olpc-cjson" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "696183c9b5fe81a7715d074fd632e8bd46f4ccc0231a3ed7fc580a80de5f7083" +dependencies = [ + "serde", + "serde_json", + "unicode-normalization", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -3137,7 +3353,7 @@ dependencies = [ "owo-colors", "prost-types", "rcgen", - "reqwest", + "reqwest 0.12.28", "rustls", "rustls-pemfile", "serde", @@ -3240,18 +3456,22 @@ dependencies = [ name = "openshell-driver-vm" version = "0.0.0" dependencies = [ + "bollard", "clap", + "flate2", "futures", "libc", "libloading", "miette", "nix", + "oci-client", "openshell-core", "openshell-vfio", "polling", "prost-types", "serde", "serde_json", + "sha2 0.10.9", "tar", "tokio", "tokio-stream", @@ -3311,7 +3531,7 @@ version = "0.0.0" dependencies = [ "bytes", "openshell-core", - "reqwest", + "reqwest 0.12.28", "serde", "serde_json", "serde_yml", @@ -3387,7 +3607,7 @@ dependencies = [ "hyper-rustls", "hyper-util", "ipnet", - "jsonwebtoken", + "jsonwebtoken 9.3.1", "metrics", "metrics-exporter-prometheus", "miette", @@ -3404,7 +3624,7 @@ dependencies = [ "prost-types", "rand 0.9.4", "rcgen", - "reqwest", + "reqwest 0.12.28", "russh", "rustls", "rustls-pemfile", @@ -4062,6 +4282,7 @@ version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -4200,7 +4421,7 @@ dependencies = [ "lru", "paste", "stability", - "strum", + "strum 0.26.3", "unicode-segmentation", "unicode-truncate", "unicode-width 0.1.14", @@ -4335,6 +4556,47 @@ dependencies = [ "webpki-roots 1.0.7", ] +[[package]] +name = "reqwest" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "rustls-platform-verifier", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower 0.5.3", + "tower-http 0.6.8", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", +] + [[package]] name = "rfc6979" version = "0.4.0" @@ -4538,6 +4800,7 @@ version = "0.23.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", @@ -4578,12 +4841,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.103.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted 0.9.0", @@ -4610,6 +4901,15 @@ dependencies = [ "cipher", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.29" @@ -5333,9 +5633,15 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", ] +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + [[package]] name = "strum_macros" version = "0.26.4" @@ -5349,6 +5655,18 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "subtle" version = "2.6.1" @@ -6001,6 +6319,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + [[package]] name = "unicode-bidi" version = "0.3.18" @@ -6157,6 +6481,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -6273,6 +6607,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -6305,6 +6652,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -6349,6 +6705,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -6456,6 +6821,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -6501,6 +6875,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -6558,6 +6947,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6576,6 +6971,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6594,6 +6995,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6624,6 +7031,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6642,6 +7055,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6660,6 +7079,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6678,6 +7103,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6887,7 +7318,7 @@ dependencies = [ "bindgen", "cmake", "pkg-config", - "reqwest", + "reqwest 0.12.28", "serde_json", "zip", ] diff --git a/architecture/custom-vm-runtime.md b/architecture/custom-vm-runtime.md index 045ee2e9a..ad677d3ae 100644 --- a/architecture/custom-vm-runtime.md +++ b/architecture/custom-vm-runtime.md @@ -20,8 +20,9 @@ kernel. The driver is spawned by `openshell-gateway` as a subprocess, talks to it over a Unix domain socket (`compute-driver.sock`) with the `openshell.compute.v1.ComputeDriver` gRPC surface, and manages per-sandbox -microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox rootfs are -embedded directly in the driver binary — no sibling files required at runtime. +microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox +supervisor are embedded directly in the driver binary; each sandbox guest +rootfs is derived from a container image at create time. ## Architecture @@ -30,7 +31,7 @@ graph TD subgraph Host["Host (macOS / Linux)"] GATEWAY["openshell-gateway
(compute::vm::spawn)"] DRIVER["openshell-driver-vm
(compute-driver.sock)"] - EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ sandbox rootfs.tar.zst"] + EMB["Embedded runtime (zstd)
libkrun · libkrunfw · gvproxy
+ openshell-sandbox.zst"] GVP["gvproxy (per sandbox)
virtio-net · DHCP · DNS"] GATEWAY <-->|gRPC over UDS| DRIVER @@ -58,8 +59,8 @@ never binds a host-side TCP listener. ## Embedded Runtime -`openshell-driver-vm` embeds the VM runtime libraries and the sandbox rootfs as -zstd-compressed byte arrays, extracting on demand: +`openshell-driver-vm` embeds the VM runtime libraries and the sandbox +supervisor as zstd-compressed byte arrays, extracting on demand: ```text ~/.local/share/openshell/vm-runtime// # libkrun / libkrunfw / gvproxy @@ -74,14 +75,20 @@ Old runtime cache versions are cleaned up when a new version is extracted. ### Sandbox rootfs preparation -The rootfs tarball the driver embeds starts from the same minimal Ubuntu base -used across the project, and is **rewritten into a supervisor-only sandbox -guest** during extraction: +Each VM sandbox starts from either a registry image fetched directly over OCI or +a local Docker image reference produced by Dockerfile-based `--from` sources. +For local Dockerfile sources, the CLI builds the image on the local Docker +daemon and passes the ordinary image tag through `template.image`. The VM driver +first checks the local Docker daemon for that tag; when present, it exports the +image filesystem and **rewrites that filesystem into a supervisor-only sandbox +guest** before caching it: -- k3s state and Kubernetes manifests are stripped out - `/srv/openshell-vm-sandbox-init.sh` is installed as the guest entrypoint -- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy, - no CNI plugins +- the bundled `openshell-sandbox` binary is copied into + `/opt/openshell/bin/openshell-sandbox` +- k3s state and Kubernetes manifests are stripped out if the image contains them +- the guest boots directly into `openshell-sandbox` — no k3s, no kube-proxy, no + CNI plugins See `crates/openshell-driver-vm/src/rootfs.rs` for the rewrite logic and `crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh` for the init @@ -95,6 +102,48 @@ spawns one launcher per sandbox as a subprocess, which in turn starts `gvproxy` and calls `krun_start_enter` to boot the guest. Keeping the launcher in the same binary means the driver ships a single artifact for both roles. +When a sandbox sets `template.image` through `openshell sandbox create --from ...`, +the VM driver treats that image as the base guest rootfs source for that +sandbox. When `template.image` is omitted, the gateway fills it from the VM +driver's advertised `default_image`, which matches the gateway's configured +sandbox image. The driver: + +- resolves the image on the gateway host without Docker for registry and + community image refs +- for local Dockerfile sources, the CLI builds through the host Docker socket + and passes the resulting ordinary Docker tag through `template.image` +- unpacks the image filesystem, injects the VM sandbox init/supervisor files, + and validates required guest tools such as `bash`, `mount`, `ip`, and `sed` +- caches the prepared guest rootfs under + `/images//rootfs.tar` +- extracts a private runtime copy under + `/sandboxes//rootfs` + +The cache key uses an immutable image identity: repo digest for registry images +and the local Docker image ID for images resolved from the local daemon. +Different VM sandboxes can use different base images concurrently because the +shared cache is per image, not global for the driver. Cached prepared rootfs +entries remain on disk until the operator removes them from the VM driver state +directory. + +Docker is therefore no longer required for VM sandboxes created from registry or +community image refs. It is only required on the local CLI/gateway host when the +source is a local Dockerfile or build context. + +Local Dockerfile sources are treated as trusted local-development inputs for VM +gateways. Remote VM gateways still reject local Dockerfile sources until a +gateway-side artifact validation and transfer boundary is designed. + +There is no embedded guest rootfs fallback anymore. VM sandboxes therefore +require either `template.image` or a configured default sandbox image. This is +still replace-the-rootfs semantics, so VM images must remain base-compatible +with the sandbox guest init path. Distroless or `scratch` images are not +expected to work. + +The separate `openshell-vm` binary still uses `vm:rootfs` to build a standalone +embedded guest filesystem, but `openshell-driver-vm` no longer consumes that +artifact. + ## Network Plane The driver launches a **dedicated `gvproxy` instance per sandbox** to provide the @@ -178,8 +227,8 @@ graph LR The `vm-runtime-.tar.zst` artifact is consumed by `openshell-driver-vm`'s `build.rs`, which embeds the library set into the binary via `include_bytes!()`. Setting `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` -at build time (wired up by `crates/openshell-driver-vm/start.sh`) points the -build at the staged artifacts. +at build time (wired up by `tasks/scripts/gateway-vm.sh`, registered as +`mise run gateway:vm`) points the build at the staged artifacts. ## Kernel Config Fragment @@ -262,8 +311,8 @@ host platform. ### Driver Binary (`release-vm-dev.yml`) Builds the self-contained `openshell-driver-vm` binary for every platform, -with the kernel runtime + sandbox rootfs embedded. Runs on every push to -`main` that touches VM-related crates. +with the kernel runtime + bundled sandbox supervisor embedded. Runs on every +push to `main` that touches VM-related crates. The `download-kernel-runtime` job pulls the current `vm-runtime-.tar.zst` from the `vm-dev` release; the `build-openshell-driver-vm` jobs set @@ -273,14 +322,15 @@ cross-compiled via osxcross (no macOS runner needed for the binary build — only for the kernel build). macOS driver binaries produced via osxcross are not codesigned. Development -builds are signed automatically by `crates/openshell-driver-vm/start.sh`; a -packaged release needs signing in CI. +builds are signed automatically by `tasks/scripts/gateway-vm.sh` +(registered as `mise run gateway:vm`); a packaged release needs signing in +CI. ## Rollout Strategy 1. Custom runtime is embedded by default when building `openshell-driver-vm` with `OPENSHELL_VM_RUNTIME_COMPRESSED_DIR` set (wired up by - `crates/openshell-driver-vm/start.sh`). + `tasks/scripts/gateway-vm.sh`). 2. The sandbox init script validates kernel capabilities at boot and fails fast if missing. 3. For development, override with `OPENSHELL_VM_RUNTIME_DIR` to use a local diff --git a/architecture/gateway.md b/architecture/gateway.md index 8e2724bc6..62381637e 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -136,8 +136,8 @@ All configuration is via CLI flags with environment variable fallbacks. The `--d | `--sandbox-image` | `OPENSHELL_SANDBOX_IMAGE` | None | Default container image for sandbox pods | | `--grpc-endpoint` | `OPENSHELL_GRPC_ENDPOINT` | None | gRPC endpoint reachable from within the cluster (for supervisor callbacks) | | `--drivers` | `OPENSHELL_DRIVERS` | `kubernetes` | Compute backend to use. Current options are `kubernetes`, `docker`, and `vm`. | -| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, and runtime state | | `--driver-dir` | `OPENSHELL_DRIVER_DIR` | unset | Override directory for `openshell-driver-vm`. When unset, the gateway searches `~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`, then a sibling binary. | +| `--vm-driver-state-dir` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Host directory for VM sandbox rootfs, console logs, runtime state, and shared image-rootfs cache | | `--vm-krun-log-level` | `OPENSHELL_VM_KRUN_LOG_LEVEL` | `1` | libkrun log level for VM helper processes | | `--vm-driver-vcpus` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | Default vCPU count for VM sandboxes | | `--vm-driver-mem-mib` | `OPENSHELL_VM_DRIVER_MEM_MIB` | `2048` | Default memory allocation for VM sandboxes in MiB | @@ -619,7 +619,7 @@ The Docker driver (`crates/openshell-driver-docker/src/lib.rs`) is an in-process `VmDriver` (`crates/openshell-driver-vm/src/driver.rs`) is served by the standalone `openshell-driver-vm` process. The gateway spawns that binary on demand and talks to it over the internal `openshell.compute.v1.ComputeDriver` gRPC contract via a Unix domain socket. -- **Create**: The VM driver process allocates a sandbox-specific rootfs from its own embedded `rootfs.tar.zst`, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor. +- **Create**: The VM driver process exports the selected sandbox image from the local Docker daemon, rewrites it into a sandbox-specific guest rootfs, injects an explicitly configured guest mTLS bundle when the gateway callback endpoint is `https://`, then re-execs itself in a hidden helper mode that loads libkrun directly and boots the supervisor. - **Networking**: The helper starts an embedded `gvproxy`, wires it into libkrun as virtio-net, and gives the guest outbound connectivity. No inbound TCP listener is needed — the supervisor reaches the gateway over its outbound `ConnectSupervisor` stream. - **Gateway callback**: The guest init script configures `eth0` for gvproxy networking, seeds `/etc/hosts` so `host.openshell.internal` resolves to the gvproxy gateway IP (`192.168.127.1`), preserves gvproxy's legacy `host.containers.internal` / `host.docker.internal` DNS answers, prefers the configured `OPENSHELL_GRPC_ENDPOINT`, and falls back to those aliases or the raw gateway IP when local hostname resolution is unavailable on macOS. - **Guest boot**: The sandbox guest runs a minimal init script that starts `openshell-sandbox` directly as PID 1 inside the VM. diff --git a/architecture/sandbox-custom-containers.md b/architecture/sandbox-custom-containers.md index 5d482ffe0..3dad52f0e 100644 --- a/architecture/sandbox-custom-containers.md +++ b/architecture/sandbox-custom-containers.md @@ -9,7 +9,7 @@ The `--from` flag accepts four kinds of input: | Input | Example | Behavior | |-------|---------|----------| | **Community sandbox name** | `--from openclaw` | Resolves to `ghcr.io/nvidia/openshell-community/sandboxes/openclaw:latest` | -| **Dockerfile path** | `--from ./Dockerfile` | Builds the image, pushes it into the cluster, then creates the sandbox | +| **Dockerfile path** | `--from ./Dockerfile` | Builds the image locally, makes it available to the local gateway, then creates the sandbox | | **Directory with Dockerfile** | `--from ./my-sandbox/` | Uses the directory as the build context | | **Full image reference** | `--from myregistry.com/img:tag` | Uses the image directly | @@ -19,8 +19,9 @@ The CLI classifies the value in this order: 1. **Existing file** whose name contains "Dockerfile" (case-insensitive) — treated as a Dockerfile to build. 2. **Existing directory** containing a `Dockerfile` — treated as a build context directory. -3. **Contains `/`, `:`, or `.`** — treated as a full container image reference. -4. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`. +3. **Missing explicit local path** (for example `./Dockerfile`, `../ctx`, or an absolute path) — rejected locally instead of sent to the gateway as an image pull. +4. **Contains `/`, `:`, or `.`** — treated as a full container image reference. +5. **Otherwise** — treated as a community sandbox name, expanded to `{OPENSHELL_COMMUNITY_REGISTRY}/{name}:latest`. The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sandboxes` and can be overridden with the `OPENSHELL_COMMUNITY_REGISTRY` environment variable. @@ -33,8 +34,14 @@ The community registry prefix defaults to `ghcr.io/nvidia/openshell-community/sa When `--from` points to a Dockerfile or directory, the CLI: 1. Builds the image locally via the Docker daemon (respecting `.dockerignore`). -2. Pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. -3. Creates the sandbox with the resulting image tag. +2. For a local Kubernetes gateway, pushes it into the cluster's containerd runtime using `docker save` / `ctr import`. +3. For standalone local Docker and VM gateways, passes the ordinary image tag through. The Docker driver runs that tag directly; the VM driver resolves it from the local Docker daemon, exports the image filesystem, and prepares the VM rootfs in its own cache. +4. Creates the sandbox with the resulting image tag. + +Local Dockerfile sources for VM gateways are trusted local-development inputs. +Remote gateways continue to reject local Dockerfile sources because the gateway +API does not yet validate or transfer local build artifacts across that +boundary. ## How It Works diff --git a/crates/openshell-bootstrap/src/build.rs b/crates/openshell-bootstrap/src/build.rs index fb9b4a63d..a313d4394 100644 --- a/crates/openshell-bootstrap/src/build.rs +++ b/crates/openshell-bootstrap/src/build.rs @@ -1,11 +1,13 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! Build and push container images into a k3s gateway. +//! Build container images for gateway runtimes. //! //! This module wraps bollard's `build_image()` API to build a container image -//! from a Dockerfile and build context, then reuses the existing push pipeline -//! to import the image into the gateway's containerd runtime. +//! from a Dockerfile and build context. Kubernetes deployments reuse the +//! existing push pipeline to import the image into the gateway's containerd +//! runtime. VM deployments keep the built image in the local Docker daemon and +//! pass an internal local-image reference to the VM driver. use std::collections::HashMap; use std::path::Path; @@ -18,37 +20,39 @@ use miette::{IntoDiagnostic, Result, WrapErr}; use crate::constants::container_name; use crate::push::push_local_images; -/// Build a container image from a Dockerfile and push it into the gateway. +/// Build a container image from a Dockerfile using the local Docker daemon. /// -/// This is used by `openshell sandbox create --from `. It: -/// 1. Creates a tar archive of the build context directory. -/// 2. Sends it to the local Docker daemon via `build_image()`. -/// 3. Pushes the resulting image into the gateway's containerd via the -/// existing `push_local_images()` pipeline. +/// This is used by `openshell sandbox create --from ` for both the +/// Kubernetes and VM backends. The image remains available in the local Docker +/// daemon so the caller can either hand the resulting tag directly to the VM +/// backend or import it into a local gateway containerd runtime. #[allow(clippy::implicit_hasher)] -pub async fn build_and_push_image( +pub async fn build_local_image( dockerfile_path: &Path, tag: &str, context_dir: &Path, - gateway_name: &str, build_args: &HashMap, on_log: &mut impl FnMut(String), ) -> Result<()> { - // 1. Build the image locally. on_log(format!( "Building image {tag} from {}", dockerfile_path.display() )); build_image(dockerfile_path, tag, context_dir, build_args, on_log).await?; on_log(format!("Built image {tag}")); + Ok(()) +} - // 2. Push into the gateway. +/// Push a locally-built image into the gateway's containerd runtime. +#[allow(clippy::implicit_hasher)] +pub async fn push_image_into_gateway( + tag: &str, + gateway_name: &str, + on_log: &mut impl FnMut(String), +) -> Result<()> { on_log(format!( "Pushing image {tag} into gateway \"{gateway_name}\"" )); - // Use the long-timeout Docker client so `docker save` of multi-GB images - // doesn't trip the 120s bollard default mid-stream. Override with - // OPENSHELL_DOCKER_TIMEOUT_SECS=. let local_docker = crate::docker::connect_local_for_large_transfers() .into_diagnostic() .wrap_err("failed to connect to local Docker daemon")?; @@ -60,6 +64,28 @@ pub async fn build_and_push_image( Ok(()) } +/// Build a container image from a Dockerfile and push it into the gateway. +/// +/// This is used by `openshell sandbox create --from ` when the +/// active gateway is the local Kubernetes deployment. It: +/// 1. Creates a tar archive of the build context directory. +/// 2. Sends it to the local Docker daemon via `build_image()`. +/// 3. Pushes the resulting image into the gateway's containerd via the +/// existing `push_local_images()` pipeline. +#[allow(clippy::implicit_hasher)] +pub async fn build_and_push_image( + dockerfile_path: &Path, + tag: &str, + context_dir: &Path, + gateway_name: &str, + build_args: &HashMap, + on_log: &mut impl FnMut(String), +) -> Result<()> { + build_local_image(dockerfile_path, tag, context_dir, build_args, on_log).await?; + push_image_into_gateway(tag, gateway_name, on_log).await?; + Ok(()) +} + /// Build a container image using the local Docker daemon. /// /// Creates a tar archive of `context_dir`, sends it to Docker with the diff --git a/crates/openshell-bootstrap/src/metadata.rs b/crates/openshell-bootstrap/src/metadata.rs index beadcbeac..663476167 100644 --- a/crates/openshell-bootstrap/src/metadata.rs +++ b/crates/openshell-bootstrap/src/metadata.rs @@ -65,6 +65,10 @@ pub struct GatewayMetadata { /// When set, tokens will include these scopes for fine-grained access control. #[serde(default, skip_serializing_if = "Option::is_none")] pub oidc_scopes: Option, + + /// Local VM driver state directory for standalone VM gateways. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vm_driver_state_dir: Option, } impl GatewayMetadata { diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index eaadf7908..2ad634cf2 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -2758,6 +2758,7 @@ pub async fn sandbox_create( } /// Resolved source for the `--from` flag on `sandbox create`. +#[derive(Debug)] enum ResolvedSource { /// A ready-to-use container image reference. Image(String), @@ -2774,19 +2775,15 @@ enum ResolvedSource { /// Resolution order: /// 1. Existing file whose name contains "Dockerfile" → build from file. /// 2. Existing directory that contains a `Dockerfile` → build from directory. -/// 3. Value contains `/`, `:`, or `.` → treat as a full image reference. -/// 4. Otherwise → community sandbox name, expanded via the registry prefix. +/// 3. Missing explicit local paths → local error, not image pull. +/// 4. Value contains `/`, `:`, or `.` → treat as a full image reference. +/// 5. Otherwise → community sandbox name, expanded via the registry prefix. fn resolve_from(value: &str) -> Result { let path = Path::new(value); // 1. Existing file that looks like a Dockerfile. if path.is_file() { - let name = path - .file_name() - .map(|n| n.to_string_lossy()) - .unwrap_or_default(); - let lower = name.to_lowercase(); - if lower.contains("dockerfile") || lower.ends_with(".dockerfile") { + if filename_looks_like_dockerfile(path) { let dockerfile = path .canonicalize() .into_diagnostic() @@ -2800,6 +2797,13 @@ fn resolve_from(value: &str) -> Result { context, }); } + + if value_looks_like_local_source(value) { + return Err(miette::miette!( + "local --from file is not a Dockerfile: {}", + path.display() + )); + } } // 2. Existing directory containing a Dockerfile. @@ -2822,13 +2826,57 @@ fn resolve_from(value: &str) -> Result { )); } - // 3. Full image reference or community sandbox name — delegate to shared + if path.exists() { + return Err(miette::miette!( + "local --from path is not a regular file or directory: {}", + path.display() + )); + } + + // 3. Missing explicit local paths should fail locally. Otherwise values + // like `./Dockerfile` reach the gateway as image references and fail as + // Docker pull errors. + if value_looks_like_local_source(value) { + return Err(miette::miette!( + "local --from path does not exist: {}\n\ + Use an existing Dockerfile, a directory containing Dockerfile, or a container image reference.", + path.display() + )); + } + + // 4. Full image reference or community sandbox name — delegate to shared // resolution in openshell-core. Ok(ResolvedSource::Image( openshell_core::image::resolve_community_image(value), )) } +fn filename_looks_like_dockerfile(path: &Path) -> bool { + let name = path + .file_name() + .map(|n| n.to_string_lossy()) + .unwrap_or_default(); + let lower = name.to_lowercase(); + lower.contains("dockerfile") || lower.ends_with(".dockerfile") +} + +fn value_looks_like_local_source(value: &str) -> bool { + value_is_explicit_local_path(value) || value_looks_like_bare_dockerfile_name(value) +} + +fn value_is_explicit_local_path(value: &str) -> bool { + let path = Path::new(value); + path.is_absolute() + || matches!(value, "." | "..") + || value.starts_with("./") + || value.starts_with("../") + || value.starts_with("~/") +} + +fn value_looks_like_bare_dockerfile_name(value: &str) -> bool { + !value.contains('/') && !value.contains(':') && filename_looks_like_dockerfile(Path::new(value)) +} + fn source_requests_gpu(source: &str) -> bool { resolve_from(source).is_ok_and(|resolved| match resolved { ResolvedSource::Image(image) => image_requests_gpu(&image), @@ -2849,15 +2897,29 @@ fn image_requests_gpu(image: &str) -> bool { image_name.contains("gpu") } -/// Build a Dockerfile and push the resulting image into the gateway. +fn dockerfile_sources_supported_for_gateway(metadata: Option<&GatewayMetadata>) -> bool { + !metadata.is_some_and(|metadata| metadata.is_remote) +} + +/// Build a Dockerfile and make the resulting image available to the gateway. /// -/// Returns the image tag that was built so the caller can use it for sandbox -/// creation. +/// For local Kubernetes gateways running in Docker, this imports the built image +/// into the gateway runtime and returns the Docker tag. Standalone local +/// gateways use the same Docker daemon that the CLI built into, so the tag is +/// passed through directly and the active compute driver resolves it. async fn build_from_dockerfile( dockerfile: &Path, context: &Path, gateway_name: &str, ) -> Result { + let metadata = get_gateway_metadata(gateway_name); + if !dockerfile_sources_supported_for_gateway(metadata.as_ref()) { + return Err(miette!( + "local Dockerfile sources are only supported for local gateways; gateway '{}' is remote", + gateway_name + )); + } + let timestamp = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() @@ -2877,21 +2939,39 @@ async fn build_from_dockerfile( eprintln!(" {msg}"); }; - openshell_bootstrap::build::build_and_push_image( + openshell_bootstrap::build::build_local_image( dockerfile, &tag, context, - gateway_name, &HashMap::new(), &mut on_log, ) .await?; + let existing_gateway = openshell_bootstrap::check_existing_deployment(gateway_name, None) + .await + .wrap_err("failed to inspect local gateway deployment state")?; + let pushed_into_gateway = existing_gateway + .is_some_and(|gateway| gateway.container_exists && gateway.container_running); + if pushed_into_gateway { + openshell_bootstrap::build::push_image_into_gateway(&tag, gateway_name, &mut on_log) + .await?; + eprintln!(); + eprintln!( + "{} Image {} is available in the gateway.", + "✓".green().bold(), + tag.cyan(), + ); + eprintln!(); + return Ok(tag); + } + eprintln!(); eprintln!( - "{} Image {} is available in the gateway.", + "{} Image {} is available in the local Docker daemon for gateway '{}'.", "✓".green().bold(), tag.cyan(), + gateway_name, ); eprintln!(); @@ -5712,13 +5792,14 @@ fn format_timestamp_ms(ms: i64) -> String { #[cfg(test)] mod tests { use super::{ - GatewayControlTarget, TlsOptions, format_gateway_select_header, - format_gateway_select_items, gateway_add, gateway_auth_label, gateway_select_with, - gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, - inferred_provider_type, parse_cli_setting_value, parse_credential_pairs, - plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message, - resolve_gateway_control_target_from, sandbox_should_persist, shell_escape, - source_requests_gpu, validate_gateway_name, validate_ssh_host, + GatewayControlTarget, TlsOptions, dockerfile_sources_supported_for_gateway, + format_gateway_select_header, format_gateway_select_items, gateway_add, gateway_auth_label, + gateway_select_with, gateway_type_label, git_sync_files, http_health_check, + image_requests_gpu, inferred_provider_type, parse_cli_setting_value, + parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message, + ready_false_condition_message, resolve_from, resolve_gateway_control_target_from, + sandbox_should_persist, shell_escape, source_requests_gpu, validate_gateway_name, + validate_ssh_host, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -5964,6 +6045,103 @@ mod tests { assert!(!source_requests_gpu("base")); } + #[test] + fn resolve_from_classifies_existing_dockerfile_path() { + let temp = tempfile::tempdir().expect("failed to create tempdir"); + let dockerfile = temp.path().join("Dockerfile"); + fs::write(&dockerfile, "FROM scratch\n").expect("failed to write Dockerfile"); + + match resolve_from(dockerfile.to_str().expect("temp path is not UTF-8")) + .expect("expected Dockerfile source") + { + super::ResolvedSource::Dockerfile { + dockerfile: resolved, + context, + } => { + assert_eq!( + resolved, + dockerfile + .canonicalize() + .expect("failed to canonicalize Dockerfile") + ); + assert_eq!( + context, + temp.path() + .canonicalize() + .expect("failed to canonicalize context") + ); + } + super::ResolvedSource::Image(image) => { + panic!("expected Dockerfile source, got image {image}"); + } + } + } + + #[test] + fn resolve_from_rejects_missing_explicit_dockerfile_path() { + let temp = tempfile::tempdir().expect("failed to create tempdir"); + let missing = temp.path().join("Dockerfile"); + + let err = resolve_from(missing.to_str().expect("temp path is not UTF-8")) + .expect_err("expected missing Dockerfile path to be rejected"); + + assert!( + err.to_string().contains("local --from path does not exist"), + "unexpected error: {err}" + ); + } + + #[test] + fn resolve_from_keeps_dockerfile_named_image_refs_as_images() { + let image_ref = "ghcr.io/acme/dockerfile-runner:latest"; + + match resolve_from(image_ref).expect("expected image source") { + super::ResolvedSource::Image(image) => assert_eq!(image, image_ref), + super::ResolvedSource::Dockerfile { .. } => { + panic!("expected image ref, got Dockerfile source"); + } + } + } + + #[test] + fn dockerfile_sources_are_rejected_for_remote_gateways() { + let metadata = GatewayMetadata { + name: "remote".to_string(), + gateway_endpoint: "https://gateway.example.com".to_string(), + is_remote: true, + gateway_port: 443, + remote_host: Some("user@gateway.example.com".to_string()), + resolved_host: Some("gateway.example.com".to_string()), + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + vm_driver_state_dir: None, + ..Default::default() + }; + + assert!(!dockerfile_sources_supported_for_gateway(Some(&metadata))); + } + + #[test] + fn dockerfile_sources_are_allowed_for_local_gateways() { + let metadata = GatewayMetadata { + name: "local".to_string(), + gateway_endpoint: "http://127.0.0.1:8080".to_string(), + is_remote: false, + gateway_port: 8080, + remote_host: None, + resolved_host: None, + auth_mode: None, + edge_team_domain: None, + edge_auth_url: None, + vm_driver_state_dir: None, + ..Default::default() + }; + + assert!(dockerfile_sources_supported_for_gateway(Some(&metadata))); + assert!(dockerfile_sources_supported_for_gateway(None)); + } + #[test] fn ready_false_condition_message_prefers_reason_and_message() { let status = SandboxStatus { @@ -6302,6 +6480,7 @@ mod tests { #[tokio::test] async fn http_health_check_supports_plain_http_endpoints() { + let _ = rustls::crypto::ring::default_provider().install_default(); let listener = TcpListener::bind("127.0.0.1:0").expect("bind listener"); let addr = listener.local_addr().expect("listener addr"); let server = thread::spawn(move || { diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index e69d06f4f..687ee87b2 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -735,6 +735,10 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { let _env = test_env(&fake_ssh_dir, &xdg_dir); let tls = test_tls(&server); install_fake_ssh(&fake_ssh_dir); + let forward_port = { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + listener.local_addr().unwrap().port() + }; run::sandbox_create( &server.endpoint, @@ -750,7 +754,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { None, &[], None, - Some(openshell_core::forward::ForwardSpec::new(8080)), + Some(openshell_core::forward::ForwardSpec::new(forward_port)), &["echo".to_string(), "OK".to_string()], Some(false), Some(false), diff --git a/crates/openshell-driver-vm/Cargo.toml b/crates/openshell-driver-vm/Cargo.toml index 04f4e9fc5..c13d904a6 100644 --- a/crates/openshell-driver-vm/Cargo.toml +++ b/crates/openshell-driver-vm/Cargo.toml @@ -22,6 +22,7 @@ path = "src/main.rs" openshell-core = { path = "../openshell-core" } openshell-vfio = { path = "../openshell-vfio" } +bollard = { version = "0.20", features = ["ssh"] } tokio = { workspace = true } tonic = { workspace = true, features = ["transport"] } prost-types = { workspace = true } @@ -35,9 +36,12 @@ miette = { workspace = true } url = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +oci-client = "0.16" libc = "0.2" libloading = "0.8" tar = "0.4" +flate2 = "1" +sha2 = "0.10" zstd = "0.13" # smol-rs/polling drives the BSD/macOS parent-death detection in diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index a36f3ea44..dbb90bb67 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -2,7 +2,7 @@ > Status: Experimental. The VM compute driver is under active development and the interface still has VM-specific plumbing that will be generalized. -Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and sandbox rootfs are embedded directly in the binary — no sibling files required at runtime. +Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) for OpenShell. The gateway spawns this binary as a subprocess, talks to it over a Unix domain socket with the `openshell.compute.v1.ComputeDriver` gRPC surface, and lets it manage per-sandbox microVMs. The runtime (libkrun + libkrunfw + gvproxy) and the sandbox supervisor are embedded directly in the binary; each sandbox guest rootfs is derived from a configured container image at create time. ## How it fits together @@ -10,7 +10,7 @@ Standalone libkrun-backed [`ComputeDriver`](../../proto/compute_driver.proto) fo flowchart LR subgraph host["Host process"] gateway["openshell-server
(compute::vm::spawn)"] - driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── rootfs.tar.zst"] + driver["openshell-driver-vm
├── libkrun (VM)
├── gvproxy (net)
└── openshell-sandbox.zst"] gateway <-->|"gRPC over UDS
compute-driver.sock"| driver end @@ -35,9 +35,15 @@ Sandbox guests execute `/opt/openshell/bin/openshell-sandbox` as PID 1 inside th mise run gateway:vm ``` -First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:rootfs -- --base` builds the embedded rootfs. Subsequent runs are cached. To keep the Unix socket path under macOS `SUN_LEN`, `mise run gateway:vm` and `start.sh` default the state dir to `/tmp/openshell-vm-driver-dev-$USER-port-$PORT/` (SQLite DB + per-sandbox rootfs + `compute-driver.sock`) unless `OPENSHELL_VM_DRIVER_STATE_DIR` is set. -The wrapper auto-registers the gateway with the CLI (`gateway destroy` + `gateway add`) so no manual registration step is needed. When running under `sudo`, it uses `sudo -u $SUDO_USER` for the registration so the config is written under the invoking user's home directory. Re-runs are idempotent. -It also exports `OPENSHELL_DRIVER_DIR=$PWD/target/debug` before starting the gateway so local dev runs use the freshly built `openshell-driver-vm` instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`. +First run takes a few minutes while `mise run vm:setup` stages libkrun/libkrunfw/gvproxy and `mise run vm:supervisor` builds the bundled guest supervisor. Subsequent runs are cached. + +By default `mise run gateway:vm`: + +- Listens on plaintext HTTP at `127.0.0.1:18081`. +- Registers the CLI gateway `vm-dev` by writing `~/.config/openshell/gateways/vm-dev/metadata.json`. It does not modify the workspace `.env`. +- Persists the gateway SQLite DB under `.cache/gateway-vm/gateway.db`. +- Places the VM driver state (per-sandbox rootfs + `compute-driver.sock`) under `/tmp/openshell-vm-driver-$USER-vm-dev/` so the AF_UNIX socket path stays under macOS `SUN_LEN`. +- Passes `--driver-dir $PWD/target/debug` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`. For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges: @@ -47,50 +53,43 @@ sudo -E env "PATH=$PATH" mise run gateway:vm -- --gpu See [`architecture/vm-gpu-sandbox-guide.md`](../../architecture/vm-gpu-sandbox-guide.md) for full GPU prerequisites and usage. -Override via environment: +Point the CLI at the gateway with one of: ```shell -OPENSHELL_SERVER_PORT=9090 \ -crates/openshell-driver-vm/start.sh +openshell --gateway vm-dev status +openshell gateway select vm-dev # then plain `openshell ` ``` -Run multiple dev gateways side by side by giving each one a unique port. The wrapper derives a distinct default state dir from that port automatically: +Override defaults via environment: ```shell -OPENSHELL_SERVER_PORT=8080 mise run gateway:vm -OPENSHELL_SERVER_PORT=8081 mise run gateway:vm -``` +# custom port (fails fast if in use) +OPENSHELL_SERVER_PORT=18091 mise run gateway:vm -If you want a custom suffix instead of `port-$PORT`, set `OPENSHELL_VM_INSTANCE`: - -```shell -OPENSHELL_SERVER_PORT=8082 \ -OPENSHELL_VM_INSTANCE=feature-a \ -mise run gateway:vm -``` - -If you want a custom CLI gateway name, set `OPENSHELL_VM_GATEWAY_NAME`: - -```shell -OPENSHELL_SERVER_PORT=8082 \ +# custom CLI gateway name + namespace OPENSHELL_VM_GATEWAY_NAME=vm-feature-a \ +OPENSHELL_SANDBOX_NAMESPACE=vm-feature-a \ mise run gateway:vm + +# custom sandbox image +OPENSHELL_SANDBOX_IMAGE=ghcr.io/example/sandbox:latest mise run gateway:vm ``` Teardown: ```shell -rm -rf /tmp/openshell-vm-driver-dev-$USER-port-8080 +rm -rf /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm +rm -rf "${XDG_CONFIG_HOME:-$HOME/.config}/openshell/gateways/vm-dev" ``` ## Manual equivalent -If you want to drive the launch yourself instead of using `start.sh`: +If you want to drive the launch yourself instead of using `mise run gateway:vm` (i.e. `tasks/scripts/gateway-vm.sh`): ```shell -# 1. Stage runtime artifacts + base rootfs into target/vm-runtime-compressed/ +# 1. Stage runtime artifacts + supervisor bundle into target/vm-runtime-compressed/ mise run vm:setup -mise run vm:rootfs -- --base # if rootfs.tar.zst is not already present +mise run vm:supervisor # if openshell-sandbox.zst is not already present # 2. Build both binaries with the staged artifacts embedded OPENSHELL_VM_RUNTIME_COMPRESSED_DIR=$PWD/target/vm-runtime-compressed \ @@ -102,16 +101,17 @@ codesign \ --force -s - target/debug/openshell-driver-vm # 4. Start the gateway with the VM driver -mkdir -p /tmp/openshell-vm-driver-dev-$USER-port-8080 +mkdir -p /tmp/openshell-vm-driver-$USER-vm-dev .cache/gateway-vm target/debug/openshell-gateway \ --drivers vm \ --disable-tls \ - --database-url sqlite:/tmp/openshell-vm-driver-dev-$USER-port-8080/openshell.db \ + --db-url "sqlite:.cache/gateway-vm/gateway.db?mode=rwc" \ --driver-dir $PWD/target/debug \ - --grpc-endpoint http://host.containers.internal:8080 \ - --ssh-gateway-host 127.0.0.1 \ - --ssh-gateway-port 8080 \ - --vm-driver-state-dir /tmp/openshell-vm-driver-dev-$USER-port-8080 + --sandbox-namespace vm-dev \ + --sandbox-image \ + --grpc-endpoint http://host.containers.internal:18081 \ + --port 18081 \ + --vm-driver-state-dir /tmp/openshell-vm-driver-$USER-vm-dev ``` The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conventional install locations (`~/.local/libexec/openshell`, `/usr/libexec/openshell`, `/usr/local/libexec/openshell`, `/usr/local/libexec`), then a sibling of the gateway binary. @@ -121,7 +121,7 @@ The gateway resolves `openshell-driver-vm` in this order: `--driver-dir`, conven | Flag | Env var | Default | Purpose | |---|---|---|---| | `--drivers vm` | `OPENSHELL_DRIVERS` | `kubernetes` | Select the VM compute driver. | -| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest calls back to. Use a host alias that resolves to the gateway's host from inside the VM (`host.containers.internal` comes from gvproxy DNS; the guest init script also seeds `host.openshell.internal` to `192.168.127.1`). | +| `--grpc-endpoint URL` | `OPENSHELL_GRPC_ENDPOINT` | — | Required. URL the sandbox guest dials to reach the gateway. Use `http://host.containers.internal:` (or `host.docker.internal` / `host.openshell.internal`) so traffic flows through gvproxy's host-loopback NAT (HostIP `192.168.127.254` → host `127.0.0.1`). Loopback URLs like `http://127.0.0.1:` are rewritten automatically by the driver. The bare gateway IP (`192.168.127.1`) only carries gvproxy's own services and will not reach host-bound ports. | | `--vm-driver-state-dir DIR` | `OPENSHELL_VM_DRIVER_STATE_DIR` | `target/openshell-vm-driver` | Per-sandbox rootfs, console logs, and the `compute-driver.sock` UDS. | | `--driver-dir DIR` | `OPENSHELL_DRIVER_DIR` | unset | Override the directory searched for `openshell-driver-vm`. | | `--vm-driver-vcpus N` | `OPENSHELL_VM_DRIVER_VCPUS` | `2` | vCPUs per sandbox. | @@ -135,14 +135,15 @@ See [`openshell-gateway --help`](../openshell-server/src/cli.rs) for the full fl ## Verifying the gateway -The gateway is auto-registered by `start.sh`. In another terminal: +The gateway is auto-registered by `mise run gateway:vm`. In another terminal: ```shell -scripts/bin/openshell sandbox create --name demo -scripts/bin/openshell sandbox connect demo +./scripts/bin/openshell status +./scripts/bin/openshell sandbox create --name demo --from +./scripts/bin/openshell sandbox connect demo ``` -First sandbox takes 10–30 seconds to boot (rootfs extraction + libkrun + guest init). Subsequent creates reuse the prepared sandbox rootfs. +First sandbox takes 10–30 seconds to boot (image fetch/prepare/cache + libkrun + guest init). If `--from` is omitted, the VM driver uses the gateway's configured default sandbox image. Without either `--from` or `--sandbox-image`, VM sandbox creation fails. Subsequent creates reuse the prepared sandbox rootfs. ## Logs and debugging @@ -150,7 +151,7 @@ Raise log verbosity for both processes: ```shell RUST_LOG=openshell_server=debug,openshell_driver_vm=debug \ - crates/openshell-driver-vm/start.sh + mise run gateway:vm ``` The VM guest's serial console is appended to `//console.log`. The `compute-driver.sock` lives at `/compute-driver.sock`; the gateway removes it on clean shutdown via `ManagedDriverProcess::drop`. @@ -161,9 +162,11 @@ The VM guest's serial console is appended to `//console.l - Rust toolchain - Guest-supervisor cross-compile toolchain (needed on macOS, and on Linux when host arch ≠ guest arch): - Matching rustup target: `rustup target add aarch64-unknown-linux-gnu` (or `x86_64-unknown-linux-gnu` for an amd64 guest) - - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `build-rootfs.sh` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. + - `cargo install --locked cargo-zigbuild` and `brew install zig` (or distro equivalent). `vm:supervisor` uses `cargo zigbuild` to cross-compile the in-VM `openshell-sandbox` supervisor binary. - [mise](https://mise.jdx.dev/) task runner -- Docker (needed by `mise run vm:rootfs` to build the base rootfs) +- Docker-compatible socket on the local CLI/gateway host when using + `openshell sandbox create --from ./Dockerfile` or `--from ./dir`; the CLI + builds the image and the VM driver exports it via the local Docker daemon - `gh` CLI (used by `mise run vm:setup` to download pre-built runtime artifacts) ## Relationship to `openshell-vm` @@ -173,4 +176,4 @@ The VM guest's serial console is appended to `//console.l ## TODOs - The gateway still configures the driver via CLI args; this will move to a gRPC bootstrap call so the driver interface is uniform across backends. See the `TODO(driver-abstraction)` notes in `crates/openshell-server/src/lib.rs` and `crates/openshell-server/src/compute/vm.rs`. -- macOS codesigning is handled by `start.sh`; a packaged release would need signing in CI. +- macOS codesigning is handled by `tasks/scripts/gateway-vm.sh`; a packaged release would need signing in CI. diff --git a/crates/openshell-driver-vm/build.rs b/crates/openshell-driver-vm/build.rs index e10a1dde0..ea4c4d2e0 100644 --- a/crates/openshell-driver-vm/build.rs +++ b/crates/openshell-driver-vm/build.rs @@ -3,9 +3,9 @@ //! Build script for openshell-driver-vm. //! -//! This crate embeds the sandbox rootfs plus the minimal libkrun runtime -//! artifacts it needs to boot base VMs without depending on the openshell-vm -//! binary or crate. +//! This crate embeds the sandbox supervisor plus the minimal libkrun runtime +//! artifacts it needs to boot VMs without depending on the openshell-vm binary +//! or crate. use std::path::{Path, PathBuf}; use std::{env, fs}; @@ -21,8 +21,7 @@ fn main() { "libkrun.dylib.zst", "libkrunfw.5.dylib.zst", "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ] { println!("cargo:rerun-if-changed={dir}/{name}"); } @@ -37,15 +36,7 @@ fn main() { "linux" => ("libkrun.so", "libkrunfw.so.5"), _ => { println!("cargo:warning=VM runtime not available for {target_os}-{target_arch}"); - generate_stub_resources( - &out_dir, - &[ - "libkrun", - "libkrunfw", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", - ], - ); + generate_stub_resources(&out_dir, &["libkrun", "libkrunfw", "openshell-sandbox.zst"]); return; } }; @@ -54,15 +45,14 @@ fn main() { PathBuf::from(dir) } else { println!("cargo:warning=OPENSHELL_VM_RUNTIME_COMPRESSED_DIR not set"); - println!("cargo:warning=Run: mise run vm:setup"); + println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor"); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); return; @@ -73,15 +63,14 @@ fn main() { "cargo:warning=Compressed runtime dir not found: {}", compressed_dir.display() ); - println!("cargo:warning=Run: mise run vm:setup"); + println!("cargo:warning=Run: mise run vm:setup && mise run vm:supervisor"); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); return; @@ -94,10 +83,9 @@ fn main() { format!("{libkrunfw_name}.zst"), ), ("gvproxy.zst".to_string(), "gvproxy.zst".to_string()), - ("rootfs.tar.zst".to_string(), "rootfs.tar.zst".to_string()), ( - "rootfs-gpu.tar.zst".to_string(), - "rootfs-gpu.tar.zst".to_string(), + "openshell-sandbox.zst".to_string(), + "openshell-sandbox.zst".to_string(), ), ]; @@ -131,15 +119,16 @@ fn main() { } if !all_found { - println!("cargo:warning=Some artifacts missing. Run: mise run vm:setup"); + println!( + "cargo:warning=Some artifacts missing. Run: mise run vm:setup && mise run vm:supervisor" + ); generate_stub_resources( &out_dir, &[ &format!("{libkrun_name}.zst"), &format!("{libkrunfw_name}.zst"), "gvproxy.zst", - "rootfs.tar.zst", - "rootfs-gpu.tar.zst", + "openshell-sandbox.zst", ], ); } diff --git a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh index 1c009a7f1..063a75032 100644 --- a/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh +++ b/crates/openshell-driver-vm/scripts/openshell-vm-sandbox-init.sh @@ -15,7 +15,20 @@ if [ -f /srv/openshell-env.sh ]; then fi BOOT_START=$(date +%s%3N 2>/dev/null || date +%s) +# gvisor-tap-vsock subnet layout: +# 192.168.127.1 — gateway: gvproxy's DNS / DHCP / HTTP API. Does NOT +# proxy arbitrary host ports. +# 192.168.127.254 — host-loopback: NAT-rewritten to host's 127.0.0.1 by +# gvproxy's TCP/UDP/ICMP forwarder. Use this address +# (or any of the host.* hostnames below) to reach a +# service the host is listening on. +# The host.containers.internal / host.docker.internal DNS records served +# by gvproxy's embedded resolver point at 192.168.127.254. We mirror that +# in /etc/hosts so the supervisor can reach the gateway even when +# gvproxy's DNS is not in resolv.conf (e.g. DHCP failed and we fell +# back to 8.8.8.8). GVPROXY_GATEWAY_IP="192.168.127.1" +GVPROXY_HOST_LOOPBACK_IP="192.168.127.254" GATEWAY_IP="$GVPROXY_GATEWAY_IP" # Parse kernel cmdline for GPU and TAP networking parameters @@ -96,15 +109,39 @@ tcp_probe() { } ensure_host_gateway_aliases() { + # Seed /etc/hosts with the well-known gvproxy hostnames so the supervisor + # can reach the OpenShell server even when gvproxy's built-in DNS is not + # in resolv.conf (e.g. when DHCP fails and we fall back to 8.8.8.8). + # + # Critical distinction: host.* aliases point at the gvproxy *host-loopback* + # IP (192.168.127.254), not the gateway IP (192.168.127.1). Only the + # host-loopback IP carries NAT rewriting to the host's 127.0.0.1 — the + # gateway IP only listens on gvproxy's own service ports (DNS:53, DHCP, + # HTTP API:80). Pinning host.containers.internal to the gateway IP + # silently breaks guest→host port reachability for arbitrary ports. local hosts_tmp="/tmp/openshell-hosts.$$" + local host_aliases="host.openshell.internal host.containers.internal host.docker.internal" + local gateway_aliases="gateway.containers.internal" + local filter='(^|[[:space:]])(host\.openshell\.internal|host\.containers\.internal|host\.docker\.internal|gateway\.containers\.internal)([[:space:]]|$)' if [ -f /etc/hosts ]; then - grep -vE '(^|[[:space:]])host\.openshell\.internal([[:space:]]|$)' /etc/hosts > "$hosts_tmp" || true + grep -vE "$filter" /etc/hosts > "$hosts_tmp" || true else : > "$hosts_tmp" fi - printf '%s host.openshell.internal\n' "$GATEWAY_IP" >> "$hosts_tmp" + # In TAP/GPU mode, GATEWAY_IP is overridden to VM_NET_GW (the host-side + # of the TAP), and the gateway is reachable directly there. In gvproxy + # mode, host.openshell.internal etc. need GVPROXY_HOST_LOOPBACK_IP + # (192.168.127.254) which is gvproxy's host-NAT entry, while + # gateway.containers.internal points at the gvproxy gateway itself. + if [ "${GATEWAY_IP}" = "${GVPROXY_GATEWAY_IP}" ]; then + printf '%s %s\n' "$GVPROXY_HOST_LOOPBACK_IP" "$host_aliases" >> "$hosts_tmp" + printf '%s %s\n' "$GVPROXY_GATEWAY_IP" "$gateway_aliases" >> "$hosts_tmp" + else + # TAP networking: gateway and host are both reachable at GATEWAY_IP. + printf '%s %s %s\n' "$GATEWAY_IP" "$host_aliases" "$gateway_aliases" >> "$hosts_tmp" + fi cat "$hosts_tmp" > /etc/hosts rm -f "$hosts_tmp" } @@ -129,7 +166,15 @@ rewrite_openshell_endpoint_if_needed() { return 0 fi - for candidate in host.openshell.internal host.containers.internal host.docker.internal "$GATEWAY_IP"; do + # Probe candidates in preference order. Hostnames first for informative + # log output, then a bare IP as a final safety net. In gvproxy mode the + # bare IP is the host-loopback (192.168.127.254). In TAP/GPU mode it's + # the TAP host gateway. + local fallback_ip="$GVPROXY_HOST_LOOPBACK_IP" + if [ "${GATEWAY_IP}" != "${GVPROXY_GATEWAY_IP}" ]; then + fallback_ip="$GATEWAY_IP" + fi + for candidate in host.openshell.internal host.containers.internal host.docker.internal "$fallback_ip"; do if [ "$candidate" = "$host" ]; then continue fi diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 704f91610..d79e5d922 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -5,12 +5,22 @@ use crate::gpu::{ GpuInventory, SubnetAllocator, allocate_vsock_cid, mac_from_sandbox_id, tap_device_name, }; use crate::rootfs::{ - extract_gpu_sandbox_rootfs_to, extract_sandbox_rootfs_to, sandbox_guest_init_path, + create_rootfs_archive_from_dir, extract_rootfs_archive_to, + prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path, }; -use futures::Stream; +use bollard::Docker; +use bollard::errors::Error as BollardError; +use bollard::models::ContainerCreateBody; +use bollard::query_parameters::{CreateContainerOptionsBuilder, RemoveContainerOptionsBuilder}; +use flate2::read::GzDecoder; +use futures::{Stream, StreamExt}; use nix::errno::Errno; use nix::sys::signal::{Signal, kill}; use nix::unistd::Pid; +use oci_client::client::{Client as OciClient, ClientConfig}; +use oci_client::manifest::{ImageIndexEntry, OciDescriptor}; +use oci_client::secrets::RegistryAuth; +use oci_client::{Reference, RegistryOperation}; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, @@ -22,31 +32,68 @@ use openshell_core::proto::compute::v1::{ compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use openshell_vfio::SysfsRoot; +use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; +use std::fs; +use std::io::Read; use std::net::Ipv4Addr; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::process::Stdio; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; +use tokio::io::AsyncWriteExt; use tokio::process::{Child, Command}; use tokio::sync::{Mutex, broadcast, mpsc}; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Status}; +use tracing::{info, warn}; use url::{Host, Url}; const DRIVER_NAME: &str = "openshell-driver-vm"; const WATCH_BUFFER: usize = 256; const DEFAULT_VCPUS: u8 = 2; const DEFAULT_MEM_MIB: u32 = 2048; -const GVPROXY_GATEWAY_IP: &str = "192.168.127.1"; +/// gvproxy host-loopback IP — gvproxy's TCP/UDP/ICMP forwarder NAT-rewrites +/// this destination to the host's `127.0.0.1` and dials out from the host +/// process. This is the only address that transparently reaches host-bound +/// services without explicit `expose` rules. +/// +/// See gvisor-tap-vsock `cmd/gvproxy/config.go` (default NAT entry +/// `HostIP -> 127.0.0.1`) and `pkg/services/forwarder/tcp.go` (NAT lookup +/// before `net.Dial`). +/// +/// Code paths route via `GVPROXY_HOST_LOOPBACK_ALIAS` (DNS / /etc/hosts) +/// instead so logs stay readable; this constant is kept for documentation +/// and parity with the guest init script. +#[allow(dead_code)] +const GVPROXY_HOST_LOOPBACK_IP: &str = "192.168.127.254"; const OPENSHELL_HOST_GATEWAY_ALIAS: &str = "host.openshell.internal"; +/// Hostname gvproxy resolves (via its embedded DNS) to the host-loopback IP. +/// +/// We rewrite loopback URLs to this hostname rather than the bare IP because: +/// * the guest init script seeds /etc/hosts with the same mapping, so it +/// resolves even when gvproxy's DNS is not in resolv.conf; +/// * keeping a recognisable hostname makes log messages clearer than a bare +/// 192.168.127.254 reference; +/// * `host.docker.internal` works the same way for Docker-flavoured tooling. +/// +/// Both names ultimately route through the gvproxy NAT path on +/// `GVPROXY_HOST_LOOPBACK_IP` — they do **not** go through the gateway IP. +const GVPROXY_HOST_LOOPBACK_ALIAS: &str = "host.containers.internal"; const GUEST_SSH_SOCKET_PATH: &str = "/run/openshell/ssh.sock"; const GUEST_TLS_DIR: &str = "/opt/openshell/tls"; const GUEST_TLS_CA_PATH: &str = "/opt/openshell/tls/ca.crt"; const GUEST_TLS_CERT_PATH: &str = "/opt/openshell/tls/tls.crt"; const GUEST_TLS_KEY_PATH: &str = "/opt/openshell/tls/tls.key"; +const IMAGE_CACHE_ROOT_DIR: &str = "images"; +const IMAGE_CACHE_ROOTFS_ARCHIVE: &str = "rootfs.tar"; +const IMAGE_EXPORT_ROOTFS_ARCHIVE: &str = "source-rootfs.tar"; +const IMAGE_IDENTITY_FILE: &str = "image-identity"; +const IMAGE_REFERENCE_FILE: &str = "image-reference"; +static IMAGE_CACHE_BUILD_COUNTER: AtomicU64 = AtomicU64::new(0); #[derive(Debug, Clone)] struct VmDriverTlsPaths { @@ -60,6 +107,7 @@ pub struct VmDriverConfig { pub openshell_endpoint: String, pub state_dir: PathBuf, pub launcher_bin: Option, + pub default_image: String, pub ssh_handshake_secret: String, pub ssh_handshake_skew_secs: u64, pub log_level: String, @@ -80,6 +128,7 @@ impl Default for VmDriverConfig { openshell_endpoint: String::new(), state_dir: PathBuf::from("target/openshell-vm-driver"), launcher_bin: None, + default_image: String::new(), ssh_handshake_secret: String::new(), ssh_handshake_skew_secs: 300, log_level: "info".to_string(), @@ -188,6 +237,7 @@ pub struct VmDriver { config: VmDriverConfig, launcher_bin: PathBuf, registry: Arc>>, + image_cache_lock: Arc>, events: broadcast::Sender, gpu_inventory: Option>>, subnet_allocator: Arc>, @@ -209,7 +259,7 @@ impl VmDriver { .map_err(|e| format!("cleanup stale TAP interfaces panicked: {e}"))?; } - let state_root = config.state_dir.join("sandboxes"); + let state_root = sandboxes_root_dir(&config.state_dir); tokio::fs::create_dir_all(&state_root) .await .map_err(|err| { @@ -218,6 +268,15 @@ impl VmDriver { state_root.display() ) })?; + let image_cache_root = image_cache_root_dir(&config.state_dir); + tokio::fs::create_dir_all(&image_cache_root) + .await + .map_err(|err| { + format!( + "failed to create state dir '{}': {err}", + image_cache_root.display() + ) + })?; let launcher_bin = if let Some(path) = config.launcher_bin.clone() { path @@ -248,6 +307,7 @@ impl VmDriver { config, launcher_bin, registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), events, gpu_inventory, subnet_allocator, @@ -264,7 +324,7 @@ impl VmDriver { GetCapabilitiesResponse { driver_name: DRIVER_NAME.to_string(), driver_version: openshell_core::VERSION.to_string(), - default_image: String::new(), + default_image: self.config.default_image.clone(), supports_gpu: self.gpu_inventory.is_some(), gpu_count, } @@ -274,13 +334,24 @@ impl VmDriver { // gRPC API surface; boxing here would diverge from every other handler. #[allow(clippy::result_large_err)] pub fn validate_sandbox(&self, sandbox: &Sandbox) -> Result<(), Status> { - validate_vm_sandbox(sandbox, self.config.gpu_enabled) + validate_vm_sandbox(sandbox, self.config.gpu_enabled)?; + if self.resolved_sandbox_image(sandbox).is_none() { + return Err(Status::failed_precondition( + "vm sandboxes require template.image or a configured default sandbox image", + )); + } + Ok(()) } // `tonic::Status` is large but is the standard error type across the // gRPC API surface; boxing here would diverge from every other handler. #[allow(clippy::result_large_err)] pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result { + info!( + sandbox_id = %sandbox.id, + sandbox_name = %sandbox.name, + "vm driver: create_sandbox received" + ); validate_vm_sandbox(sandbox, self.config.gpu_enabled)?; if self.registry.lock().await.contains_key(&sandbox.id) { @@ -293,6 +364,17 @@ impl VmDriver { let state_dir = sandbox_state_dir(&self.config.state_dir, &sandbox.id); let rootfs = state_dir.join("rootfs"); + let image_ref = self.resolved_sandbox_image(sandbox).ok_or_else(|| { + Status::failed_precondition( + "vm sandboxes require template.image or a configured default sandbox image", + ) + })?; + info!( + sandbox_id = %sandbox.id, + image_ref = %image_ref, + state_dir = %state_dir.display(), + "vm driver: resolved image ref, preparing rootfs" + ); tokio::fs::create_dir_all(&state_dir) .await @@ -302,22 +384,57 @@ impl VmDriver { .config .tls_paths() .map_err(Status::failed_precondition)?; - let rootfs_for_extract = rootfs.clone(); - let extract_fn = if is_gpu { - extract_gpu_sandbox_rootfs_to - } else { - extract_sandbox_rootfs_to - }; - tokio::task::spawn_blocking(move || extract_fn(&rootfs_for_extract)) + // Mirror the K8s `Scheduled` event so the CLI can complete the + // "Requesting sandbox" step and switch the spinner over to the + // image-pull phase before we block on the registry. + self.publish_platform_event( + sandbox.id.clone(), + platform_event( + "vm", + "Normal", + "Scheduled", + format!("Sandbox accepted by vm driver to image \"{image_ref}\""), + ), + ); + + let image_identity = match self + .prepare_runtime_rootfs(&sandbox.id, &image_ref, &rootfs) .await - .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? - .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; - if let Some(tls_paths) = tls_paths.as_ref() { - prepare_guest_tls_materials(&rootfs, tls_paths) - .await - .map_err(|err| { - Status::internal(format!("prepare guest TLS materials failed: {err}")) - })?; + { + Ok(image_identity) => { + info!( + sandbox_id = %sandbox.id, + image_identity = %image_identity, + "vm driver: rootfs prepared" + ); + image_identity + } + Err(err) => { + warn!( + sandbox_id = %sandbox.id, + error = %err.message(), + "vm driver: rootfs preparation failed" + ); + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(err); + } + }; + if let Some(tls_paths) = tls_paths.as_ref() + && let Err(err) = prepare_guest_tls_materials(&rootfs, tls_paths).await + { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::internal(format!( + "prepare guest TLS materials failed: {err}" + ))); + } + + if let Err(err) = + write_sandbox_image_metadata(&state_dir, &image_ref, &image_identity).await + { + let _ = tokio::fs::remove_dir_all(&state_dir).await; + return Err(Status::internal(format!( + "write sandbox image metadata failed: {err}" + ))); } let gpu_bdf = if is_gpu { @@ -431,9 +548,20 @@ impl VmDriver { command.arg("--vm-env").arg(env); } + info!( + sandbox_id = %sandbox.id, + launcher = %self.launcher_bin.display(), + console_output = %console_output.display(), + "vm driver: spawning VM launcher" + ); let child = match command.spawn() { Ok(child) => child, Err(err) => { + warn!( + sandbox_id = %sandbox.id, + error = %err, + "vm driver: launcher spawn failed" + ); if gpu_bdf.is_some() { self.release_gpu_and_subnet(&sandbox.id); } @@ -444,6 +572,18 @@ impl VmDriver { ))); } }; + info!( + sandbox_id = %sandbox.id, + launcher_pid = child.id().unwrap_or(0), + "vm driver: launcher spawned" + ); + // Mirror the K8s `Started` event so the CLI can complete the + // "Starting sandbox" step. The supervisor-ready transition still + // promotes the sandbox to `Ready` separately. + self.publish_platform_event( + sandbox.id.clone(), + platform_event("vm", "Normal", "Started", "Started VM launcher".to_string()), + ); let snapshot = sandbox_snapshot(sandbox, provisioning_condition(), false); let process = Arc::new(Mutex::new(VmProcess { child, @@ -587,6 +727,443 @@ impl VmDriver { } } + async fn prepare_runtime_rootfs( + &self, + sandbox_id: &str, + image_ref: &str, + rootfs: &Path, + ) -> Result { + let image_identity = self + .ensure_cached_image_rootfs_archive(sandbox_id, image_ref) + .await?; + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + let rootfs_dest = rootfs.to_path_buf(); + tokio::task::spawn_blocking(move || extract_rootfs_archive_to(&archive_path, &rootfs_dest)) + .await + .map_err(|err| Status::internal(format!("sandbox rootfs extraction panicked: {err}")))? + .map_err(|err| Status::internal(format!("extract sandbox rootfs failed: {err}")))?; + + Ok(image_identity) + } + + fn resolved_sandbox_image(&self, sandbox: &Sandbox) -> Option { + requested_sandbox_image(sandbox) + .map(ToOwned::to_owned) + .or_else(|| { + let image = self.config.default_image.trim(); + (!image.is_empty()).then(|| image.to_string()) + }) + } + + async fn ensure_cached_image_rootfs_archive( + &self, + sandbox_id: &str, + image_ref: &str, + ) -> Result { + if let Some((docker, image_identity)) = self.resolve_local_docker_image(image_ref).await? { + return self + .ensure_cached_local_image_rootfs_archive( + sandbox_id, + image_ref, + &docker, + &image_identity, + ) + .await; + } + + info!(image_ref = %image_ref, "vm driver: ensuring cached image rootfs archive (registry)"); + let reference = parse_registry_reference(image_ref)?; + let client = registry_client(); + let auth = registry_auth(image_ref)?; + info!(image_ref = %image_ref, "vm driver: authenticating with registry"); + client + .auth(&reference, &auth, RegistryOperation::Pull) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" + )) + })?; + info!(image_ref = %image_ref, "vm driver: fetching manifest digest"); + let image_identity = client + .fetch_manifest_digest(&reference, &auth) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to resolve vm sandbox image '{image_ref}': {err}" + )) + })?; + info!( + image_ref = %image_ref, + image_identity = %image_identity, + "vm driver: manifest digest resolved" + ); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, &image_identity); + + // Mirror the K8s `Pulling` event so the CLI flips to the + // image-pull spinner with the image name as detail. We emit it + // for cache hits too and immediately follow with `Pulled` so the + // spinner step still advances cleanly. + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulling", + format!("Pulling image \"{image_ref}\""), + ), + ); + + if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + archive_path = %archive_path.display(), + "vm driver: image rootfs archive cache hit (no build needed)" + ); + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + return Ok(image_identity); + } + + info!( + image_identity = %image_identity, + "vm driver: image rootfs archive cache miss, acquiring build lock" + ); + let _cache_guard = self.image_cache_lock.lock().await; + info!( + image_identity = %image_identity, + "vm driver: build lock acquired" + ); + if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + "vm driver: image rootfs archive cache hit after lock (built by another task)" + ); + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + return Ok(image_identity); + } + + self.build_cached_registry_image_rootfs_archive( + sandbox_id, + &client, + &reference, + &auth, + image_ref, + &image_identity, + ) + .await?; + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + Ok(image_identity) + } + + async fn resolve_local_docker_image( + &self, + image_ref: &str, + ) -> Result, Status> { + let required_local_image = is_openshell_local_build_image_ref(image_ref); + let docker = match Docker::connect_with_local_defaults() { + Ok(docker) => docker, + Err(err) if required_local_image => { + return Err(Status::failed_precondition(format!( + "failed to connect to local Docker daemon for locally built sandbox image '{image_ref}': {err}" + ))); + } + Err(err) => { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: local Docker daemon unavailable, falling back to registry" + ); + return Ok(None); + } + }; + + match docker.inspect_image(image_ref).await { + Ok(inspect) => { + if let Some(message) = local_docker_image_platform_mismatch( + image_ref, + inspect.os.as_deref(), + inspect.architecture.as_deref(), + ) { + if required_local_image { + return Err(Status::failed_precondition(message)); + } + warn!( + image_ref = %image_ref, + %message, + "vm driver: local Docker image platform mismatch, falling back to registry" + ); + return Ok(None); + } + + let image_identity = + inspect + .id + .filter(|id| !id.trim().is_empty()) + .ok_or_else(|| { + Status::failed_precondition(format!( + "local Docker image '{image_ref}' inspect response has no image ID" + )) + })?; + info!( + image_ref = %image_ref, + image_identity = %image_identity, + "vm driver: resolved image from local Docker daemon" + ); + Ok(Some((docker, image_identity))) + } + Err(err) if is_docker_not_found_error(&err) && required_local_image => { + Err(Status::failed_precondition(format!( + "locally built sandbox image '{image_ref}' is not present in the local Docker daemon" + ))) + } + Err(err) if is_docker_not_found_error(&err) => Ok(None), + Err(err) if required_local_image => Err(Status::failed_precondition(format!( + "failed to inspect locally built sandbox image '{image_ref}': {err}" + ))), + Err(err) => { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: local Docker image inspection failed, falling back to registry" + ); + Ok(None) + } + } + } + + async fn ensure_cached_local_image_rootfs_archive( + &self, + sandbox_id: &str, + image_ref: &str, + docker: &Docker, + image_identity: &str, + ) -> Result { + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); + + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulling", + format!("Pulling image \"{image_ref}\""), + ), + ); + + if tokio::fs::metadata(&archive_path).await.is_ok() { + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + return Ok(image_identity.to_string()); + } + + let _cache_guard = self.image_cache_lock.lock().await; + if tokio::fs::metadata(&archive_path).await.is_ok() { + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + return Ok(image_identity.to_string()); + } + + self.build_cached_local_image_rootfs_archive(docker, image_ref, image_identity) + .await?; + self.publish_pulled_event(sandbox_id, image_ref, &archive_path) + .await; + Ok(image_identity.to_string()) + } + + async fn build_cached_local_image_rootfs_archive( + &self, + docker: &Docker, + image_ref: &str, + image_identity: &str, + ) -> Result<(), Status> { + let cache_dir = image_cache_dir(&self.config.state_dir, image_identity); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); + let staging_dir = image_cache_staging_dir(&self.config.state_dir, image_identity); + let exported_rootfs = staging_dir.join(IMAGE_EXPORT_ROOTFS_ARCHIVE); + let prepared_rootfs = staging_dir.join("rootfs"); + let prepared_archive = staging_dir.join(IMAGE_CACHE_ROOTFS_ARCHIVE); + + tokio::fs::create_dir_all(image_cache_root_dir(&self.config.state_dir)) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + tokio::fs::create_dir_all(&cache_dir) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + + if tokio::fs::metadata(&staging_dir).await.is_ok() { + tokio::fs::remove_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!( + "remove stale image cache staging dir failed: {err}" + )) + })?; + } + tokio::fs::create_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!("create image cache staging dir failed: {err}")) + })?; + + if let Err(err) = + export_local_image_rootfs_to_path(docker, image_ref, &exported_rootfs).await + { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(err); + } + + let image_ref_owned = image_ref.to_string(); + let image_identity_owned = image_identity.to_string(); + let exported_rootfs_for_build = exported_rootfs.clone(); + let prepared_rootfs_for_build = prepared_rootfs.clone(); + let prepared_archive_for_build = prepared_archive.clone(); + let build_result = tokio::task::spawn_blocking(move || { + prepare_exported_rootfs_archive( + &image_ref_owned, + &image_identity_owned, + &exported_rootfs_for_build, + &prepared_rootfs_for_build, + &prepared_archive_for_build, + ) + }) + .await + .map_err(|err| Status::internal(format!("local image preparation panicked: {err}")))?; + + if let Err(err) = build_result { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(Status::failed_precondition(err)); + } + + if tokio::fs::metadata(&archive_path).await.is_ok() { + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Ok(()); + } + + tokio::fs::rename(&prepared_archive, &archive_path) + .await + .map_err(|err| Status::internal(format!("store cached image rootfs failed: {err}")))?; + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + Ok(()) + } + + async fn build_cached_registry_image_rootfs_archive( + &self, + sandbox_id: &str, + client: &OciClient, + reference: &Reference, + auth: &RegistryAuth, + image_ref: &str, + image_identity: &str, + ) -> Result<(), Status> { + let cache_dir = image_cache_dir(&self.config.state_dir, image_identity); + let archive_path = image_cache_rootfs_archive(&self.config.state_dir, image_identity); + let staging_dir = image_cache_staging_dir(&self.config.state_dir, image_identity); + let prepared_rootfs = staging_dir.join("rootfs"); + let prepared_archive = staging_dir.join(IMAGE_CACHE_ROOTFS_ARCHIVE); + + tokio::fs::create_dir_all(image_cache_root_dir(&self.config.state_dir)) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + tokio::fs::create_dir_all(&cache_dir) + .await + .map_err(|err| Status::internal(format!("create image cache dir failed: {err}")))?; + + if tokio::fs::metadata(&staging_dir).await.is_ok() { + tokio::fs::remove_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!( + "remove stale image cache staging dir failed: {err}" + )) + })?; + } + tokio::fs::create_dir_all(&staging_dir) + .await + .map_err(|err| { + Status::internal(format!("create image cache staging dir failed: {err}")) + })?; + + info!( + image_ref = %image_ref, + staging_dir = %staging_dir.display(), + "vm driver: pulling registry image layers" + ); + if let Err(err) = self + .pull_registry_image_rootfs( + sandbox_id, + client, + reference, + auth, + image_ref, + &staging_dir, + &prepared_rootfs, + ) + .await + { + warn!( + image_ref = %image_ref, + error = %err.message(), + "vm driver: pull_registry_image_rootfs failed" + ); + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(err); + } + info!( + image_ref = %image_ref, + "vm driver: image layers pulled, preparing rootfs archive" + ); + + let image_ref_owned = image_ref.to_string(); + let image_identity_owned = image_identity.to_string(); + let prepared_rootfs_for_build = prepared_rootfs.clone(); + let prepared_archive_for_build = prepared_archive.clone(); + let build_result = tokio::task::spawn_blocking(move || { + prepare_sandbox_rootfs_from_image_root( + &prepared_rootfs_for_build, + &image_identity_owned, + ) + .map_err(|err| { + format!("vm sandbox image '{image_ref_owned}' is not base-compatible: {err}") + })?; + create_rootfs_archive_from_dir(&prepared_rootfs_for_build, &prepared_archive_for_build) + }) + .await + .map_err(|err| Status::internal(format!("image rootfs preparation panicked: {err}")))?; + + if let Err(err) = build_result { + warn!( + image_ref = %image_ref, + error = %err, + "vm driver: rootfs archive build failed" + ); + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Err(Status::failed_precondition(err)); + } + + if tokio::fs::metadata(&archive_path).await.is_ok() { + info!( + image_identity = %image_identity, + "vm driver: another task wrote archive while we were building, discarding ours" + ); + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + return Ok(()); + } + + tokio::fs::rename(&prepared_archive, &archive_path) + .await + .map_err(|err| Status::internal(format!("store cached image rootfs failed: {err}")))?; + info!( + image_identity = %image_identity, + archive_path = %archive_path.display(), + "vm driver: image rootfs archive committed to cache" + ); + let _ = tokio::fs::remove_dir_all(&staging_dir).await; + Ok(()) + } + /// Watch the launcher child process and surface errors as driver /// conditions. /// @@ -891,11 +1468,6 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu } if let Some(template) = spec.template.as_ref() { - if !template.image.is_empty() { - return Err(Status::failed_precondition( - "vm sandboxes do not support template.image", - )); - } if !template.agent_socket_path.is_empty() { return Err(Status::failed_precondition( "vm sandboxes do not support template.agent_socket_path", @@ -915,58 +1487,685 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu Ok(()) } -fn merged_environment(sandbox: &Sandbox) -> HashMap { - let mut environment = sandbox - .spec - .as_ref() - .and_then(|spec| spec.template.as_ref()) - .map_or_else(HashMap::new, |template| template.environment.clone()); - if let Some(spec) = sandbox.spec.as_ref() { - environment.extend(spec.environment.clone()); - } - environment +#[allow(clippy::result_large_err)] +fn parse_registry_reference(image_ref: &str) -> Result { + Reference::try_from(image_ref).map_err(|err| { + Status::failed_precondition(format!( + "invalid vm sandbox image reference '{image_ref}': {err}" + )) + }) } -fn guest_visible_openshell_endpoint(endpoint: &str) -> String { - let Ok(mut url) = Url::parse(endpoint) else { - return endpoint.to_string(); - }; +fn is_openshell_local_build_image_ref(image_ref: &str) -> bool { + image_ref.starts_with("openshell/sandbox-from:") +} - let should_rewrite = match url.host() { - Some(Host::Ipv4(ip)) => ip.is_loopback(), - Some(Host::Ipv6(ip)) => ip.is_loopback(), - Some(Host::Domain(host)) => host.eq_ignore_ascii_case("localhost"), - None => false, - }; +fn local_docker_image_platform_mismatch( + image_ref: &str, + actual_os: Option<&str>, + actual_arch: Option<&str>, +) -> Option { + let actual_os = actual_os.unwrap_or("unknown"); + let actual_arch = actual_arch.unwrap_or("unknown"); + let expected_os = "linux"; + let expected_arch = linux_oci_arch(); + + (actual_os != expected_os || actual_arch != expected_arch).then(|| { + format!( + "local Docker image '{image_ref}' is {actual_os}/{actual_arch}, but VM sandboxes require {expected_os}/{expected_arch}" + ) + }) +} - if should_rewrite && url.set_host(Some(GVPROXY_GATEWAY_IP)).is_ok() { - return url.to_string(); +fn is_docker_not_found_error(err: &BollardError) -> bool { + matches!( + err, + BollardError::DockerResponseServerError { + status_code: 404, + .. + } + ) +} + +async fn export_local_image_rootfs_to_path( + docker: &Docker, + image_ref: &str, + tar_path: &Path, +) -> Result<(), Status> { + let container_name = format!( + "openshell-vm-rootfs-export-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() + ); + let create_options = CreateContainerOptionsBuilder::default() + .name(container_name.as_str()) + .build(); + let container = docker + .create_container( + Some(create_options), + ContainerCreateBody { + image: Some(image_ref.to_string()), + ..Default::default() + }, + ) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to create temporary export container for local Docker image '{image_ref}': {err}" + )) + })?; + let container_id = container.id; + + let export_result = async { + if let Some(parent) = tar_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|err| { + Status::internal(format!( + "create export dir {} failed: {err}", + parent.display() + )) + })?; + } + let mut file = tokio::fs::File::create(tar_path).await.map_err(|err| { + Status::internal(format!("create {} failed: {err}", tar_path.display())) + })?; + let mut stream = docker.export_container(&container_id); + while let Some(chunk) = stream.next().await { + let chunk = chunk.map_err(|err| { + Status::failed_precondition(format!( + "failed to export local Docker image '{image_ref}': {err}" + )) + })?; + file.write_all(&chunk).await.map_err(|err| { + Status::internal(format!("write {} failed: {err}", tar_path.display())) + })?; + } + file.flush() + .await + .map_err(|err| Status::internal(format!("flush {} failed: {err}", tar_path.display()))) } + .await; - endpoint.to_string() + let cleanup_result = docker + .remove_container( + &container_id, + Some(RemoveContainerOptionsBuilder::default().force(true).build()), + ) + .await; + + match (export_result, cleanup_result) { + (Ok(()), Ok(())) => Ok(()), + (Err(err), _) => Err(err), + (Ok(()), Err(err)) => Err(Status::internal(format!( + "failed to remove temporary export container for local Docker image '{image_ref}': {err}" + ))), + } } -fn gateway_port_from_endpoint(endpoint: &str) -> Option { - Url::parse(endpoint).ok().and_then(|url| url.port()) +fn prepare_exported_rootfs_archive( + image_ref: &str, + image_identity: &str, + exported_rootfs: &Path, + prepared_rootfs: &Path, + prepared_archive: &Path, +) -> Result<(), String> { + extract_rootfs_archive_to(exported_rootfs, prepared_rootfs)?; + prepare_sandbox_rootfs_from_image_root(prepared_rootfs, image_identity) + .map_err(|err| format!("vm sandbox image '{image_ref}' is not base-compatible: {err}"))?; + create_rootfs_archive_from_dir(prepared_rootfs, prepared_archive) } -fn guest_visible_openshell_endpoint_for_tap(endpoint: &str, host_ip: &str) -> String { - let Ok(mut url) = Url::parse(endpoint) else { - return endpoint.to_string(); - }; - if url.set_host(Some(host_ip)).is_ok() { - url.to_string() - } else { - endpoint.to_string() +fn registry_client() -> OciClient { + OciClient::new(ClientConfig { + platform_resolver: Some(Box::new(linux_platform_resolver)), + ..Default::default() + }) +} + +fn linux_platform_resolver(manifests: &[ImageIndexEntry]) -> Option { + let expected_arch = linux_oci_arch(); + manifests + .iter() + .find_map(|entry| { + let platform = entry.platform.as_ref()?; + (platform.os.to_string() == "linux" + && platform.architecture.to_string() == expected_arch) + .then(|| entry.digest.clone()) + }) + .or_else(|| { + manifests.iter().find_map(|entry| { + let platform = entry.platform.as_ref()?; + (platform.os.to_string() == "linux").then(|| entry.digest.clone()) + }) + }) +} + +fn linux_oci_arch() -> &'static str { + match std::env::consts::ARCH { + "x86_64" => "amd64", + "aarch64" => "arm64", + "arm" => "arm", + other => other, } } -fn build_guest_environment( - sandbox: &Sandbox, - config: &VmDriverConfig, - endpoint_override: Option<&str>, -) -> Vec { - let openshell_endpoint = endpoint_override.map_or_else( +#[allow(clippy::result_large_err)] +fn registry_auth(image_ref: &str) -> Result { + let username = env_non_empty("OPENSHELL_REGISTRY_USERNAME"); + let token = env_non_empty("OPENSHELL_REGISTRY_TOKEN"); + + match token { + Some(token) => { + let username = match username { + Some(username) => username, + None if image_reference_registry_host(image_ref) + .eq_ignore_ascii_case("ghcr.io") => + { + "__token__".to_string() + } + None => { + return Err(Status::failed_precondition( + "OPENSHELL_REGISTRY_USERNAME is required when OPENSHELL_REGISTRY_TOKEN is set for non-GHCR registries", + )); + } + }; + Ok(RegistryAuth::Basic(username, token)) + } + None => Ok(RegistryAuth::Anonymous), + } +} + +fn env_non_empty(key: &str) -> Option { + std::env::var(key) + .ok() + .filter(|value| !value.trim().is_empty()) +} + +fn image_reference_registry_host(image_ref: &str) -> &str { + let mut parts = image_ref.splitn(2, '/'); + let first = parts.next().unwrap_or(image_ref); + let has_path = parts.next().is_some(); + if has_path + && (first.contains('.') || first.contains(':') || first.eq_ignore_ascii_case("localhost")) + { + first + } else { + "docker.io" + } +} + +impl VmDriver { + #[allow(clippy::too_many_arguments)] + async fn pull_registry_image_rootfs( + &self, + sandbox_id: &str, + client: &OciClient, + reference: &Reference, + auth: &RegistryAuth, + image_ref: &str, + staging_dir: &Path, + rootfs: &Path, + ) -> Result<(), Status> { + client + .auth(reference, auth, RegistryOperation::Pull) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to authenticate registry access for vm sandbox image '{image_ref}': {err}" + )) + })?; + let (manifest, _) = client + .pull_image_manifest(reference, auth) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to pull vm sandbox image manifest '{image_ref}': {err}" + )) + })?; + + tokio::fs::create_dir_all(rootfs) + .await + .map_err(|err| Status::internal(format!("create rootfs dir failed: {err}")))?; + tokio::fs::create_dir_all(staging_dir.join("layers")) + .await + .map_err(|err| Status::internal(format!("create layer staging dir failed: {err}")))?; + + let total_layers = manifest.layers.len(); + let total_bytes: i64 = manifest.layers.iter().map(|layer| layer.size.max(0)).sum(); + for (index, layer) in manifest.layers.iter().enumerate() { + // Emit a per-layer progress event so the CLI can show + // "Layer 3/8 (12.4 MB)" as detail under the spinner. + let mut metadata = HashMap::new(); + metadata.insert("layer_index".to_string(), (index + 1).to_string()); + metadata.insert("layer_total".to_string(), total_layers.to_string()); + metadata.insert("layer_digest".to_string(), layer.digest.clone()); + metadata.insert("layer_size_bytes".to_string(), layer.size.to_string()); + metadata.insert("image_ref".to_string(), image_ref.to_string()); + if total_bytes > 0 { + metadata.insert("image_size_bytes".to_string(), total_bytes.to_string()); + } + let mut event = platform_event( + "vm", + "Normal", + "PullingLayer", + format!( + "Pulling layer {}/{} ({} bytes) for image \"{image_ref}\"", + index + 1, + total_layers, + layer.size + ), + ); + event.metadata = metadata; + self.publish_platform_event(sandbox_id.to_string(), event); + + pull_registry_layer( + client, + reference, + image_ref, + staging_dir, + rootfs, + layer, + index, + ) + .await?; + } + + Ok(()) + } + + /// Emit a `Pulled` platform event with a message that mirrors the + /// kubelet's `Successfully pulled image ... Image size: N bytes.` + /// format so the CLI's `extract_image_size` parser works unchanged. + async fn publish_pulled_event(&self, sandbox_id: &str, image_ref: &str, archive_path: &Path) { + let size_suffix = tokio::fs::metadata(archive_path).await.map_or_else( + |_| String::new(), + |meta| format!(" Image size: {} bytes.", meta.len()), + ); + self.publish_platform_event( + sandbox_id.to_string(), + platform_event( + "vm", + "Normal", + "Pulled", + format!("Successfully pulled image \"{image_ref}\".{size_suffix}"), + ), + ); + } +} + +async fn pull_registry_layer( + client: &OciClient, + reference: &Reference, + image_ref: &str, + staging_dir: &Path, + rootfs: &Path, + layer: &OciDescriptor, + index: usize, +) -> Result<(), Status> { + let digest_component = sanitize_image_identity(&layer.digest); + let blob_path = staging_dir + .join("layers") + .join(format!("{index:02}-{digest_component}.blob")); + let layer_root = staging_dir + .join("layers") + .join(format!("{index:02}-{digest_component}.root")); + + let mut file = tokio::fs::File::create(&blob_path) + .await + .map_err(|err| Status::internal(format!("create layer blob failed: {err}")))?; + client + .pull_blob(reference, layer, &mut file) + .await + .map_err(|err| { + Status::failed_precondition(format!( + "failed to download layer '{}' for vm sandbox image '{image_ref}': {err}", + layer.digest + )) + })?; + file.flush() + .await + .map_err(|err| Status::internal(format!("flush layer blob failed: {err}")))?; + + let blob_path_for_digest = blob_path.clone(); + let expected_digest = layer.digest.clone(); + tokio::task::spawn_blocking(move || { + verify_descriptor_digest(&blob_path_for_digest, &expected_digest) + }) + .await + .map_err(|err| Status::internal(format!("layer digest verification panicked: {err}")))? + .map_err(|err| { + Status::failed_precondition(format!( + "vm sandbox image layer verification failed for '{}': {err}", + layer.digest + )) + })?; + + let blob_path_for_unpack = blob_path.clone(); + let layer_root_for_unpack = layer_root.clone(); + let rootfs_for_unpack = rootfs.to_path_buf(); + let media_type = layer.media_type.clone(); + tokio::task::spawn_blocking(move || { + extract_layer_blob_to_dir(&blob_path_for_unpack, &media_type, &layer_root_for_unpack)?; + apply_layer_dir_to_rootfs(&layer_root_for_unpack, &rootfs_for_unpack) + }) + .await + .map_err(|err| Status::internal(format!("layer extraction panicked: {err}")))? + .map_err(|err| { + Status::failed_precondition(format!( + "failed to apply layer '{}' for vm sandbox image '{image_ref}': {err}", + layer.digest + )) + }) +} + +fn verify_descriptor_digest(path: &Path, expected_digest: &str) -> Result<(), String> { + let expected = expected_digest + .strip_prefix("sha256:") + .ok_or_else(|| format!("unsupported layer digest '{expected_digest}'"))?; + let actual = compute_file_sha256_hex(path)?; + if actual == expected { + Ok(()) + } else { + Err(format!( + "digest mismatch for {}: expected sha256:{expected}, got sha256:{actual}", + path.display() + )) + } +} + +fn compute_file_sha256_hex(path: &Path) -> Result { + let mut file = fs::File::open(path).map_err(|err| format!("open {}: {err}", path.display()))?; + let mut hasher = Sha256::new(); + let mut buffer = vec![0_u8; 64 * 1024].into_boxed_slice(); + loop { + let read = file + .read(&mut buffer) + .map_err(|err| format!("read {}: {err}", path.display()))?; + if read == 0 { + break; + } + hasher.update(&buffer[..read]); + } + Ok(format!("{:x}", hasher.finalize())) +} + +fn extract_layer_blob_to_dir( + blob_path: &Path, + media_type: &str, + dest: &Path, +) -> Result<(), String> { + if dest.exists() { + fs::remove_dir_all(dest).map_err(|err| format!("remove {}: {err}", dest.display()))?; + } + fs::create_dir_all(dest).map_err(|err| format!("create {}: {err}", dest.display()))?; + + let file = + fs::File::open(blob_path).map_err(|err| format!("open {}: {err}", blob_path.display()))?; + match layer_compression_from_media_type(media_type)? { + LayerCompression::None => extract_tar_reader_to_dir(file, dest), + LayerCompression::Gzip => extract_tar_reader_to_dir(GzDecoder::new(file), dest), + LayerCompression::Zstd => { + let decoder = zstd::stream::read::Decoder::new(file) + .map_err(|err| format!("decompress {}: {err}", blob_path.display()))?; + extract_tar_reader_to_dir(decoder, dest) + } + } +} + +fn extract_tar_reader_to_dir(reader: impl Read, dest: &Path) -> Result<(), String> { + let mut archive = tar::Archive::new(reader); + archive + .unpack(dest) + .map_err(|err| format!("extract layer into {}: {err}", dest.display())) +} + +// `media_type` is an OCI media type string (e.g. `application/vnd.oci.image.layer.v1.tar+gzip`), +// not a filesystem path, so case-sensitive comparison is correct. +#[allow(clippy::case_sensitive_file_extension_comparisons)] +fn layer_compression_from_media_type(media_type: &str) -> Result { + if media_type.is_empty() { + return Err("layer media type is missing".to_string()); + } + if media_type.ends_with("+zstd") { + return Ok(LayerCompression::Zstd); + } + if media_type.ends_with("+gzip") || media_type.ends_with(".gzip") { + return Ok(LayerCompression::Gzip); + } + if media_type.ends_with(".tar") + || media_type.ends_with("tar") + || media_type == "application/vnd.oci.image.layer.v1.tar" + || media_type == "application/vnd.oci.image.layer.nondistributable.v1.tar" + { + return Ok(LayerCompression::None); + } + Err(format!("unsupported layer media type '{media_type}'")) +} + +fn apply_layer_dir_to_rootfs(layer_root: &Path, rootfs: &Path) -> Result<(), String> { + merge_layer_directory(layer_root, rootfs) +} + +fn merge_layer_directory(source_dir: &Path, target_dir: &Path) -> Result<(), String> { + fs::create_dir_all(target_dir) + .map_err(|err| format!("create {}: {err}", target_dir.display()))?; + + let mut entries = fs::read_dir(source_dir) + .map_err(|err| format!("read {}: {err}", source_dir.display()))? + .collect::, _>>() + .map_err(|err| format!("read {}: {err}", source_dir.display()))?; + entries.sort_by_key(fs::DirEntry::file_name); + + if entries + .iter() + .any(|entry| entry.file_name().to_string_lossy() == ".wh..wh..opq") + { + clear_directory_contents(target_dir)?; + } + + for entry in entries { + let file_name = entry.file_name(); + let name = file_name.to_string_lossy(); + if name == ".wh..wh..opq" { + continue; + } + if let Some(hidden_name) = name.strip_prefix(".wh.") { + remove_path_if_exists(&target_dir.join(hidden_name))?; + continue; + } + + let source_path = entry.path(); + let dest_path = target_dir.join(&file_name); + let metadata = fs::symlink_metadata(&source_path) + .map_err(|err| format!("stat {}: {err}", source_path.display()))?; + let file_type = metadata.file_type(); + + if file_type.is_dir() { + if let Ok(dest_metadata) = fs::symlink_metadata(&dest_path) + && !dest_metadata.file_type().is_dir() + && !path_is_dir_or_symlink_to_dir(&dest_path)? + { + remove_path_if_exists(&dest_path)?; + } + fs::create_dir_all(&dest_path) + .map_err(|err| format!("create {}: {err}", dest_path.display()))?; + merge_layer_directory(&source_path, &dest_path)?; + if fs::symlink_metadata(&dest_path) + .map_err(|err| format!("stat {}: {err}", dest_path.display()))? + .file_type() + .is_dir() + { + fs::set_permissions(&dest_path, metadata.permissions()) + .map_err(|err| format!("chmod {}: {err}", dest_path.display()))?; + } + } else if file_type.is_file() { + remove_path_if_exists(&dest_path)?; + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create {}: {err}", parent.display()))?; + } + fs::copy(&source_path, &dest_path).map_err(|err| { + format!( + "copy {} to {}: {err}", + source_path.display(), + dest_path.display() + ) + })?; + fs::set_permissions(&dest_path, metadata.permissions()) + .map_err(|err| format!("chmod {}: {err}", dest_path.display()))?; + } else if file_type.is_symlink() { + copy_symlink(&source_path, &dest_path)?; + } else { + return Err(format!( + "unsupported layer entry type at {}", + source_path.display() + )); + } + } + + Ok(()) +} + +fn path_is_dir_or_symlink_to_dir(path: &Path) -> Result { + match fs::metadata(path) { + Ok(metadata) => Ok(metadata.file_type().is_dir()), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), + Err(err) => Err(format!("stat {}: {err}", path.display())), + } +} + +fn clear_directory_contents(dir: &Path) -> Result<(), String> { + if !dir.exists() { + return Ok(()); + } + for entry in fs::read_dir(dir).map_err(|err| format!("read {}: {err}", dir.display()))? { + let entry = entry.map_err(|err| format!("read {}: {err}", dir.display()))?; + remove_path_if_exists(&entry.path())?; + } + Ok(()) +} + +fn remove_path_if_exists(path: &Path) -> Result<(), String> { + let Ok(metadata) = fs::symlink_metadata(path) else { + return Ok(()); + }; + if metadata.file_type().is_dir() { + fs::remove_dir_all(path).map_err(|err| format!("remove {}: {err}", path.display())) + } else { + fs::remove_file(path).map_err(|err| format!("remove {}: {err}", path.display())) + } +} + +#[cfg(unix)] +fn copy_symlink(source_path: &Path, dest_path: &Path) -> Result<(), String> { + let target = fs::read_link(source_path) + .map_err(|err| format!("readlink {}: {err}", source_path.display()))?; + remove_path_if_exists(dest_path)?; + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?; + } + std::os::unix::fs::symlink(&target, dest_path).map_err(|err| { + format!( + "symlink {} to {}: {err}", + target.display(), + dest_path.display() + ) + }) +} + +#[cfg(not(unix))] +fn copy_symlink(_source_path: &Path, _dest_path: &Path) -> Result<(), String> { + Err("symlink layers are only supported on Unix hosts".to_string()) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum LayerCompression { + None, + Gzip, + Zstd, +} + +fn requested_sandbox_image(sandbox: &Sandbox) -> Option<&str> { + sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .map(|template| template.image.trim()) + .filter(|image| !image.is_empty()) +} + +fn merged_environment(sandbox: &Sandbox) -> HashMap { + let mut environment = sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .map_or_else(HashMap::new, |template| template.environment.clone()); + if let Some(spec) = sandbox.spec.as_ref() { + environment.extend(spec.environment.clone()); + } + environment +} + +/// Rewrites loopback host references in a gateway URL to a hostname the guest +/// can reach via gvproxy. +/// +/// The driver receives the gateway endpoint from `--openshell-endpoint`, which +/// in local/dev/e2e setups is typically `http://127.0.0.1:`. That URL is +/// useless inside the guest because the guest's loopback interface is its own, +/// not the host's. Inside the guest we need a name that gvproxy will translate +/// into the host's loopback address. +/// +/// We rewrite to `host.containers.internal`, which gvproxy's embedded DNS resolves +/// to the host-loopback IP `192.168.127.254`. gvproxy installs a default NAT entry +/// rewriting that destination to the host's `127.0.0.1` and dialing out from the +/// host process, so any port the host is listening on becomes reachable. The +/// gateway IP `192.168.127.1` does **not** do this — it only listens on gvproxy's +/// own service ports (DNS, DHCP, HTTP API). The guest init script also seeds the +/// hostname in `/etc/hosts` so resolution works even if gvproxy's DNS isn't in +/// resolv.conf (e.g. when DHCP fails). +/// +/// Non-loopback URLs are returned unchanged. +fn guest_visible_openshell_endpoint(endpoint: &str) -> String { + let Ok(mut url) = Url::parse(endpoint) else { + return endpoint.to_string(); + }; + + let should_rewrite = match url.host() { + Some(Host::Ipv4(ip)) => ip.is_loopback(), + Some(Host::Ipv6(ip)) => ip.is_loopback(), + Some(Host::Domain(host)) => host.eq_ignore_ascii_case("localhost"), + None => false, + }; + + if should_rewrite && url.set_host(Some(GVPROXY_HOST_LOOPBACK_ALIAS)).is_ok() { + return url.to_string(); + } + + endpoint.to_string() +} + +fn gateway_port_from_endpoint(endpoint: &str) -> Option { + Url::parse(endpoint).ok().and_then(|url| url.port()) +} + +fn guest_visible_openshell_endpoint_for_tap(endpoint: &str, host_ip: &str) -> String { + let Ok(mut url) = Url::parse(endpoint) else { + return endpoint.to_string(); + }; + if url.set_host(Some(host_ip)).is_ok() { + url.to_string() + } else { + endpoint.to_string() + } +} + +fn build_guest_environment( + sandbox: &Sandbox, + config: &VmDriverConfig, + endpoint_override: Option<&str>, +) -> Vec { + let openshell_endpoint = endpoint_override.map_or_else( || guest_visible_openshell_endpoint(&config.openshell_endpoint), String::from, ); @@ -1033,8 +2232,69 @@ fn sandbox_log_level(sandbox: &Sandbox, default_level: &str) -> String { .to_string() } +fn sandboxes_root_dir(root: &Path) -> PathBuf { + root.join("sandboxes") +} + fn sandbox_state_dir(root: &Path, sandbox_id: &str) -> PathBuf { - root.join("sandboxes").join(sandbox_id) + sandboxes_root_dir(root).join(sandbox_id) +} + +fn image_cache_root_dir(root: &Path) -> PathBuf { + root.join(IMAGE_CACHE_ROOT_DIR) +} + +fn image_cache_dir(root: &Path, image_identity: &str) -> PathBuf { + image_cache_root_dir(root).join(sanitize_image_identity(image_identity)) +} + +fn image_cache_rootfs_archive(root: &Path, image_identity: &str) -> PathBuf { + image_cache_dir(root, image_identity).join(IMAGE_CACHE_ROOTFS_ARCHIVE) +} + +fn image_cache_staging_dir(root: &Path, image_identity: &str) -> PathBuf { + image_cache_root_dir(root).join(format!( + "{}.staging-{}", + sanitize_image_identity(image_identity), + unique_image_cache_suffix() + )) +} + +fn sanitize_image_identity(image_identity: &str) -> String { + image_identity + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' || ch == '.' { + ch + } else { + '-' + } + }) + .collect() +} + +fn unique_image_cache_suffix() -> String { + let counter = IMAGE_CACHE_BUILD_COUNTER.fetch_add(1, Ordering::Relaxed); + format!("{}-{counter}", current_time_ms()) +} + +async fn write_sandbox_image_metadata( + state_dir: &Path, + image_ref: &str, + image_identity: &str, +) -> Result<(), std::io::Error> { + tokio::fs::write( + state_dir.join(IMAGE_IDENTITY_FILE), + format!("{image_identity}\n"), + ) + .await?; + tokio::fs::write( + state_dir.join(IMAGE_REFERENCE_FILE), + format!("{image_ref}\n"), + ) + .await?; + + Ok(()) } async fn prepare_guest_tls_materials( @@ -1056,7 +2316,7 @@ async fn copy_guest_tls_material( mode: u32, ) -> Result<(), std::io::Error> { tokio::fs::copy(source, dest).await?; - tokio::fs::set_permissions(dest, std::fs::Permissions::from_mode(mode)).await?; + tokio::fs::set_permissions(dest, fs::Permissions::from_mode(mode)).await?; Ok(()) } @@ -1169,6 +2429,7 @@ mod tests { DriverSandboxSpec as SandboxSpec, DriverSandboxTemplate as SandboxTemplate, }; use prost_types::{Struct, Value, value::Kind}; + use std::fs; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; use tonic::Code; @@ -1242,6 +2503,132 @@ mod tests { assert!(err.message().contains("platform_config")); } + #[test] + fn validate_vm_sandbox_accepts_template_image() { + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate { + image: "ghcr.io/example/sandbox:latest".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + validate_vm_sandbox(&sandbox, false).expect("template.image should be accepted"); + } + + #[test] + fn capabilities_report_configured_default_image() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:dev".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), + }; + + assert_eq!(driver.capabilities().default_image, "openshell/sandbox:dev"); + } + + #[test] + fn resolved_sandbox_image_prefers_template_image() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:default".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate { + image: "ghcr.io/example/custom:latest".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + driver.resolved_sandbox_image(&sandbox).as_deref(), + Some("ghcr.io/example/custom:latest") + ); + } + + #[test] + fn resolved_sandbox_image_falls_back_to_driver_default() { + let driver = VmDriver { + config: VmDriverConfig { + default_image: "openshell/sandbox:default".to_string(), + ..Default::default() + }, + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate::default()), + ..Default::default() + }), + ..Default::default() + }; + + assert_eq!( + driver.resolved_sandbox_image(&sandbox).as_deref(), + Some("openshell/sandbox:default") + ); + } + + #[test] + fn resolved_sandbox_image_returns_none_without_template_or_default() { + let driver = VmDriver { + config: VmDriverConfig::default(), + launcher_bin: PathBuf::from("/tmp/openshell-driver-vm"), + registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), + events: broadcast::channel(WATCH_BUFFER).0, + gpu_inventory: None, + subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( + Ipv4Addr::new(10, 0, 128, 0), + 17, + ))), + }; + let sandbox = Sandbox { + spec: Some(SandboxSpec { + template: Some(SandboxTemplate::default()), + ..Default::default() + }), + ..Default::default() + }; + + assert!(driver.resolved_sandbox_image(&sandbox).is_none()); + } + #[test] fn merged_environment_prefers_spec_values() { let sandbox = Sandbox { @@ -1280,7 +2667,7 @@ mod tests { let env = build_guest_environment(&sandbox, &config, None); assert!(env.contains(&"HOME=/root".to_string())); assert!(env.contains(&format!( - "OPENSHELL_ENDPOINT=http://{GVPROXY_GATEWAY_IP}:8080/" + "OPENSHELL_ENDPOINT=http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/" ))); assert!(env.contains(&"OPENSHELL_SANDBOX_ID=sandbox-123".to_string())); assert!(env.contains(&format!( @@ -1322,18 +2709,18 @@ mod tests { } #[test] - fn guest_visible_openshell_endpoint_rewrites_loopback_hosts_to_gvproxy_gateway() { + fn guest_visible_openshell_endpoint_rewrites_loopback_hosts_to_gvproxy_host_alias() { assert_eq!( guest_visible_openshell_endpoint("http://127.0.0.1:8080"), - format!("http://{GVPROXY_GATEWAY_IP}:8080/") + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/") ); assert_eq!( guest_visible_openshell_endpoint("http://localhost:8080"), - format!("http://{GVPROXY_GATEWAY_IP}:8080/") + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080/") ); assert_eq!( guest_visible_openshell_endpoint("https://[::1]:8443"), - format!("https://{GVPROXY_GATEWAY_IP}:8443/") + format!("https://{GVPROXY_HOST_LOOPBACK_ALIAS}:8443/") ); } @@ -1346,12 +2733,12 @@ mod tests { format!("http://{OPENSHELL_HOST_GATEWAY_ALIAS}:8080") ); assert_eq!( - guest_visible_openshell_endpoint("http://host.containers.internal:8080"), - "http://host.containers.internal:8080" + guest_visible_openshell_endpoint(&format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080")), + format!("http://{GVPROXY_HOST_LOOPBACK_ALIAS}:8080") ); assert_eq!( - guest_visible_openshell_endpoint(&format!("http://{GVPROXY_GATEWAY_IP}:8080")), - format!("http://{GVPROXY_GATEWAY_IP}:8080") + guest_visible_openshell_endpoint("http://192.168.127.1:8080"), + "http://192.168.127.1:8080" ); assert_eq!( guest_visible_openshell_endpoint("https://gateway.internal:8443"), @@ -1359,6 +2746,142 @@ mod tests { ); } + #[test] + fn image_reference_registry_host_defaults_to_docker_hub() { + assert_eq!(image_reference_registry_host("ubuntu:24.04"), "docker.io"); + assert_eq!( + image_reference_registry_host("library/ubuntu:24.04"), + "docker.io" + ); + assert_eq!( + image_reference_registry_host("ghcr.io/nvidia/openshell/base:latest"), + "ghcr.io" + ); + assert_eq!( + image_reference_registry_host("localhost/example:dev"), + "localhost" + ); + assert_eq!( + image_reference_registry_host("localhost:5000/example/sandbox:dev"), + "localhost:5000" + ); + } + + #[test] + fn openshell_local_build_image_ref_matches_cli_tags() { + assert!(is_openshell_local_build_image_ref( + "openshell/sandbox-from:123" + )); + assert!(!is_openshell_local_build_image_ref("ubuntu:24.04")); + assert!(!is_openshell_local_build_image_ref( + "ghcr.io/nvidia/openshell/base:latest" + )); + } + + #[test] + fn local_docker_image_platform_mismatch_checks_guest_platform() { + assert!( + local_docker_image_platform_mismatch( + "openshell/sandbox-from:123", + Some("linux"), + Some(linux_oci_arch()), + ) + .is_none() + ); + + let err = local_docker_image_platform_mismatch( + "openshell/sandbox-from:123", + Some("linux"), + Some("wrong-arch"), + ) + .expect("architecture mismatch should be reported"); + assert!(err.contains("wrong-arch")); + assert!(err.contains(linux_oci_arch())); + + let err = local_docker_image_platform_mismatch("openshell/sandbox-from:123", None, None) + .expect("unknown platform should be reported"); + assert!(err.contains("unknown/unknown")); + } + + #[test] + fn apply_layer_dir_to_rootfs_honors_whiteouts() { + let base = unique_temp_dir(); + let rootfs = base.join("rootfs"); + let layer = base.join("layer"); + + fs::create_dir_all(rootfs.join("dir")).unwrap(); + fs::write(rootfs.join("removed.txt"), "old").unwrap(); + fs::write(rootfs.join("dir/old.txt"), "old").unwrap(); + + fs::create_dir_all(layer.join("dir")).unwrap(); + fs::write(layer.join(".wh.removed.txt"), "").unwrap(); + fs::write(layer.join("dir/.wh..wh..opq"), "").unwrap(); + fs::write(layer.join("dir/new.txt"), "new").unwrap(); + + apply_layer_dir_to_rootfs(&layer, &rootfs).unwrap(); + + assert!(!rootfs.join("removed.txt").exists()); + assert!(!rootfs.join("dir/old.txt").exists()); + assert_eq!( + fs::read_to_string(rootfs.join("dir/new.txt")).unwrap(), + "new" + ); + + let _ = fs::remove_dir_all(base); + } + + #[test] + fn apply_layer_dir_to_rootfs_preserves_lower_symlink_dirs() { + let base = unique_temp_dir(); + let rootfs = base.join("rootfs"); + let layer = base.join("layer"); + + fs::create_dir_all(rootfs.join("usr/bin")).unwrap(); + fs::write(rootfs.join("usr/bin/bash"), "bash").unwrap(); + std::os::unix::fs::symlink("usr/bin", rootfs.join("bin")).unwrap(); + + fs::create_dir_all(layer.join("bin")).unwrap(); + fs::write(layer.join("bin/foo"), "foo").unwrap(); + + apply_layer_dir_to_rootfs(&layer, &rootfs).unwrap(); + + assert!( + fs::symlink_metadata(rootfs.join("bin")) + .unwrap() + .file_type() + .is_symlink(), + "lower /bin symlink should be preserved" + ); + assert_eq!( + fs::read_to_string(rootfs.join("usr/bin/bash")).unwrap(), + "bash" + ); + assert_eq!( + fs::read_to_string(rootfs.join("usr/bin/foo")).unwrap(), + "foo" + ); + + let _ = fs::remove_dir_all(base); + } + + #[test] + fn layer_compression_from_media_type_supports_common_formats() { + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar").unwrap(), + LayerCompression::None + ); + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar+gzip") + .unwrap(), + LayerCompression::Gzip + ); + assert_eq!( + layer_compression_from_media_type("application/vnd.oci.image.layer.v1.tar+zstd") + .unwrap(), + LayerCompression::Zstd + ); + } + #[test] fn build_guest_environment_includes_tls_paths_for_https_endpoint() { let config = VmDriverConfig { @@ -1401,6 +2924,7 @@ mod tests { config: VmDriverConfig::default(), launcher_bin: PathBuf::from("openshell-driver-vm"), registry: Arc::new(Mutex::new(HashMap::new())), + image_cache_lock: Arc::new(Mutex::new(())), events, gpu_inventory: None, subnet_allocator: Arc::new(std::sync::Mutex::new(SubnetAllocator::new( @@ -1472,7 +2996,7 @@ mod tests { fn validate_openshell_endpoint_accepts_host_gateway() { validate_openshell_endpoint("http://host.containers.internal:8080") .expect("guest-reachable host alias should be accepted"); - validate_openshell_endpoint(&format!("http://{GVPROXY_GATEWAY_IP}:8080")) + validate_openshell_endpoint("http://192.168.127.1:8080") .expect("gateway IP should be accepted"); validate_openshell_endpoint(&format!("http://{OPENSHELL_HOST_GATEWAY_ALIAS}:8080")) .expect("openshell host alias should be accepted"); @@ -1480,6 +3004,70 @@ mod tests { .expect("dns endpoint should be accepted"); } + #[test] + fn prepare_exported_rootfs_archive_rewrites_docker_exported_rootfs() { + let base = unique_temp_dir(); + let source_rootfs = base.join("source-rootfs"); + let exported_rootfs = base.join("exported-rootfs.tar"); + let prepared_rootfs = base.join("prepared-rootfs"); + let prepared_archive = base.join("prepared-rootfs.tar"); + let extracted = base.join("extracted"); + + for path in [ + "bin/bash", + "bin/mount", + "bin/sed", + "sbin/ip", + "opt/openshell/bin/openshell-sandbox", + "usr/local/bin/k3s", + ] { + let path = source_rootfs.join(path); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, "").unwrap(); + } + fs::create_dir_all(source_rootfs.join("opt/openshell/manifests")).unwrap(); + fs::write(source_rootfs.join("opt/openshell/manifests/old.yaml"), "").unwrap(); + + create_rootfs_archive_from_dir(&source_rootfs, &exported_rootfs).unwrap(); + prepare_exported_rootfs_archive( + "openshell/sandbox-from:123", + "sha256:local-image", + &exported_rootfs, + &prepared_rootfs, + &prepared_archive, + ) + .unwrap(); + extract_rootfs_archive_to(&prepared_archive, &extracted).unwrap(); + + assert!(extracted.join("srv/openshell-vm-sandbox-init.sh").is_file()); + assert!( + extracted + .join("opt/openshell/bin/openshell-sandbox") + .is_file() + ); + assert!(!extracted.join("usr/local/bin/k3s").exists()); + assert!(!extracted.join("opt/openshell/manifests").exists()); + assert_eq!( + fs::read_to_string(extracted.join("opt/openshell/.rootfs-type")).unwrap(), + "sandbox\n" + ); + assert!( + fs::read_to_string(extracted.join(".openshell-rootfs-variant")) + .unwrap() + .contains("sha256:local-image") + ); + + let _ = fs::remove_dir_all(base); + } + + #[test] + fn sanitize_image_identity_rewrites_path_separators() { + assert_eq!( + sanitize_image_identity("sha256:abc/def@ghi"), + "sha256-abc-def-ghi" + ); + } + #[tokio::test] async fn prepare_guest_tls_materials_copies_bundle_into_rootfs() { let base = unique_temp_dir(); diff --git a/crates/openshell-driver-vm/src/main.rs b/crates/openshell-driver-vm/src/main.rs index ca1842596..596e6c88d 100644 --- a/crates/openshell-driver-vm/src/main.rs +++ b/crates/openshell-driver-vm/src/main.rs @@ -62,6 +62,9 @@ struct Args { #[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")] openshell_endpoint: Option, + #[arg(long, env = "OPENSHELL_SANDBOX_IMAGE", default_value = "")] + default_image: String, + #[arg( long, env = "OPENSHELL_VM_DRIVER_STATE_DIR", @@ -169,6 +172,7 @@ async fn main() -> Result<()> { .ok_or_else(|| miette::miette!("OPENSHELL_GRPC_ENDPOINT is required"))?, state_dir: args.state_dir, launcher_bin: None, + default_image: args.default_image, ssh_handshake_secret: args.ssh_handshake_secret.unwrap_or_default(), ssh_handshake_skew_secs: args.ssh_handshake_skew_secs, log_level: args.log_level, diff --git a/crates/openshell-driver-vm/src/rootfs.rs b/crates/openshell-driver-vm/src/rootfs.rs index 7c7fa19f4..4eeb28917 100644 --- a/crates/openshell-driver-vm/src/rootfs.rs +++ b/crates/openshell-driver-vm/src/rootfs.rs @@ -2,74 +2,138 @@ // SPDX-License-Identifier: Apache-2.0 use std::fs; -use std::io::Cursor; +use std::fs::File; +use std::io::{BufWriter, Cursor}; use std::path::Path; -const ROOTFS: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs.tar.zst")); -const ROOTFS_GPU: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/rootfs-gpu.tar.zst")); +const SUPERVISOR: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/openshell-sandbox.zst")); const ROOTFS_VARIANT_MARKER: &str = ".openshell-rootfs-variant"; const SANDBOX_GUEST_INIT_PATH: &str = "/srv/openshell-vm-sandbox-init.sh"; +const SANDBOX_SUPERVISOR_PATH: &str = "/opt/openshell/bin/openshell-sandbox"; pub const fn sandbox_guest_init_path() -> &'static str { SANDBOX_GUEST_INIT_PATH } -pub fn extract_sandbox_rootfs_to(dest: &Path) -> Result<(), String> { - extract_variant( - ROOTFS, - "sandbox", - "sandbox rootfs not embedded. Build openshell-driver-vm with OPENSHELL_VM_RUNTIME_COMPRESSED_DIR set or run `mise run vm:setup` first", - dest, - ) -} - -pub fn extract_gpu_sandbox_rootfs_to(dest: &Path) -> Result<(), String> { - extract_variant( - ROOTFS_GPU, - "sandbox-gpu", - "GPU sandbox rootfs not embedded. Build with `mise run vm:rootfs -- --gpu` first", - dest, +pub fn prepare_sandbox_rootfs_from_image_root( + rootfs: &Path, + image_identity: &str, +) -> Result<(), String> { + prepare_sandbox_rootfs(rootfs)?; + validate_sandbox_rootfs(rootfs)?; + fs::write( + rootfs.join(ROOTFS_VARIANT_MARKER), + format!("{}:image:{image_identity}\n", env!("CARGO_PKG_VERSION")), ) + .map_err(|e| format!("write rootfs variant marker: {e}"))?; + Ok(()) } -fn extract_variant(blob: &[u8], variant: &str, empty_msg: &str, dest: &Path) -> Result<(), String> { - if blob.is_empty() { - return Err(empty_msg.to_string()); - } - - let expected_marker = format!("{}:{variant}", env!("CARGO_PKG_VERSION")); - let marker_path = dest.join(ROOTFS_VARIANT_MARKER); - - if dest.is_dir() - && fs::read_to_string(&marker_path).is_ok_and(|value| value.trim() == expected_marker) - { - return Ok(()); - } - +pub fn extract_rootfs_archive_to(archive_path: &Path, dest: &Path) -> Result<(), String> { if dest.exists() { fs::remove_dir_all(dest) .map_err(|e| format!("remove old rootfs {}: {e}", dest.display()))?; } - unpack_zstd_tar(blob, variant, dest)?; - prepare_sandbox_rootfs(dest)?; - fs::write(marker_path, format!("{expected_marker}\n")) - .map_err(|e| format!("write rootfs variant marker: {e}"))?; - Ok(()) + fs::create_dir_all(dest).map_err(|e| format!("create rootfs dir {}: {e}", dest.display()))?; + let file = + File::open(archive_path).map_err(|e| format!("open {}: {e}", archive_path.display()))?; + let mut archive = tar::Archive::new(file); + archive + .unpack(dest) + .map_err(|e| format!("extract rootfs tarball into {}: {e}", dest.display())) } -fn unpack_zstd_tar(blob: &[u8], label: &str, dest: &Path) -> Result<(), String> { - fs::create_dir_all(dest).map_err(|e| format!("create rootfs dir {}: {e}", dest.display()))?; +pub fn create_rootfs_archive_from_dir(source: &Path, archive_path: &Path) -> Result<(), String> { + if let Some(parent) = archive_path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?; + } - let decoder = zstd::Decoder::new(Cursor::new(blob)) - .map_err(|e| format!("decompress {label} rootfs: {e}"))?; - let mut archive = tar::Archive::new(decoder); - archive.unpack(dest).map_err(|e| { + let file = File::create(archive_path) + .map_err(|e| format!("create {}: {e}", archive_path.display()))?; + let writer = BufWriter::new(file); + let mut builder = tar::Builder::new(writer); + append_rootfs_tree_to_archive(&mut builder, source, Path::new("")).map_err(|e| { format!( - "extract {label} rootfs tarball into {}: {e}", - dest.display() + "archive {} into {}: {e}", + source.display(), + archive_path.display() ) - }) + })?; + builder + .finish() + .map_err(|e| format!("finalize {}: {e}", archive_path.display())) +} + +fn append_rootfs_tree_to_archive( + builder: &mut tar::Builder>, + source: &Path, + archive_prefix: &Path, +) -> Result<(), String> { + let mut entries = fs::read_dir(source) + .map_err(|e| format!("read {}: {e}", source.display()))? + .collect::, _>>() + .map_err(|e| format!("read {}: {e}", source.display()))?; + entries.sort_by_key(fs::DirEntry::file_name); + + for entry in entries { + let entry_name = entry.file_name(); + let source_path = entry.path(); + let archive_path = if archive_prefix.as_os_str().is_empty() { + entry_name.into() + } else { + archive_prefix.join(entry_name) + }; + let metadata = fs::symlink_metadata(&source_path) + .map_err(|e| format!("stat {}: {e}", source_path.display()))?; + let file_type = metadata.file_type(); + + if file_type.is_dir() { + builder + .append_dir(&archive_path, &source_path) + .map_err(|e| format!("append dir {}: {e}", source_path.display()))?; + append_rootfs_tree_to_archive(builder, &source_path, &archive_path)?; + continue; + } + + if file_type.is_file() { + let mut file = File::open(&source_path) + .map_err(|e| format!("open {}: {e}", source_path.display()))?; + builder + .append_file(&archive_path, &mut file) + .map_err(|e| format!("append file {}: {e}", source_path.display()))?; + continue; + } + + if file_type.is_symlink() { + append_symlink_to_archive(builder, &source_path, &archive_path, &metadata)?; + continue; + } + + return Err(format!( + "unsupported rootfs entry type at {}", + source_path.display() + )); + } + + Ok(()) +} + +fn append_symlink_to_archive( + builder: &mut tar::Builder>, + source_path: &Path, + archive_path: &Path, + metadata: &fs::Metadata, +) -> Result<(), String> { + let target = fs::read_link(source_path) + .map_err(|e| format!("readlink {}: {e}", source_path.display()))?; + let mut header = tar::Header::new_gnu(); + header.set_metadata(metadata); + header.set_size(0); + header.set_cksum(); + builder + .append_link(&mut header, archive_path, target) + .map_err(|e| format!("append symlink {}: {e}", source_path.display())) } fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { @@ -103,6 +167,8 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { .map_err(|e| format!("chmod {}: {e}", init_path.display()))?; } + ensure_supervisor_binary(rootfs)?; + let opt_dir = rootfs.join("opt/openshell"); fs::create_dir_all(&opt_dir).map_err(|e| format!("create {}: {e}", opt_dir.display()))?; fs::write(opt_dir.join(".rootfs-type"), "sandbox\n") @@ -114,6 +180,19 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { Ok(()) } +pub fn validate_sandbox_rootfs(rootfs: &Path) -> Result<(), String> { + require_rootfs_path(rootfs, SANDBOX_GUEST_INIT_PATH)?; + require_rootfs_path(rootfs, "/opt/openshell/bin/openshell-sandbox")?; + require_any_rootfs_path(rootfs, &["/bin/bash"])?; + require_any_rootfs_path(rootfs, &["/bin/mount", "/usr/bin/mount"])?; + require_any_rootfs_path( + rootfs, + &["/sbin/ip", "/usr/sbin/ip", "/bin/ip", "/usr/bin/ip"], + )?; + require_any_rootfs_path(rootfs, &["/bin/sed", "/usr/bin/sed"])?; + Ok(()) +} + fn ensure_sandbox_guest_user(rootfs: &Path) -> Result<(), String> { const SANDBOX_UID: u32 = 10001; const SANDBOX_GID: u32 = 10001; @@ -167,6 +246,62 @@ fn ensure_line_in_file( fs::write(path, contents).map_err(|e| format!("write {}: {e}", path.display())) } +fn ensure_supervisor_binary(rootfs: &Path) -> Result<(), String> { + let path = rootfs.join(SANDBOX_SUPERVISOR_PATH.trim_start_matches('/')); + if SUPERVISOR.is_empty() { + if !path.exists() { + return Err( + "sandbox supervisor not embedded. Build openshell-driver-vm with OPENSHELL_VM_RUNTIME_COMPRESSED_DIR set and run `mise run vm:setup && mise run vm:supervisor` first" + .to_string(), + ); + } + } else { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?; + } + + let supervisor = zstd::decode_all(Cursor::new(SUPERVISOR)) + .map_err(|e| format!("decompress supervisor: {e}"))?; + fs::write(&path, supervisor).map_err(|e| format!("write {}: {e}", path.display()))?; + } + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + fs::set_permissions(&path, fs::Permissions::from_mode(0o755)) + .map_err(|e| format!("chmod {}: {e}", path.display()))?; + } + + Ok(()) +} + +fn require_rootfs_path(rootfs: &Path, relative: &str) -> Result<(), String> { + let candidate = rootfs.join(relative.trim_start_matches('/')); + if candidate.exists() { + Ok(()) + } else { + Err(format!( + "prepared rootfs is missing {}", + candidate.display() + )) + } +} + +fn require_any_rootfs_path(rootfs: &Path, candidates: &[&str]) -> Result<(), String> { + if candidates + .iter() + .any(|candidate| rootfs.join(candidate.trim_start_matches('/')).exists()) + { + Ok(()) + } else { + Err(format!( + "prepared rootfs is missing one of: {}", + candidates.join(", ") + )) + } +} + fn remove_rootfs_path(rootfs: &Path, relative: &str) -> Result<(), String> { let path = rootfs.join(relative); if !path.exists() { @@ -198,9 +333,15 @@ mod tests { fs::create_dir_all(rootfs.join("var/lib/rancher")).expect("create var/lib/rancher"); fs::create_dir_all(rootfs.join("opt/openshell/charts")).expect("create charts"); fs::create_dir_all(rootfs.join("opt/openshell/manifests")).expect("create manifests"); + fs::create_dir_all(rootfs.join("opt/openshell/bin")).expect("create openshell bin"); fs::write(rootfs.join("usr/local/bin/k3s"), b"k3s").expect("write k3s"); fs::write(rootfs.join("usr/local/bin/kubectl"), b"kubectl").expect("write kubectl"); fs::write(rootfs.join("opt/openshell/.initialized"), b"yes").expect("write initialized"); + fs::write( + rootfs.join("opt/openshell/bin/openshell-sandbox"), + b"sandbox", + ) + .expect("write openshell-sandbox"); fs::write( rootfs.join("etc/passwd"), "root:x:0:0:root:/root:/bin/bash\n", @@ -208,8 +349,15 @@ mod tests { .expect("write passwd"); fs::write(rootfs.join("etc/group"), "root:x:0:\n").expect("write group"); fs::write(rootfs.join("etc/hosts"), "127.0.0.1 localhost\n").expect("write hosts"); + fs::create_dir_all(rootfs.join("bin")).expect("create bin"); + fs::create_dir_all(rootfs.join("sbin")).expect("create sbin"); + fs::write(rootfs.join("bin/bash"), b"bash").expect("write bash"); + fs::write(rootfs.join("bin/mount"), b"mount").expect("write mount"); + fs::write(rootfs.join("bin/sed"), b"sed").expect("write sed"); + fs::write(rootfs.join("sbin/ip"), b"ip").expect("write ip"); prepare_sandbox_rootfs(&rootfs).expect("prepare sandbox rootfs"); + validate_sandbox_rootfs(&rootfs).expect("validate sandbox rootfs"); assert!(!rootfs.join("usr/local/bin/k3s").exists()); assert!(!rootfs.join("usr/local/bin/kubectl").exists()); @@ -236,6 +384,37 @@ mod tests { let _ = fs::remove_dir_all(&dir); } + #[cfg(unix)] + #[test] + fn create_rootfs_archive_preserves_broken_symlinks() { + let dir = unique_temp_dir(); + let rootfs = dir.join("rootfs"); + let extracted = dir.join("extracted"); + let archive = dir.join("rootfs.tar"); + + fs::create_dir_all(rootfs.join("etc")).expect("create etc"); + fs::write(rootfs.join("etc/hosts"), "127.0.0.1 localhost\n").expect("write hosts"); + std::os::unix::fs::symlink("/proc/self/mounts", rootfs.join("etc/mtab")) + .expect("create symlink"); + + create_rootfs_archive_from_dir(&rootfs, &archive).expect("archive rootfs"); + extract_rootfs_archive_to(&archive, &extracted).expect("extract rootfs"); + + let extracted_link = extracted.join("etc/mtab"); + assert!( + fs::symlink_metadata(&extracted_link) + .unwrap() + .file_type() + .is_symlink() + ); + assert_eq!( + fs::read_link(&extracted_link).expect("read extracted symlink"), + PathBuf::from("/proc/self/mounts") + ); + + let _ = fs::remove_dir_all(&dir); + } + fn unique_temp_dir() -> PathBuf { static COUNTER: AtomicU64 = AtomicU64::new(0); let nanos = SystemTime::now() diff --git a/crates/openshell-driver-vm/src/runtime.rs b/crates/openshell-driver-vm/src/runtime.rs index c063da10b..a7c9afcea 100644 --- a/crates/openshell-driver-vm/src/runtime.rs +++ b/crates/openshell-driver-vm/src/runtime.rs @@ -705,13 +705,15 @@ fn run_libkrun_vm(config: &VmLaunchConfig) -> Result<(), String> { // talks to on boot (IPs 192.168.127.1 / .2, defaults for // gvisor-tap-vsock); // * the host-facing gateway identity the guest uses for callbacks: - // the init script seeds `/etc/hosts` with - // `host.openshell.internal` pointing at 192.168.127.1 while - // leaving gvproxy's legacy `host.containers.internal` / - // `host.docker.internal` DNS answers intact, which is how the guest's - // `rewrite_openshell_endpoint_if_needed` probe reaches the host - // gateway when the bare loopback address doesn't resolve from - // inside the VM. + // gvproxy installs a default NAT entry rewriting `192.168.127.254` + // (the subnet's HostIP) to the host's `127.0.0.1`, and serves + // `host.containers.internal` / `host.docker.internal` / + // `host.openshell.internal` in its embedded DNS pointing at that + // same HostIP. The guest init script seeds /etc/hosts with the + // same mapping so the supervisor reaches the host gateway even + // when gvproxy's DNS isn't in resolv.conf. The gateway IP + // (192.168.127.1) is NOT a host-loopback proxy — it only listens + // on its own service ports (DNS:53, DHCP, HTTP API:80). // // That network plane is also what the sandbox supervisor's // per-sandbox netns (veth pair + iptables, see diff --git a/crates/openshell-driver-vm/start.sh b/crates/openshell-driver-vm/start.sh deleted file mode 100755 index 0eb305c7d..000000000 --- a/crates/openshell-driver-vm/start.sh +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -# Under sudo, PATH is reset and user-local tools (mise, cargo) disappear. -# Restore the invoking user's tool directories so mise and its shims work. -if [ -n "${SUDO_USER:-}" ]; then - _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) - for _p in "${_sudo_home}/.local/bin" "${_sudo_home}/.local/share/mise/shims" "${_sudo_home}/.cargo/bin"; do - [ -d "${_p}" ] && PATH="${_p}:${PATH}" - done - export PATH -fi - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -CLI_BIN="${ROOT}/scripts/bin/openshell" -COMPRESSED_DIR="${ROOT}/target/vm-runtime-compressed" -SERVER_PORT="${OPENSHELL_SERVER_PORT:-8080}" -# Keep the driver socket path under AF_UNIX SUN_LEN on macOS. -STATE_DIR_ROOT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}" -STATE_LABEL_RAW="${OPENSHELL_VM_INSTANCE:-port-${SERVER_PORT}}" -STATE_LABEL="$(printf '%s' "${STATE_LABEL_RAW}" | tr -cs '[:alnum:]._-' '-')" -if [ -z "${STATE_LABEL}" ]; then - STATE_LABEL="port-${SERVER_PORT}" -fi -STATE_DIR_DEFAULT="${STATE_DIR_ROOT}/openshell-vm-driver-dev-${USER:-user}-${STATE_LABEL}" -STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${STATE_DIR_DEFAULT}}" -DB_PATH_DEFAULT="${STATE_DIR}/openshell.db" -VM_HOST_GATEWAY_DEFAULT="${OPENSHELL_VM_HOST_GATEWAY:-host.containers.internal}" -LOCAL_GATEWAY_ENDPOINT_DEFAULT="http://127.0.0.1:${SERVER_PORT}" -LOCAL_GATEWAY_ENDPOINT="${OPENSHELL_VM_LOCAL_GATEWAY_ENDPOINT:-${LOCAL_GATEWAY_ENDPOINT_DEFAULT}}" -GATEWAY_NAME_DEFAULT="vm-driver-${STATE_LABEL}" -GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-${GATEWAY_NAME_DEFAULT}}" -DRIVER_DIR_DEFAULT="${ROOT}/target/debug" -DRIVER_DIR="${OPENSHELL_DRIVER_DIR:-${DRIVER_DIR_DEFAULT}}" - -export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR}}" - -for arg in "$@"; do - if [ "${arg}" = "--gpu" ]; then - export OPENSHELL_VM_GPU=true - break - fi -done - -mkdir -p "${STATE_DIR}" - -normalize_bool() { - case "${1,,}" in - 1|true|yes|on) echo "true" ;; - 0|false|no|off) echo "false" ;; - *) - echo "invalid boolean value '$1' (expected true/false, 1/0, yes/no, on/off)" >&2 - exit 1 - ;; - esac -} - -check_supervisor_cross_toolchain() { - # The sandbox supervisor inside the guest is always Linux. On non-Linux - # hosts (macOS) and on Linux hosts with a different arch than the guest, - # we cross-compile via cargo-zigbuild and need the matching rustup target. - local host_os host_arch guest_arch rust_target - host_os="$(uname -s)" - host_arch="$(uname -m)" - guest_arch="${GUEST_ARCH:-${host_arch}}" - case "${guest_arch}" in - arm64|aarch64) rust_target="aarch64-unknown-linux-gnu" ;; - x86_64|amd64) rust_target="x86_64-unknown-linux-gnu" ;; - *) return 0 ;; - esac - if [ "${host_os}" = "Linux" ] && [ "${host_arch}" = "${guest_arch}" ]; then - return 0 - fi - local missing=0 - if ! command -v cargo-zigbuild >/dev/null 2>&1; then - echo "ERROR: cargo-zigbuild not found (required to cross-compile the guest supervisor)." >&2 - echo " Install: cargo install --locked cargo-zigbuild && brew install zig" >&2 - missing=1 - fi - if ! rustup target list --installed 2>/dev/null | grep -qx "${rust_target}"; then - echo "ERROR: Rust target '${rust_target}' not installed." >&2 - echo " Install: rustup target add ${rust_target}" >&2 - missing=1 - fi - if [ "${missing}" -ne 0 ]; then - exit 1 - fi -} - -if [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs.tar.zst" ]; then - check_supervisor_cross_toolchain - echo "==> Building base VM rootfs tarball" - mise run vm:rootfs -- --base -fi - -if [ "${OPENSHELL_VM_GPU:-}" = "true" ] && [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs-gpu.tar.zst" ]; then - check_supervisor_cross_toolchain - echo "==> Building GPU VM rootfs tarball" - mise run vm:rootfs -- --gpu -fi - -if [ ! -s "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}/rootfs.tar.zst" ] || ! find "${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q .; then - echo "==> Preparing embedded VM runtime" - mise run vm:setup -fi - -echo "==> Building gateway and VM compute driver" -cargo build -p openshell-server -p openshell-driver-vm - -if [ "$(uname -s)" = "Darwin" ]; then - echo "==> Codesigning VM compute driver" - codesign \ - --entitlements "${ROOT}/crates/openshell-driver-vm/entitlements.plist" \ - --force \ - -s - \ - "${ROOT}/target/debug/openshell-driver-vm" -fi - -export OPENSHELL_DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" -export OPENSHELL_DB_URL="${OPENSHELL_DB_URL:-sqlite:${DB_PATH_DEFAULT}}" -export OPENSHELL_DRIVERS="${OPENSHELL_DRIVERS:-vm}" -export OPENSHELL_DRIVER_DIR="${DRIVER_DIR}" -export OPENSHELL_GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-http://${VM_HOST_GATEWAY_DEFAULT}:${SERVER_PORT}}" -export OPENSHELL_SSH_GATEWAY_HOST="${OPENSHELL_SSH_GATEWAY_HOST:-127.0.0.1}" -export OPENSHELL_SSH_GATEWAY_PORT="${OPENSHELL_SSH_GATEWAY_PORT:-${SERVER_PORT}}" -export OPENSHELL_SSH_HANDSHAKE_SECRET="${OPENSHELL_SSH_HANDSHAKE_SECRET:-}" -export OPENSHELL_VM_DRIVER_STATE_DIR="${STATE_DIR}" - -# Resolve the VM runtime directory (contains vmlinux, virtiofsd, etc.) -# so the child --internal-run-vm process can find it under sudo. -if [ -z "${OPENSHELL_VM_RUNTIME_DIR:-}" ]; then - _candidate="${HOME}/.local/share/openshell/vm-runtime/0.0.0" - if [ -n "${SUDO_USER:-}" ]; then - _sudo_home=$(getent passwd "${SUDO_USER}" | cut -d: -f6) - _candidate="${_sudo_home}/.local/share/openshell/vm-runtime/0.0.0" - fi - if [ -f "${_candidate}/vmlinux" ]; then - export OPENSHELL_VM_RUNTIME_DIR="${_candidate}" - fi -fi - -echo "==> Registering gateway" -echo " Name: ${GATEWAY_NAME}" -echo " Endpoint: ${LOCAL_GATEWAY_ENDPOINT}" -echo " Driver: ${OPENSHELL_DRIVER_DIR}/openshell-driver-vm" - -# GPU passthrough requires root, but gateway config must be written to the -# real user's home directory — not /root/.config/openshell/. -# Unset XDG_CONFIG_HOME so the CLI falls back to $HOME/.config (sudo -u -# sets HOME correctly but may inherit XDG_CONFIG_HOME from the root env). -if [ -n "${SUDO_USER:-}" ]; then - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway destroy --name "${GATEWAY_NAME}" 2>/dev/null || true - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway add --name "${GATEWAY_NAME}" "${LOCAL_GATEWAY_ENDPOINT}" - sudo -u "${SUDO_USER}" env -u XDG_CONFIG_HOME "PATH=${PATH}" "${CLI_BIN}" gateway select "${GATEWAY_NAME}" -else - "${CLI_BIN}" gateway destroy --name "${GATEWAY_NAME}" 2>/dev/null || true - "${CLI_BIN}" gateway add --name "${GATEWAY_NAME}" "${LOCAL_GATEWAY_ENDPOINT}" - "${CLI_BIN}" gateway select "${GATEWAY_NAME}" -fi - -echo "==> Starting OpenShell server with VM compute driver" -exec "${ROOT}/target/debug/openshell-gateway" diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index ae90c8b34..a039b784e 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -402,6 +402,7 @@ async fn run_from_args(args: Args) -> Result<()> { let vm_config = VmComputeConfig { state_dir: args.vm_driver_state_dir, driver_dir: args.driver_dir, + default_image: config.sandbox_image.clone(), krun_log_level: args.vm_krun_log_level, vcpus: args.vm_vcpus, mem_mib: args.vm_mem_mib, diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index 73f0d1d22..e5b974f74 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -63,6 +63,9 @@ pub struct VmComputeConfig { /// falls back to its conventional install paths and sibling binary. pub driver_dir: Option, + /// Default sandbox image the driver should use when a request omits one. + pub default_image: String, + /// libkrun log level used by the VM driver helper. pub krun_log_level: u32, @@ -125,6 +128,7 @@ impl Default for VmComputeConfig { Self { state_dir: Self::default_state_dir(), driver_dir: None, + default_image: String::new(), krun_log_level: Self::default_krun_log_level(), vcpus: Self::default_vcpus(), mem_mib: Self::default_mem_mib(), @@ -304,9 +308,17 @@ pub async fn spawn( .arg("--openshell-endpoint") .arg(&config.grpc_endpoint); command.arg("--state-dir").arg(&vm_config.state_dir); - command - .arg("--ssh-handshake-secret") - .arg(&config.ssh_handshake_secret); + if !vm_config.default_image.trim().is_empty() { + command.arg("--default-image").arg(&vm_config.default_image); + } + // Only forward the handshake secret when one is configured. The VM + // driver does not consume it, but accepts it for parity with the + // Kubernetes/Podman drivers; passing an empty value is noise. + if !config.ssh_handshake_secret.is_empty() { + command + .arg("--ssh-handshake-secret") + .arg(&config.ssh_handshake_secret); + } command .arg("--ssh-handshake-skew-secs") .arg(config.ssh_handshake_skew_secs.to_string()); diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 2a18e209a..0d4b7e605 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -154,6 +154,15 @@ pub async fn run_server( if database_url.is_empty() { return Err(Error::config("database_url is required")); } + let driver = configured_compute_driver(&config)?; + if config.ssh_handshake_secret.is_empty() + && !matches!(driver, ComputeDriverKind::Docker | ComputeDriverKind::Vm) + { + return Err(Error::config( + "ssh_handshake_secret is required. Set --ssh-handshake-secret or OPENSHELL_SSH_HANDSHAKE_SECRET", + )); + } + let store = Arc::new(Store::connect(database_url).await?); let oidc_cache = if let Some(ref oidc) = config.oidc { diff --git a/deploy/docker/Dockerfile.driver-vm-macos b/deploy/docker/Dockerfile.driver-vm-macos index 47fcbd3e1..f667653d0 100644 --- a/deploy/docker/Dockerfile.driver-vm-macos +++ b/deploy/docker/Dockerfile.driver-vm-macos @@ -8,7 +8,7 @@ # # openshell-driver-vm loads libkrun/libkrunfw at runtime via dlopen, so it # does NOT need Hypervisor.framework headers at build time. Pre-compressed -# runtime artifacts (libkrun, libkrunfw, gvproxy, rootfs) are injected via +# runtime artifacts (libkrun, libkrunfw, gvproxy, bundled supervisor) are injected via # the vm-runtime-compressed build context and embedded into the binary via # include_bytes!(). # diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index 4b83fb4bb..452ad761f 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -55,6 +55,10 @@ openshell sandbox create --from my-registry.example.com/my-image:latest The CLI resolves community names against the [OpenShell Community](https://github.com/NVIDIA/OpenShell-Community) catalog, pulls the bundled Dockerfile and policy, builds the image locally, and creates the sandbox. For the full catalog and how to contribute your own, refer to [Community Sandboxes](/sandboxes/community-sandboxes). +Local directories and Dockerfiles require a local gateway because the CLI builds +through the local Docker daemon. Use a registry image reference for remote +gateways. + ### Label a Sandbox Attach labels when you create a sandbox to track ownership, environment, or workflow grouping: diff --git a/e2e/rust/e2e-vm.sh b/e2e/rust/e2e-vm.sh index 5990d8db6..551f9b41e 100755 --- a/e2e/rust/e2e-vm.sh +++ b/e2e/rust/e2e-vm.sh @@ -167,13 +167,20 @@ echo "==> Starting openshell-gateway on 127.0.0.1:${HOST_PORT} (state: ${RUN_STA # `~/.local/libexec/openshell/openshell-driver-vm` when present # (install-vm.sh installs there), which silently shadows development # builds — a subtle source of stale-binary bugs in e2e runs. +# --grpc-endpoint is the URL the VM driver passes into each guest as +# OPENSHELL_ENDPOINT. The supervisor inside the VM dials this address. +# Use `host.containers.internal` rather than `127.0.0.1` so gvproxy's +# host-loopback proxy carries the connection — gvproxy's bare gateway IP +# (192.168.127.1) does NOT forward arbitrary host ports. The driver also +# rewrites loopback URLs to this hostname as a safety net, so this matches +# what the guest will actually see and aligns with `tasks/scripts/gateway-vm.sh`. "${GATEWAY_BIN}" \ --drivers vm \ --disable-tls \ --disable-gateway-auth \ --db-url 'sqlite::memory:' \ --port "${HOST_PORT}" \ - --grpc-endpoint "http://127.0.0.1:${HOST_PORT}" \ + --grpc-endpoint "http://host.containers.internal:${HOST_PORT}" \ --ssh-handshake-secret "${SSH_HANDSHAKE_SECRET}" \ --driver-dir "${ROOT}/target/debug" \ --vm-driver-state-dir "${RUN_STATE_DIR}" \ diff --git a/scripts/bin/openshell b/scripts/bin/openshell index 4a85332c6..0383d5b57 100755 --- a/scripts/bin/openshell +++ b/scripts/bin/openshell @@ -5,6 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" BINARY="$PROJECT_ROOT/target/debug/openshell" STATE_FILE="$PROJECT_ROOT/.cache/openshell-build.state" +CALLER_PWD="$PWD" # --------------------------------------------------------------------------- # Fingerprint-based rebuild check @@ -26,7 +27,10 @@ else current_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown") # Collect dirty (modified, staged, untracked) files - mapfile -t changed_files < <( + changed_files=() + while IFS= read -r path; do + changed_files+=("$path") + done < <( { git diff --name-only 2>/dev/null git diff --name-only --cached 2>/dev/null @@ -118,7 +122,10 @@ if [[ "$needs_build" == "1" ]]; then cd "$PROJECT_ROOT" new_head=$(git rev-parse HEAD 2>/dev/null || echo "unknown") # Recompute fingerprint of remaining dirty files (build may not change them) - mapfile -t post_files < <( + post_files=() + while IFS= read -r path; do + post_files+=("$path") + done < <( { git diff --name-only 2>/dev/null git diff --name-only --cached 2>/dev/null @@ -165,4 +172,5 @@ fingerprint=${new_fingerprint} EOF fi +cd "$CALLER_PWD" exec "$BINARY" "$@" diff --git a/tasks/gateway.toml b/tasks/gateway.toml index 3f7a684d2..1df07cb95 100644 --- a/tasks/gateway.toml +++ b/tasks/gateway.toml @@ -6,3 +6,7 @@ ["gateway:docker"] description = "Run a standalone gateway with the bundled Docker compute driver" run = "bash tasks/scripts/gateway-docker.sh" + +["gateway:vm"] +description = "Run a standalone gateway with the bundled VM compute driver" +run = "bash tasks/scripts/gateway-vm.sh" diff --git a/tasks/scripts/gateway-vm.sh b/tasks/scripts/gateway-vm.sh new file mode 100755 index 000000000..ac047dba2 --- /dev/null +++ b/tasks/scripts/gateway-vm.sh @@ -0,0 +1,345 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Start a standalone openshell-gateway backed by the VM compute driver +# (openshell-driver-vm) for local manual testing. +# +# Invocation: +# mise run gateway:vm +# +# Defaults: +# - Plaintext HTTP on 127.0.0.1:18081 +# - Dedicated CLI gateway "vm-dev" +# - Persistent gateway state (SQLite DB) under .cache/gateway-vm +# - Per-sandbox VM driver state (rootfs + compute-driver.sock) under +# /tmp/openshell-vm-driver-- so the AF_UNIX socket +# path stays under macOS SUN_LEN +# +# Common overrides: +# OPENSHELL_SERVER_PORT=18091 mise run gateway:vm +# OPENSHELL_VM_GATEWAY_NAME=my-vm-gateway mise run gateway:vm +# OPENSHELL_SANDBOX_NAMESPACE=my-ns mise run gateway:vm +# OPENSHELL_SANDBOX_IMAGE=ghcr.io/... mise run gateway:vm +# mise run gateway:vm -- --gpu +# +# This script also writes ~/.config/openshell/active_gateway so the +# `openshell` CLI automatically targets this gateway in subsequent shells. +# No need to run `openshell gateway select`. Inside this repo you can +# override per-developer with OPENSHELL_GATEWAY in `.env` (mise loads it). +# An explicit `--gateway` / `--gateway-endpoint` flag still wins. + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +PORT="${OPENSHELL_SERVER_PORT:-18081}" +GATEWAY_NAME="${OPENSHELL_VM_GATEWAY_NAME:-vm-dev}" +STATE_DIR="${OPENSHELL_VM_GATEWAY_STATE_DIR:-${ROOT}/.cache/gateway-vm}" +SANDBOX_NAMESPACE="${OPENSHELL_SANDBOX_NAMESPACE:-vm-dev}" +SANDBOX_IMAGE="${OPENSHELL_SANDBOX_IMAGE:-${COMMUNITY_SANDBOX_IMAGE:-ghcr.io/nvidia/openshell-community/sandboxes/base:latest}}" +SANDBOX_IMAGE_PULL_POLICY="${OPENSHELL_SANDBOX_IMAGE_PULL_POLICY:-IfNotPresent}" +LOG_LEVEL="${OPENSHELL_LOG_LEVEL:-info}" +GATEWAY_BIN="${ROOT}/target/debug/openshell-gateway" +DRIVER_DIR_DEFAULT="${ROOT}/target/debug" +DRIVER_DIR="${OPENSHELL_DRIVER_DIR:-${DRIVER_DIR_DEFAULT}}" +COMPRESSED_DIR_DEFAULT="${ROOT}/target/vm-runtime-compressed" +COMPRESSED_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${COMPRESSED_DIR_DEFAULT}}" +VM_HOST_GATEWAY_DEFAULT="${OPENSHELL_VM_HOST_GATEWAY:-host.containers.internal}" +GRPC_ENDPOINT="${OPENSHELL_GRPC_ENDPOINT:-http://${VM_HOST_GATEWAY_DEFAULT}:${PORT}}" + +normalize_arch() { + case "$1" in + x86_64|amd64) echo "amd64" ;; + aarch64|arm64) echo "arm64" ;; + *) echo "$1" ;; + esac +} + +normalize_bool() { + case "${1,,}" in + 1|true|yes|on) echo "true" ;; + 0|false|no|off) echo "false" ;; + *) + echo "ERROR: invalid boolean value '$1' (expected true/false, 1/0, yes/no, on/off)" >&2 + exit 2 + ;; + esac +} + +port_is_in_use() { + local port=$1 + if command -v lsof >/dev/null 2>&1; then + lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1 + return $? + fi + if command -v nc >/dev/null 2>&1; then + nc -z 127.0.0.1 "${port}" >/dev/null 2>&1 + return $? + fi + (echo >/dev/tcp/127.0.0.1/"${port}") >/dev/null 2>&1 +} + +invoking_user() { + if [ -n "${SUDO_USER:-}" ] && [ "${SUDO_USER}" != "root" ]; then + printf '%s\n' "${SUDO_USER}" + else + id -un + fi +} + +invoking_user_home() { + local user=$1 + local home + if [ "${user}" = "$(id -un)" ]; then + printf '%s\n' "${HOME}" + return + fi + if command -v getent >/dev/null 2>&1; then + home="$(getent passwd "${user}" | cut -d: -f6)" + if [ -n "${home}" ]; then + printf '%s\n' "${home}" + return + fi + fi + if command -v dscl >/dev/null 2>&1; then + home="$(dscl . -read "/Users/${user}" NFSHomeDirectory 2>/dev/null | awk '{print $2}')" + if [ -n "${home}" ]; then + printf '%s\n' "${home}" + return + fi + fi + if [ "$(uname -s)" = "Darwin" ]; then + printf '/Users/%s\n' "${user}" + else + printf '/home/%s\n' "${user}" + fi +} + +gateway_config_home() { + local user home + user="$(invoking_user)" + if [ -n "${SUDO_USER:-}" ] && [ "${user}" != "$(id -un)" ]; then + home="$(invoking_user_home "${user}")" + printf '%s\n' "${home}/.config" + else + printf '%s\n' "${XDG_CONFIG_HOME:-${HOME}/.config}" + fi +} + +chown_invoking_user() { + if [ -n "${SUDO_UID:-}" ] && [ -n "${SUDO_GID:-}" ]; then + chown -R "${SUDO_UID}:${SUDO_GID}" "$@" 2>/dev/null || true + fi +} + +register_gateway_metadata() { + local name=$1 + local endpoint=$2 + local port=$3 + local vm_driver_state_dir=$4 + local config_home gateway_dir + + config_home="$(gateway_config_home)" + gateway_dir="${config_home}/openshell/gateways/${name}" + + mkdir -p "${gateway_dir}" + chmod 700 "${gateway_dir}" 2>/dev/null || true + cat >"${gateway_dir}/metadata.json" </dev/null || true + chown_invoking_user "${config_home}/openshell" +} + +# Mirror what `openshell gateway select ` does: write the gateway name +# to $XDG_CONFIG_HOME/openshell/active_gateway. The CLI picks it up as the +# default target when neither --gateway nor OPENSHELL_GATEWAY is set. +save_active_gateway() { + local name=$1 + local config_home active_gateway_path + config_home="$(gateway_config_home)" + active_gateway_path="${config_home}/openshell/active_gateway" + mkdir -p "$(dirname "${active_gateway_path}")" + printf '%s' "${name}" >"${active_gateway_path}" + chown_invoking_user "${config_home}/openshell" +} + +check_supervisor_cross_toolchain() { + # The sandbox supervisor inside the guest is always Linux. On non-Linux + # hosts (macOS) and on Linux hosts with a different arch than the guest, + # `mise run vm:supervisor` cross-compiles via cargo-zigbuild and needs + # the matching rustup target installed. + local host_os host_arch guest_arch rust_target + host_os="$(uname -s)" + host_arch="$(uname -m)" + guest_arch="${GUEST_ARCH:-${host_arch}}" + case "${guest_arch}" in + arm64|aarch64) rust_target="aarch64-unknown-linux-gnu" ;; + x86_64|amd64) rust_target="x86_64-unknown-linux-gnu" ;; + *) return 0 ;; + esac + if [ "${host_os}" = "Linux" ] && [ "${host_arch}" = "${guest_arch}" ]; then + return 0 + fi + local missing=0 + if ! command -v cargo-zigbuild >/dev/null 2>&1; then + echo "ERROR: cargo-zigbuild not found (required to cross-compile the guest supervisor)." >&2 + echo " Install: cargo install --locked cargo-zigbuild && brew install zig" >&2 + missing=1 + fi + if ! rustup target list --installed 2>/dev/null | grep -qx "${rust_target}"; then + echo "ERROR: Rust target '${rust_target}' not installed." >&2 + echo " Install: rustup target add ${rust_target}" >&2 + missing=1 + fi + if [ "${missing}" -ne 0 ]; then + exit 1 + fi +} + +VM_GPU="$(normalize_bool "${OPENSHELL_VM_GPU:-false}")" + +while [ "$#" -gt 0 ]; do + case "$1" in + --gpu) + VM_GPU="true" + shift + ;; + --gpu-mem-mib) + if [ "$#" -lt 2 ]; then + echo "ERROR: --gpu-mem-mib requires a value" >&2 + exit 2 + fi + export OPENSHELL_VM_GPU_MEM_MIB="$2" + shift 2 + ;; + --gpu-vcpus) + if [ "$#" -lt 2 ]; then + echo "ERROR: --gpu-vcpus requires a value" >&2 + exit 2 + fi + export OPENSHELL_VM_GPU_VCPUS="$2" + shift 2 + ;; + -h|--help) + echo "Usage: mise run gateway:vm -- [--gpu] [--gpu-mem-mib MIB] [--gpu-vcpus N]" + exit 0 + ;; + *) + echo "ERROR: unknown gateway-vm option '$1'" >&2 + exit 2 + ;; + esac +done + +if [ "${VM_GPU}" = "true" ]; then + export OPENSHELL_VM_GPU="true" +else + unset OPENSHELL_VM_GPU +fi + +if [[ ! "${GATEWAY_NAME}" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "ERROR: OPENSHELL_VM_GATEWAY_NAME must contain only letters, numbers, dots, underscores, or dashes" >&2 + exit 2 +fi + +if port_is_in_use "${PORT}"; then + echo "ERROR: port ${PORT} is already in use; free it or set OPENSHELL_SERVER_PORT" >&2 + exit 2 +fi + +# AF_UNIX SUN_LEN on macOS is 104 bytes; the VM driver places +# `compute-driver.sock` directly under VM_DRIVER_STATE_DIR, so anchor it +# under /tmp instead of `${ROOT}/.cache` (which is typically too long on +# macOS dev boxes with worktree paths). +STATE_LABEL="$(printf '%s' "${GATEWAY_NAME}" | tr -cs '[:alnum:]._-' '-')" +if [ -z "${STATE_LABEL}" ]; then + STATE_LABEL="vm-dev" +fi +VM_DRIVER_STATE_DIR_DEFAULT="${OPENSHELL_VM_DRIVER_STATE_ROOT:-/tmp}/openshell-vm-driver-${USER:-user}-${STATE_LABEL}" +VM_DRIVER_STATE_DIR="${OPENSHELL_VM_DRIVER_STATE_DIR:-${VM_DRIVER_STATE_DIR_DEFAULT}}" + +DISABLE_TLS="$(normalize_bool "${OPENSHELL_DISABLE_TLS:-true}")" + +# Build prerequisites: VM runtime artifacts + bundled supervisor. +if [ ! -d "${COMPRESSED_DIR}" ] \ + || ! find "${COMPRESSED_DIR}" -maxdepth 1 -name 'libkrun*.zst' | grep -q . \ + || [ ! -f "${COMPRESSED_DIR}/gvproxy.zst" ]; then + echo "==> Preparing embedded VM runtime (mise run vm:setup)" + mise run vm:setup +fi + +if [ ! -f "${COMPRESSED_DIR}/openshell-sandbox.zst" ]; then + check_supervisor_cross_toolchain + echo "==> Building bundled VM supervisor (mise run vm:supervisor)" + mise run vm:supervisor +fi + +export OPENSHELL_VM_RUNTIME_COMPRESSED_DIR="${COMPRESSED_DIR}" + +CARGO_BUILD_JOBS_ARG=() +if [[ -n "${CARGO_BUILD_JOBS:-}" ]]; then + CARGO_BUILD_JOBS_ARG=(-j "${CARGO_BUILD_JOBS}") +fi + +echo "==> Building openshell-gateway and openshell-driver-vm" +cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + -p openshell-server -p openshell-driver-vm + +if [ "$(uname -s)" = "Darwin" ]; then + echo "==> Codesigning openshell-driver-vm (Hypervisor entitlement)" + codesign \ + --entitlements "${ROOT}/crates/openshell-driver-vm/entitlements.plist" \ + --force \ + -s - \ + "${DRIVER_DIR}/openshell-driver-vm" +fi + +mkdir -p "${STATE_DIR}" +mkdir -p "${VM_DRIVER_STATE_DIR}" + +GATEWAY_ENDPOINT="http://127.0.0.1:${PORT}" +register_gateway_metadata "${GATEWAY_NAME}" "${GATEWAY_ENDPOINT}" "${PORT}" "${VM_DRIVER_STATE_DIR}" +save_active_gateway "${GATEWAY_NAME}" + +echo "Starting standalone VM gateway..." +echo " gateway: ${GATEWAY_NAME}" +echo " endpoint: ${GATEWAY_ENDPOINT}" +echo " namespace: ${SANDBOX_NAMESPACE}" +echo " state dir: ${STATE_DIR}" +echo " driver: ${DRIVER_DIR}/openshell-driver-vm" +echo " driver dir: ${VM_DRIVER_STATE_DIR}" +echo " gpu: ${VM_GPU}" +echo " image: ${SANDBOX_IMAGE}" +echo +echo "Active gateway set to '${GATEWAY_NAME}'. The CLI now targets this gateway" +echo "by default — just run \`openshell \`. Override with --gateway" +echo "or by setting OPENSHELL_GATEWAY (e.g. in .env)." +echo + +GATEWAY_ARGS=( + --port "${PORT}" + --log-level "${LOG_LEVEL}" + --drivers vm + --db-url "sqlite:${STATE_DIR}/gateway.db?mode=rwc" + --sandbox-namespace "${SANDBOX_NAMESPACE}" + --sandbox-image "${SANDBOX_IMAGE}" + --sandbox-image-pull-policy "${SANDBOX_IMAGE_PULL_POLICY}" + --grpc-endpoint "${GRPC_ENDPOINT}" + --driver-dir "${DRIVER_DIR}" + --vm-driver-state-dir "${VM_DRIVER_STATE_DIR}" +) + +if [ "${DISABLE_TLS}" = "true" ]; then + GATEWAY_ARGS+=(--disable-tls) +fi + +exec "${GATEWAY_BIN}" "${GATEWAY_ARGS[@]}" diff --git a/tasks/scripts/vm/build-supervisor-bundle.sh b/tasks/scripts/vm/build-supervisor-bundle.sh new file mode 100755 index 000000000..90f5b517d --- /dev/null +++ b/tasks/scripts/vm/build-supervisor-bundle.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +OUTPUT_DIR="${OPENSHELL_VM_RUNTIME_COMPRESSED_DIR:-${ROOT}/target/vm-runtime-compressed}" + +GUEST_ARCH="" +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + GUEST_ARCH="$2" + shift 2 + ;; + --arch=*) + GUEST_ARCH="${1#--arch=}" + shift + ;; + --help|-h) + echo "Usage: $0 [--arch aarch64|x86_64]" + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +if [ -z "${GUEST_ARCH}" ]; then + case "$(uname -m)" in + aarch64|arm64) GUEST_ARCH="aarch64" ;; + x86_64|amd64) GUEST_ARCH="x86_64" ;; + *) + echo "ERROR: Unsupported host architecture: $(uname -m)" >&2 + echo " Use --arch aarch64 or --arch x86_64 to override." >&2 + exit 1 + ;; + esac +fi + +case "${GUEST_ARCH}" in + aarch64|arm64) + RUST_TARGET="aarch64-unknown-linux-gnu" + ;; + x86_64|amd64) + RUST_TARGET="x86_64-unknown-linux-gnu" + ;; + *) + echo "ERROR: Unsupported guest architecture: ${GUEST_ARCH}" >&2 + echo " Supported: aarch64, x86_64" >&2 + exit 1 + ;; +esac + +SUPERVISOR_BIN="${ROOT}/target/${RUST_TARGET}/release/openshell-sandbox" +SUPERVISOR_OUTPUT="${OUTPUT_DIR}/openshell-sandbox.zst" + +ensure_build_nofile_limit() { + local desired="${OPENSHELL_VM_BUILD_NOFILE_LIMIT:-8192}" + local minimum=1024 + local current="" + local hard="" + local target="" + + [ "$(uname -s)" = "Darwin" ] || return 0 + command -v cargo-zigbuild >/dev/null 2>&1 || return 0 + + current="$(ulimit -n 2>/dev/null || echo "")" + case "${current}" in + ''|*[!0-9]*) + return 0 + ;; + esac + + if [ "${current}" -ge "${desired}" ]; then + return 0 + fi + + hard="$(ulimit -Hn 2>/dev/null || echo "")" + target="${desired}" + case "${hard}" in + ''|unlimited|infinity) + ;; + *[!0-9]*) + ;; + *) + if [ "${hard}" -lt "${target}" ]; then + target="${hard}" + fi + ;; + esac + + if [ "${target}" -gt "${current}" ] && ulimit -n "${target}" 2>/dev/null; then + echo "==> Raised open file limit for cargo-zigbuild: ${current} -> $(ulimit -n)" + fi + + current="$(ulimit -n 2>/dev/null || echo "${current}")" + case "${current}" in + ''|*[!0-9]*) + return 0 + ;; + esac + + if [ "${current}" -lt "${desired}" ]; then + echo "WARNING: Open file limit is ${current}; cargo-zigbuild is more reliable at ${desired}+ on macOS." + fi + + if [ "${current}" -lt "${minimum}" ]; then + echo "ERROR: Open file limit (${current}) is too low for cargo-zigbuild on macOS." >&2 + echo " Run: ulimit -n ${desired}" >&2 + echo " Then re-run this script." >&2 + exit 1 + fi +} + +echo "==> Building openshell-sandbox supervisor bundle" +echo " Guest arch: ${GUEST_ARCH}" +echo " Rust target: ${RUST_TARGET}" +echo " Output: ${SUPERVISOR_OUTPUT}" + +mkdir -p "${OUTPUT_DIR}" +ensure_build_nofile_limit + +SUPERVISOR_BUILD_LOG="$(mktemp -t openshell-supervisor-build.XXXXXX.log)" +run_supervisor_build() { + local rustc_wrapper_mode="${1:-default}" + local cargo_prefix=() + + if [ "${rustc_wrapper_mode}" = "without-rustc-wrapper" ]; then + cargo_prefix=(env -u RUSTC_WRAPPER) + fi + + if command -v cargo-zigbuild >/dev/null 2>&1; then + "${cargo_prefix[@]}" cargo zigbuild --release -p openshell-sandbox --target "${RUST_TARGET}" \ + --manifest-path "${ROOT}/Cargo.toml" + else + echo " cargo-zigbuild not found, falling back to cargo build..." + "${cargo_prefix[@]}" cargo build --release -p openshell-sandbox --target "${RUST_TARGET}" \ + --manifest-path "${ROOT}/Cargo.toml" + fi +} + +print_build_failure() { + echo "ERROR: supervisor build failed. Full output:" >&2 + cat "${SUPERVISOR_BUILD_LOG}" >&2 + echo " (log saved at ${SUPERVISOR_BUILD_LOG})" >&2 +} + +if run_supervisor_build >"${SUPERVISOR_BUILD_LOG}" 2>&1; then + tail -5 "${SUPERVISOR_BUILD_LOG}" + rm -f "${SUPERVISOR_BUILD_LOG}" +else + status=$? + if [ -n "${RUSTC_WRAPPER:-}" ] && grep -Eq 'sccache: encountered fatal error|Too many open files|os error 24' "${SUPERVISOR_BUILD_LOG}"; then + echo "WARNING: supervisor build failed through RUSTC_WRAPPER=${RUSTC_WRAPPER}; retrying without RUSTC_WRAPPER." >&2 + : >"${SUPERVISOR_BUILD_LOG}" + if run_supervisor_build without-rustc-wrapper >"${SUPERVISOR_BUILD_LOG}" 2>&1; then + tail -5 "${SUPERVISOR_BUILD_LOG}" + rm -f "${SUPERVISOR_BUILD_LOG}" + else + status=$? + print_build_failure + exit "${status}" + fi + else + print_build_failure + exit "${status}" + fi +fi + +if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: supervisor binary not found at ${SUPERVISOR_BIN}" >&2 + exit 1 +fi + +zstd -19 -T0 -f "${SUPERVISOR_BIN}" -o "${SUPERVISOR_OUTPUT}" + +echo "==> Bundled supervisor ready" +echo " Binary: $(du -sh "${SUPERVISOR_BIN}" | cut -f1)" +echo " Compressed: $(du -sh "${SUPERVISOR_OUTPUT}" | cut -f1)" diff --git a/tasks/scripts/vm/vm-setup.sh b/tasks/scripts/vm/vm-setup.sh index bccb7f754..7563819b9 100755 --- a/tasks/scripts/vm/vm-setup.sh +++ b/tasks/scripts/vm/vm-setup.sh @@ -21,6 +21,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${SCRIPT_DIR}/_lib.sh" ROOT="$(vm_lib_root)" +CLI_BIN="${ROOT}/scripts/bin/openshell" FROM_SOURCE="${FROM_SOURCE:-0}" @@ -126,6 +127,6 @@ echo "" echo "==> Setup complete!" echo " Compressed artifacts in: ${OUTPUT_DIR}" echo "" -echo "Next steps:" -echo " mise run vm:rootfs --base # build rootfs (requires Docker)" -echo " mise run gateway:vm # start openshell-gateway with the VM driver" +echo "After starting the gateway:" +echo " ${CLI_BIN} status" +echo " ${CLI_BIN} sandbox create --name vm-test --from ubuntu:24.04" diff --git a/tasks/vm.toml b/tasks/vm.toml index 2549f230f..e9eb22561 100644 --- a/tasks/vm.toml +++ b/tasks/vm.toml @@ -5,21 +5,20 @@ # # Workflow: # mise run vm:setup # one-time: download pre-built runtime (~30s) +# mise run vm:supervisor # build the bundled sandbox supervisor # mise run gateway:vm # start openshell-gateway with the VM driver +# # (defined in tasks/gateway.toml) # mise run vm # build + run the standalone openshell-vm microVM # mise run vm:clean # wipe everything and start over # -# See crates/openshell-driver-vm/README.md for the `gateway:vm` flow and +# See tasks/gateway.toml for `gateway:vm`, +# crates/openshell-driver-vm/README.md for the VM driver workflow, and # crates/openshell-vm/README.md for the standalone microVM path. # ═══════════════════════════════════════════════════════════════════════════ # Main Commands # ═══════════════════════════════════════════════════════════════════════════ -["gateway:vm"] -description = "Build openshell-gateway + openshell-driver-vm and start the gateway with the VM driver (pass -- --gpu for GPU support)" -run = "crates/openshell-driver-vm/start.sh" - [vm] description = "Build and run the standalone openshell-vm microVM" depends = ["build:docker:gateway"] @@ -38,6 +37,10 @@ run = [ description = "One-time setup: download (or build) the VM runtime" run = "tasks/scripts/vm/vm-setup.sh" +["vm:supervisor"] +description = "Build the bundled openshell-sandbox supervisor for openshell-driver-vm" +run = "tasks/scripts/vm/build-supervisor-bundle.sh" + ["vm:rootfs"] description = "Build the VM rootfs tarball (use -- --base for lightweight)" run = "tasks/scripts/vm/build-rootfs-tarball.sh"