diff --git a/Cargo.lock b/Cargo.lock index 683d6aa..606a9af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2374,8 +2374,6 @@ dependencies = [ [[package]] name = "vhost-user-backend" version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783587813a59c42c36519a6e12bb31eb2d7fa517377428252ba4cc2312584243" dependencies = [ "libc 0.2.182", "log", diff --git a/Cargo.toml b/Cargo.toml index 1a2afb5..5285114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,3 +54,7 @@ missing_safety_doc = "deny" undocumented_unsafe_blocks = "deny" option_if_let_else = "allow" cloned_ref_to_slice_refs = "allow" + + +[patch.crates-io] +vhost-user-backend = { path = "vhost-user-backend-patched" } diff --git a/vhost-user-backend-patched/.cargo-ok b/vhost-user-backend-patched/.cargo-ok new file mode 100644 index 0000000..5f8b795 --- /dev/null +++ b/vhost-user-backend-patched/.cargo-ok @@ -0,0 +1 @@ +{"v":1} \ No newline at end of file diff --git a/vhost-user-backend-patched/.cargo_vcs_info.json b/vhost-user-backend-patched/.cargo_vcs_info.json new file mode 100644 index 0000000..c0c6393 --- /dev/null +++ b/vhost-user-backend-patched/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "52b8d40b310e4199e742d52c9c29a944e4eeb7ad" + }, + "path_in_vcs": "vhost-user-backend" +} \ No newline at end of file diff --git a/vhost-user-backend-patched/CHANGELOG.md b/vhost-user-backend-patched/CHANGELOG.md new file mode 100644 index 0000000..d1ede25 --- /dev/null +++ b/vhost-user-backend-patched/CHANGELOG.md @@ -0,0 +1,204 @@ +# Changelog + +## [Unreleased] + +### Added +### Changed +### Deprecated +### Fixed + +## v0.21.0 + +### Changed +- [[#308]](https://github.com/rust-vmm/vhost/pull/308) Replace Eventfd with EventNotifier/EventConsumer. +- [[#321]](https://github.com/rust-vmm/vhost/pull/321) Don't take ownership of listener in `VhostUserDaemon::start`. +- [[#333]](https://github.com/rust-vmm/vhost/pull/333) Updated virtio-queue to 0.17.0, vm-memory to 0.17.1, vmm-sys-util to 0.15.0 and vhost to 0.15.0 + +## v0.20.0 + +### Changed +- [[306]](https://github.com/rust-vmm/vhost/pull/306) Updated virtio-queue to v0.16.0 and virtio-bindings to v0.2.6 + +## v0.19.0 + +### Changed +- [[299]](https://github.com/rust-vmm/vhost/pull/299) Updated virtio-queue to v0.15.0 and virtio-bindings to v0.2.5 + +## v0.18.0 + +### Added +- [[#268]](https://github.com/rust-vmm/vhost/pull/268) Add support for `VHOST_USER_GET_SHARED_OBJECT` + +### Changed +- [[#295]](https://github.com/rust-vmm/vhost/pull/295) Updated vm-memory to 0.16.2 and vmm-sys-util to 0.14.0 + +## v0.17.0 + +### Added +- [[#266]](https://github.com/rust-vmm/vhost/pull/266) Add support for `VHOST_USER_RESET_DEVICE` + +### Changed +- [[#269]](https://github.com/rust-vmm/vhost/pull/269) Update vm-memory to 0.16.0 and virtio-queue to 0.13.0 + +## v0.16.1 + +### Fixed +- [[#267]](https://github.com/rust-vmm/vhost/pull/267) Fix feature unification issues with gpu-socket feature. + +## v0.16.0 - yanked + +This version got yanked because the `gpu_socket` feature introduced in this +release was causing problems +(see [#265](https://github.com/rust-vmm/vhost/issues/265)). +Starting with the next version (v0.16.1), the `gpu_socket` feature was removed. + +### Added +- [[#241]](https://github.com/rust-vmm/vhost/pull/241) Add shared objects support +- [[#239]](https://github.com/rust-vmm/vhost/pull/239) Add support for `VHOST_USER_GPU_SET_SOCKET` + +### Changed +- [[#257]](https://github.com/rust-vmm/vhost/pull/257) Update virtio-queue version from 0.12.0 to 0.13.0 and vm-memory from 0.14.0 to 0.15.0. +- [[#240]](https://github.com/rust-vmm/vhost/pull/240) Move the set of event_idx property from set_vring_base callback to set_features one + +## v0.15.0 + +### Changed +- [[#237]](https://github.com/rust-vmm/vhost/pull/237) Update virtio-queue dependency to 0.12.0 + +## v0.14.0 + +### Added +- [[#203]](https://github.com/rust-vmm/vhost/pull/203) Add back-end's internal state migration support +- [[#218]](https://github.com/rust-vmm/vhost/pull/218) Adding POSTCOPY support +- [[#206]](https://github.com/rust-vmm/vhost/pull/206) Add bitmap support for tracking dirty pages during migration + +## v0.13.1 + +### Fixed + +- [[#227]](https://github.com/rust-vmm/vhost/pull/227) vhost-user-backend: Fix SET_VRING_KICK should not disable the vring + +## v0.13.0 + +### Changed +- [[#224]](https://github.com/rust-vmm/vhost/pull/224) vhost-user-backend: bump up MAX_MEM_SLOTS to 509 + +## v0.12.0 + +### Fixed +- [[#210]](https://github.com/rust-vmm/vhost/pull/210) Enable all vrings upon receipt of `VHOST_USER_SET_FEATURES` + message. +- [[#212]](https://github.com/rust-vmm/vhost/pull/212) Validate queue index in `VhostUserHandler::set_vring_base` + to avoid potential out-of-bounds panic. + +### Changed +- [[#214]](https://github.com/rust-vmm/vhost/pull/214) Avoid indexing the same Vec multiple times by locally caching the + result of `Vec:get`. +- [[#219]](https://github.com/rust-vmm/vhost/pull/219) Update vmm-sys-util dependency to 0.12.1 and vm-memory dependency to 0.14.0. + +## v0.11.0 + +### Added +- [[#173]](https://github.com/rust-vmm/vhost/pull/173) vhost-user-backend: Added convenience function `serve` + +### Changed +- [[#187]](https://github.com/rust-vmm/vhost/pull/187) Clean master slave + - Replaced master/slave with frontend/backend in the codebase and public API. +- [[#192]](https://github.com/rust-vmm/vhost/pull/192) vhost-user-backend: remove return value from handle_event +- [[#155]](https://github.com/rust-vmm/vhost/pull/155) Converted generic type + parameters of VhostUserBackend into associated types. +- [[#116]](https://github.com/rust-vmm/vhost/pull/116) Upgrade to 2021 edition + +## v0.10.1 + +### Fixed +- [[#180]](https://github.com/rust-vmm/vhost/pull/180) vhost-user-backend: fetch 'used' index from guest + +## v0.10.0 + +### Added +- [[#169]](https://github.com/rust-vmm/vhost/pull/160) vhost-user-backend: Add support for Xen memory mappings + +### Fixed +- [[#161]](https://github.com/rust-vmm/vhost/pull/161) get_vring_base should not reset the queue + +## v0.9.0 + +### Added +- [[#138]](https://github.com/rust-vmm/vhost/pull/138): vhost-user-backend: add repository metadata + +### Changed +- Updated dependency virtio-bindings 0.1.0 -> 0.2.0 +- Updated dependency virtio-queue 0.7.0 -> 0.8.0 +- Updated dependency vm-memory 0.10.0 -> 0.11.0 + +### Fixed +- [[#154]](https://github.com/rust-vmm/vhost/pull/154): Fix return value of GET_VRING_BASE message +- [[#142]](https://github.com/rust-vmm/vhost/pull/142): vhost_user: Slave requests aren't only FS specific + +## v0.8.0 + +### Added +- [[#120]](https://github.com/rust-vmm/vhost/pull/120): vhost_kern: vdpa: Add missing ioctls + +### Changed +- Updated dependency vhost 0.5 -> 0.6 +- Updated dependency virtio-queue 0.6 -> 0.7.0 +- Updated depepdency vm-memory 0.9 to 0.10.0 +- Updated depepdency vmm-sys-util 0.10 to 0.11.0 + +## v0.7.0 + +### Changed + +- Started using caret dependencies +- Updated dependency nix 0.24 -> 0.25 +- Updated depepdency log 0.4.6 -> 0.4.17 +- Updated dependency vhost 0.4 -> 0.5 +- Updated dependency virtio-queue 0.5.0 -> 0.6 +- Updated dependency vm-memory 0.7 -> 0.9 + +## v0.6.0 + +### Changed + +- Moved to rust-vmm/virtio-queue v0.5.0 + +### Fixed + +- Fixed vring initialization logic + +## v0.5.1 + +### Changed +- Moved to rust-vmm/vmm-sys-util 0.10.0 + +## v0.5.0 + +### Changed + +- Moved to rust-vmm/virtio-queue v0.4.0 + +## v0.4.0 + +### Changed + +- Moved to rust-vmm/virtio-queue v0.3.0 +- Relaxed rust-vmm/vm-memory dependency to require ">=0.7" + +## v0.3.0 + +### Changed + +- Moved to rust-vmm/vhost v0.4.0 + +## v0.2.0 + +### Added + +- Ability to run the daemon as a client +- VringEpollHandler implements AsRawFd + +## v0.1.0 + +First release diff --git a/vhost-user-backend-patched/Cargo.lock b/vhost-user-backend-patched/Cargo.lock new file mode 100644 index 0000000..790bd97 --- /dev/null +++ b/vhost-user-backend-patched/Cargo.lock @@ -0,0 +1,673 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags 2.10.0", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cc" +version = "1.2.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "js-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "libc", +] + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "userfaultfd" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3a8a0cb358f7d1b7ee9b6784be122b6f51248a6d9e214d555beb9b44c72aea" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "libc", + "nix 0.27.1", + "thiserror 1.0.69", + "userfaultfd-sys", +] + +[[package]] +name = "userfaultfd-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc91d95a797a81604af22946d0e86656f27feb0b9665c60665cf3554df12d1a8" +dependencies = [ + "bindgen", + "cc", + "cfg-if", +] + +[[package]] +name = "uuid" +version = "1.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +dependencies = [ + "getrandom", + "js-sys", + "rand", + "wasm-bindgen", +] + +[[package]] +name = "vhost" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c76d90ce3c6b37d610a5304c9a445cfff580cf8b4b9fd02fb256aaf68552c28a" +dependencies = [ + "bitflags 2.10.0", + "libc", + "uuid", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vhost-user-backend" +version = "0.21.0" +dependencies = [ + "libc", + "log", + "nix 0.30.1", + "tempfile", + "userfaultfd", + "uuid", + "vhost", + "virtio-bindings", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "virtio-bindings" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f498a26d5a63be7bbb8bdcd3869c3f286c4c4a17108905276454da0caf8cb" + +[[package]] +name = "virtio-queue" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e358084f32ed165fddb41d98ff1b7ff3c08b9611d8d6114a1b422e2e85688baf" +dependencies = [ + "libc", + "log", + "virtio-bindings", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vm-memory" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f39348a049689cabd3377cdd9182bf526ec76a6f823b79903896452e9d7a7380" +dependencies = [ + "arc-swap", + "bitflags 2.10.0", + "libc", + "thiserror 2.0.17", + "vmm-sys-util", + "winapi", +] + +[[package]] +name = "vmm-sys-util" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "506c62fdf617a5176827c2f9afbcf1be155b03a9b4bf9617a60dbc07e3a1642f" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/vhost-user-backend-patched/Cargo.toml b/vhost-user-backend-patched/Cargo.toml new file mode 100644 index 0000000..ed00549 --- /dev/null +++ b/vhost-user-backend-patched/Cargo.toml @@ -0,0 +1,120 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "vhost-user-backend" +version = "0.21.0" +authors = ["The Cloud Hypervisor Authors"] +build = false +autolib = false +autobins = false +autoexamples = false +autotests = false +autobenches = false +description = "A framework to build vhost-user backend service daemon" +readme = "README.md" +keywords = [ + "vhost-user", + "virtio", +] +license = "Apache-2.0" +repository = "https://github.com/rust-vmm/vhost" + +[package.metadata.docs.rs] +all-features = true +rustc-args = ["--cfg RUSTDOC_disable_feature_compat_errors"] + +[package.metadata.cargo-all-features] +skip_feature_sets = [[ + "xen", + "postcopy", +]] + +[features] +postcopy = [ + "vhost/postcopy", + "userfaultfd", +] +xen = [ + "vm-memory/xen", + "vhost/xen", +] + +[lib] +name = "vhost_user_backend" +path = "src/lib.rs" + +[[test]] +name = "vhost-user-server" +path = "tests/vhost-user-server.rs" + +[dependencies.libc] +version = "0.2.39" + +[dependencies.log] +version = "0.4.17" + +[dependencies.userfaultfd] +version = "0.9.0" +optional = true + +[dependencies.vhost] +version = "0.15.0" +features = ["vhost-user-backend"] + +[dependencies.virtio-bindings] +version = "0.2.6" + +[dependencies.virtio-queue] +version = "0.17.0" + +[dependencies.vm-memory] +version = "0.17.1" +features = [ + "backend-mmap", + "backend-atomic", + "backend-bitmap", +] + +[dependencies.vmm-sys-util] +version = "0.15.0" + +[dev-dependencies.nix] +version = "0.30" +features = ["fs"] + +[dev-dependencies.tempfile] +version = "3.2.0" + +[dev-dependencies.uuid] +version = "1.8.0" +features = ["v4"] + +[dev-dependencies.vhost] +version = "0.15.0" +features = [ + "test-utils", + "vhost-user-frontend", + "vhost-user-backend", +] + +[dev-dependencies.vm-memory] +version = "0.17.1" +features = [ + "backend-mmap", + "backend-atomic", +] + +[lints.rust.unexpected_cfgs] +level = "warn" +priority = 0 +check-cfg = ["cfg(RUSTDOC_disable_feature_compat_errors)"] diff --git a/vhost-user-backend-patched/Cargo.toml.orig b/vhost-user-backend-patched/Cargo.toml.orig new file mode 100644 index 0000000..668fbb6 --- /dev/null +++ b/vhost-user-backend-patched/Cargo.toml.orig @@ -0,0 +1,42 @@ +[package] +name = "vhost-user-backend" +version = "0.21.0" +authors = ["The Cloud Hypervisor Authors"] +keywords = ["vhost-user", "virtio"] +description = "A framework to build vhost-user backend service daemon" +repository = "https://github.com/rust-vmm/vhost" +edition = "2021" +license = "Apache-2.0" + +[package.metadata.docs.rs] +all-features = true +rustc-args = ['--cfg RUSTDOC_disable_feature_compat_errors'] + +[features] +xen = ["vm-memory/xen", "vhost/xen"] +postcopy = ["vhost/postcopy", "userfaultfd"] + +[dependencies] +libc = "0.2.39" +log = "0.4.17" +userfaultfd = { version = "0.9.0", optional = true } +vhost = { path = "../vhost", version = "0.15.0", features = ["vhost-user-backend"] } +virtio-bindings = { workspace = true } +virtio-queue = { workspace = true } +vm-memory = { workspace = true, features = ["backend-mmap", "backend-atomic", "backend-bitmap"] } +vmm-sys-util = { workspace = true } + +[dev-dependencies] +nix = { version = "0.30", features = ["fs"] } +uuid = { version = "1.8.0", features=["v4"] } +vhost = { path = "../vhost", version = "0.15.0", features = ["test-utils", "vhost-user-frontend", "vhost-user-backend"] } +vm-memory = { workspace = true, features = ["backend-mmap", "backend-atomic"] } +tempfile = "3.2.0" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(RUSTDOC_disable_feature_compat_errors)'] } + +[package.metadata.cargo-all-features] +skip_feature_sets = [ + ["xen", "postcopy"] +] diff --git a/vhost-user-backend-patched/README.md b/vhost-user-backend-patched/README.md new file mode 100644 index 0000000..46b771c --- /dev/null +++ b/vhost-user-backend-patched/README.md @@ -0,0 +1,127 @@ +# vhost-user-backend + +## Design + +The `vhost-user-backend` crate provides a framework to implement `vhost-user` backend services, +which includes following external public APIs: +- A daemon control object (`VhostUserDaemon`) to start and stop the service daemon. +- A vhost-user backend trait (`VhostUserBackendMut`) to handle vhost-user control messages and virtio + messages. +- A vring access trait (`VringT`) to access virtio queues, and three implementations of the trait: + `VringState`, `VringMutex` and `VringRwLock`. + +## Usage +The `vhost-user-backend` crate provides a framework to implement vhost-user backend services. The main interface provided by `vhost-user-backend` library is the `struct VhostUserDaemon`: +```rust +pub struct VhostUserDaemon +where + S: VhostUserBackend, + V: VringT> + Clone + Send + Sync + 'static, + B: Bitmap + 'static, +{ + pub fn new(name: String, backend: S, atomic_mem: GuestMemoryAtomic>) -> Result; + pub fn start(&mut self, listener: Listener) -> Result<()>; + pub fn wait(&mut self) -> Result<()>; + pub fn get_epoll_handlers(&self) -> Vec>>; +} +``` + +### Create a `VhostUserDaemon` Instance +The `VhostUserDaemon::new()` creates an instance of `VhostUserDaemon` object. The client needs to +pass in an `VhostUserBackend` object, which will be used to configure the `VhostUserDaemon` +instance, handle control messages from the vhost-user frontend and handle virtio requests from +virtio queues. A group of working threads will be created to handle virtio requests from configured +virtio queues. + +### Start the `VhostUserDaemon` +The `VhostUserDaemon::start()` method waits for an incoming connection from the vhost-user frontends +on the `listener`. Once a connection is ready, a main thread will be created to handle vhost-user +messages from the vhost-user frontend. + +### Stop the `VhostUserDaemon` +The `VhostUserDaemon::stop()` method waits for the main thread to exit. An exit event must be sent +to the main thread by writing to the `exit_event` EventFd before waiting for it to exit. + +### Threading Model +The main thread and virtio queue working threads will concurrently access the underlying virtio +queues, so all virtio queue in multi-threading model. But the main thread only accesses virtio +queues for configuration, so client could adopt locking policies to optimize for the virtio queue +working threads. + +## Example +Example code to handle virtio messages from a virtio queue: +```rust +impl VhostUserBackendMut for VhostUserService { + fn process_queue(&mut self, vring: &VringMutex) -> Result { + let mut used_any = false; + let mem = match &self.mem { + Some(m) => m.memory(), + None => return Err(Error::NoMemoryConfigured), + }; + + let mut vring_state = vring.get_mut(); + + while let Some(avail_desc) = vring_state + .get_queue_mut() + .iter() + .map_err(|_| Error::IterateQueue)? + .next() + { + // Process the request... + + if self.event_idx { + if vring_state.add_used(head_index, 0).is_err() { + warn!("Couldn't return used descriptors to the ring"); + } + + match vring_state.needs_notification() { + Err(_) => { + warn!("Couldn't check if queue needs to be notified"); + vring_state.signal_used_queue().unwrap(); + } + Ok(needs_notification) => { + if needs_notification { + vring_state.signal_used_queue().unwrap(); + } + } + } + } else { + if vring_state.add_used(head_index, 0).is_err() { + warn!("Couldn't return used descriptors to the ring"); + } + vring_state.signal_used_queue().unwrap(); + } + } + + Ok(used_any) + } +} +``` + +## Postcopy support + +To enabled POSTCOPY_* messages support there is a `postcopy` feature. +Due to how Xen handles memory mappings the `postcopy` feature is not compatible +with `xen` feature. Enabling both at the same time will result in a compilation error. + +`postcopy` feature enables optional `userfaultfd` dependency in order to create and +interact with `userfaultfd` object. This requires access permission to `/dev/userfaultfd` +file from the backend. + +## Xen support + +Supporting Xen requires special handling while mapping the guest memory. The +`vm-memory` crate implements xen memory mapping support via a separate feature +`xen`, and this crate uses the same feature name to enable Xen support. + +Also, for xen mappings, the memory regions passed by the frontend contains few +extra fields as described in the vhost-user protocol documentation. + +It was decided by the `rust-vmm` maintainers to keep the interface simple and +build the crate for either standard Unix memory mapping or Xen, and not both. + +## License + +This project is licensed under + +- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0 diff --git a/vhost-user-backend-patched/src/backend.rs b/vhost-user-backend-patched/src/backend.rs new file mode 100644 index 0000000..20e7daf --- /dev/null +++ b/vhost-user-backend-patched/src/backend.rs @@ -0,0 +1,809 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2019-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Traits for vhost user backend servers to implement virtio data plain services. +//! +//! Define two traits for vhost user backend servers to implement virtio data plane services. +//! The only difference between the two traits is mutability. The [VhostUserBackend] trait is +//! designed with interior mutability, so the implementor may choose the suitable way to protect +//! itself from concurrent accesses. The [VhostUserBackendMut] is designed without interior +//! mutability, and an implementation of: +//! ```ignore +//! impl VhostUserBackend for RwLock { } +//! ``` +//! is provided for convenience. +//! +//! [VhostUserBackend]: trait.VhostUserBackend.html +//! [VhostUserBackendMut]: trait.VhostUserBackendMut.html + +use std::fs::File; +use std::io::Result; +use std::ops::Deref; +use std::sync::{Arc, Mutex, RwLock}; + +use vhost::vhost_user::message::{ + VhostTransferStateDirection, VhostTransferStatePhase, VhostUserProtocolFeatures, + VhostUserSharedMsg, +}; +use vhost::vhost_user::Backend; +use vm_memory::bitmap::Bitmap; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::event::{EventConsumer, EventNotifier}; + +use vhost::vhost_user::GpuBackend; + +use super::vring::VringT; +use super::GM; + +/// Trait with interior mutability for vhost user backend servers to implement concrete services. +/// +/// To support multi-threading and asynchronous IO, we enforce `Send + Sync` bound. +pub trait VhostUserBackend: Send + Sync { + type Bitmap: Bitmap + 'static; + type Vring: VringT>; + + /// Get number of queues supported. + fn num_queues(&self) -> usize; + + /// Get maximum queue size supported. + fn max_queue_size(&self) -> usize; + + /// Get available virtio features. + fn features(&self) -> u64; + + /// Set acknowledged virtio features. + fn acked_features(&self, _features: u64) {} + + /// Get available vhost protocol features. + fn protocol_features(&self) -> VhostUserProtocolFeatures; + + /// Reset the emulated device state. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn reset_device(&self) {} + + /// Enable or disable the virtio EVENT_IDX feature + fn set_event_idx(&self, enabled: bool); + + /// Get virtio device configuration. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn get_config(&self, _offset: u32, _size: u32) -> Vec { + Vec::new() + } + + /// Set virtio device configuration. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn set_config(&self, _offset: u32, _buf: &[u8]) -> Result<()> { + Ok(()) + } + + /// Update guest memory regions. + fn update_memory(&self, mem: GM) -> Result<()>; + + /// Set handler for communicating with the frontend by the backend communication channel. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn set_backend_req_fd(&self, _backend: Backend) {} + + /// This method retrieves a file descriptor for a shared object, identified by a unique UUID, + /// which can be used by the front-end for DMA. If the shared object is found, it must return + /// a File that the frontend can use. If the shared object does not exist the function returns + /// `None` (indicating no file descriptor is available). + /// + /// This function returns a `Result`, returning an error if the backend does not implement this + /// function. + fn get_shared_object(&self, _uuid: VhostUserSharedMsg) -> Result { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support get shared object", + )) + } + + /// Set handler for communicating with the frontend by the gpu specific backend communication + /// channel. + /// + /// This function returns a `Result`, returning an error if the backend does not implement this + /// function. + fn set_gpu_socket(&self, _gpu_backend: GpuBackend) -> Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "backend does not support set_gpu_socket() / VHOST_USER_GPU_SET_SOCKET", + )) + } + + /// Get the map to map queue index to worker thread index. + /// + /// A return value of [2, 2, 4] means: the first two queues will be handled by worker thread 0, + /// the following two queues will be handled by worker thread 1, and the last four queues will + /// be handled by worker thread 2. + fn queues_per_thread(&self) -> Vec { + vec![0xffff_ffff] + } + + /// Provide an optional exit EventFd for the specified worker thread. + /// + /// The returned `EventFd` will be monitored for IO events. When the + /// returned EventFd is written to, the worker thread will exit. + // TODO: Refine this API to return only EventNotifier. + fn exit_event(&self, _thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + None + } + + /// Handle IO events for backend registered file descriptors. + /// + /// This function gets called if the backend registered some additional listeners onto specific + /// file descriptors. The library can handle virtqueues on its own, but does not know what to + /// do with events happening on custom listeners. + fn handle_event( + &self, + device_event: u16, + evset: EventSet, + vrings: &[Self::Vring], + thread_id: usize, + ) -> Result<()>; + + /// Initiate transfer of internal state for the purpose of migration to/from the back-end. + /// + /// Depending on `direction`, the state should either be saved (i.e. serialized and written to + /// `file`) or loaded (i.e. read from `file` and deserialized). The back-end can choose to use + /// a different channel than file. If so, it must return a File that the front-end can use. + /// Note that this function must not block during transfer, i.e. I/O to/from `file` must be + /// done outside of this function. + fn set_device_state_fd( + &self, + _direction: VhostTransferStateDirection, + _phase: VhostTransferStatePhase, + _file: File, + ) -> Result> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support state transfer", + )) + } + + /// After transferring internal state, check for any resulting errors, including potential + /// deserialization errors when loading state. + /// + /// Although this function return a `Result`, the front-end will not receive any details about + /// this error. + fn check_device_state(&self) -> Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support state transfer", + )) + } +} + +/// Trait without interior mutability for vhost user backend servers to implement concrete services. +pub trait VhostUserBackendMut: Send + Sync { + type Bitmap: Bitmap + 'static; + type Vring: VringT>; + + /// Get number of queues supported. + fn num_queues(&self) -> usize; + + /// Get maximum queue size supported. + fn max_queue_size(&self) -> usize; + + /// Get available virtio features. + fn features(&self) -> u64; + + /// Set acknowledged virtio features. + fn acked_features(&mut self, _features: u64) {} + + /// Get available vhost protocol features. + fn protocol_features(&self) -> VhostUserProtocolFeatures; + + /// Reset the emulated device state. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn reset_device(&mut self) {} + + /// Enable or disable the virtio EVENT_IDX feature + fn set_event_idx(&mut self, enabled: bool); + + /// Get virtio device configuration. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn get_config(&self, _offset: u32, _size: u32) -> Vec { + Vec::new() + } + + /// Set virtio device configuration. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> Result<()> { + Ok(()) + } + + /// Update guest memory regions. + fn update_memory(&mut self, mem: GM) -> Result<()>; + + /// Set handler for communicating with the frontend by the backend communication channel. + /// + /// A default implementation is provided as we cannot expect all backends to implement this + /// function. + fn set_backend_req_fd(&mut self, _backend: Backend) {} + + /// This method retrieves a file descriptor for a shared object, identified by a unique UUID, + /// which can be used by the front-end for DMA. If the shared object is found, it must return + /// a File that the frontend can use. If the shared object does not exist the function returns + /// `None` (indicating no file descriptor is available). + /// + /// This function returns a `Result`, returning an error if the backend does not implement this + /// function. + fn get_shared_object(&mut self, _uuid: VhostUserSharedMsg) -> Result { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support get shared object", + )) + } + + /// Set handler for communicating with the frontend by the gpu specific backend communication + /// channel. + /// + /// This function returns a `Result`, returning an error if the backend does not implement this + /// function. + fn set_gpu_socket(&mut self, _gpu_backend: GpuBackend) -> Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "backend does not support set_gpu_socket() / VHOST_USER_GPU_SET_SOCKET", + )) + } + + /// Get the map to map queue index to worker thread index. + /// + /// A return value of [2, 2, 4] means: the first two queues will be handled by worker thread 0, + /// the following two queues will be handled by worker thread 1, and the last four queues will + /// be handled by worker thread 2. + fn queues_per_thread(&self) -> Vec { + vec![0xffff_ffff] + } + + /// Provide an optional exit EventFd for the specified worker thread. + /// + /// If an (`EventFd`, `token`) pair is returned, the returned `EventFd` will be monitored for IO + /// events by using epoll with the specified `token`. When the returned EventFd is written to, + /// the worker thread will exit. + // TODO: Refine this API to return only EventNotifier. + fn exit_event(&self, _thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + None + } + + /// Handle IO events for backend registered file descriptors. + /// + /// This function gets called if the backend registered some additional listeners onto specific + /// file descriptors. The library can handle virtqueues on its own, but does not know what to + /// do with events happening on custom listeners. + fn handle_event( + &mut self, + device_event: u16, + evset: EventSet, + vrings: &[Self::Vring], + thread_id: usize, + ) -> Result<()>; + + /// Initiate transfer of internal state for the purpose of migration to/from the back-end. + /// + /// Depending on `direction`, the state should either be saved (i.e. serialized and written to + /// `file`) or loaded (i.e. read from `file` and deserialized). Note that this function must + /// not block during transfer, i.e. I/O to/from `file` must be done outside of this function. + fn set_device_state_fd( + &mut self, + _direction: VhostTransferStateDirection, + _phase: VhostTransferStatePhase, + _file: File, + ) -> Result> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support state transfer", + )) + } + + /// After transferring internal state, check for any resulting errors, including potential + /// deserialization errors when loading state. + /// + /// Although this function return a `Result`, the front-end will not receive any details about + /// this error. + fn check_device_state(&self) -> Result<()> { + Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "back end does not support state transfer", + )) + } +} + +impl VhostUserBackend for Arc { + type Bitmap = T::Bitmap; + type Vring = T::Vring; + + fn num_queues(&self) -> usize { + self.deref().num_queues() + } + + fn max_queue_size(&self) -> usize { + self.deref().max_queue_size() + } + + fn features(&self) -> u64 { + self.deref().features() + } + + fn acked_features(&self, features: u64) { + self.deref().acked_features(features) + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + self.deref().protocol_features() + } + + fn reset_device(&self) { + self.deref().reset_device() + } + + fn set_event_idx(&self, enabled: bool) { + self.deref().set_event_idx(enabled) + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + self.deref().get_config(offset, size) + } + + fn set_config(&self, offset: u32, buf: &[u8]) -> Result<()> { + self.deref().set_config(offset, buf) + } + + fn update_memory(&self, mem: GM) -> Result<()> { + self.deref().update_memory(mem) + } + + fn set_backend_req_fd(&self, backend: Backend) { + self.deref().set_backend_req_fd(backend) + } + + fn get_shared_object(&self, uuid: VhostUserSharedMsg) -> Result { + self.deref().get_shared_object(uuid) + } + + fn set_gpu_socket(&self, gpu_backend: GpuBackend) -> Result<()> { + self.deref().set_gpu_socket(gpu_backend) + } + + fn queues_per_thread(&self) -> Vec { + self.deref().queues_per_thread() + } + + fn exit_event(&self, thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + self.deref().exit_event(thread_index) + } + + fn handle_event( + &self, + device_event: u16, + evset: EventSet, + vrings: &[Self::Vring], + thread_id: usize, + ) -> Result<()> { + self.deref() + .handle_event(device_event, evset, vrings, thread_id) + } + + fn set_device_state_fd( + &self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> Result> { + self.deref().set_device_state_fd(direction, phase, file) + } + + fn check_device_state(&self) -> Result<()> { + self.deref().check_device_state() + } +} + +impl VhostUserBackend for Mutex { + type Bitmap = T::Bitmap; + type Vring = T::Vring; + + fn num_queues(&self) -> usize { + self.lock().unwrap().num_queues() + } + + fn max_queue_size(&self) -> usize { + self.lock().unwrap().max_queue_size() + } + + fn features(&self) -> u64 { + self.lock().unwrap().features() + } + + fn acked_features(&self, features: u64) { + self.lock().unwrap().acked_features(features) + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + self.lock().unwrap().protocol_features() + } + + fn reset_device(&self) { + self.lock().unwrap().reset_device() + } + + fn set_event_idx(&self, enabled: bool) { + self.lock().unwrap().set_event_idx(enabled) + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + self.lock().unwrap().get_config(offset, size) + } + + fn set_config(&self, offset: u32, buf: &[u8]) -> Result<()> { + self.lock().unwrap().set_config(offset, buf) + } + + fn update_memory(&self, mem: GM) -> Result<()> { + self.lock().unwrap().update_memory(mem) + } + + fn set_backend_req_fd(&self, backend: Backend) { + self.lock().unwrap().set_backend_req_fd(backend) + } + + fn get_shared_object(&self, uuid: VhostUserSharedMsg) -> Result { + self.lock().unwrap().get_shared_object(uuid) + } + + fn set_gpu_socket(&self, gpu_backend: GpuBackend) -> Result<()> { + self.lock().unwrap().set_gpu_socket(gpu_backend) + } + + fn queues_per_thread(&self) -> Vec { + self.lock().unwrap().queues_per_thread() + } + + fn exit_event(&self, thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + self.lock().unwrap().exit_event(thread_index) + } + + fn handle_event( + &self, + device_event: u16, + evset: EventSet, + vrings: &[Self::Vring], + thread_id: usize, + ) -> Result<()> { + self.lock() + .unwrap() + .handle_event(device_event, evset, vrings, thread_id) + } + + fn set_device_state_fd( + &self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> Result> { + self.lock() + .unwrap() + .set_device_state_fd(direction, phase, file) + } + + fn check_device_state(&self) -> Result<()> { + self.lock().unwrap().check_device_state() + } +} + +impl VhostUserBackend for RwLock { + type Bitmap = T::Bitmap; + type Vring = T::Vring; + + fn num_queues(&self) -> usize { + self.read().unwrap().num_queues() + } + + fn max_queue_size(&self) -> usize { + self.read().unwrap().max_queue_size() + } + + fn features(&self) -> u64 { + self.read().unwrap().features() + } + + fn acked_features(&self, features: u64) { + self.write().unwrap().acked_features(features) + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + self.read().unwrap().protocol_features() + } + + fn reset_device(&self) { + self.write().unwrap().reset_device() + } + + fn set_event_idx(&self, enabled: bool) { + self.write().unwrap().set_event_idx(enabled) + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + self.read().unwrap().get_config(offset, size) + } + + fn set_config(&self, offset: u32, buf: &[u8]) -> Result<()> { + self.write().unwrap().set_config(offset, buf) + } + + fn update_memory(&self, mem: GM) -> Result<()> { + self.write().unwrap().update_memory(mem) + } + + fn set_backend_req_fd(&self, backend: Backend) { + self.write().unwrap().set_backend_req_fd(backend) + } + + fn get_shared_object(&self, uuid: VhostUserSharedMsg) -> Result { + self.write().unwrap().get_shared_object(uuid) + } + + fn set_gpu_socket(&self, gpu_backend: GpuBackend) -> Result<()> { + self.write().unwrap().set_gpu_socket(gpu_backend) + } + + fn queues_per_thread(&self) -> Vec { + self.read().unwrap().queues_per_thread() + } + + fn exit_event(&self, thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + self.read().unwrap().exit_event(thread_index) + } + + fn handle_event( + &self, + device_event: u16, + evset: EventSet, + vrings: &[Self::Vring], + thread_id: usize, + ) -> Result<()> { + self.write() + .unwrap() + .handle_event(device_event, evset, vrings, thread_id) + } + + fn set_device_state_fd( + &self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> Result> { + self.write() + .unwrap() + .set_device_state_fd(direction, phase, file) + } + + fn check_device_state(&self) -> Result<()> { + self.read().unwrap().check_device_state() + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::VringRwLock; + use std::sync::Mutex; + use uuid::Uuid; + use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; + use vmm_sys_util::event::{new_event_consumer_and_notifier, EventFlag}; + + pub struct MockVhostBackend { + events: u64, + event_idx: bool, + acked_features: u64, + exit_event_fds: Vec<(EventConsumer, EventNotifier)>, + } + + impl MockVhostBackend { + pub fn new() -> Self { + let mut backend = MockVhostBackend { + events: 0, + event_idx: false, + acked_features: 0, + exit_event_fds: vec![], + }; + + // Create a event_fd for each thread. We make it NONBLOCKing in + // order to allow tests maximum flexibility in checking whether + // signals arrived or not. + backend.exit_event_fds = (0..backend.queues_per_thread().len()) + .map(|_| { + new_event_consumer_and_notifier(EventFlag::NONBLOCK) + .expect("Failed to new EventNotifier and EventConsumer") + }) + .collect(); + + backend + } + } + + impl VhostUserBackendMut for MockVhostBackend { + type Bitmap = (); + type Vring = VringRwLock; + + fn num_queues(&self) -> usize { + 2 + } + + fn max_queue_size(&self) -> usize { + 256 + } + + fn features(&self) -> u64 { + 0xffff_ffff_ffff_ffff + } + + fn acked_features(&mut self, features: u64) { + self.acked_features = features; + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::all() + } + + fn reset_device(&mut self) { + self.event_idx = false; + self.events = 0; + self.acked_features = 0; + } + + fn set_event_idx(&mut self, enabled: bool) { + self.event_idx = enabled; + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + assert_eq!(offset, 0x200); + assert_eq!(size, 8); + + vec![0xa5u8; 8] + } + + fn set_config(&mut self, offset: u32, buf: &[u8]) -> Result<()> { + assert_eq!(offset, 0x200); + assert_eq!(buf.len(), 8); + assert_eq!(buf, &[0xa5u8; 8]); + + Ok(()) + } + + fn update_memory(&mut self, _atomic_mem: GuestMemoryAtomic) -> Result<()> { + Ok(()) + } + + fn set_backend_req_fd(&mut self, _backend: Backend) {} + + fn get_shared_object(&mut self, _uuid: VhostUserSharedMsg) -> Result { + let file = tempfile::tempfile().unwrap(); + Ok(file) + } + + fn queues_per_thread(&self) -> Vec { + vec![1, 1] + } + + fn exit_event(&self, thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + self.exit_event_fds.get(thread_index).map(|(s, r)| { + ( + s.try_clone().expect("Failed to clone EventConsumer"), + r.try_clone().expect("Failed to clone EventNotifier"), + ) + }) + } + + fn handle_event( + &mut self, + _device_event: u16, + _evset: EventSet, + _vrings: &[VringRwLock], + _thread_id: usize, + ) -> Result<()> { + self.events += 1; + + Ok(()) + } + } + + #[test] + fn test_new_mock_backend_mutex() { + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + + assert_eq!(backend.num_queues(), 2); + assert_eq!(backend.max_queue_size(), 256); + assert_eq!(backend.features(), 0xffff_ffff_ffff_ffff); + assert_eq!( + backend.protocol_features(), + VhostUserProtocolFeatures::all() + ); + assert_eq!(backend.queues_per_thread(), [1, 1]); + + assert_eq!(backend.get_config(0x200, 8), vec![0xa5; 8]); + backend.set_config(0x200, &[0xa5; 8]).unwrap(); + + backend.acked_features(0xffff); + assert_eq!(backend.lock().unwrap().acked_features, 0xffff); + + backend.set_event_idx(true); + assert!(backend.lock().unwrap().event_idx); + + let _ = backend.exit_event(0).unwrap(); + + let uuid = VhostUserSharedMsg { + uuid: Uuid::new_v4(), + }; + backend.get_shared_object(uuid).unwrap(); + + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + backend.update_memory(mem).unwrap(); + + backend.reset_device(); + assert!(backend.lock().unwrap().events == 0); + assert!(!backend.lock().unwrap().event_idx); + assert!(backend.lock().unwrap().acked_features == 0); + } + + #[test] + fn test_new_mock_backend_rwlock() { + let backend = Arc::new(RwLock::new(MockVhostBackend::new())); + + assert_eq!(backend.num_queues(), 2); + assert_eq!(backend.max_queue_size(), 256); + assert_eq!(backend.features(), 0xffff_ffff_ffff_ffff); + assert_eq!( + backend.protocol_features(), + VhostUserProtocolFeatures::all() + ); + assert_eq!(backend.queues_per_thread(), [1, 1]); + + assert_eq!(backend.get_config(0x200, 8), vec![0xa5; 8]); + backend.set_config(0x200, &[0xa5; 8]).unwrap(); + + backend.acked_features(0xffff); + assert_eq!(backend.read().unwrap().acked_features, 0xffff); + + backend.set_event_idx(true); + assert!(backend.read().unwrap().event_idx); + + let _ = backend.exit_event(0).unwrap(); + + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + backend.update_memory(mem.clone()).unwrap(); + + let uuid = VhostUserSharedMsg { + uuid: Uuid::new_v4(), + }; + backend.get_shared_object(uuid).unwrap(); + + let vring = VringRwLock::new(mem, 0x1000).unwrap(); + backend + .handle_event(0x1, EventSet::IN, &[vring], 0) + .unwrap(); + + backend.reset_device(); + assert!(backend.read().unwrap().events == 0); + assert!(!backend.read().unwrap().event_idx); + assert!(backend.read().unwrap().acked_features == 0); + } +} diff --git a/vhost-user-backend-patched/src/bitmap.rs b/vhost-user-backend-patched/src/bitmap.rs new file mode 100644 index 0000000..5e56110 --- /dev/null +++ b/vhost-user-backend-patched/src/bitmap.rs @@ -0,0 +1,632 @@ +// Copyright (C) 2024 Red Hat, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::ops::Index; +use std::os::fd::{AsRawFd, BorrowedFd}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, RwLock}; +use std::{io, ptr}; +use vm_memory::bitmap::{Bitmap, BitmapSlice, WithBitmapSlice}; +use vm_memory::mmap::NewBitmap; +use vm_memory::{Address, GuestMemoryRegion}; + +// Size in bytes of the `VHOST_LOG_PAGE` +const LOG_PAGE_SIZE: usize = 0x1000; +// Number of bits grouped together as a basic storage unit ("word") in the bitmap +// (i.e., in this case one byte tracks 8 pages, one bit per page). +const LOG_WORD_SIZE: usize = u8::BITS as usize; + +/// A `Bitmap` with an internal `Bitmap` that can be replaced at runtime +pub trait BitmapReplace: Bitmap { + type InnerBitmap: MemRegionBitmap; + + /// Replace the internal `Bitmap` + fn replace(&self, bitmap: Self::InnerBitmap); +} + +/// A bitmap relative to a memory region +pub trait MemRegionBitmap: Sized { + /// Creates a new bitmap relative to `region`, using the `logmem` as + /// backing memory for the bitmap + fn new(region: &R, logmem: Arc) -> io::Result; +} + +// TODO: This impl is a quick and dirty hack to allow the tests to continue using +// `GuestMemoryMmap<()>`. Sadly this is exposed in the public API, but it should +// be moved to an internal mock library. +impl BitmapReplace for () { + type InnerBitmap = (); + + // this implementation must not be used if the backend sets `VHOST_USER_PROTOCOL_F_LOG_SHMFD` + fn replace(&self, _bitmap: ()) { + panic!("The unit bitmap () must not be used if VHOST_USER_PROTOCOL_F_LOG_SHMFD is set"); + } +} + +impl MemRegionBitmap for () { + fn new(_region: &R, _logmem: Arc) -> io::Result { + Err(io::Error::from(io::ErrorKind::Unsupported)) + } +} + +/// `BitmapMmapRegion` implements a bitmap tha can be replaced at runtime. +/// The main use case is to support live migration on vhost-user backends +/// (see `VHOST_USER_PROTOCOL_F_LOG_SHMFD` and `VHOST_USER_SET_LOG_BASE` in the vhost-user protocol +/// specification). It uses a fixed memory page size of `VHOST_LOG_PAGE` bytes (i.e., `4096` bytes), +/// so it converts addresses to page numbers before setting or clearing the bits. +/// +/// To use this bitmap you need to define the memory as `GuestMemoryMmap`. +/// +/// Note: +/// This implementation uses `std::sync::RwLock`, the priority policy of the lock is dependent on +/// the underlying operating system's implementation and does not guarantee any particular policy, +/// in systems other than linux a thread trying to acquire the lock may starve. +#[derive(Default, Debug, Clone)] +pub struct BitmapMmapRegion { + // TODO: To avoid both reader and writer starvation we can replace the `std::sync::RwLock` with + // `parking_lot::RwLock`. + inner: Arc>>, + base_address: usize, // The slice's base address +} + +impl Bitmap for BitmapMmapRegion { + fn mark_dirty(&self, offset: usize, len: usize) { + let inner = self.inner.read().unwrap(); + if let Some(bitmap) = inner.as_ref() { + if let Some(absolute_offset) = self.base_address.checked_add(offset) { + bitmap.mark_dirty(absolute_offset, len); + } + } + } + + fn dirty_at(&self, offset: usize) -> bool { + let inner = self.inner.read().unwrap(); + inner + .as_ref() + .is_some_and(|bitmap| bitmap.dirty_at(self.base_address.saturating_add(offset))) + } + + fn slice_at(&self, offset: usize) -> >::S { + Self { + inner: Arc::clone(&self.inner), + base_address: self.base_address.saturating_add(offset), + } + } +} + +impl BitmapReplace for BitmapMmapRegion { + type InnerBitmap = AtomicBitmapMmap; + + fn replace(&self, bitmap: AtomicBitmapMmap) { + let mut inner = self.inner.write().unwrap(); + inner.replace(bitmap); + } +} + +impl BitmapSlice for BitmapMmapRegion {} + +impl WithBitmapSlice<'_> for BitmapMmapRegion { + type S = Self; +} + +impl NewBitmap for BitmapMmapRegion { + fn with_len(_len: usize) -> Self { + Self::default() + } +} + +/// `AtomicBitmapMmap` implements a simple memory-mapped bitmap on the page level with test +/// and set operations. The main use case is to support live migration on vhost-user backends +/// (see `VHOST_USER_PROTOCOL_F_LOG_SHMFD` and `VHOST_USER_SET_LOG_BASE` in the vhost-user protocol +/// specification). It uses a fixed memory page size of `LOG_PAGE_SIZE` bytes, so it converts +/// addresses to page numbers before setting or clearing the bits. +#[derive(Debug)] +pub struct AtomicBitmapMmap { + logmem: Arc, + pages_before_region: usize, // Number of pages to ignore from the start of the bitmap + number_of_pages: usize, // Number of total pages indexed in the bitmap for this region +} + +// `AtomicBitmapMmap` implements a simple bitmap, it is page-size aware and relative +// to a memory region. It handling the `log` memory mapped area. Each page is indexed +// inside a word of `LOG_WORD_SIZE` bits, so even if the bitmap starts at the beginning of +// the mapped area, the memory region does not necessarily have to start at the beginning of +// that word. +// Note: we don't implement `Bitmap` because we cannot implement `slice_at()` +impl MemRegionBitmap for AtomicBitmapMmap { + // Creates a new memory-mapped bitmap for the memory region. This bitmap must fit within the + // log mapped memory. + fn new(region: &R, logmem: Arc) -> io::Result { + let region_start_addr: usize = region.start_addr().raw_value().io_try_into()?; + let region_len: usize = region.len().io_try_into()?; + if region_len == 0 { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // The size of the log should be large enough to cover all known guest addresses. + let region_end_addr = region_start_addr + .checked_add(region_len - 1) + .ok_or(io::Error::from(io::ErrorKind::InvalidData))?; + let region_end_log_word = page_word(page_number(region_end_addr)); + if region_end_log_word >= logmem.len() { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // The frontend sends a single bitmap (i.e., the log memory to be mapped using `fd`, + // `mmap_offset` and `mmap_size`) that covers the entire guest memory. + // However, since each memory region requires a bitmap relative to them, we have to + // adjust the offset and size, in number of pages, of this region. + let offset_pages = page_number(region_start_addr); + let size_page = page_number(region_len); + + Ok(Self { + logmem, + pages_before_region: offset_pages, + number_of_pages: size_page, + }) + } +} + +impl AtomicBitmapMmap { + // Sets the memory range as dirty. The `offset` is relative to the memory region, + // so an offset of `0` references the start of the memory region. Any attempt to + // access beyond the end of the bitmap are simply ignored. + fn mark_dirty(&self, offset: usize, len: usize) { + if len == 0 { + return; + } + + let first_page = page_number(offset); + let last_page = page_number(offset.saturating_add(len - 1)); + for page in first_page..=last_page { + if page >= self.number_of_pages { + break; // ignore out of bound access + } + + // get the absolute page number + let page = self.pages_before_region + page; + self.logmem[page_word(page)].fetch_or(1 << page_bit(page), Ordering::Relaxed); + } + } + + // Check whether the specified offset is marked as dirty. The `offset` is relative + // to the memory region, so a `0` offset references the start of the memory region. + // Any attempt to access beyond the end of the bitmap are simply ignored. + fn dirty_at(&self, offset: usize) -> bool { + let page = page_number(offset); + if page >= self.number_of_pages { + return false; // ignore out of bound access + } + + // get the absolute page number + let page = self.pages_before_region + page; + let page_bit = self.logmem[page_word(page)].load(Ordering::Relaxed) & (1 << page_bit(page)); + page_bit != 0 + } +} + +/// `MmaplogReg` mmaps the frontend bitmap backing memory in the current process. +#[derive(Debug)] +pub struct MmapLogReg { + addr: *const AtomicU8, + len: usize, +} + +// SAFETY: Send is not automatically implemented because the raw pointer. +// No one besides `MmapLogReg` has the raw pointer, so we can safely transfer it to another thread. +unsafe impl Send for MmapLogReg {} + +// SAFETY: Sync is not automatically implemented because the raw pointer. +// `MmapLogReg` doesn't have any interior mutability and all access to `&AtomicU8` +// are done through atomic operations. +unsafe impl Sync for MmapLogReg {} + +impl MmapLogReg { + // Note: We could try to adjust the mapping area to only cover the memory region, but + // the region's starting address is not guarantee to be LOG_WORD_SIZE-page aligned + // which makes the implementation needlessly cumbersome. + // Note: The specification does not define whether the offset must be page-aligned or not. + // But, since we are receiving the offset from the frontend to be used to call mmap, + // we assume it is properly aligned (currently, qemu always send a 0 offset). + pub(crate) fn from_file(fd: BorrowedFd, offset: u64, len: u64) -> io::Result { + let offset: isize = offset.io_try_into()?; + let len: usize = len.io_try_into()?; + + // Let's uphold the safety contract for `std::ptr::offset()`. + if len > isize::MAX as usize { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // SAFETY: `fd` is a valid file descriptor and we are not using `libc::MAP_FIXED`. + let addr = unsafe { + libc::mmap( + ptr::null_mut(), + len as libc::size_t, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset as libc::off_t, + ) + }; + + if addr == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(Self { + addr: addr as *const AtomicU8, + len, + }) + } + + fn len(&self) -> usize { + self.len + } +} + +impl Index for MmapLogReg { + type Output = AtomicU8; + + // It's ok to get a reference to an atomic value. + fn index(&self, index: usize) -> &Self::Output { + assert!(index < self.len); + // Note: Instead of `&*` we can use `AtomicU8::from_ptr()` as soon it gets stabilized. + // SAFETY: `self.addr` is a valid and properly aligned pointer. Also, `self.addr` + `index` + // doesn't wrap around and is contained within the mapped memory region. + unsafe { &*self.addr.add(index) } + } +} + +impl Drop for MmapLogReg { + fn drop(&mut self) { + // SAFETY: `addr` is properly aligned, also we are sure that this is the + // last reference alive and/or we have an exclusive access to this object. + unsafe { + libc::munmap(self.addr as *mut libc::c_void, self.len as libc::size_t); + } + } +} + +trait IoTryInto>: Sized { + fn io_try_into(self) -> io::Result; +} + +impl IoTryInto for TySrc +where + TyDst: TryFrom, + >::Error: Send + Sync + std::error::Error + 'static, +{ + fn io_try_into(self) -> io::Result { + self.try_into() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +#[inline] +// Get the page number corresponding to the address `addr` +fn page_number(addr: usize) -> usize { + addr / LOG_PAGE_SIZE +} + +#[inline] +// Get the word within the bitmap of the page. +// Each page is indexed inside a word of `LOG_WORD_SIZE` bits. +fn page_word(page: usize) -> usize { + page / LOG_WORD_SIZE +} + +#[inline] +// Get the bit index inside a word of `LOG_WORD_SIZE` bits +fn page_bit(page: usize) -> usize { + page % LOG_WORD_SIZE +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::Write; + use std::os::fd::AsFd; + use vm_memory::{GuestAddress, GuestRegionMmap}; + use vmm_sys_util::tempfile::TempFile; + + // Helper method to check whether a specified range is clean. + pub fn range_is_clean(b: &B, start: usize, len: usize) -> bool { + (start..start + len).all(|offset| !b.dirty_at(offset)) + } + + // Helper method to check whether a specified range is dirty. + pub fn range_is_dirty(b: &B, start: usize, len: usize) -> bool { + (start..start + len).all(|offset| b.dirty_at(offset)) + } + + fn tmp_file(len: usize) -> File { + let mut f = TempFile::new().unwrap().into_file(); + let buf = vec![0; len]; + f.write_all(buf.as_ref()).unwrap(); + f + } + + fn test_all(b: &BitmapMmapRegion, len: usize) { + assert!(range_is_clean(b, 0, len), "The bitmap should be clean"); + + b.mark_dirty(0, len); + assert!(range_is_dirty(b, 0, len), "The bitmap should be dirty"); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_bigger_than_log() { + // Let's create a log memory area to track 8 pages, + // since 1 bit correspond to 1 page, we need a 1-byte log memory area. + let mmap_offset: u64 = 0; + let mmap_size = 1; // // 1 byte = 8 bits/pages + let f = tmp_file(mmap_size); + + // A guest memory region of 16 pages + let region_start_addr = GuestAddress(mmap_offset); + let region_len = LOG_PAGE_SIZE * 16; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + + assert!(log.is_err()); + } + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_log_and_region_same_size() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 32-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 32; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_smaller_than_log() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 16-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 16; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_smaller_than_one_word() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 6-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 6; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_two_regions_overlapping_word_first_dirty() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + // A 11-page guest memory region + let region0_start_addr = GuestAddress::new(mmap_offset); + let region0_len = LOG_PAGE_SIZE * 11; + let region0: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region0_start_addr, region0_len, None).unwrap(); + + let log0 = AtomicBitmapMmap::new(®ion0, Arc::clone(&logmem)); + assert!(log0.is_ok()); + let log0 = log0.unwrap(); + let bitmap0 = BitmapMmapRegion::default(); + bitmap0.replace(log0); + + // A 1-page guest memory region + let region1_start_addr = GuestAddress::new(mmap_offset + LOG_PAGE_SIZE as u64 * 14); + let region1_len = LOG_PAGE_SIZE; + let region1: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region1_start_addr, region1_len, None).unwrap(); + + let log1 = AtomicBitmapMmap::new(®ion1, Arc::clone(&logmem)); + assert!(log1.is_ok()); + let log1 = log1.unwrap(); + + let bitmap1 = BitmapMmapRegion::default(); + bitmap1.replace(log1); + + // Both regions should be clean + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap0 should be clean" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + + // Marking region 0, region 1 should continue be clean + bitmap0.mark_dirty(0, region0_len); + + assert!( + range_is_dirty(&bitmap0, 0, region0_len), + "The bitmap0 should be dirty" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_two_regions_overlapping_word_second_dirty() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + // A 11-page guest memory region + let region0_start_addr = GuestAddress::new(mmap_offset); + let region0_len = LOG_PAGE_SIZE * 11; + let region0: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region0_start_addr, region0_len, None).unwrap(); + + let log0 = AtomicBitmapMmap::new(®ion0, Arc::clone(&logmem)); + assert!(log0.is_ok()); + let log0 = log0.unwrap(); + + let bitmap0 = BitmapMmapRegion::default(); + bitmap0.replace(log0); + + // A 1-page guest memory region + let region1_start_addr = GuestAddress::new(mmap_offset + LOG_PAGE_SIZE as u64 * 14); + let region1_len = LOG_PAGE_SIZE; + let region1: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region1_start_addr, region1_len, None).unwrap(); + + let log1 = AtomicBitmapMmap::new(®ion1, Arc::clone(&logmem)); + assert!(log1.is_ok()); + let log1 = log1.unwrap(); + + let bitmap1 = BitmapMmapRegion::default(); + bitmap1.replace(log1); + + // Both regions should be clean + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap0 should be clean" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + + // Marking region 1, region 0 should continue be clean + bitmap1.mark_dirty(0, region1_len); + + assert!( + range_is_dirty(&bitmap1, 0, region1_len), + "The bitmap0 should be dirty" + ); + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap1 should be clean" + ); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_slice() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 32-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 32; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + assert!( + range_is_clean(&bitmap, 0, region_len), + "The bitmap should be clean" + ); + + // Let's get a slice of half the bitmap + let slice_len = region_len / 2; + let slice = bitmap.slice_at(slice_len); + assert!( + range_is_clean(&slice, 0, slice_len), + "The slice should be clean" + ); + + slice.mark_dirty(0, slice_len); + assert!( + range_is_dirty(&slice, 0, slice_len), + "The slice should be dirty" + ); + assert!( + range_is_clean(&bitmap, 0, slice_len), + "The first half of the bitmap should be clean" + ); + assert!( + range_is_dirty(&bitmap, slice_len, region_len - slice_len), + "The last half of the bitmap should be dirty" + ); + } +} diff --git a/vhost-user-backend-patched/src/event_loop.rs b/vhost-user-backend-patched/src/event_loop.rs new file mode 100644 index 0000000..09e9104 --- /dev/null +++ b/vhost-user-backend-patched/src/event_loop.rs @@ -0,0 +1,276 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2019-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::fmt::{Display, Formatter}; +use std::io::{self, Result}; +use std::marker::PhantomData; +use std::os::fd::IntoRawFd; +use std::os::unix::io::{AsRawFd, RawFd}; + +use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; +use vmm_sys_util::event::EventNotifier; + +use super::backend::VhostUserBackend; +use super::vring::VringT; + +/// Errors related to vring epoll event handling. +#[derive(Debug)] +pub enum VringEpollError { + /// Failed to create epoll file descriptor. + EpollCreateFd(io::Error), + /// Failed while waiting for events. + EpollWait(io::Error), + /// Could not register exit event + RegisterExitEvent(io::Error), + /// Failed to read the event from kick EventFd. + HandleEventReadKick(io::Error), + /// Failed to handle the event from the backend. + HandleEventBackendHandling(io::Error), +} + +impl Display for VringEpollError { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match self { + VringEpollError::EpollCreateFd(e) => write!(f, "cannot create epoll fd: {e}"), + VringEpollError::EpollWait(e) => write!(f, "failed to wait for epoll event: {e}"), + VringEpollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"), + VringEpollError::HandleEventReadKick(e) => { + write!(f, "cannot read vring kick event: {e}") + } + VringEpollError::HandleEventBackendHandling(e) => { + write!(f, "failed to handle epoll event: {e}") + } + } + } +} + +impl std::error::Error for VringEpollError {} + +/// Result of vring epoll operations. +pub type VringEpollResult = std::result::Result; + +/// Epoll event handler to manage and process epoll events for registered file descriptor. +/// +/// The `VringEpollHandler` structure provides interfaces to: +/// - add file descriptors to be monitored by the epoll fd +/// - remove registered file descriptors from the epoll fd +/// - run the event loop to handle pending events on the epoll fd +pub struct VringEpollHandler { + epoll: Epoll, + backend: T, + vrings: Vec, + thread_id: usize, + exit_event_fd: Option, + phantom: PhantomData, +} + +impl VringEpollHandler { + /// Send `exit event` to break the event loop. + pub fn send_exit_event(&self) { + if let Some(eventfd) = self.exit_event_fd.as_ref() { + let _ = eventfd.notify(); + } + } +} + +impl VringEpollHandler +where + T: VhostUserBackend, +{ + /// Create a `VringEpollHandler` instance. + pub(crate) fn new( + backend: T, + vrings: Vec, + thread_id: usize, + ) -> VringEpollResult { + let epoll = Epoll::new().map_err(VringEpollError::EpollCreateFd)?; + let exit_event_fd = backend.exit_event(thread_id); + + let exit_event_fd = if let Some((consumer, notifier)) = exit_event_fd { + let id = backend.num_queues(); + epoll + .ctl( + ControlOperation::Add, + consumer.into_raw_fd(), + EpollEvent::new(EventSet::IN, id as u64), + ) + .map_err(VringEpollError::RegisterExitEvent)?; + Some(notifier) + } else { + None + }; + + Ok(VringEpollHandler { + epoll, + backend, + vrings, + thread_id, + exit_event_fd, + phantom: PhantomData, + }) + } + + /// Register an event into the epoll fd. + /// + /// When this event is later triggered, the backend implementation of `handle_event` will be + /// called. + pub fn register_listener(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + // `data` range [0...num_queues] is reserved for queues and exit event. + if data <= self.backend.num_queues() as u64 { + Err(io::Error::from_raw_os_error(libc::EINVAL)) + } else { + self.register_event(fd, ev_type, data) + } + } + + /// Unregister an event from the epoll fd. + /// + /// If the event is triggered after this function has been called, the event will be silently + /// dropped. + pub fn unregister_listener(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + // `data` range [0...num_queues] is reserved for queues and exit event. + if data <= self.backend.num_queues() as u64 { + Err(io::Error::from_raw_os_error(libc::EINVAL)) + } else { + self.unregister_event(fd, ev_type, data) + } + } + + pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + self.epoll + .ctl(ControlOperation::Add, fd, EpollEvent::new(ev_type, data)) + } + + pub(crate) fn unregister_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> { + self.epoll + .ctl(ControlOperation::Delete, fd, EpollEvent::new(ev_type, data)) + } + + /// Run the event poll loop to handle all pending events on registered fds. + /// + /// The event loop will be terminated once an event is received from the `exit event fd` + /// associated with the backend. + pub(crate) fn run(&self) -> VringEpollResult<()> { + const EPOLL_EVENTS_LEN: usize = 100; + let mut events = vec![EpollEvent::new(EventSet::empty(), 0); EPOLL_EVENTS_LEN]; + + 'epoll: loop { + let num_events = match self.epoll.wait(-1, &mut events[..]) { + Ok(res) => res, + Err(e) => { + if e.kind() == io::ErrorKind::Interrupted { + // It's well defined from the epoll_wait() syscall + // documentation that the epoll loop can be interrupted + // before any of the requested events occurred or the + // timeout expired. In both those cases, epoll_wait() + // returns an error of type EINTR, but this should not + // be considered as a regular error. Instead it is more + // appropriate to retry, by calling into epoll_wait(). + continue; + } + return Err(VringEpollError::EpollWait(e)); + } + }; + + for event in events.iter().take(num_events) { + let evset = match EventSet::from_bits(event.events) { + Some(evset) => evset, + None => { + let evbits = event.events; + println!("epoll: ignoring unknown event set: 0x{evbits:x}"); + continue; + } + }; + + let ev_type = event.data() as u16; + + // handle_event() returns true if an event is received from the exit event fd. + if self.handle_event(ev_type, evset)? { + break 'epoll; + } + } + } + + Ok(()) + } + + fn handle_event(&self, device_event: u16, evset: EventSet) -> VringEpollResult { + if self.exit_event_fd.is_some() && device_event as usize == self.backend.num_queues() { + return Ok(true); + } + + if (device_event as usize) < self.vrings.len() { + let vring = &self.vrings[device_event as usize]; + let enabled = vring + .read_kick() + .map_err(VringEpollError::HandleEventReadKick)?; + + // If the vring is not enabled, it should not be processed. + if !enabled { + return Ok(false); + } + } + + self.backend + .handle_event(device_event, evset, &self.vrings, self.thread_id) + .map_err(VringEpollError::HandleEventBackendHandling)?; + + Ok(false) + } +} + +impl AsRawFd for VringEpollHandler { + fn as_raw_fd(&self) -> RawFd { + self.epoll.as_raw_fd() + } +} + +#[cfg(test)] +mod tests { + use super::super::backend::tests::MockVhostBackend; + use super::super::vring::VringRwLock; + use super::*; + use std::sync::{Arc, Mutex}; + use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; + use vmm_sys_util::event::{new_event_consumer_and_notifier, EventFlag}; + + #[test] + fn test_vring_epoll_handler() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + let vring = VringRwLock::new(mem, 0x1000).unwrap(); + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + + let handler = VringEpollHandler::new(backend, vec![vring], 0x1).unwrap(); + + let (consumer, _notifier) = new_event_consumer_and_notifier(EventFlag::empty()).unwrap(); + handler + .register_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unwrap(); + // Register an already registered fd. + handler + .register_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unwrap_err(); + // Register an invalid data. + handler + .register_listener(consumer.as_raw_fd(), EventSet::IN, 1) + .unwrap_err(); + + handler + .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unwrap(); + // unregister an already unregistered fd. + handler + .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 3) + .unwrap_err(); + // unregister an invalid data. + handler + .unregister_listener(consumer.as_raw_fd(), EventSet::IN, 1) + .unwrap_err(); + // Check we retrieve the correct file descriptor + assert_eq!(handler.as_raw_fd(), handler.epoll.as_raw_fd()); + } +} diff --git a/vhost-user-backend-patched/src/handler.rs b/vhost-user-backend-patched/src/handler.rs new file mode 100644 index 0000000..757304b --- /dev/null +++ b/vhost-user-backend-patched/src/handler.rs @@ -0,0 +1,794 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2019-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::error; +use std::fs::File; +use std::io; +use std::os::fd::AsFd; +#[cfg(feature = "postcopy")] +use std::os::fd::FromRawFd; +use std::os::unix::io::AsRawFd; +use std::sync::Arc; +use std::thread; + +use crate::bitmap::{BitmapReplace, MemRegionBitmap, MmapLogReg}; +#[cfg(feature = "postcopy")] +use userfaultfd::{Uffd, UffdBuilder}; +use vhost::vhost_user::message::{ + VhostTransferStateDirection, VhostTransferStatePhase, VhostUserConfigFlags, VhostUserLog, + VhostUserMemoryRegion, VhostUserProtocolFeatures, VhostUserSharedMsg, + VhostUserSingleMemoryRegion, VhostUserVirtioFeatures, VhostUserVringAddrFlags, + VhostUserVringState, +}; +use vhost::vhost_user::GpuBackend; +use vhost::vhost_user::{ + Backend, Error as VhostUserError, Result as VhostUserResult, VhostUserBackendReqHandlerMut, +}; + +use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; +use virtio_queue::{Error as VirtQueError, QueueT}; +use vm_memory::mmap::NewBitmap; +use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryMmap, GuestRegionMmap}; +use vmm_sys_util::epoll::EventSet; + +use super::backend::VhostUserBackend; +use super::event_loop::VringEpollHandler; +use super::event_loop::{VringEpollError, VringEpollResult}; +use super::vring::VringT; +use super::GM; + +// vhost in the kernel usually supports 509 mem slots. +// The 509 used to be the KVM limit, it supported 512, but 3 were used +// for internal purposes (nowadays, it supports more than that). +const MAX_MEM_SLOTS: u64 = 509; + +#[derive(Debug)] +/// Errors related to vhost-user handler. +pub enum VhostUserHandlerError { + /// Failed to create a `Vring`. + CreateVring(VirtQueError), + /// Failed to create vring worker. + CreateEpollHandler(VringEpollError), + /// Failed to spawn vring worker. + SpawnVringWorker(io::Error), + /// Could not find the mapping from memory regions. + MissingMemoryMapping, +} + +impl std::fmt::Display for VhostUserHandlerError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + VhostUserHandlerError::CreateVring(e) => { + write!(f, "failed to create vring: {e}") + } + VhostUserHandlerError::CreateEpollHandler(e) => { + write!(f, "failed to create vring epoll handler: {e}") + } + VhostUserHandlerError::SpawnVringWorker(e) => { + write!(f, "failed spawning the vring worker: {e}") + } + VhostUserHandlerError::MissingMemoryMapping => write!(f, "Missing memory mapping"), + } + } +} + +impl error::Error for VhostUserHandlerError {} + +/// Result of vhost-user handler operations. +pub type VhostUserHandlerResult = std::result::Result; + +#[derive(Debug)] +struct AddrMapping { + #[cfg(feature = "postcopy")] + local_addr: u64, + vmm_addr: u64, + size: u64, + gpa_base: u64, +} + +pub struct VhostUserHandler { + backend: T, + handlers: Vec>>, + owned: bool, + features_acked: bool, + acked_features: u64, + acked_protocol_features: u64, + num_queues: usize, + max_queue_size: usize, + queues_per_thread: Vec, + mappings: Vec, + atomic_mem: GM, + vrings: Vec, + #[cfg(feature = "postcopy")] + uffd: Option, + worker_threads: Vec>>, +} + +// Ensure VhostUserHandler: Clone + Send + Sync + 'static. +impl VhostUserHandler +where + T: VhostUserBackend + Clone + 'static, + T::Vring: Clone + Send + Sync + 'static, + T::Bitmap: Clone + Send + Sync + 'static, +{ + pub(crate) fn new(backend: T, atomic_mem: GM) -> VhostUserHandlerResult { + let num_queues = backend.num_queues(); + let max_queue_size = backend.max_queue_size(); + let queues_per_thread = backend.queues_per_thread(); + + let mut vrings = Vec::new(); + for _ in 0..num_queues { + let vring = T::Vring::new(atomic_mem.clone(), max_queue_size as u16) + .map_err(VhostUserHandlerError::CreateVring)?; + vrings.push(vring); + } + + let mut handlers = Vec::new(); + let mut worker_threads = Vec::new(); + for (thread_id, queues_mask) in queues_per_thread.iter().enumerate() { + let mut thread_vrings = Vec::new(); + for (index, vring) in vrings.iter().enumerate() { + if (queues_mask >> index) & 1u64 == 1u64 { + thread_vrings.push(vring.clone()); + } + } + + let handler = Arc::new( + VringEpollHandler::new(backend.clone(), thread_vrings, thread_id) + .map_err(VhostUserHandlerError::CreateEpollHandler)?, + ); + let handler2 = handler.clone(); + let worker_thread = thread::Builder::new() + .name("vring_worker".to_string()) + .spawn(move || handler2.run()) + .map_err(VhostUserHandlerError::SpawnVringWorker)?; + + handlers.push(handler); + worker_threads.push(worker_thread); + } + + Ok(VhostUserHandler { + backend, + handlers, + owned: false, + features_acked: false, + acked_features: 0, + acked_protocol_features: 0, + num_queues, + max_queue_size, + queues_per_thread, + mappings: Vec::new(), + atomic_mem, + vrings, + #[cfg(feature = "postcopy")] + uffd: None, + worker_threads, + }) + } +} + +impl VhostUserHandler { + pub(crate) fn send_exit_event(&self) { + for handler in self.handlers.iter() { + handler.send_exit_event(); + } + } + + fn vmm_va_to_gpa(&self, vmm_va: u64) -> VhostUserHandlerResult { + for mapping in self.mappings.iter() { + if vmm_va >= mapping.vmm_addr && vmm_va < mapping.vmm_addr + mapping.size { + return Ok(vmm_va - mapping.vmm_addr + mapping.gpa_base); + } + } + + Err(VhostUserHandlerError::MissingMemoryMapping) + } +} + +impl VhostUserHandler +where + T: VhostUserBackend, +{ + pub(crate) fn get_epoll_handlers(&self) -> Vec>> { + self.handlers.clone() + } + + fn vring_needs_init(&self, vring: &T::Vring) -> bool { + let vring_state = vring.get_ref(); + + // If the vring wasn't initialized and we already have an EventFd for + // VRING_KICK, initialize it now. + !vring_state.get_queue().ready() && vring_state.get_kick().is_some() + } + + fn initialize_vring(&self, vring: &T::Vring, index: u8) -> VhostUserResult<()> { + assert!(vring.get_ref().get_kick().is_some()); + + if let Some(fd) = vring.get_ref().get_kick() { + for (thread_index, queues_mask) in self.queues_per_thread.iter().enumerate() { + let shifted_queues_mask = queues_mask >> index; + if shifted_queues_mask & 1u64 == 1u64 { + let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); + self.handlers[thread_index] + .register_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .map_err(VhostUserError::ReqHandlerError)?; + break; + } + } + } + + vring.set_queue_ready(true); + + Ok(()) + } + + /// Helper to check if VirtioFeature enabled + fn check_feature(&self, feat: VhostUserVirtioFeatures) -> VhostUserResult<()> { + if self.acked_features & feat.bits() != 0 { + Ok(()) + } else { + Err(VhostUserError::InactiveFeature(feat)) + } + } +} + +impl VhostUserBackendReqHandlerMut for VhostUserHandler +where + T::Bitmap: BitmapReplace + NewBitmap + Clone, +{ + fn set_owner(&mut self) -> VhostUserResult<()> { + if self.owned { + return Err(VhostUserError::InvalidOperation("already claimed")); + } + self.owned = true; + Ok(()) + } + + fn reset_owner(&mut self) -> VhostUserResult<()> { + self.owned = false; + self.features_acked = false; + self.acked_features = 0; + self.acked_protocol_features = 0; + Ok(()) + } + + fn reset_device(&mut self) -> VhostUserResult<()> { + // Disable all vrings + for vring in self.vrings.iter_mut() { + vring.set_enabled(false); + } + + // Reset device state, retain protocol state + self.features_acked = false; + self.acked_features = 0; + self.backend.reset_device(); + Ok(()) + } + + fn get_features(&mut self) -> VhostUserResult { + Ok(self.backend.features()) + } + + fn set_features(&mut self, features: u64) -> VhostUserResult<()> { + log::debug!("set_features features={features:#018x} supported={:#018x}", self.backend.features()); + if (features & !self.backend.features()) != 0 { + log::error!("set_features: unsupported bits {:#018x}", features & !self.backend.features()); + return Err(VhostUserError::InvalidParam); + } + + self.acked_features = features; + self.features_acked = true; + + // Upon receiving a `VHOST_USER_SET_FEATURES` message from the front-end without + // `VHOST_USER_F_PROTOCOL_FEATURES` set, the back-end must enable all rings immediately. + // While processing the rings (whether they are enabled or not), the back-end must support + // changing some configuration aspects on the fly. + // (see https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#ring-states) + // + // Note: If `VHOST_USER_F_PROTOCOL_FEATURES` has been negotiated we must leave + // the vrings in their current state. + if self.acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() == 0 { + for vring in self.vrings.iter_mut() { + vring.set_enabled(true); + } + } + + let event_idx: bool = (self.acked_features & (1 << VIRTIO_RING_F_EVENT_IDX)) != 0; + for vring in self.vrings.iter_mut() { + vring.set_queue_event_idx(event_idx); + } + self.backend.set_event_idx(event_idx); + self.backend.acked_features(self.acked_features); + + Ok(()) + } + + fn set_mem_table( + &mut self, + ctx: &[VhostUserMemoryRegion], + files: Vec, + ) -> VhostUserResult<()> { + // We need to create tuple of ranges from the list of VhostUserMemoryRegion + // that we get from the caller. + let mut regions = Vec::new(); + let mut mappings: Vec = Vec::new(); + + for (region, file) in ctx.iter().zip(files) { + let guest_region = GuestRegionMmap::new( + region.mmap_region(file)?, + GuestAddress(region.guest_phys_addr), + ) + .ok_or(VhostUserError::ReqHandlerError( + io::ErrorKind::InvalidInput.into(), + ))?; + mappings.push(AddrMapping { + #[cfg(feature = "postcopy")] + local_addr: guest_region.as_ptr() as u64, + vmm_addr: region.user_addr, + size: region.memory_size, + gpa_base: region.guest_phys_addr, + }); + regions.push(guest_region); + } + + let mem = GuestMemoryMmap::from_regions(regions) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + // Updating the inner GuestMemory object here will cause all our vrings to + // see the new one the next time they call to `atomic_mem.memory()`. + self.atomic_mem.lock().unwrap().replace(mem); + + self.backend + .update_memory(self.atomic_mem.clone()) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + self.mappings = mappings; + + Ok(()) + } + + fn set_vring_num(&mut self, index: u32, num: u32) -> VhostUserResult<()> { + log::debug!("set_vring_num index={index} num={num} max={}", self.max_queue_size); + let vring = self + .vrings + .get(index as usize) + .ok_or_else(|| { log::error!("set_vring_num: index {index} out of range (have {})", self.vrings.len()); VhostUserError::InvalidParam })?; + + if num == 0 || num as usize > self.max_queue_size { + log::error!("set_vring_num: num {num} invalid (max={})", self.max_queue_size); + return Err(VhostUserError::InvalidParam); + } + vring.set_queue_size(num as u16); + Ok(()) + } + + fn set_vring_addr( + &mut self, + index: u32, + _flags: VhostUserVringAddrFlags, + descriptor: u64, + used: u64, + available: u64, + _log: u64, + ) -> VhostUserResult<()> { + log::debug!("set_vring_addr index={index} desc={descriptor:#x} used={used:#x} avail={available:#x}"); + let vring = self + .vrings + .get(index as usize) + .ok_or_else(|| { log::error!("set_vring_addr: index {index} out of range (have {})", self.vrings.len()); VhostUserError::InvalidParam })?; + + if !self.mappings.is_empty() { + let desc_table = self + .vmm_va_to_gpa(descriptor) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + let avail_ring = self + .vmm_va_to_gpa(available) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + let used_ring = self + .vmm_va_to_gpa(used) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + vring + .set_queue_info(desc_table, avail_ring, used_ring) + .map_err(|e| { log::error!("set_vring_addr: set_queue_info failed: {e:?}"); VhostUserError::InvalidParam })?; + + // SET_VRING_BASE will only restore the 'avail' index, however, after the guest driver + // changes, for instance, after reboot, the 'used' index should be reset to 0. + // + // So let's fetch the used index from the vring as set by the guest here to keep + // compatibility with the QEMU's vhost-user library just in case, any implementation + // expects the 'used' index to be set when receiving a SET_VRING_ADDR message. + // + // Note: I'm not sure why QEMU's vhost-user library sets the 'user' index here, + // _probably_ to make sure that the VQ is already configured. A better solution would + // be to receive the 'used' index in SET_VRING_BASE, as is done when using packed VQs. + let idx = vring + .queue_used_idx() + .map_err(|_| VhostUserError::BackendInternalError)?; + vring.set_queue_next_used(idx); + + Ok(()) + } else { + Err(VhostUserError::InvalidParam) + } + } + + fn set_vring_base(&mut self, index: u32, base: u32) -> VhostUserResult<()> { + log::debug!("set_vring_base index={index} base={base}"); + let vring = self + .vrings + .get(index as usize) + .ok_or_else(|| { log::error!("set_vring_base: index {index} out of range"); VhostUserError::InvalidParam })?; + + vring.set_queue_next_avail(base as u16); + + Ok(()) + } + + fn get_vring_base(&mut self, index: u32) -> VhostUserResult { + let vring = self + .vrings + .get(index as usize) + .ok_or(VhostUserError::InvalidParam)?; + + // Quote from vhost-user specification: + // Client must start ring upon receiving a kick (that is, detecting + // that file descriptor is readable) on the descriptor specified by + // VHOST_USER_SET_VRING_KICK, and stop ring upon receiving + // VHOST_USER_GET_VRING_BASE. + vring.set_queue_ready(false); + + if let Some(fd) = vring.get_ref().get_kick() { + for (thread_index, queues_mask) in self.queues_per_thread.iter().enumerate() { + let shifted_queues_mask = queues_mask >> index; + if shifted_queues_mask & 1u64 == 1u64 { + let evt_idx = queues_mask.count_ones() - shifted_queues_mask.count_ones(); + self.handlers[thread_index] + .unregister_event(fd.as_raw_fd(), EventSet::IN, u64::from(evt_idx)) + .map_err(VhostUserError::ReqHandlerError)?; + break; + } + } + } + + let next_avail = vring.queue_next_avail(); + + vring.set_kick(None); + vring.set_call(None); + + Ok(VhostUserVringState::new(index, u32::from(next_avail))) + } + + fn set_vring_kick(&mut self, index: u8, file: Option) -> VhostUserResult<()> { + log::debug!("set_vring_kick index={index}"); + let vring = self + .vrings + .get(index as usize) + .ok_or_else(|| { log::error!("set_vring_kick: index {index} out of range"); VhostUserError::InvalidParam })?; + + // SAFETY: EventFd requires that it has sole ownership of its fd. So + // does File, so this is safe. + // Ideally, we'd have a generic way to refer to a uniquely-owned fd, + // such as that proposed by Rust RFC #3128. + vring.set_kick(file); + + if self.vring_needs_init(vring) { + self.initialize_vring(vring, index)?; + } + + Ok(()) + } + + fn set_vring_call(&mut self, index: u8, file: Option) -> VhostUserResult<()> { + let vring = self + .vrings + .get(index as usize) + .ok_or(VhostUserError::InvalidParam)?; + + vring.set_call(file); + + if self.vring_needs_init(vring) { + self.initialize_vring(vring, index)?; + } + + Ok(()) + } + + fn set_vring_err(&mut self, index: u8, file: Option) -> VhostUserResult<()> { + let vring = self + .vrings + .get(index as usize) + .ok_or(VhostUserError::InvalidParam)?; + + vring.set_err(file); + + Ok(()) + } + + fn get_protocol_features(&mut self) -> VhostUserResult { + Ok(self.backend.protocol_features()) + } + + fn set_protocol_features(&mut self, features: u64) -> VhostUserResult<()> { + // Note: backend that reported VHOST_USER_F_PROTOCOL_FEATURES must + // support this message even before VHOST_USER_SET_FEATURES was + // called. + self.acked_protocol_features = features; + Ok(()) + } + + fn get_queue_num(&mut self) -> VhostUserResult { + Ok(self.num_queues as u64) + } + + fn set_vring_enable(&mut self, index: u32, enable: bool) -> VhostUserResult<()> { + // This request should be handled only when VHOST_USER_F_PROTOCOL_FEATURES + // has been negotiated. + self.check_feature(VhostUserVirtioFeatures::PROTOCOL_FEATURES)?; + + let vring = self + .vrings + .get(index as usize) + .ok_or(VhostUserError::InvalidParam)?; + + // Backend must not pass data to/from the backend until ring is + // enabled by VHOST_USER_SET_VRING_ENABLE with parameter 1, + // or after it has been disabled by VHOST_USER_SET_VRING_ENABLE + // with parameter 0. + vring.set_enabled(enable); + + Ok(()) + } + + fn get_config( + &mut self, + offset: u32, + size: u32, + _flags: VhostUserConfigFlags, + ) -> VhostUserResult> { + Ok(self.backend.get_config(offset, size)) + } + + fn set_config( + &mut self, + offset: u32, + buf: &[u8], + _flags: VhostUserConfigFlags, + ) -> VhostUserResult<()> { + self.backend + .set_config(offset, buf) + .map_err(VhostUserError::ReqHandlerError) + } + + fn set_backend_req_fd(&mut self, backend: Backend) { + if self.acked_protocol_features & VhostUserProtocolFeatures::REPLY_ACK.bits() != 0 { + backend.set_reply_ack_flag(true); + } + if self.acked_protocol_features & VhostUserProtocolFeatures::SHARED_OBJECT.bits() != 0 { + backend.set_shared_object_flag(true); + } + self.backend.set_backend_req_fd(backend); + } + + fn set_gpu_socket(&mut self, gpu_backend: GpuBackend) -> VhostUserResult<()> { + self.backend + .set_gpu_socket(gpu_backend) + .map_err(VhostUserError::ReqHandlerError) + } + + fn get_shared_object(&mut self, uuid: VhostUserSharedMsg) -> VhostUserResult { + match self.backend.get_shared_object(uuid) { + Ok(shared_file) => Ok(shared_file), + Err(e) => Err(VhostUserError::ReqHandlerError(io::Error::other(e))), + } + } + + fn get_inflight_fd( + &mut self, + _inflight: &vhost::vhost_user::message::VhostUserInflight, + ) -> VhostUserResult<(vhost::vhost_user::message::VhostUserInflight, File)> { + // Assume the backend hasn't negotiated the inflight feature; it + // wouldn't be correct for the backend to do so, as we don't (yet) + // provide a way for it to handle such requests. + Err(VhostUserError::InvalidOperation("not supported")) + } + + fn set_inflight_fd( + &mut self, + _inflight: &vhost::vhost_user::message::VhostUserInflight, + _file: File, + ) -> VhostUserResult<()> { + Err(VhostUserError::InvalidOperation("not supported")) + } + + fn get_max_mem_slots(&mut self) -> VhostUserResult { + Ok(MAX_MEM_SLOTS) + } + + fn add_mem_region( + &mut self, + region: &VhostUserSingleMemoryRegion, + file: File, + ) -> VhostUserResult<()> { + let guest_region = Arc::new( + GuestRegionMmap::new( + region.mmap_region(file)?, + GuestAddress(region.guest_phys_addr), + ) + .ok_or(VhostUserError::ReqHandlerError( + io::ErrorKind::InvalidInput.into(), + ))?, + ); + + let addr_mapping = AddrMapping { + #[cfg(feature = "postcopy")] + local_addr: guest_region.as_ptr() as u64, + vmm_addr: region.user_addr, + size: region.memory_size, + gpa_base: region.guest_phys_addr, + }; + + let mem = self + .atomic_mem + .memory() + .insert_region(guest_region) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + self.atomic_mem.lock().unwrap().replace(mem); + + self.backend + .update_memory(self.atomic_mem.clone()) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + self.mappings.push(addr_mapping); + + Ok(()) + } + + fn remove_mem_region(&mut self, region: &VhostUserSingleMemoryRegion) -> VhostUserResult<()> { + let (mem, _) = self + .atomic_mem + .memory() + .remove_region(GuestAddress(region.guest_phys_addr), region.memory_size) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + self.atomic_mem.lock().unwrap().replace(mem); + + self.backend + .update_memory(self.atomic_mem.clone()) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + self.mappings + .retain(|mapping| mapping.gpa_base != region.guest_phys_addr); + + Ok(()) + } + + fn set_device_state_fd( + &mut self, + direction: VhostTransferStateDirection, + phase: VhostTransferStatePhase, + file: File, + ) -> VhostUserResult> { + self.backend + .set_device_state_fd(direction, phase, file) + .map_err(VhostUserError::ReqHandlerError) + } + + fn check_device_state(&mut self) -> VhostUserResult<()> { + self.backend + .check_device_state() + .map_err(VhostUserError::ReqHandlerError) + } + + #[cfg(feature = "postcopy")] + fn postcopy_advice(&mut self) -> VhostUserResult { + let mut uffd_builder = UffdBuilder::new(); + + let uffd = uffd_builder + .close_on_exec(true) + .non_blocking(true) + .user_mode_only(false) + .create() + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + + // We need to duplicate the uffd fd because we need both + // to return File with fd and store fd inside uffd. + // + // SAFETY: + // We know that uffd is correctly created. + // This means fd inside uffd is also a valid fd. + // Duplicating a valid fd is safe. + let uffd_dup = unsafe { libc::dup(uffd.as_raw_fd()) }; + if uffd_dup < 0 { + return Err(VhostUserError::ReqHandlerError(io::Error::last_os_error())); + } + + // SAFETY: + // We know that uffd_dup is a valid fd. + let uffd_file = unsafe { File::from_raw_fd(uffd_dup) }; + + self.uffd = Some(uffd); + + Ok(uffd_file) + } + + #[cfg(feature = "postcopy")] + fn postcopy_listen(&mut self) -> VhostUserResult<()> { + let Some(ref uffd) = self.uffd else { + return Err(VhostUserError::ReqHandlerError(io::Error::other( + "No registered UFFD handler", + ))); + }; + + for mapping in self.mappings.iter() { + uffd.register( + mapping.local_addr as *mut libc::c_void, + mapping.size as usize, + ) + .map_err(|e| VhostUserError::ReqHandlerError(io::Error::other(e)))?; + } + + Ok(()) + } + + #[cfg(feature = "postcopy")] + fn postcopy_end(&mut self) -> VhostUserResult<()> { + self.uffd = None; + Ok(()) + } + + // Sets logging (i.e., bitmap) shared memory space. + // + // During live migration, the front-end may need to track the modifications the back-end + // makes to the memory mapped regions. The front-end should mark the dirty pages in a log. + // Once it complies to this logging, it may declare the `VHOST_F_LOG_ALL` vhost feature. + // + // If the backend has the `VHOST_USER_PROTOCOL_F_LOG_SHMFD` protocol feature it may receive + // the `VHOST_USER_SET_LOG_BASE` message. The log memory file descriptor is provided in `file`, + // the size and offset of shared memory area are provided in the `VhostUserLog` message. + // + // See https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#migration. + // TODO: We ignore the `LOG_ALL` flag on `SET_FEATURES`, so we will continue marking pages as + // dirty even if the migration fails. We need to disable the logging after receiving a + // `SET_FEATURE` without the `LOG_ALL` flag. + fn set_log_base(&mut self, log: &VhostUserLog, file: File) -> VhostUserResult<()> { + let mem = self.atomic_mem.memory(); + + let logmem = Arc::new( + MmapLogReg::from_file(file.as_fd(), log.mmap_offset, log.mmap_size) + .map_err(VhostUserError::ReqHandlerError)?, + ); + + // Let's create all bitmaps first before replacing them, in case any of them fails + let mut bitmaps = Vec::new(); + for region in mem.iter() { + let bitmap = <::Bitmap as BitmapReplace>::InnerBitmap::new( + region, + Arc::clone(&logmem), + ) + .map_err(VhostUserError::ReqHandlerError)?; + + bitmaps.push((region, bitmap)); + } + + for (region, bitmap) in bitmaps { + (*region).bitmap().replace(bitmap); + } + + Ok(()) + } +} + +impl Drop for VhostUserHandler { + fn drop(&mut self) { + // Signal all working threads to exit. + self.send_exit_event(); + + for thread in self.worker_threads.drain(..) { + if let Err(e) = thread.join() { + error!("Error in vring worker: {:?}", e); + } + } + } +} diff --git a/vhost-user-backend-patched/src/lib.rs b/vhost-user-backend-patched/src/lib.rs new file mode 100644 index 0000000..7ade20a --- /dev/null +++ b/vhost-user-backend-patched/src/lib.rs @@ -0,0 +1,374 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2019-2021 Alibaba Cloud Computing. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! A simple framework to run a vhost-user backend service. + +#[macro_use] +extern crate log; + +use std::fmt::{Display, Formatter}; +use std::path::Path; +use std::sync::{Arc, Mutex}; +use std::thread; + +use vhost::vhost_user::{BackendListener, BackendReqHandler, Error as VhostUserError, Listener}; +use vm_memory::mmap::NewBitmap; +use vm_memory::{GuestMemoryAtomic, GuestMemoryMmap}; + +use self::handler::VhostUserHandler; + +mod backend; +pub use self::backend::{VhostUserBackend, VhostUserBackendMut}; + +mod event_loop; +pub use self::event_loop::VringEpollHandler; + +mod handler; +pub use self::handler::VhostUserHandlerError; + +pub mod bitmap; +use crate::bitmap::BitmapReplace; + +mod vring; +pub use self::vring::{ + VringMutex, VringRwLock, VringState, VringStateGuard, VringStateMutGuard, VringT, +}; + +// Due to the way `xen` handles memory mappings we can not combine it with +// `postcopy` feature which relies on persistent memory mappings. Thus we +// disallow enabling both features at the same time. +#[cfg(all( + not(RUSTDOC_disable_feature_compat_errors), + not(doc), + feature = "postcopy", + feature = "xen" +))] +compile_error!("Both `postcopy` and `xen` features can not be enabled at the same time."); + +/// An alias for `GuestMemoryAtomic>` to simplify code. +type GM = GuestMemoryAtomic>; + +#[derive(Debug)] +/// Errors related to vhost-user daemon. +pub enum Error { + /// Failed to create a new vhost-user handler. + NewVhostUserHandler(VhostUserHandlerError), + /// Failed creating vhost-user backend listener. + CreateBackendListener(VhostUserError), + /// Failed creating vhost-user backend handler. + CreateBackendReqHandler(VhostUserError), + /// Failed creating listener socket + CreateVhostUserListener(VhostUserError), + /// Failed starting daemon thread. + StartDaemon(std::io::Error), + /// Failed waiting for daemon thread. + WaitDaemon(std::boxed::Box), + /// Failed handling a vhost-user request. + HandleRequest(VhostUserError), +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + match self { + Error::NewVhostUserHandler(e) => write!(f, "cannot create vhost user handler: {e}"), + Error::CreateBackendListener(e) => write!(f, "cannot create backend listener: {e}"), + Error::CreateBackendReqHandler(e) => { + write!(f, "cannot create backend req handler: {e}") + } + Error::CreateVhostUserListener(e) => { + write!(f, "cannot create vhost-user listener: {e}") + } + Error::StartDaemon(e) => write!(f, "failed to start daemon: {e}"), + Error::WaitDaemon(_e) => write!(f, "failed to wait for daemon exit"), + Error::HandleRequest(e) => write!(f, "failed to handle request: {e}"), + } + } +} + +/// Result of vhost-user daemon operations. +pub type Result = std::result::Result; + +/// Implement a simple framework to run a vhost-user service daemon. +/// +/// This structure is the public API the backend is allowed to interact with in order to run +/// a fully functional vhost-user daemon. +pub struct VhostUserDaemon { + name: String, + handler: Arc>>, + main_thread: Option>>, +} + +impl VhostUserDaemon +where + T: VhostUserBackend + Clone + 'static, + T::Bitmap: BitmapReplace + NewBitmap + Clone + Send + Sync, + T::Vring: Clone + Send + Sync, +{ + /// Create the daemon instance, providing the backend implementation of `VhostUserBackend`. + /// + /// Under the hood, this will start a dedicated thread responsible for listening onto + /// registered event. Those events can be vring events or custom events from the backend, + /// but they get to be registered later during the sequence. + pub fn new( + name: String, + backend: T, + atomic_mem: GuestMemoryAtomic>, + ) -> Result { + let handler = Arc::new(Mutex::new( + VhostUserHandler::new(backend, atomic_mem).map_err(Error::NewVhostUserHandler)?, + )); + + Ok(VhostUserDaemon { + name, + handler, + main_thread: None, + }) + } + + /// Run a dedicated thread handling all requests coming through the socket. + /// This runs in an infinite loop that should be terminating once the other + /// end of the socket (the VMM) hangs up. + /// + /// This function is the common code for starting a new daemon, no matter if + /// it acts as a client or a server. + fn start_daemon( + &mut self, + mut handler: BackendReqHandler>>, + ) -> Result<()> { + let handle = thread::Builder::new() + .name(self.name.clone()) + .spawn(move || loop { + handler.handle_request().map_err(Error::HandleRequest)?; + }) + .map_err(Error::StartDaemon)?; + + self.main_thread = Some(handle); + + Ok(()) + } + + /// Connect to the vhost-user socket and run a dedicated thread handling + /// all requests coming through this socket. This runs in an infinite loop + /// that should be terminating once the other end of the socket (the VMM) + /// hangs up. + pub fn start_client(&mut self, socket_path: &str) -> Result<()> { + let backend_handler = BackendReqHandler::connect(socket_path, self.handler.clone()) + .map_err(Error::CreateBackendReqHandler)?; + self.start_daemon(backend_handler) + } + + /// Listen to the vhost-user socket and run a dedicated thread handling all requests coming + /// through this socket. + /// + /// This runs in an infinite loop that should be terminating once the other end of the socket + /// (the VMM) disconnects. + /// + /// *Note:* A convenience function [VhostUserDaemon::serve] exists that + /// may be a better option than this for simple use-cases. + pub fn start(&mut self, listener: &mut Listener) -> Result<()> { + let mut backend_listener = BackendListener::new(listener, self.handler.clone()) + .map_err(Error::CreateBackendListener)?; + let backend_handler = self.accept(&mut backend_listener)?; + self.start_daemon(backend_handler) + } + + fn accept( + &self, + backend_listener: &mut BackendListener>>, + ) -> Result>>> { + loop { + match backend_listener.accept() { + Err(e) => return Err(Error::CreateBackendListener(e)), + Ok(Some(v)) => return Ok(v), + Ok(None) => continue, + } + } + } + + /// Wait for the thread handling the vhost-user socket connection to terminate. + /// + /// *Note:* A convenience function [VhostUserDaemon::serve] exists that + /// may be a better option than this for simple use-cases. + pub fn wait(&mut self) -> Result<()> { + if let Some(handle) = self.main_thread.take() { + match handle.join().map_err(Error::WaitDaemon)? { + Ok(()) => Ok(()), + Err(Error::HandleRequest(VhostUserError::SocketBroken(_))) => Ok(()), + Err(e) => Err(e), + } + } else { + Ok(()) + } + } + + /// Bind to socket, handle a single connection and shutdown + /// + /// This is a convenience function that provides an easy way to handle the + /// following actions without needing to call the low-level functions: + /// - Create a listener + /// - Start listening + /// - Handle a single event + /// - Send the exit event to all handler threads + /// + /// Internal `Err` results that indicate a device disconnect will be treated + /// as success and `Ok(())` will be returned in those cases. + /// + /// *Note:* See [VhostUserDaemon::start] and [VhostUserDaemon::wait] if you + /// need more flexibility. + pub fn serve>(&mut self, socket: P) -> Result<()> { + let mut listener = Listener::new(socket, true).map_err(Error::CreateVhostUserListener)?; + + self.start(&mut listener)?; + let result = self.wait(); + + // Regardless of the result, we want to signal worker threads to exit + self.handler.lock().unwrap().send_exit_event(); + + // For this convenience function we are not treating certain "expected" + // outcomes as error. Disconnects and partial messages can be usual + // behaviour seen from quitting guests. + match &result { + Err(e) => match e { + Error::HandleRequest(VhostUserError::Disconnected) => Ok(()), + Error::HandleRequest(VhostUserError::PartialMessage) => Ok(()), + _ => result, + }, + _ => result, + } + } + + /// Retrieve the vring epoll handler. + /// + /// This is necessary to perform further actions like registering and unregistering some extra + /// event file descriptors. + pub fn get_epoll_handlers(&self) -> Vec>> { + // Do not expect poisoned lock. + self.handler.lock().unwrap().get_epoll_handlers() + } +} + +#[cfg(test)] +mod tests { + use super::backend::tests::MockVhostBackend; + use super::*; + use libc::EAGAIN; + use std::os::unix::net::{UnixListener, UnixStream}; + use std::sync::Barrier; + use std::time::Duration; + use vm_memory::{GuestAddress, GuestMemoryAtomic, GuestMemoryMmap}; + + #[test] + fn test_new_daemon() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + let mut daemon = VhostUserDaemon::new("test".to_owned(), backend, mem).unwrap(); + + let handlers = daemon.get_epoll_handlers(); + assert_eq!(handlers.len(), 2); + + let barrier = Arc::new(Barrier::new(2)); + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path().join("socket"); + + thread::scope(|s| { + s.spawn(|| { + barrier.wait(); + let socket = UnixStream::connect(&path).unwrap(); + barrier.wait(); + drop(socket) + }); + + let mut listener = Listener::new(&path, false).unwrap(); + barrier.wait(); + daemon.start(&mut listener).unwrap(); + barrier.wait(); + // Above process generates a `HandleRequest(PartialMessage)` error. + daemon.wait().unwrap_err(); + daemon.wait().unwrap(); + }); + } + + #[test] + fn test_new_daemon_client() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + let mut daemon = VhostUserDaemon::new("test".to_owned(), backend, mem).unwrap(); + + let handlers = daemon.get_epoll_handlers(); + assert_eq!(handlers.len(), 2); + + let barrier = Arc::new(Barrier::new(2)); + let tmpdir = tempfile::tempdir().unwrap(); + let path = tmpdir.path().join("socket"); + + thread::scope(|s| { + s.spawn(|| { + let listener = UnixListener::bind(&path).unwrap(); + barrier.wait(); + let (stream, _) = listener.accept().unwrap(); + barrier.wait(); + drop(stream) + }); + + barrier.wait(); + daemon + .start_client(path.as_path().to_str().unwrap()) + .unwrap(); + barrier.wait(); + // Above process generates a `HandleRequest(PartialMessage)` error. + daemon.wait().unwrap_err(); + daemon.wait().unwrap(); + }); + } + + #[test] + fn test_daemon_serve() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + let mut daemon = VhostUserDaemon::new("test".to_owned(), backend.clone(), mem).unwrap(); + let tmpdir = tempfile::tempdir().unwrap(); + let socket_path = tmpdir.path().join("socket"); + + thread::scope(|s| { + s.spawn(|| { + let _ = daemon.serve(&socket_path); + }); + + // We have no way to wait for when the server becomes available... + // So we will have to spin! + while !socket_path.exists() { + thread::sleep(Duration::from_millis(10)); + } + + // Check that no exit events got triggered yet + for thread_id in 0..backend.queues_per_thread().len() { + let fd = backend.exit_event(thread_id).unwrap(); + // Reading from exit fd should fail since nothing was written yet + assert_eq!( + fd.0.consume().unwrap_err().raw_os_error().unwrap(), + EAGAIN, + "exit event should not have been raised yet!" + ); + } + + let socket = UnixStream::connect(&socket_path).unwrap(); + // disconnect immediately again + drop(socket); + }); + + // Check that exit events got triggered + let backend = backend.lock().unwrap(); + for thread_id in 0..backend.queues_per_thread().len() { + let fd = backend.exit_event(thread_id).unwrap(); + assert!(fd.0.consume().is_ok(), "No exit event was raised!"); + } + } +} diff --git a/vhost-user-backend-patched/src/vring.rs b/vhost-user-backend-patched/src/vring.rs new file mode 100644 index 0000000..3b4284c --- /dev/null +++ b/vhost-user-backend-patched/src/vring.rs @@ -0,0 +1,581 @@ +// Copyright 2019 Intel Corporation. All Rights Reserved. +// Copyright 2021 Alibaba Cloud Computing. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +//! Struct to maintain state information and manipulate vhost-user queues. + +use std::fs::File; +use std::io; +use std::ops::{Deref, DerefMut}; +use std::os::unix::io::{FromRawFd, IntoRawFd}; +use std::result::Result; +use std::sync::atomic::Ordering; +use std::sync::{Arc, Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard}; + +use virtio_queue::{Error as VirtQueError, Queue, QueueT}; +use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap}; +use vmm_sys_util::event::{EventConsumer, EventNotifier}; + +/// Trait for objects returned by `VringT::get_ref()`. +pub trait VringStateGuard<'a, M: GuestAddressSpace> { + /// Type for guard returned by `VringT::get_ref()`. + type G: Deref>; +} + +/// Trait for objects returned by `VringT::get_mut()`. +pub trait VringStateMutGuard<'a, M: GuestAddressSpace> { + /// Type for guard returned by `VringT::get_mut()`. + type G: DerefMut>; +} + +pub trait VringT: + for<'a> VringStateGuard<'a, M> + for<'a> VringStateMutGuard<'a, M> +{ + /// Create a new instance of Vring. + fn new(mem: M, max_queue_size: u16) -> Result + where + Self: Sized; + + /// Get an immutable reference to the kick event fd. + fn get_ref(&self) -> >::G; + + /// Get a mutable reference to the kick event fd. + fn get_mut(&self) -> >::G; + + /// Add an used descriptor into the used queue. + fn add_used(&self, desc_index: u16, len: u32) -> Result<(), VirtQueError>; + + /// Notify the vhost-user frontend that used descriptors have been put into the used queue. + fn signal_used_queue(&self) -> io::Result<()>; + + /// Enable event notification for queue. + fn enable_notification(&self) -> Result; + + /// Disable event notification for queue. + fn disable_notification(&self) -> Result<(), VirtQueError>; + + /// Check whether a notification to the guest is needed. + fn needs_notification(&self) -> Result; + + /// Set vring enabled state. + fn set_enabled(&self, enabled: bool); + + /// Set queue addresses for descriptor table, available ring and used ring. + fn set_queue_info( + &self, + desc_table: u64, + avail_ring: u64, + used_ring: u64, + ) -> Result<(), VirtQueError>; + + /// Get queue next avail head. + fn queue_next_avail(&self) -> u16; + + /// Set queue next avail head. + fn set_queue_next_avail(&self, base: u16); + + /// Set queue next used head. + fn set_queue_next_used(&self, idx: u16); + + /// Get queue next used head index from the guest memory. + fn queue_used_idx(&self) -> Result; + + /// Set configured queue size. + fn set_queue_size(&self, num: u16); + + /// Enable/disable queue event index feature. + fn set_queue_event_idx(&self, enabled: bool); + + /// Set queue enabled state. + fn set_queue_ready(&self, ready: bool); + + /// Set `EventFd` for kick. + fn set_kick(&self, file: Option); + + /// Read event from the kick `EventFd`. + fn read_kick(&self) -> io::Result; + + /// Set `EventFd` for call. + fn set_call(&self, file: Option); + + /// Set `EventFd` for err. + fn set_err(&self, file: Option); +} + +/// Struct to maintain raw state information for a vhost-user queue. +/// +/// This struct maintains all information of a virito queue, and could be used as an `VringT` +/// object for single-threaded context. +pub struct VringState> { + queue: Queue, + kick: Option, + call: Option, + err: Option, + enabled: bool, + mem: M, +} + +impl VringState { + /// Create a new instance of Vring. + fn new(mem: M, max_queue_size: u16) -> Result { + Ok(VringState { + queue: Queue::new(max_queue_size)?, + kick: None, + call: None, + err: None, + enabled: false, + mem, + }) + } + + /// Get an immutable reference to the underlying raw `Queue` object. + pub fn get_queue(&self) -> &Queue { + &self.queue + } + + /// Get a mutable reference to the underlying raw `Queue` object. + pub fn get_queue_mut(&mut self) -> &mut Queue { + &mut self.queue + } + + /// Add an used descriptor into the used queue. + pub fn add_used(&mut self, desc_index: u16, len: u32) -> Result<(), VirtQueError> { + self.queue + .add_used(self.mem.memory().deref(), desc_index, len) + } + + /// Notify the vhost-user frontend that used descriptors have been put into the used queue. + pub fn signal_used_queue(&self) -> io::Result<()> { + if let Some(call) = self.call.as_ref() { + call.notify() + } else { + Ok(()) + } + } + + /// Enable event notification for queue. + pub fn enable_notification(&mut self) -> Result { + self.queue.enable_notification(self.mem.memory().deref()) + } + + /// Disable event notification for queue. + pub fn disable_notification(&mut self) -> Result<(), VirtQueError> { + self.queue.disable_notification(self.mem.memory().deref()) + } + + /// Check whether a notification to the guest is needed. + pub fn needs_notification(&mut self) -> Result { + self.queue.needs_notification(self.mem.memory().deref()) + } + + /// Set vring enabled state. + pub fn set_enabled(&mut self, enabled: bool) { + self.enabled = enabled; + } + + /// Set queue addresses for descriptor table, available ring and used ring. + pub fn set_queue_info( + &mut self, + desc_table: u64, + avail_ring: u64, + used_ring: u64, + ) -> Result<(), VirtQueError> { + self.queue + .try_set_desc_table_address(GuestAddress(desc_table))?; + self.queue + .try_set_avail_ring_address(GuestAddress(avail_ring))?; + self.queue + .try_set_used_ring_address(GuestAddress(used_ring)) + } + + /// Get queue next avail head. + fn queue_next_avail(&self) -> u16 { + self.queue.next_avail() + } + + /// Set queue next avail head. + fn set_queue_next_avail(&mut self, base: u16) { + self.queue.set_next_avail(base); + } + + /// Set queue next used head. + fn set_queue_next_used(&mut self, idx: u16) { + self.queue.set_next_used(idx); + } + + /// Get queue next used head index from the guest memory. + fn queue_used_idx(&self) -> Result { + self.queue + .used_idx(self.mem.memory().deref(), Ordering::Relaxed) + .map(|idx| idx.0) + } + + /// Set configured queue size. + fn set_queue_size(&mut self, num: u16) { + self.queue.set_size(num); + } + + /// Enable/disable queue event index feature. + fn set_queue_event_idx(&mut self, enabled: bool) { + self.queue.set_event_idx(enabled); + } + + /// Set queue enabled state. + fn set_queue_ready(&mut self, ready: bool) { + self.queue.set_ready(ready); + } + + /// Get the `EventFd` for kick. + pub fn get_kick(&self) -> &Option { + &self.kick + } + + /// Set `EventFd` for kick. + fn set_kick(&mut self, file: Option) { + // SAFETY: + // EventFd requires that it has sole ownership of its fd. So does File, so this is safe. + // Ideally, we'd have a generic way to refer to a uniquely-owned fd, such as that proposed + // by Rust RFC #3128. + self.kick = file.map(|f| unsafe { EventConsumer::from_raw_fd(f.into_raw_fd()) }); + } + + /// Read event from the kick `EventFd`. + fn read_kick(&self) -> io::Result { + if let Some(kick) = &self.kick { + kick.consume()?; + } + + Ok(self.enabled) + } + + /// Set `EventFd` for call. + fn set_call(&mut self, file: Option) { + // SAFETY: see comment in set_kick() + self.call = file.map(|f| unsafe { EventNotifier::from_raw_fd(f.into_raw_fd()) }); + } + + /// Get the `EventFd` for call. + pub fn get_call(&self) -> &Option { + &self.call + } + + /// Set `EventFd` for err. + fn set_err(&mut self, file: Option) { + // SAFETY: see comment in set_kick() + self.err = file.map(|f| unsafe { EventConsumer::from_raw_fd(f.into_raw_fd()) }); + } +} + +/// A `VringState` object protected by Mutex for multi-threading context. +#[derive(Clone)] +pub struct VringMutex> { + state: Arc>>, +} + +impl VringMutex { + /// Get a mutable guard to the underlying raw `VringState` object. + fn lock(&self) -> MutexGuard<'_, VringState> { + self.state.lock().unwrap() + } +} + +impl<'a, M: 'a + GuestAddressSpace> VringStateGuard<'a, M> for VringMutex { + type G = MutexGuard<'a, VringState>; +} + +impl<'a, M: 'a + GuestAddressSpace> VringStateMutGuard<'a, M> for VringMutex { + type G = MutexGuard<'a, VringState>; +} + +impl VringT for VringMutex { + fn new(mem: M, max_queue_size: u16) -> Result { + Ok(VringMutex { + state: Arc::new(Mutex::new(VringState::new(mem, max_queue_size)?)), + }) + } + + fn get_ref(&self) -> >::G { + self.state.lock().unwrap() + } + + fn get_mut(&self) -> >::G { + self.lock() + } + + fn add_used(&self, desc_index: u16, len: u32) -> Result<(), VirtQueError> { + self.lock().add_used(desc_index, len) + } + + fn signal_used_queue(&self) -> io::Result<()> { + self.get_ref().signal_used_queue() + } + + fn enable_notification(&self) -> Result { + self.lock().enable_notification() + } + + fn disable_notification(&self) -> Result<(), VirtQueError> { + self.lock().disable_notification() + } + + fn needs_notification(&self) -> Result { + self.lock().needs_notification() + } + + fn set_enabled(&self, enabled: bool) { + self.lock().set_enabled(enabled) + } + + fn set_queue_info( + &self, + desc_table: u64, + avail_ring: u64, + used_ring: u64, + ) -> Result<(), VirtQueError> { + self.lock() + .set_queue_info(desc_table, avail_ring, used_ring) + } + + fn queue_next_avail(&self) -> u16 { + self.get_ref().queue_next_avail() + } + + fn set_queue_next_avail(&self, base: u16) { + self.lock().set_queue_next_avail(base) + } + + fn set_queue_next_used(&self, idx: u16) { + self.lock().set_queue_next_used(idx) + } + + fn queue_used_idx(&self) -> Result { + self.lock().queue_used_idx() + } + + fn set_queue_size(&self, num: u16) { + self.lock().set_queue_size(num); + } + + fn set_queue_event_idx(&self, enabled: bool) { + self.lock().set_queue_event_idx(enabled); + } + + fn set_queue_ready(&self, ready: bool) { + self.lock().set_queue_ready(ready); + } + + fn set_kick(&self, file: Option) { + self.lock().set_kick(file); + } + + fn read_kick(&self) -> io::Result { + self.get_ref().read_kick() + } + + fn set_call(&self, file: Option) { + self.lock().set_call(file) + } + + fn set_err(&self, file: Option) { + self.lock().set_err(file) + } +} + +/// A `VringState` object protected by RwLock for multi-threading context. +#[derive(Clone)] +pub struct VringRwLock> { + state: Arc>>, +} + +impl VringRwLock { + /// Get a mutable guard to the underlying raw `VringState` object. + fn write_lock(&self) -> RwLockWriteGuard<'_, VringState> { + self.state.write().unwrap() + } +} + +impl<'a, M: 'a + GuestAddressSpace> VringStateGuard<'a, M> for VringRwLock { + type G = RwLockReadGuard<'a, VringState>; +} + +impl<'a, M: 'a + GuestAddressSpace> VringStateMutGuard<'a, M> for VringRwLock { + type G = RwLockWriteGuard<'a, VringState>; +} + +impl VringT for VringRwLock { + fn new(mem: M, max_queue_size: u16) -> Result { + Ok(VringRwLock { + state: Arc::new(RwLock::new(VringState::new(mem, max_queue_size)?)), + }) + } + + fn get_ref(&self) -> >::G { + self.state.read().unwrap() + } + + fn get_mut(&self) -> >::G { + self.write_lock() + } + + fn add_used(&self, desc_index: u16, len: u32) -> Result<(), VirtQueError> { + self.write_lock().add_used(desc_index, len) + } + + fn signal_used_queue(&self) -> io::Result<()> { + self.get_ref().signal_used_queue() + } + + fn enable_notification(&self) -> Result { + self.write_lock().enable_notification() + } + + fn disable_notification(&self) -> Result<(), VirtQueError> { + self.write_lock().disable_notification() + } + + fn needs_notification(&self) -> Result { + self.write_lock().needs_notification() + } + + fn set_enabled(&self, enabled: bool) { + self.write_lock().set_enabled(enabled) + } + + fn set_queue_info( + &self, + desc_table: u64, + avail_ring: u64, + used_ring: u64, + ) -> Result<(), VirtQueError> { + self.write_lock() + .set_queue_info(desc_table, avail_ring, used_ring) + } + + fn queue_next_avail(&self) -> u16 { + self.get_ref().queue_next_avail() + } + + fn set_queue_next_avail(&self, base: u16) { + self.write_lock().set_queue_next_avail(base) + } + + fn set_queue_next_used(&self, idx: u16) { + self.write_lock().set_queue_next_used(idx) + } + + fn queue_used_idx(&self) -> Result { + self.get_ref().queue_used_idx() + } + + fn set_queue_size(&self, num: u16) { + self.write_lock().set_queue_size(num); + } + + fn set_queue_event_idx(&self, enabled: bool) { + self.write_lock().set_queue_event_idx(enabled); + } + + fn set_queue_ready(&self, ready: bool) { + self.write_lock().set_queue_ready(ready); + } + + fn set_kick(&self, file: Option) { + self.write_lock().set_kick(file); + } + + fn read_kick(&self) -> io::Result { + self.get_ref().read_kick() + } + + fn set_call(&self, file: Option) { + self.write_lock().set_call(file) + } + + fn set_err(&self, file: Option) { + self.write_lock().set_err(file) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use vm_memory::bitmap::AtomicBitmap; + use vmm_sys_util::event::{new_event_consumer_and_notifier, EventFlag}; + + #[test] + fn test_new_vring() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::::from_ranges(&[(GuestAddress(0x100000), 0x10000)]) + .unwrap(), + ); + let vring = VringMutex::new(mem, 0x1000).unwrap(); + + assert!(vring.get_ref().get_kick().is_none()); + assert!(!vring.get_mut().enabled); + assert!(!vring.lock().queue.ready()); + assert!(!vring.lock().queue.event_idx_enabled()); + + vring.set_enabled(true); + assert!(vring.get_ref().enabled); + + vring.set_queue_info(0x100100, 0x100200, 0x100300).unwrap(); + assert_eq!(vring.lock().get_queue().desc_table(), 0x100100); + assert_eq!(vring.lock().get_queue().avail_ring(), 0x100200); + assert_eq!(vring.lock().get_queue().used_ring(), 0x100300); + + assert_eq!(vring.queue_next_avail(), 0); + vring.set_queue_next_avail(0x20); + assert_eq!(vring.queue_next_avail(), 0x20); + + vring.set_queue_size(0x200); + assert_eq!(vring.lock().queue.size(), 0x200); + + vring.set_queue_event_idx(true); + assert!(vring.lock().queue.event_idx_enabled()); + + vring.set_queue_ready(true); + assert!(vring.lock().queue.ready()); + } + + #[test] + fn test_vring_set_fd() { + let mem = GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ); + let vring = VringMutex::new(mem, 0x1000).unwrap(); + + vring.set_enabled(true); + assert!(vring.get_ref().enabled); + + let (consumer, notifier) = new_event_consumer_and_notifier(EventFlag::empty()).unwrap(); + // SAFETY: Safe because we panic before if eventfd is not valid. + let file = unsafe { File::from_raw_fd(consumer.into_raw_fd()) }; + assert!(vring.get_mut().kick.is_none()); + assert!(vring.read_kick().unwrap()); + vring.set_kick(Some(file)); + notifier.notify().unwrap(); + assert!(vring.read_kick().unwrap()); + assert!(vring.get_ref().kick.is_some()); + vring.set_kick(None); + assert!(vring.get_ref().kick.is_none()); + + let (_consumer, notifier) = new_event_consumer_and_notifier(EventFlag::empty()).unwrap(); + // SAFETY: Safe because we panic before if eventfd is not valid. + let file = unsafe { File::from_raw_fd(notifier.into_raw_fd()) }; + assert!(vring.get_ref().call.is_none()); + vring.set_call(Some(file)); + assert!(vring.get_ref().call.is_some()); + vring.set_call(None); + assert!(vring.get_ref().call.is_none()); + + let (consumer, _notifier) = new_event_consumer_and_notifier(EventFlag::empty()).unwrap(); + // SAFETY: Safe because we panic before if eventfd is not valid. + let file = unsafe { File::from_raw_fd(consumer.into_raw_fd()) }; + assert!(vring.get_ref().err.is_none()); + vring.set_err(Some(file)); + assert!(vring.get_ref().err.is_some()); + vring.set_err(None); + assert!(vring.get_ref().err.is_none()); + } +} diff --git a/vhost-user-backend-patched/tests/vhost-user-server.rs b/vhost-user-backend-patched/tests/vhost-user-server.rs new file mode 100644 index 0000000..3c8205a --- /dev/null +++ b/vhost-user-backend-patched/tests/vhost-user-server.rs @@ -0,0 +1,412 @@ +use std::fs::File; +use std::io::Result; +use std::os::unix::io::AsRawFd; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::sync::{Arc, Barrier, Mutex}; +use std::thread; + +use uuid::Uuid; +use vhost::vhost_user::message::{ + VhostUserConfigFlags, VhostUserHeaderFlag, VhostUserInflight, VhostUserProtocolFeatures, + VhostUserSharedMsg, +}; +use vhost::vhost_user::{Backend, Frontend, Listener, VhostUserFrontend}; +use vhost::{VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; +use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock}; +use vm_memory::{ + FileOffset, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, GuestMemoryMmap, +}; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::event::{ + new_event_consumer_and_notifier, EventConsumer, EventFlag, EventNotifier, +}; +use vmm_sys_util::eventfd::EventFd; + +struct MockVhostBackend { + events: u64, + event_idx: bool, + acked_features: u64, +} + +impl MockVhostBackend { + const SUPPORTED_FEATURES: u64 = 0xffff_ffff_ffff_ffff; + + fn new() -> Self { + MockVhostBackend { + events: 0, + event_idx: false, + acked_features: 0, + } + } +} + +impl VhostUserBackendMut for MockVhostBackend { + type Bitmap = (); + type Vring = VringRwLock; + + fn num_queues(&self) -> usize { + 2 + } + + fn max_queue_size(&self) -> usize { + 256 + } + + fn features(&self) -> u64 { + Self::SUPPORTED_FEATURES + } + + fn acked_features(&mut self, features: u64) { + self.acked_features = features; + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + // Exclude REPLY_ACK to test that it is automatically added. + VhostUserProtocolFeatures::all() - VhostUserProtocolFeatures::REPLY_ACK + } + + fn reset_device(&mut self) { + self.events = 0; + self.event_idx = false; + self.acked_features = 0; + } + + fn set_event_idx(&mut self, enabled: bool) { + self.event_idx = enabled; + } + + fn get_config(&self, offset: u32, size: u32) -> Vec { + assert_eq!(offset, 0x200); + assert_eq!(size, 8); + + vec![0xa5u8; 8] + } + + fn set_config(&mut self, offset: u32, buf: &[u8]) -> Result<()> { + assert_eq!(offset, 0x200); + assert_eq!(buf, &[0xa5u8; 8]); + + Ok(()) + } + + fn update_memory(&mut self, atomic_mem: GuestMemoryAtomic) -> Result<()> { + let mem = atomic_mem.memory(); + let region = mem.find_region(GuestAddress(0x100000)).unwrap(); + assert_eq!(region.size(), 0x100000); + Ok(()) + } + + fn set_backend_req_fd(&mut self, _backend: Backend) {} + + fn get_shared_object(&mut self, _uuid: VhostUserSharedMsg) -> Result { + let file = tempfile::tempfile().unwrap(); + Ok(file) + } + + fn queues_per_thread(&self) -> Vec { + vec![1, 1] + } + + fn exit_event(&self, _thread_index: usize) -> Option<(EventConsumer, EventNotifier)> { + Some( + new_event_consumer_and_notifier(EventFlag::empty()) + .expect("Failed to create EventConsumer and EventNotifier"), + ) + } + + fn handle_event( + &mut self, + _device_event: u16, + _evset: EventSet, + _vrings: &[VringRwLock], + _thread_id: usize, + ) -> Result<()> { + self.events += 1; + + Ok(()) + } +} + +fn setup_frontend(path: &Path, barrier: Arc) -> Frontend { + barrier.wait(); + let mut frontend = Frontend::connect(path, 1).unwrap(); + frontend.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); + // Wait before issue service requests. + barrier.wait(); + + let features = frontend.get_features().unwrap(); + let proto = frontend.get_protocol_features().unwrap(); + frontend.set_features(features).unwrap(); + frontend.set_protocol_features(proto).unwrap(); + assert!(proto.contains(VhostUserProtocolFeatures::REPLY_ACK)); + + frontend +} + +fn vhost_user_client(path: &Path, barrier: Arc) { + barrier.wait(); + let mut frontend = Frontend::connect(path, 1).unwrap(); + frontend.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); + // Wait before issue service requests. + barrier.wait(); + + let features = frontend.get_features().unwrap(); + let proto = frontend.get_protocol_features().unwrap(); + frontend.set_features(features).unwrap(); + frontend.set_protocol_features(proto).unwrap(); + assert!(proto.contains(VhostUserProtocolFeatures::REPLY_ACK)); + + let queue_num = frontend.get_queue_num().unwrap(); + assert_eq!(queue_num, 2); + + frontend.set_owner().unwrap(); + //frontend.set_owner().unwrap_err(); + frontend.reset_owner().unwrap(); + frontend.reset_owner().unwrap(); + frontend.set_owner().unwrap(); + + frontend.set_features(features).unwrap(); + frontend.set_protocol_features(proto).unwrap(); + assert!(proto.contains(VhostUserProtocolFeatures::REPLY_ACK)); + + let memfd = nix::sys::memfd::memfd_create("test", nix::sys::memfd::MFdFlags::empty()).unwrap(); + let file = File::from(memfd); + file.set_len(0x100000).unwrap(); + let file_offset = FileOffset::new(file, 0); + let mem = GuestMemoryMmap::<()>::from_ranges_with_files(&[( + GuestAddress(0x100000), + 0x100000, + Some(file_offset), + )]) + .unwrap(); + let addr = mem.get_host_address(GuestAddress(0x100000)).unwrap() as u64; + let reg = mem.find_region(GuestAddress(0x100000)).unwrap(); + let fd = reg.file_offset().unwrap(); + let regions = [VhostUserMemoryRegionInfo::new( + 0x100000, + 0x100000, + addr, + 0, + fd.file().as_raw_fd(), + )]; + frontend.set_mem_table(®ions).unwrap(); + + frontend.set_vring_num(0, 256).unwrap(); + + let config = VringConfigData { + queue_max_size: 256, + queue_size: 256, + flags: 0, + desc_table_addr: addr, + used_ring_addr: addr + 0x10000, + avail_ring_addr: addr + 0x20000, + log_addr: None, + }; + frontend.set_vring_addr(0, &config).unwrap(); + + let eventfd = EventFd::new(0).unwrap(); + frontend.set_vring_kick(0, &eventfd).unwrap(); + frontend.set_vring_call(0, &eventfd).unwrap(); + frontend.set_vring_err(0, &eventfd).unwrap(); + frontend.set_vring_enable(0, true).unwrap(); + + let buf = [0u8; 8]; + let (_cfg, data) = frontend + .get_config(0x200, 8, VhostUserConfigFlags::empty(), &buf) + .unwrap(); + assert_eq!(&data, &[0xa5u8; 8]); + frontend + .set_config(0x200, VhostUserConfigFlags::empty(), &data) + .unwrap(); + + let (tx, _rx) = UnixStream::pair().unwrap(); + frontend.set_backend_request_fd(&tx).unwrap(); + + let state = frontend.get_vring_base(0).unwrap(); + frontend.set_vring_base(0, state as u16).unwrap(); + + assert_eq!(frontend.get_max_mem_slots().unwrap(), 509); + let region = VhostUserMemoryRegionInfo::new(0x800000, 0x100000, addr, 0, fd.file().as_raw_fd()); + frontend.add_mem_region(®ion).unwrap(); + frontend.remove_mem_region(®ion).unwrap(); +} + +/// Provide a vhost-user back-end for front-end testing. +/// +/// Set up a `MockVhostBackend` vhost-user back-end and run `cb` in a thread, passing the +/// vhost-user socket's path and a barrier to await request processing. `cb` is supposed to run +/// the front-end tests. +/// +/// After request processing has begun, run `server_fn`, passing both a reference to the back-end +/// and the same barrier as given to `cb`. `server_fn` may perform additional back-end tests while +/// `cb` is still run in its thread. +/// +/// After `server_fn` is done, await `cb` (joining its thread), and return. +fn vhost_user_server_with_fn>, Arc)>( + cb: fn(&Path, Arc), + server_fn: F, +) { + let mem = GuestMemoryAtomic::new(GuestMemoryMmap::<()>::new()); + let backend = Arc::new(Mutex::new(MockVhostBackend::new())); + let mut daemon = VhostUserDaemon::new("test".to_owned(), backend.clone(), mem).unwrap(); + + let barrier = Arc::new(Barrier::new(2)); + let tmpdir = tempfile::tempdir().unwrap(); + let mut path = tmpdir.path().to_path_buf(); + path.push("socket"); + + let barrier2 = barrier.clone(); + let path1 = path.clone(); + let thread = thread::spawn(move || cb(&path1, barrier2)); + + let mut listener = Listener::new(&path, false).unwrap(); + barrier.wait(); + daemon.start(&mut listener).unwrap(); + barrier.wait(); + + server_fn(backend, barrier); + + // handle service requests from clients. + thread.join().unwrap(); +} + +fn vhost_user_server(cb: fn(&Path, Arc)) { + vhost_user_server_with_fn(cb, |_, _| {}) +} + +#[test] +fn test_vhost_user_server() { + vhost_user_server(vhost_user_client); +} + +fn vhost_user_enable(path: &Path, barrier: Arc) { + let frontend = setup_frontend(path, barrier); + frontend.set_owner().unwrap(); + frontend.set_owner().unwrap_err(); +} + +#[test] +fn test_vhost_user_enable() { + vhost_user_server(vhost_user_enable); +} + +fn vhost_user_set_inflight(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + let eventfd = EventFd::new(0).unwrap(); + // No implementation for inflight_fd yet. + let inflight = VhostUserInflight { + mmap_size: 0x100000, + mmap_offset: 0, + num_queues: 1, + queue_size: 256, + }; + frontend + .set_inflight_fd(&inflight, eventfd.as_raw_fd()) + .unwrap_err(); +} + +#[test] +fn test_vhost_user_set_inflight() { + vhost_user_server(vhost_user_set_inflight); +} + +fn vhost_user_get_inflight(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + // No implementation for inflight_fd yet. + let inflight = VhostUserInflight { + mmap_size: 0x100000, + mmap_offset: 0, + num_queues: 1, + queue_size: 256, + }; + assert!(frontend.get_inflight_fd(&inflight).is_err()); +} + +#[test] +fn test_vhost_user_get_shared_object() { + vhost_user_server(vhost_user_get_shared_object); +} + +fn vhost_user_get_shared_object(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + frontend + .get_shared_object(&VhostUserSharedMsg::default()) + .unwrap_err(); + frontend + .get_shared_object(&VhostUserSharedMsg { + uuid: Uuid::new_v4(), + }) + .unwrap(); +} + +#[test] +fn test_vhost_user_get_inflight() { + vhost_user_server(vhost_user_get_inflight); +} + +#[cfg(feature = "postcopy")] +fn vhost_user_postcopy_advise(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + let _uffd_file = frontend.postcopy_advise().unwrap(); +} + +#[cfg(feature = "postcopy")] +fn vhost_user_postcopy_listen(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + let _uffd_file = frontend.postcopy_advise().unwrap(); + frontend.postcopy_listen().unwrap(); +} + +#[cfg(feature = "postcopy")] +fn vhost_user_postcopy_end(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier); + let _uffd_file = frontend.postcopy_advise().unwrap(); + frontend.postcopy_listen().unwrap(); + frontend.postcopy_end().unwrap(); +} + +// These tests need an access to the `/dev/userfaultfd` +// in order to pass. +#[cfg(feature = "postcopy")] +#[test] +fn test_vhost_user_postcopy() { + vhost_user_server(vhost_user_postcopy_advise); + vhost_user_server(vhost_user_postcopy_listen); + vhost_user_server(vhost_user_postcopy_end); +} + +fn vhost_user_reset_device(path: &Path, barrier: Arc) { + let mut frontend = setup_frontend(path, barrier.clone()); + + // Signal that we are about to reset + barrier.wait(); + // Wait until server has checked non-reset state + barrier.wait(); + + frontend.reset_device().unwrap(); + + // Signal reset is done + barrier.wait(); +} + +#[test] +fn test_vhost_user_reset_device() { + vhost_user_server_with_fn(vhost_user_reset_device, |backend, barrier| { + // Wait until `vhost_user_reset_device()` is before reset + barrier.wait(); + // Check non-reset state + assert!(backend.lock().unwrap().acked_features == MockVhostBackend::SUPPORTED_FEATURES); + // Set up some arbitrary internal state + backend.lock().unwrap().events = 42; + + // Allow reset + barrier.wait(); + // Wait for reset to be done + barrier.wait(); + + // Check reset state + assert!(backend.lock().unwrap().acked_features == 0); + assert!(backend.lock().unwrap().events == 0); + }); +}