From ead845312031fb07386cd2692c0a8dd33ff80fac Mon Sep 17 00:00:00 2001 From: Rob Bradford Date: Fri, 27 Nov 2020 15:15:33 +0000 Subject: [PATCH] build: Remove vhost_user_fs This has been superseded by virtiofsd-rs. Fixes: #2013 Signed-off-by: Rob Bradford --- Cargo.lock | 177 --- Cargo.toml | 1 - vhost_user_fs/Cargo.toml | 23 - vhost_user_fs/src/descriptor_utils.rs | 1001 ------------ vhost_user_fs/src/file_traits.rs | 409 ----- vhost_user_fs/src/filesystem.rs | 1148 -------------- vhost_user_fs/src/fs_cache_req_handler.rs | 62 - vhost_user_fs/src/fuse.rs | 1165 -------------- vhost_user_fs/src/lib.rs | 60 - vhost_user_fs/src/main.rs | 423 ----- vhost_user_fs/src/multikey.rs | 274 ---- vhost_user_fs/src/passthrough.rs | 1724 --------------------- vhost_user_fs/src/sandbox.rs | 319 ---- vhost_user_fs/src/seccomp.rs | 141 -- vhost_user_fs/src/server.rs | 1419 ----------------- 15 files changed, 8346 deletions(-) delete mode 100644 vhost_user_fs/Cargo.toml delete mode 100644 vhost_user_fs/src/descriptor_utils.rs delete mode 100644 vhost_user_fs/src/file_traits.rs delete mode 100644 vhost_user_fs/src/filesystem.rs delete mode 100644 vhost_user_fs/src/fs_cache_req_handler.rs delete mode 100644 vhost_user_fs/src/fuse.rs delete mode 100644 vhost_user_fs/src/lib.rs delete mode 100644 vhost_user_fs/src/main.rs delete mode 100644 vhost_user_fs/src/multikey.rs delete mode 100644 vhost_user_fs/src/passthrough.rs delete mode 100644 vhost_user_fs/src/sandbox.rs delete mode 100644 vhost_user_fs/src/seccomp.rs delete mode 100644 vhost_user_fs/src/server.rs diff --git a/Cargo.lock b/Cargo.lock index 7436df67f..45eb58528 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -374,102 +374,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" -[[package]] -name = "futures" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b3b0c040a1fe6529d30b3c5944b280c7f0dcb2930d2c3062bca967b602583d0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b7109687aa4e177ef6fe84553af6280ef2778bdb7783ba44c9dc3399110fe64" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "847ce131b72ffb13b6109a221da9ad97a64cbe48feb1028356b836b47b8f1748" - -[[package]] -name = "futures-executor" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4caa2b2b68b880003057c1dd49f1ed937e38f22fcf6c212188a121f08cf40a65" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", - "num_cpus", -] - -[[package]] -name = "futures-io" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611834ce18aaa1bd13c4b374f5d653e1027cf99b6b502584ff8c9a64413b30bb" - -[[package]] -name = "futures-macro" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77408a692f1f97bcc61dc001d752e00643408fbc922e4d634c655df50d595556" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f878195a49cee50e006b02b93cf7e0a95a38ac7b776b4c4d9cc1207cd20fcb3d" - -[[package]] -name = "futures-task" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c554eb5bf48b2426c4771ab68c6b14468b6e76cc90996f528c3338d761a4d0d" -dependencies = [ - "once_cell", -] - -[[package]] -name = "futures-util" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d304cff4a7b99cfb7986f7d43fbe93d175e72e704a8860787cc95e9ffd85cbd2" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project", - "pin-utils", - "proc-macro-hack", - "proc-macro-nested", - "slab", -] - [[package]] name = "getrandom" version = "0.1.15" @@ -734,28 +638,12 @@ dependencies = [ "vmm-sys-util", ] -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3b63360ec3cb337817c2dbd47ab4a0f170d285d8e5a2064600f3def1402397" -[[package]] -name = "once_cell" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" - [[package]] name = "openssl-sys" version = "0.9.58" @@ -824,32 +712,6 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" -[[package]] -name = "pin-project" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ccc2237c2c489783abd8c4c80e5450fc0e98644555b1364da68cc29aa151ca7" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e8d2bf0b23038a4424865103a4df472855692821aab4e4f5c3312d461d9e5f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "pkg-config" version = "0.3.19" @@ -950,18 +812,6 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro-nested" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eba180dafb9038b050a4c280019bbedf9f2467b61e5d892dcad585bb57aadc5a" - [[package]] name = "proc-macro2" version = "1.0.24" @@ -1240,12 +1090,6 @@ dependencies = [ "libc", ] -[[package]] -name = "slab" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" - [[package]] name = "smallvec" version = "1.5.0" @@ -1580,27 +1424,6 @@ dependencies = [ "vmm-sys-util", ] -[[package]] -name = "vhost_user_fs" -version = "0.1.0" -dependencies = [ - "bitflags 1.2.1", - "clap", - "epoll", - "futures", - "libc", - "log 0.4.11", - "seccomp", - "tempdir", - "vhost", - "vhost_user_backend", - "virtio-bindings", - "virtio-devices", - "vm-memory", - "vm-virtio", - "vmm-sys-util", -] - [[package]] name = "vhost_user_net" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 4925621bf..2d81c86db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,6 @@ members = [ "qcow", "vhost_user_backend", "vhost_user_block", - "vhost_user_fs", "vhost_user_net", "virtio-devices", "vmm", diff --git a/vhost_user_fs/Cargo.toml b/vhost_user_fs/Cargo.toml deleted file mode 100644 index 44bcf4764..000000000 --- a/vhost_user_fs/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "vhost_user_fs" -version = "0.1.0" -authors = ["The Cloud Hypervisor Authors"] -edition = "2018" - -[dependencies] -bitflags = "1.1.0" -clap = { version = "2.33.3", features=["wrap_help"] } -epoll = ">=4.0.1" -futures = { version = "0.3.8", features = ["thread-pool"] } -libc = "0.2.80" -log = "0.4.11" -# Match the version in vmm -seccomp = { git = "https://github.com/firecracker-microvm/firecracker", tag = "v0.22.0" } -tempdir = "0.3.7" -virtio-bindings = { version = "0.1", features = ["virtio-v5_0_0"]} -virtio-devices = { path = "../virtio-devices" } -vhost_rs = { git = "https://github.com/rust-vmm/vhost", branch = "master", package = "vhost", features = ["vhost-user-slave"] } -vhost_user_backend = { path = "../vhost_user_backend"} -vm-memory = "0.4.0" -vm-virtio = { path = "../vm-virtio" } -vmm-sys-util = "0.7.0" \ No newline at end of file diff --git a/vhost_user_fs/src/descriptor_utils.rs b/vhost_user_fs/src/descriptor_utils.rs deleted file mode 100644 index a95d06d70..000000000 --- a/vhost_user_fs/src/descriptor_utils.rs +++ /dev/null @@ -1,1001 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::cmp; -use std::collections::VecDeque; -use std::fmt::{self, Display}; -use std::io::{self, Read, Write}; -use std::mem::{size_of, MaybeUninit}; -use std::ops::Deref; -use std::ptr::copy_nonoverlapping; -use std::result; - -use vm_memory::{ - Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryMmap, - GuestMemoryRegion, Le16, Le32, Le64, VolatileMemory, VolatileMemoryError, VolatileSlice, -}; -use vm_virtio::queue::Error as QueueError; -use vm_virtio::DescriptorChain; - -use crate::file_traits::{FileReadWriteAtVolatile, FileReadWriteVolatile}; - -#[derive(Debug)] -pub enum Error { - DescriptorChainOverflow, - FindMemoryRegion, - GuestMemoryError(GuestMemoryError), - InvalidChain, - ConvertIndirectDescriptor(QueueError), - IoError(io::Error), - SplitOutOfBounds(usize), - VolatileMemoryError(VolatileMemoryError), -} - -impl Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use self::Error::*; - - match self { - ConvertIndirectDescriptor(e) => write!(f, "invalid indirect descriptor: {}", e), - DescriptorChainOverflow => write!( - f, - "the combined length of all the buffers in a `DescriptorChain` would overflow" - ), - FindMemoryRegion => write!(f, "no memory region for this address range"), - GuestMemoryError(e) => write!(f, "descriptor guest memory error: {}", e), - InvalidChain => write!(f, "invalid descriptor chain"), - IoError(e) => write!(f, "descriptor I/O error: {}", e), - SplitOutOfBounds(off) => write!(f, "`DescriptorChain` split is out of bounds: {}", off), - VolatileMemoryError(e) => write!(f, "volatile memory error: {}", e), - } - } -} - -pub type Result = result::Result; - -impl std::error::Error for Error {} - -#[derive(Clone)] -struct DescriptorChainConsumer<'a> { - buffers: VecDeque>, - bytes_consumed: usize, -} - -impl<'a> DescriptorChainConsumer<'a> { - fn available_bytes(&self) -> usize { - // This is guaranteed not to overflow because the total length of the chain - // is checked during all creations of `DescriptorChainConsumer` (see - // `Reader::new()` and `Writer::new()`). - self.buffers - .iter() - .fold(0usize, |count, vs| count + vs.len() as usize) - } - - fn bytes_consumed(&self) -> usize { - self.bytes_consumed - } - - /// Consumes at most `count` bytes from the `DescriptorChain`. Callers must provide a function - /// that takes a `&[VolatileSlice]` and returns the total number of bytes consumed. This - /// function guarantees that the combined length of all the slices in the `&[VolatileSlice]` is - /// less than or equal to `count`. - /// - /// # Errors - /// - /// If the provided function returns any error then no bytes are consumed from the buffer and - /// the error is returned to the caller. - fn consume(&mut self, count: usize, f: F) -> io::Result - where - F: FnOnce(&[VolatileSlice]) -> io::Result, - { - let mut buflen = 0; - let mut bufs = Vec::with_capacity(self.buffers.len()); - for &vs in &self.buffers { - if buflen >= count { - break; - } - - bufs.push(vs); - - let rem = count - buflen; - if rem < vs.len() { - buflen += rem; - } else { - buflen += vs.len() as usize; - } - } - - if bufs.is_empty() { - return Ok(0); - } - - let bytes_consumed = f(&*bufs)?; - - // This can happen if a driver tricks a device into reading/writing more data than - // fits in a `usize`. - let total_bytes_consumed = - self.bytes_consumed - .checked_add(bytes_consumed) - .ok_or_else(|| { - io::Error::new(io::ErrorKind::InvalidData, Error::DescriptorChainOverflow) - })?; - - let mut rem = bytes_consumed; - while let Some(vs) = self.buffers.pop_front() { - if rem < vs.len() { - // Split the slice and push the remainder back into the buffer list. Safe because we - // know that `rem` is not out of bounds due to the check and we checked the bounds - // on `vs` when we added it to the buffer list. - self.buffers.push_front(vs.offset(rem).unwrap()); - break; - } - - // No need for checked math because we know that `vs.size() <= rem`. - rem -= vs.len(); - } - - self.bytes_consumed = total_bytes_consumed; - - Ok(bytes_consumed) - } - - fn split_at(&mut self, offset: usize) -> Result> { - let mut rem = offset; - let pos = self.buffers.iter().position(|vs| { - if rem < vs.len() { - true - } else { - rem -= vs.len(); - false - } - }); - - if let Some(at) = pos { - let mut other = self.buffers.split_off(at); - - if rem > 0 { - // There must be at least one element in `other` because we checked - // its `size` value in the call to `position` above. - let front = other.pop_front().expect("empty VecDeque after split"); - self.buffers - .push_back(front.offset(rem).map_err(Error::VolatileMemoryError)?); - other.push_front(front.offset(rem).map_err(Error::VolatileMemoryError)?); - } - - Ok(DescriptorChainConsumer { - buffers: other, - bytes_consumed: 0, - }) - } else if rem == 0 { - Ok(DescriptorChainConsumer { - buffers: VecDeque::new(), - bytes_consumed: 0, - }) - } else { - Err(Error::SplitOutOfBounds(offset)) - } - } -} - -/// Provides high-level interface over the sequence of memory regions -/// defined by readable descriptors in the descriptor chain. -/// -/// Note that virtio spec requires driver to place any device-writable -/// descriptors after any device-readable descriptors (2.6.4.2 in Virtio Spec v1.1). -/// Reader will skip iterating over descriptor chain when first writable -/// descriptor is encountered. -#[derive(Clone)] -pub struct Reader<'a> { - buffer: DescriptorChainConsumer<'a>, -} - -impl<'a> Reader<'a> { - /// Construct a new Reader wrapper over `desc_chain`. - pub fn new(mem: &'a GuestMemoryMmap, desc_chain: DescriptorChain<'a>) -> Result> { - let mut total_len: usize = 0; - let chain = if desc_chain.is_indirect() { - desc_chain - .new_from_indirect() - .map_err(Error::ConvertIndirectDescriptor)? - } else { - desc_chain - }; - let buffers = chain - .into_iter() - .readable() - .map(|desc| { - // Verify that summing the descriptor sizes does not overflow. - // This can happen if a driver tricks a device into reading more data than - // fits in a `usize`. - total_len = total_len - .checked_add(desc.len as usize) - .ok_or(Error::DescriptorChainOverflow)?; - - let region = mem.find_region(desc.addr).ok_or(Error::FindMemoryRegion)?; - let offset = desc - .addr - .checked_sub(region.start_addr().raw_value()) - .unwrap(); - region - .deref() - .get_slice(offset.raw_value() as usize, desc.len as usize) - .map_err(Error::VolatileMemoryError) - }) - .collect::>>>()?; - Ok(Reader { - buffer: DescriptorChainConsumer { - buffers, - bytes_consumed: 0, - }, - }) - } - - /// Reads an object from the descriptor chain buffer. - pub fn read_obj(&mut self) -> io::Result { - let mut obj = MaybeUninit::::uninit(); - - // Safe because `MaybeUninit` guarantees that the pointer is valid for - // `size_of::()` bytes. - let buf = unsafe { - ::std::slice::from_raw_parts_mut(obj.as_mut_ptr() as *mut u8, size_of::()) - }; - - self.read_exact(buf)?; - - // Safe because any type that implements `ByteValued` can be considered initialized - // even if it is filled with random data. - Ok(unsafe { obj.assume_init() }) - } - - /// Reads data from the descriptor chain buffer into a file descriptor. - /// Returns the number of bytes read from the descriptor chain buffer. - /// The number of bytes read can be less than `count` if there isn't - /// enough data in the descriptor chain buffer. - pub fn read_to( - &mut self, - mut dst: F, - count: usize, - ) -> io::Result { - self.buffer - .consume(count, |bufs| dst.write_vectored_volatile(bufs)) - } - - /// Reads data from the descriptor chain buffer into a File at offset `off`. - /// Returns the number of bytes read from the descriptor chain buffer. - /// The number of bytes read can be less than `count` if there isn't - /// enough data in the descriptor chain buffer. - pub fn read_to_at( - &mut self, - mut dst: F, - count: usize, - off: u64, - ) -> io::Result { - self.buffer - .consume(count, |bufs| dst.write_vectored_at_volatile(bufs, off)) - } - - pub fn read_exact_to( - &mut self, - mut dst: F, - mut count: usize, - ) -> io::Result<()> { - while count > 0 { - match self.read_to(&mut dst, count) { - Ok(0) => { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "failed to fill whole buffer", - )) - } - Ok(n) => count -= n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - - Ok(()) - } - - /// Returns number of bytes available for reading. May return an error if the combined - /// lengths of all the buffers in the DescriptorChain would cause an integer overflow. - pub fn available_bytes(&self) -> usize { - self.buffer.available_bytes() - } - - /// Returns number of bytes already read from the descriptor chain buffer. - pub fn bytes_read(&self) -> usize { - self.buffer.bytes_consumed() - } - - /// Splits this `Reader` into two at the given offset in the `DescriptorChain` buffer. - /// After the split, `self` will be able to read up to `offset` bytes while the returned - /// `Reader` can read up to `available_bytes() - offset` bytes. Returns an error if - /// `offset > self.available_bytes()`. - pub fn split_at(&mut self, offset: usize) -> Result> { - self.buffer.split_at(offset).map(|buffer| Reader { buffer }) - } -} - -impl<'a> io::Read for Reader<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.buffer.consume(buf.len(), |bufs| { - let mut rem = buf; - let mut total = 0; - for vs in bufs { - let copy_len = cmp::min(rem.len(), vs.len()); - - // Safe because we have already verified that `vs` points to valid memory. - unsafe { - copy_nonoverlapping(vs.as_ptr() as *const u8, rem.as_mut_ptr(), copy_len); - } - rem = &mut rem[copy_len..]; - total += copy_len; - } - Ok(total) - }) - } -} - -/// Provides high-level interface over the sequence of memory regions -/// defined by writable descriptors in the descriptor chain. -/// -/// Note that virtio spec requires driver to place any device-writable -/// descriptors after any device-readable descriptors (2.6.4.2 in Virtio Spec v1.1). -/// Writer will start iterating the descriptors from the first writable one and will -/// assume that all following descriptors are writable. -#[derive(Clone)] -pub struct Writer<'a> { - buffer: DescriptorChainConsumer<'a>, -} - -impl<'a> Writer<'a> { - /// Construct a new Writer wrapper over `desc_chain`. - pub fn new(mem: &'a GuestMemoryMmap, desc_chain: DescriptorChain<'a>) -> Result> { - let mut total_len: usize = 0; - let chain = if desc_chain.is_indirect() { - desc_chain - .new_from_indirect() - .map_err(Error::ConvertIndirectDescriptor)? - } else { - desc_chain - }; - let buffers = chain - .into_iter() - .writable() - .map(|desc| { - // Verify that summing the descriptor sizes does not overflow. - // This can happen if a driver tricks a device into writing more data than - // fits in a `usize`. - total_len = total_len - .checked_add(desc.len as usize) - .ok_or(Error::DescriptorChainOverflow)?; - - let region = mem.find_region(desc.addr).ok_or(Error::FindMemoryRegion)?; - let offset = desc - .addr - .checked_sub(region.start_addr().raw_value()) - .unwrap(); - region - .deref() - .get_slice(offset.raw_value() as usize, desc.len as usize) - .map_err(Error::VolatileMemoryError) - }) - .collect::>>>()?; - - Ok(Writer { - buffer: DescriptorChainConsumer { - buffers, - bytes_consumed: 0, - }, - }) - } - - /// Writes an object to the descriptor chain buffer. - pub fn write_obj(&mut self, val: T) -> io::Result<()> { - self.write_all(val.as_slice()) - } - - /// Returns number of bytes available for writing. May return an error if the combined - /// lengths of all the buffers in the DescriptorChain would cause an overflow. - pub fn available_bytes(&self) -> usize { - self.buffer.available_bytes() - } - - /// Writes data to the descriptor chain buffer from a file descriptor. - /// Returns the number of bytes written to the descriptor chain buffer. - /// The number of bytes written can be less than `count` if - /// there isn't enough data in the descriptor chain buffer. - pub fn write_from( - &mut self, - mut src: F, - count: usize, - ) -> io::Result { - self.buffer - .consume(count, |bufs| src.read_vectored_volatile(bufs)) - } - - /// Writes data to the descriptor chain buffer from a File at offset `off`. - /// Returns the number of bytes written to the descriptor chain buffer. - /// The number of bytes written can be less than `count` if - /// there isn't enough data in the descriptor chain buffer. - pub fn write_from_at( - &mut self, - mut src: F, - count: usize, - off: u64, - ) -> io::Result { - self.buffer - .consume(count, |bufs| src.read_vectored_at_volatile(bufs, off)) - } - - pub fn write_all_from( - &mut self, - mut src: F, - mut count: usize, - ) -> io::Result<()> { - while count > 0 { - match self.write_from(&mut src, count) { - Ok(0) => { - return Err(io::Error::new( - io::ErrorKind::WriteZero, - "failed to write whole buffer", - )) - } - Ok(n) => count -= n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - - Ok(()) - } - - /// Returns number of bytes already written to the descriptor chain buffer. - pub fn bytes_written(&self) -> usize { - self.buffer.bytes_consumed() - } - - /// Splits this `Writer` into two at the given offset in the `DescriptorChain` buffer. - /// After the split, `self` will be able to write up to `offset` bytes while the returned - /// `Writer` can write up to `available_bytes() - offset` bytes. Returns an error if - /// `offset > self.available_bytes()`. - pub fn split_at(&mut self, offset: usize) -> Result> { - self.buffer.split_at(offset).map(|buffer| Writer { buffer }) - } -} - -impl<'a> io::Write for Writer<'a> { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.buffer.consume(buf.len(), |bufs| { - let mut rem = buf; - let mut total = 0; - for vs in bufs { - let copy_len = cmp::min(rem.len(), vs.len()); - - // Safe because we have already verified that `vs` points to valid memory. - unsafe { - copy_nonoverlapping(rem.as_ptr(), vs.as_ptr(), copy_len); - } - rem = &rem[copy_len..]; - total += copy_len; - } - Ok(total) - }) - } - - fn flush(&mut self) -> io::Result<()> { - // Nothing to flush since the writes go straight into the buffer. - Ok(()) - } -} - -const VIRTQ_DESC_F_NEXT: u16 = 0x1; -const VIRTQ_DESC_F_WRITE: u16 = 0x2; - -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum DescriptorType { - Readable, - Writable, -} - -#[derive(Copy, Clone, Debug, Default)] -#[repr(C)] -struct virtq_desc { - addr: Le64, - len: Le32, - flags: Le16, - next: Le16, -} - -// Safe because it only has data and has no implicit padding. -unsafe impl ByteValued for virtq_desc {} - -/// Test utility function to create a descriptor chain in guest memory. -pub fn create_descriptor_chain( - memory: &GuestMemoryMmap, - descriptor_array_addr: GuestAddress, - mut buffers_start_addr: GuestAddress, - descriptors: Vec<(DescriptorType, u32)>, - spaces_between_regions: u32, -) -> Result { - let descriptors_len = descriptors.len(); - for (index, (type_, size)) in descriptors.into_iter().enumerate() { - let mut flags = 0; - if let DescriptorType::Writable = type_ { - flags |= VIRTQ_DESC_F_WRITE; - } - if index + 1 < descriptors_len { - flags |= VIRTQ_DESC_F_NEXT; - } - - let index = index as u16; - let desc = virtq_desc { - addr: buffers_start_addr.raw_value().into(), - len: size.into(), - flags: flags.into(), - next: (index + 1).into(), - }; - - let offset = size + spaces_between_regions; - buffers_start_addr = buffers_start_addr - .checked_add(u64::from(offset)) - .ok_or(Error::InvalidChain)?; - - let _ = memory.write_obj( - desc, - descriptor_array_addr - .checked_add(u64::from(index) * std::mem::size_of::() as u64) - .ok_or(Error::InvalidChain)?, - ); - } - - DescriptorChain::checked_new(memory, descriptor_array_addr, 0x100, 0, None) - .ok_or(Error::InvalidChain) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn reader_test_simple_chain() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 8), - (Readable, 16), - (Readable, 18), - (Readable, 64), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - assert_eq!(reader.available_bytes(), 106); - assert_eq!(reader.bytes_read(), 0); - - let mut buffer = [0 as u8; 64]; - if let Err(e) = reader.read_exact(&mut buffer) { - panic!("read_exact should not fail here: {:?}", e); - } - - assert_eq!(reader.available_bytes(), 42); - assert_eq!(reader.bytes_read(), 64); - - match reader.read(&mut buffer) { - Err(e) => panic!("read should not fail here: {:?}", e), - Ok(length) => assert_eq!(length, 42), - } - - assert_eq!(reader.available_bytes(), 0); - assert_eq!(reader.bytes_read(), 106); - } - - #[test] - fn writer_test_simple_chain() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Writable, 8), - (Writable, 16), - (Writable, 18), - (Writable, 64), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut writer = Writer::new(&memory, chain).expect("failed to create Writer"); - assert_eq!(writer.available_bytes(), 106); - assert_eq!(writer.bytes_written(), 0); - - let buffer = [0 as u8; 64]; - if let Err(e) = writer.write_all(&buffer) { - panic!("write_all should not fail here: {:?}", e); - } - - assert_eq!(writer.available_bytes(), 42); - assert_eq!(writer.bytes_written(), 64); - - match writer.write(&buffer) { - Err(e) => panic!("write should not fail here {:?}", e), - Ok(length) => assert_eq!(length, 42), - } - - assert_eq!(writer.available_bytes(), 0); - assert_eq!(writer.bytes_written(), 106); - } - - #[test] - fn reader_test_incompatible_chain() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Writable, 8)], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - assert_eq!(reader.available_bytes(), 0); - assert_eq!(reader.bytes_read(), 0); - - assert!(reader.read_obj::().is_err()); - - assert_eq!(reader.available_bytes(), 0); - assert_eq!(reader.bytes_read(), 0); - } - - #[test] - fn writer_test_incompatible_chain() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Readable, 8)], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut writer = Writer::new(&memory, chain).expect("failed to create Writer"); - assert_eq!(writer.available_bytes(), 0); - assert_eq!(writer.bytes_written(), 0); - - assert!(writer.write_obj(0u8).is_err()); - - assert_eq!(writer.available_bytes(), 0); - assert_eq!(writer.bytes_written(), 0); - } - - #[test] - fn reader_writer_shared_chain() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain.clone()).expect("failed to create Reader"); - let mut writer = Writer::new(&memory, chain).expect("failed to create Writer"); - - assert_eq!(reader.bytes_read(), 0); - assert_eq!(writer.bytes_written(), 0); - - let mut buffer = Vec::with_capacity(200); - - assert_eq!( - reader - .read_to_end(&mut buffer) - .expect("read should not fail here"), - 128 - ); - - // The writable descriptors are only 68 bytes long. - writer - .write_all(&buffer[..68]) - .expect("write should not fail here"); - - assert_eq!(reader.available_bytes(), 0); - assert_eq!(reader.bytes_read(), 128); - assert_eq!(writer.available_bytes(), 0); - assert_eq!(writer.bytes_written(), 68); - } - - #[test] - fn reader_writer_shattered_object() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let secret: Le32 = 0x1234_5678.into(); - - // Create a descriptor chain with memory regions that are properly separated. - let chain_writer = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Writable, 1), (Writable, 1), (Writable, 1), (Writable, 1)], - 123, - ) - .expect("create_descriptor_chain failed"); - let mut writer = Writer::new(&memory, chain_writer).expect("failed to create Writer"); - if let Err(e) = writer.write_obj(secret) { - panic!("write_obj should not fail here: {:?}", e); - } - - // Now create new descriptor chain pointing to the same memory and try to read it. - let chain_reader = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Readable, 1), (Readable, 1), (Readable, 1), (Readable, 1)], - 123, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain_reader).expect("failed to create Reader"); - match reader.read_obj::() { - Err(e) => panic!("read_obj should not fail here: {:?}", e), - Ok(read_secret) => assert_eq!(read_secret, secret), - } - } - - #[test] - fn reader_unexpected_eof() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Readable, 256), (Readable, 256)], - 0, - ) - .expect("create_descriptor_chain failed"); - - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let mut buf = vec![0; 1024]; - - assert_eq!( - reader - .read_exact(&mut buf[..]) - .expect_err("read more bytes than available") - .kind(), - io::ErrorKind::UnexpectedEof - ); - } - - #[test] - fn split_border() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let other = reader.split_at(32).expect("failed to split Reader"); - assert_eq!(reader.available_bytes(), 32); - assert_eq!(other.available_bytes(), 96); - } - - #[test] - fn split_middle() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let other = reader.split_at(24).expect("failed to split Reader"); - assert_eq!(reader.available_bytes(), 24); - assert_eq!(other.available_bytes(), 104); - } - - #[test] - fn split_end() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let other = reader.split_at(128).expect("failed to split Reader"); - assert_eq!(reader.available_bytes(), 128); - assert_eq!(other.available_bytes(), 0); - } - - #[test] - fn split_beginning() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let other = reader.split_at(0).expect("failed to split Reader"); - assert_eq!(reader.available_bytes(), 0); - assert_eq!(other.available_bytes(), 128); - } - - #[test] - fn split_outofbounds() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![ - (Readable, 16), - (Readable, 16), - (Readable, 96), - (Writable, 64), - (Writable, 1), - (Writable, 3), - ], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - if reader.split_at(256).is_ok() { - panic!("successfully split Reader with out of bounds offset"); - } - } - - #[test] - fn read_full() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Readable, 16), (Readable, 16), (Readable, 16)], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut reader = Reader::new(&memory, chain).expect("failed to create Reader"); - - let mut buf = vec![0u8; 64]; - assert_eq!( - reader.read(&mut buf[..]).expect("failed to read to buffer"), - 48 - ); - } - - #[test] - fn write_full() { - use DescriptorType::*; - - let memory_start_addr = GuestAddress(0x0); - let memory = GuestMemoryMmap::from_ranges(&[(memory_start_addr, 0x10000)]).unwrap(); - - let chain = create_descriptor_chain( - &memory, - GuestAddress(0x0), - GuestAddress(0x100), - vec![(Writable, 16), (Writable, 16), (Writable, 16)], - 0, - ) - .expect("create_descriptor_chain failed"); - let mut writer = Writer::new(&memory, chain).expect("failed to create Writer"); - - let buf = vec![0xdeu8; 64]; - assert_eq!( - writer.write(&buf[..]).expect("failed to write from buffer"), - 48 - ); - } -} diff --git a/vhost_user_fs/src/file_traits.rs b/vhost_user_fs/src/file_traits.rs deleted file mode 100644 index 302bf244d..000000000 --- a/vhost_user_fs/src/file_traits.rs +++ /dev/null @@ -1,409 +0,0 @@ -// Copyright 2018 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::fs::File; -use std::io::{Error, ErrorKind, Result}; -use std::os::unix::io::AsRawFd; - -use vm_memory::VolatileSlice; - -use libc::{ - c_int, c_void, off64_t, pread64, preadv64, pwrite64, pwritev64, read, readv, size_t, write, - writev, -}; - -/// A trait for setting the size of a file. -/// This is equivalent to File's `set_len` method, but -/// wrapped in a trait so that it can be implemented for -/// other types. -pub trait FileSetLen { - // Set the size of this file. - // This is the moral equivalent of `ftruncate()`. - fn set_len(&self, _len: u64) -> Result<()>; -} - -impl FileSetLen for File { - fn set_len(&self, len: u64) -> Result<()> { - File::set_len(self, len) - } -} - -/// A trait similar to `Read` and `Write`, but uses volatile memory as buffers. -pub trait FileReadWriteVolatile { - /// Read bytes from this file into the given slice, returning the number of bytes read on - /// success. - fn read_volatile(&mut self, slice: VolatileSlice) -> Result; - - /// Like `read_volatile`, except it reads to a slice of buffers. Data is copied to fill each - /// buffer in order, with the final buffer written to possibly being only partially filled. This - /// method must behave as a single call to `read_volatile` with the buffers concatenated would. - /// The default implementation calls `read_volatile` with either the first nonempty buffer - /// provided, or returns `Ok(0)` if none exists. - fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - bufs.iter() - .find(|b| !b.is_empty()) - .map(|&b| self.read_volatile(b)) - .unwrap_or(Ok(0)) - } - - /// Reads bytes from this into the given slice until all bytes in the slice are written, or an - /// error is returned. - fn read_exact_volatile(&mut self, mut slice: VolatileSlice) -> Result<()> { - while !slice.is_empty() { - let bytes_read = self.read_volatile(slice)?; - if bytes_read == 0 { - return Err(Error::from(ErrorKind::UnexpectedEof)); - } - // Will panic if read_volatile read more bytes than we gave it, which would be worthy of - // a panic. - slice = slice.offset(bytes_read).unwrap(); - } - Ok(()) - } - - /// Write bytes from the slice to the given file, returning the number of bytes written on - /// success. - fn write_volatile(&mut self, slice: VolatileSlice) -> Result; - - /// Like `write_volatile`, except that it writes from a slice of buffers. Data is copied from - /// each buffer in order, with the final buffer read from possibly being only partially - /// consumed. This method must behave as a call to `write_volatile` with the buffers - /// concatenated would. The default implementation calls `write_volatile` with either the first - /// nonempty buffer provided, or returns `Ok(0)` if none exists. - fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - bufs.iter() - .find(|b| !b.is_empty()) - .map(|&b| self.write_volatile(b)) - .unwrap_or(Ok(0)) - } - - /// Write bytes from the slice to the given file until all the bytes from the slice have been - /// written, or an error is returned. - fn write_all_volatile(&mut self, mut slice: VolatileSlice) -> Result<()> { - while !slice.is_empty() { - let bytes_written = self.write_volatile(slice)?; - if bytes_written == 0 { - return Err(Error::from(ErrorKind::WriteZero)); - } - // Will panic if read_volatile read more bytes than we gave it, which would be worthy of - // a panic. - slice = slice.offset(bytes_written).unwrap(); - } - Ok(()) - } -} - -impl<'a, T: FileReadWriteVolatile + ?Sized> FileReadWriteVolatile for &'a mut T { - fn read_volatile(&mut self, slice: VolatileSlice) -> Result { - (**self).read_volatile(slice) - } - - fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - (**self).read_vectored_volatile(bufs) - } - - fn read_exact_volatile(&mut self, slice: VolatileSlice) -> Result<()> { - (**self).read_exact_volatile(slice) - } - - fn write_volatile(&mut self, slice: VolatileSlice) -> Result { - (**self).write_volatile(slice) - } - - fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - (**self).write_vectored_volatile(bufs) - } - - fn write_all_volatile(&mut self, slice: VolatileSlice) -> Result<()> { - (**self).write_all_volatile(slice) - } -} - -/// A trait similar to the unix `ReadExt` and `WriteExt` traits, but for volatile memory. -pub trait FileReadWriteAtVolatile { - /// Reads bytes from this file at `offset` into the given slice, returning the number of bytes - /// read on success. - fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result; - - /// Like `read_at_volatile`, except it reads to a slice of buffers. Data is copied to fill each - /// buffer in order, with the final buffer written to possibly being only partially filled. This - /// method must behave as a single call to `read_at_volatile` with the buffers concatenated - /// would. The default implementation calls `read_at_volatile` with either the first nonempty - /// buffer provided, or returns `Ok(0)` if none exists. - fn read_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result { - if let Some(&slice) = bufs.first() { - self.read_at_volatile(slice, offset) - } else { - Ok(0) - } - } - - /// Reads bytes from this file at `offset` into the given slice until all bytes in the slice are - /// read, or an error is returned. - fn read_exact_at_volatile(&mut self, mut slice: VolatileSlice, mut offset: u64) -> Result<()> { - while !slice.is_empty() { - match self.read_at_volatile(slice, offset) { - Ok(0) => return Err(Error::from(ErrorKind::UnexpectedEof)), - Ok(n) => { - slice = slice.offset(n).unwrap(); - offset = offset.checked_add(n as u64).unwrap(); - } - Err(ref e) if e.kind() == ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - Ok(()) - } - - /// Writes bytes from this file at `offset` into the given slice, returning the number of bytes - /// written on success. - fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result; - - /// Like `write_at_at_volatile`, except that it writes from a slice of buffers. Data is copied - /// from each buffer in order, with the final buffer read from possibly being only partially - /// consumed. This method must behave as a call to `write_at_volatile` with the buffers - /// concatenated would. The default implementation calls `write_at_volatile` with either the - /// first nonempty buffer provided, or returns `Ok(0)` if none exists. - fn write_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result { - if let Some(&slice) = bufs.first() { - self.write_at_volatile(slice, offset) - } else { - Ok(0) - } - } - - /// Writes bytes from this file at `offset` into the given slice until all bytes in the slice - /// are written, or an error is returned. - fn write_all_at_volatile(&mut self, mut slice: VolatileSlice, mut offset: u64) -> Result<()> { - while !slice.is_empty() { - match self.write_at_volatile(slice, offset) { - Ok(0) => return Err(Error::from(ErrorKind::WriteZero)), - Ok(n) => { - slice = slice.offset(n).unwrap(); - offset = offset.checked_add(n as u64).unwrap(); - } - Err(ref e) if e.kind() == ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - Ok(()) - } -} - -impl<'a, T: FileReadWriteAtVolatile + ?Sized> FileReadWriteAtVolatile for &'a mut T { - fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result { - (**self).read_at_volatile(slice, offset) - } - - fn read_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result { - (**self).read_vectored_at_volatile(bufs, offset) - } - - fn read_exact_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<()> { - (**self).read_exact_at_volatile(slice, offset) - } - - fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result { - (**self).write_at_volatile(slice, offset) - } - - fn write_vectored_at_volatile(&mut self, bufs: &[VolatileSlice], offset: u64) -> Result { - (**self).write_vectored_at_volatile(bufs, offset) - } - - fn write_all_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result<()> { - (**self).write_all_at_volatile(slice, offset) - } -} - -macro_rules! volatile_impl { - ($ty:ty) => { - impl FileReadWriteVolatile for $ty { - fn read_volatile(&mut self, slice: VolatileSlice) -> Result { - // Safe because only bytes inside the slice are accessed and the kernel is expected - // to handle arbitrary memory for I/O. - let ret = - unsafe { read(self.as_raw_fd(), slice.as_ptr() as *mut c_void, slice.len()) }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn read_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - let iovecs: Vec = bufs - .iter() - .map(|s| libc::iovec { - iov_base: s.as_ptr() as *mut c_void, - iov_len: s.len() as size_t, - }) - .collect(); - - if iovecs.is_empty() { - return Ok(0); - } - - // Safe because only bytes inside the buffers are accessed and the kernel is - // expected to handle arbitrary memory for I/O. - let ret = unsafe { readv(self.as_raw_fd(), &iovecs[0], iovecs.len() as c_int) }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn write_volatile(&mut self, slice: VolatileSlice) -> Result { - // Safe because only bytes inside the slice are accessed and the kernel is expected - // to handle arbitrary memory for I/O. - let ret = unsafe { - write( - self.as_raw_fd(), - slice.as_ptr() as *const c_void, - slice.len(), - ) - }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn write_vectored_volatile(&mut self, bufs: &[VolatileSlice]) -> Result { - let iovecs: Vec = bufs - .iter() - .map(|s| libc::iovec { - iov_base: s.as_ptr() as *mut c_void, - iov_len: s.len() as size_t, - }) - .collect(); - - if iovecs.is_empty() { - return Ok(0); - } - - // Safe because only bytes inside the buffers are accessed and the kernel is - // expected to handle arbitrary memory for I/O. - let ret = unsafe { writev(self.as_raw_fd(), &iovecs[0], iovecs.len() as c_int) }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - } - - impl FileReadWriteAtVolatile for $ty { - fn read_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result { - // Safe because only bytes inside the slice are accessed and the kernel is expected - // to handle arbitrary memory for I/O. - let ret = unsafe { - pread64( - self.as_raw_fd(), - slice.as_ptr() as *mut c_void, - slice.len(), - offset as off64_t, - ) - }; - - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn read_vectored_at_volatile( - &mut self, - bufs: &[VolatileSlice], - offset: u64, - ) -> Result { - let iovecs: Vec = bufs - .iter() - .map(|s| libc::iovec { - iov_base: s.as_ptr() as *mut c_void, - iov_len: s.len() as size_t, - }) - .collect(); - - if iovecs.is_empty() { - return Ok(0); - } - - // Safe because only bytes inside the buffers are accessed and the kernel is - // expected to handle arbitrary memory for I/O. - let ret = unsafe { - preadv64( - self.as_raw_fd(), - &iovecs[0], - iovecs.len() as c_int, - offset as off64_t, - ) - }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn write_at_volatile(&mut self, slice: VolatileSlice, offset: u64) -> Result { - // Safe because only bytes inside the slice are accessed and the kernel is expected - // to handle arbitrary memory for I/O. - let ret = unsafe { - pwrite64( - self.as_raw_fd(), - slice.as_ptr() as *const c_void, - slice.len(), - offset as off64_t, - ) - }; - - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - - fn write_vectored_at_volatile( - &mut self, - bufs: &[VolatileSlice], - offset: u64, - ) -> Result { - let iovecs: Vec = bufs - .iter() - .map(|s| libc::iovec { - iov_base: s.as_ptr() as *mut c_void, - iov_len: s.len() as size_t, - }) - .collect(); - - if iovecs.is_empty() { - return Ok(0); - } - - // Safe because only bytes inside the buffers are accessed and the kernel is - // expected to handle arbitrary memory for I/O. - let ret = unsafe { - pwritev64( - self.as_raw_fd(), - &iovecs[0], - iovecs.len() as c_int, - offset as off64_t, - ) - }; - if ret >= 0 { - Ok(ret as usize) - } else { - Err(Error::last_os_error()) - } - } - } - }; -} - -volatile_impl!(File); diff --git a/vhost_user_fs/src/filesystem.rs b/vhost_user_fs/src/filesystem.rs deleted file mode 100644 index cad2d002f..000000000 --- a/vhost_user_fs/src/filesystem.rs +++ /dev/null @@ -1,1148 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::convert::TryInto; -use std::ffi::CStr; -use std::fs::File; -use std::io; -use std::mem; -use std::time::Duration; - -use crate::fuse; - -use super::fs_cache_req_handler::FsCacheReqHandler; -pub use fuse::FsOptions; -pub use fuse::OpenOptions; -pub use fuse::RemovemappingOne; -pub use fuse::SetattrValid; -pub use fuse::ROOT_ID; - -/// Information about a path in the filesystem. -pub struct Entry { - /// An `Inode` that uniquely identifies this path. During `lookup`, setting this to `0` means a - /// negative entry. Returning `ENOENT` also means a negative entry but setting this to `0` - /// allows the kernel to cache the negative result for `entry_timeout`. The value should be - /// produced by converting a `FileSystem::Inode` into a `u64`. - pub inode: u64, - - /// The generation number for this `Entry`. Typically used for network file systems. An `inode` - /// / `generation` pair must be unique over the lifetime of the file system (rather than just - /// the lifetime of the mount). In other words, if a `FileSystem` implementation re-uses an - /// `Inode` after it has been deleted then it must assign a new, previously unused generation - /// number to the `Inode` at the same time. - pub generation: u64, - - /// Inode attributes. Even if `attr_timeout` is zero, `attr` must be correct. For example, for - /// `open()`, FUSE uses `attr.st_size` from `lookup()` to determine how many bytes to request. - /// If this value is not correct, incorrect data will be returned. - pub attr: libc::stat64, - - /// How long the values in `attr` should be considered valid. If the attributes of the `Entry` - /// are only modified by the FUSE client, then this should be set to a very large value. - pub attr_timeout: Duration, - - /// How long the name associated with this `Entry` should be considered valid. If directory - /// entries are only changed or deleted by the FUSE client, then this should be set to a very - /// large value. - pub entry_timeout: Duration, -} - -impl From for fuse::EntryOut { - fn from(entry: Entry) -> fuse::EntryOut { - fuse::EntryOut { - nodeid: entry.inode, - generation: entry.generation, - entry_valid: entry.entry_timeout.as_secs(), - attr_valid: entry.attr_timeout.as_secs(), - entry_valid_nsec: entry.entry_timeout.subsec_nanos(), - attr_valid_nsec: entry.attr_timeout.subsec_nanos(), - attr: entry.attr.into(), - } - } -} - -/// Represents information about an entry in a directory. -pub struct DirEntry<'a> { - /// The inode number for this entry. This does NOT have to be the same as the `Inode` for this - /// directory entry. However, it must be the same as the `attr.st_ino` field of the `Entry` that - /// would be returned by a `lookup` request in the parent directory for `name`. - pub ino: libc::ino64_t, - - /// Any non-zero value that the kernel can use to identify the current point in the directory - /// entry stream. It does not need to be the actual physical position. A value of `0` is - /// reserved to mean "from the beginning" and should never be used. The `offset` value of the - /// first entry in a stream should point to the beginning of the second entry and so on. - pub offset: u64, - - /// The type of this directory entry. Valid values are any of the `libc::DT_*` constants. - pub type_: u32, - - /// The name of this directory entry. There are no requirements for the contents of this field - /// and any sequence of bytes is considered valid. - pub name: &'a [u8], -} - -/// A reply to a `getxattr` method call. -pub enum GetxattrReply { - /// The value of the requested extended attribute. This can be arbitrary textual or binary data - /// and does not need to be nul-terminated. - Value(Vec), - - /// The size of the buffer needed to hold the value of the requested extended attribute. Should - /// be returned when the `size` parameter is 0. Callers should note that it is still possible - /// for the size of the value to change in between `getxattr` calls and should not assume that a - /// subsequent call to `getxattr` with the returned count will always succeed. - Count(u32), -} - -/// A reply to a `listxattr` method call. -pub enum ListxattrReply { - /// A buffer containing a nul-separated list of the names of all the extended attributes - /// associated with this `Inode`. This list of names may be unordered and includes a namespace - /// prefix. There may be several disjoint namespaces associated with a single `Inode`. - Names(Vec), - - /// This size of the buffer needed to hold the full list of extended attribute names associated - /// with this `Inode`. Should be returned when the `size` parameter is 0. Callers should note - /// that it is still possible for the set of extended attributes to change between `listxattr` - /// calls and so should not assume that a subsequent call to `listxattr` with the returned count - /// will always succeed. - Count(u32), -} - -/// A trait for directly copying data from the fuse transport into a `File` without first storing it -/// in an intermediate buffer. -pub trait ZeroCopyReader { - /// Copies at most `count` bytes from `self` directly into `f` at offset `off` without storing - /// it in any intermediate buffers. If the return value is `Ok(n)` then it must be guaranteed - /// that `0 <= n <= count`. If `n` is `0`, then it can indicate one of 3 possibilities: - /// - /// 1. There is no more data left in `self`. - /// 2. There is no more space in `f`. - /// 3. `count` was `0`. - /// - /// # Errors - /// - /// If any error is returned then the implementation must guarantee that no bytes were copied - /// from `self`. If the underlying write to `f` returns `0` then the implementation must return - /// an error of the kind `io::ErrorKind::WriteZero`. - fn read_to(&mut self, f: &mut File, count: usize, off: u64) -> io::Result; - - /// Copies exactly `count` bytes of data from `self` into `f` at offset `off`. `off + count` - /// must be less than `u64::MAX`. - /// - /// # Errors - /// - /// If an error is returned then the number of bytes copied from `self` is unspecified but it - /// will never be more than `count`. - fn read_exact_to(&mut self, f: &mut File, mut count: usize, mut off: u64) -> io::Result<()> { - let c = count - .try_into() - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - if off.checked_add(c).is_none() { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "`off` + `count` must be less than u64::MAX", - )); - } - - while count > 0 { - match self.read_to(f, count, off) { - Ok(0) => { - return Err(io::Error::new( - io::ErrorKind::WriteZero, - "failed to fill whole buffer", - )) - } - Ok(n) => { - count -= n; - off += n as u64; - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - - Ok(()) - } - - /// Copies all remaining bytes from `self` into `f` at offset `off`. Equivalent to repeatedly - /// calling `read_to` until it returns either `Ok(0)` or a non-`ErrorKind::Interrupted` error. - /// - /// # Errors - /// - /// If an error is returned then the number of bytes copied from `self` is unspecified. - fn copy_to_end(&mut self, f: &mut File, mut off: u64) -> io::Result { - let mut out = 0; - loop { - match self.read_to(f, ::std::usize::MAX, off) { - Ok(0) => return Ok(out), - Ok(n) => { - off = off.saturating_add(n as u64); - out += n; - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - } -} - -impl<'a, R: ZeroCopyReader> ZeroCopyReader for &'a mut R { - fn read_to(&mut self, f: &mut File, count: usize, off: u64) -> io::Result { - (**self).read_to(f, count, off) - } - fn read_exact_to(&mut self, f: &mut File, count: usize, off: u64) -> io::Result<()> { - (**self).read_exact_to(f, count, off) - } - fn copy_to_end(&mut self, f: &mut File, off: u64) -> io::Result { - (**self).copy_to_end(f, off) - } -} - -/// A trait for directly copying data from a `File` into the fuse transport without first storing -/// it in an intermediate buffer. -pub trait ZeroCopyWriter { - /// Copies at most `count` bytes from `f` at offset `off` directly into `self` without storing - /// it in any intermediate buffers. If the return value is `Ok(n)` then it must be guaranteed - /// that `0 <= n <= count`. If `n` is `0`, then it can indicate one of 3 possibilities: - /// - /// 1. There is no more data left in `f`. - /// 2. There is no more space in `self`. - /// 3. `count` was `0`. - /// - /// # Errors - /// - /// If any error is returned then the implementation must guarantee that no bytes were copied - /// from `f`. If the underlying read from `f` returns `0` then the implementation must return an - /// error of the kind `io::ErrorKind::UnexpectedEof`. - fn write_from(&mut self, f: &mut File, count: usize, off: u64) -> io::Result; - - /// Copies exactly `count` bytes of data from `f` at offset `off` into `self`. `off + count` - /// must be less than `u64::MAX`. - /// - /// # Errors - /// - /// If an error is returned then the number of bytes copied from `self` is unspecified but it - /// well never be more than `count`. - fn write_all_from(&mut self, f: &mut File, mut count: usize, mut off: u64) -> io::Result<()> { - let c = count - .try_into() - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - if off.checked_add(c).is_none() { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "`off` + `count` must be less than u64::MAX", - )); - } - - while count > 0 { - match self.write_from(f, count, off) { - Ok(0) => { - return Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "failed to write whole buffer", - )) - } - Ok(n) => { - // No need for checked math here because we verified that `off + count` will not - // overflow and `n` must be <= `count`. - count -= n; - off += n as u64; - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - - Ok(()) - } - - /// Copies all remaining bytes from `f` at offset `off` into `self`. Equivalent to repeatedly - /// calling `write_from` until it returns either `Ok(0)` or a non-`ErrorKind::Interrupted` - /// error. - /// - /// # Errors - /// - /// If an error is returned then the number of bytes copied from `f` is unspecified. - fn copy_to_end(&mut self, f: &mut File, mut off: u64) -> io::Result { - let mut out = 0; - loop { - match self.write_from(f, ::std::usize::MAX, off) { - Ok(0) => return Ok(out), - Ok(n) => { - off = off.saturating_add(n as u64); - out += n; - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - } - } -} - -impl<'a, W: ZeroCopyWriter> ZeroCopyWriter for &'a mut W { - fn write_from(&mut self, f: &mut File, count: usize, off: u64) -> io::Result { - (**self).write_from(f, count, off) - } - fn write_all_from(&mut self, f: &mut File, count: usize, off: u64) -> io::Result<()> { - (**self).write_all_from(f, count, off) - } - fn copy_to_end(&mut self, f: &mut File, off: u64) -> io::Result { - (**self).copy_to_end(f, off) - } -} - -/// Additional context associated with requests. -#[derive(Clone, Copy, Debug)] -pub struct Context { - /// The user ID of the calling process. - pub uid: libc::uid_t, - - /// The group ID of the calling process. - pub gid: libc::gid_t, - - /// The thread group ID of the calling process. - pub pid: libc::pid_t, -} - -impl From for Context { - fn from(source: fuse::InHeader) -> Self { - Context { - uid: source.uid, - gid: source.gid, - pid: source.pid as i32, - } - } -} - -/// The main trait that connects a file system with a transport. -#[allow(unused_variables)] -pub trait FileSystem { - /// Represents a location in the filesystem tree and can be used to perform operations that act - /// on the metadata of a file/directory (e.g., `getattr` and `setattr`). Can also be used as the - /// starting point for looking up paths in the filesystem tree. An `Inode` may support operating - /// directly on the content of the path that to which it points. `FileSystem` implementations - /// that support this should set the `FsOptions::ZERO_MESSAGE_OPEN` option in the return value - /// of the `init` function. On linux based systems, an `Inode` is equivalent to opening a file - /// or directory with the `libc::O_PATH` flag. - /// - /// # Lookup Count - /// - /// The `FileSystem` implementation is required to keep a "lookup count" for every `Inode`. - /// Every time an `Entry` is returned by a `FileSystem` trait method, this lookup count should - /// increase by 1. The lookup count for an `Inode` decreases when the kernel sends a `forget` - /// request. `Inode`s with a non-zero lookup count may receive requests from the kernel even - /// after calls to `unlink`, `rmdir` or (when overwriting an existing file) `rename`. - /// `FileSystem` implementations must handle such requests properly and it is recommended to - /// defer removal of the `Inode` until the lookup count reaches zero. Calls to `unlink`, `rmdir` - /// or `rename` will be followed closely by `forget` unless the file or directory is open, in - /// which case the kernel issues `forget` only after the `release` or `releasedir` calls. - /// - /// Note that if a file system will be exported over NFS the `Inode`'s lifetime must extend even - /// beyond `forget`. See the `generation` field in `Entry`. - type Inode: From + Into; - - /// Represents a file or directory that is open for reading/writing. - type Handle: From + Into; - - /// Initialize the file system. - /// - /// This method is called when a connection to the FUSE kernel module is first established. The - /// `capable` parameter indicates the features that are supported by the kernel module. The - /// implementation should return the options that it supports. Any options set in the returned - /// `FsOptions` that are not also set in `capable` are silently dropped. - fn init(&self, capable: FsOptions) -> io::Result { - Ok(FsOptions::empty()) - } - - /// Clean up the file system. - /// - /// Called when the filesystem exits. All open `Handle`s should be closed and the lookup count - /// for all open `Inode`s implicitly goes to zero. At this point the connection to the FUSE - /// kernel module may already be gone so implementations should not rely on being able to - /// communicate with the kernel. - fn destroy(&self) {} - - /// Look up a directory entry by name and get its attributes. - /// - /// If this call is successful then the lookup count of the `Inode` associated with the returned - /// `Entry` must be increased by 1. - fn lookup(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Forget about an inode. - /// - /// Called when the kernel removes an inode from its internal caches. `count` indicates the - /// amount by which the lookup count for the inode should be decreased. If reducing the lookup - /// count by `count` causes it to go to zero, then the implementation may delete the `Inode`. - fn forget(&self, ctx: Context, inode: Self::Inode, count: u64) {} - - /// Forget about multiple inodes. - /// - /// `requests` is a vector of `(inode, count)` pairs. See the documentation for `forget` for - /// more information. - fn batch_forget(&self, ctx: Context, requests: Vec<(Self::Inode, u64)>) { - for (inode, count) in requests { - self.forget(ctx, inode, count) - } - } - - /// Get attributes for a file / directory. - /// - /// If `handle` is not `None`, then it contains the handle previously returned by the - /// implementation after a call to `open` or `opendir`. However, implementations should still - /// take care to verify the handle if they do not trust the client (e.g., virtio-fs). - /// - /// If writeback caching is enabled (`FsOptions::WRITEBACK_CACHE`), then the kernel module - /// likely has a better idea of the length of the file than the file system (for - /// example, if there was a write that extended the size of the file but has not yet been - /// flushed). In this case, the `st_size` field of the returned struct is ignored. - /// - /// The returned `Duration` indicates how long the returned attributes should be considered - /// valid by the client. If the attributes are only changed via the FUSE kernel module (i.e., - /// the kernel module has exclusive access), then this should be a very large value. - fn getattr( - &self, - ctx: Context, - inode: Self::Inode, - handle: Option, - ) -> io::Result<(libc::stat64, Duration)> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Set attributes for a file / directory. - /// - /// If `handle` is not `None`, then it contains the handle previously returned by the - /// implementation after a call to `open` or `opendir`. However, implementations should still - /// take care to verify the handle if they do not trust the client (e.g., virtio-fs). - /// - /// The `valid` parameter indicates the fields of `attr` that may be considered valid and should - /// be set by the file system. The content of all other fields in `attr` is undefined. - /// - /// If the `FsOptions::HANDLE_KILLPRIV` was set during `init`, then the implementation is - /// expected to reset the setuid and setgid bits if the file size or owner is being changed. - /// - /// This method returns the new attributes after making the modifications requested by the - /// client. The returned `Duration` indicates how long the returned attributes should be - /// considered valid by the client. If the attributes are only changed via the FUSE kernel - /// module (i.e., the kernel module has exclusive access), then this should be a very large - /// value. - fn setattr( - &self, - ctx: Context, - inode: Self::Inode, - attr: libc::stat64, - handle: Option, - valid: SetattrValid, - ) -> io::Result<(libc::stat64, Duration)> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Read a symbolic link. - fn readlink(&self, ctx: Context, inode: Self::Inode) -> io::Result> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Create a symbolic link. - /// - /// The file system must create a symbolic link named `name` in the directory represented by - /// `parent`, which contains the string `linkname`. Returns an `Entry` for the newly created - /// symlink. - /// - /// If this call is successful then the lookup count of the `Inode` associated with the returned - /// `Entry` must be increased by 1. - fn symlink( - &self, - ctx: Context, - linkname: &CStr, - parent: Self::Inode, - name: &CStr, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Create a file node. - /// - /// Create a regular file, character device, block device, fifo, or socket node named `name` in - /// the directory represented by `inode`. Valid values for `mode` and `rdev` are the same as - /// those accepted by the `mknod(2)` system call. Returns an `Entry` for the newly created node. - /// - /// When the `FsOptions::DONT_MASK` feature is set, the file system is responsible for setting - /// the permissions of the created node to `mode & !umask`. - /// - /// If this call is successful then the lookup count of the `Inode` associated with the returned - /// `Entry` must be increased by 1. - fn mknod( - &self, - ctx: Context, - inode: Self::Inode, - name: &CStr, - mode: u32, - rdev: u32, - umask: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Create a directory. - /// - /// When the `FsOptions::DONT_MASK` feature is set, the file system is responsible for setting - /// the permissions of the created directory to `mode & !umask`. Returns an `Entry` for the - /// newly created directory. - /// - /// If this call is successful then the lookup count of the `Inode` associated with the returned - /// `Entry` must be increased by 1. - fn mkdir( - &self, - ctx: Context, - parent: Self::Inode, - name: &CStr, - mode: u32, - umask: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Remove a file. - /// - /// If the file's inode lookup count is non-zero, then the file system is expected to delay - /// removal of the inode until the lookup count goes to zero. See the documentation of the - /// `forget` function for more information. - fn unlink(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Remove a directory. - /// - /// If the directory's inode lookup count is non-zero, then the file system is expected to delay - /// removal of the inode until the lookup count goes to zero. See the documentation of the - /// `forget` function for more information. - fn rmdir(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Rename a file / directory. - /// - /// If the destination exists, it should be atomically replaced. If the destination's inode - /// lookup count is non-zero, then the file system is expected to delay removal of the inode - /// until the lookup count goes to zero. See the documentation of the `forget` function for more - /// information. - /// - /// `flags` may be `libc::RENAME_EXCHANGE` or `libc::RENAME_NOREPLACE`. If - /// `libc::RENAME_NOREPLACE` is specified, the implementation must not overwrite `newname` if it - /// exists and must return an error instead. If `libc::RENAME_EXCHANGE` is specified, the - /// implementation must atomically exchange the two files, i.e., both must exist and neither may - /// be deleted. - fn rename( - &self, - ctx: Context, - olddir: Self::Inode, - oldname: &CStr, - newdir: Self::Inode, - newname: &CStr, - flags: u32, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Create a hard link. - /// - /// Create a hard link from `inode` to `newname` in the directory represented by `newparent`. - /// - /// If this call is successful then the lookup count of the `Inode` associated with the returned - /// `Entry` must be increased by 1. - fn link( - &self, - ctx: Context, - inode: Self::Inode, - newparent: Self::Inode, - newname: &CStr, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Open a file. - /// - /// Open the file associated with `inode` for reading / writing. All values accepted by the - /// `open(2)` system call are valid values for `flags` and must be handled by the file system. - /// However, there are some additional rules: - /// - /// * Creation flags (`libc::O_CREAT`, `libc::O_EXCL`, `libc::O_NOCTTY`) will be filtered out - /// and handled by the kernel. - /// - /// * The file system should check the access modes (`libc::O_RDONLY`, `libc::O_WRONLY`, - /// `libc::O_RDWR`) to determine if the operation is permitted. If the file system was mounted - /// with the `-o default_permissions` mount option, then this check will also be carried out - /// by the kernel before sending the open request. - /// - /// * When writeback caching is enabled (`FsOptions::WRITEBACK_CACHE`) the kernel may send read - /// requests even for files opened with `libc::O_WRONLY`. The file system should be prepared - /// to handle this. - /// - /// * When writeback caching is enabled, the kernel will handle the `libc::O_APPEND` flag. - /// However, this will not work reliably unless the kernel has exclusive access to the file. - /// In this case the file system may either ignore the `libc::O_APPEND` flag or return an - /// error to indicate that reliable `libc::O_APPEND` handling is not available. - /// - /// * When writeback caching is disabled, the file system is expected to properly handle - /// `libc::O_APPEND` and ensure that each write is appended to the end of the file. - /// - /// The file system may choose to return a `Handle` to refer to the newly opened file. The - /// kernel will then use this `Handle` for all operations on the content of the file (`read`, - /// `write`, `flush`, `release`, `fsync`). If the file system does not return a - /// `Handle` then the kernel will use the `Inode` for the file to operate on its contents. In - /// this case the file system may wish to enable the `FsOptions::ZERO_MESSAGE_OPEN` feature if - /// it is supported by the kernel (see below). - /// - /// The returned `OpenOptions` allow the file system to change the way the opened file is - /// handled by the kernel. See the documentation of `OpenOptions` for more information. - /// - /// If the `FsOptions::ZERO_MESSAGE_OPEN` feature is enabled by both the file system - /// implementation and the kernel, then the file system may return an error of `ENOSYS`. This - /// will be interpreted by the kernel as success and future calls to `open` and `release` will - /// be handled by the kernel without being passed on to the file system. - fn open( - &self, - ctx: Context, - inode: Self::Inode, - flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - // Matches the behavior of libfuse. - Ok((None, OpenOptions::empty())) - } - - /// Create and open a file. - /// - /// If the file does not already exist, the file system should create it with the specified - /// `mode`. When the `FsOptions::DONT_MASK` feature is set, the file system is responsible for - /// setting the permissions of the created file to `mode & !umask`. - /// - /// If the file system returns an `ENOSYS` error, then the kernel will treat this method as - /// unimplemented and all future calls to `create` will be handled by calling the `mknod` and - /// `open` methods instead. - /// - /// See the documentation for the `open` method for more information about opening the file. In - /// addition to the optional `Handle` and the `OpenOptions`, the file system must also return an - /// `Entry` for the file. This increases the lookup count for the `Inode` associated with the - /// file by 1. - fn create( - &self, - ctx: Context, - parent: Self::Inode, - name: &CStr, - mode: u32, - flags: u32, - umask: u32, - ) -> io::Result<(Entry, Option, OpenOptions)> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Read data from a file. - /// - /// Returns `size` bytes of data starting from offset `off` from the file associated with - /// `inode` or `handle`. - /// - /// `flags` contains the flags used to open the file. Similarly, `handle` is the `Handle` - /// returned by the file system from the `open` method, if any. If the file system - /// implementation did not return a `Handle` from `open` then the contents of `handle` are - /// undefined. - /// - /// This method should return exactly the number of bytes requested by the kernel, except in the - /// case of error or EOF. Otherwise, the kernel will substitute the rest of the data with - /// zeroes. An exception to this rule is if the file was opened with the "direct I/O" option - /// (`libc::O_DIRECT`), in which case the kernel will forward the return code from this method - /// to the userspace application that made the system call. - #[allow(clippy::too_many_arguments)] - fn read( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - w: W, - size: u32, - offset: u64, - lock_owner: Option, - flags: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Write data to a file. - /// - /// Writes `size` bytes of data starting from offset `off` to the file associated with `inode` - /// or `handle`. - /// - /// `flags` contains the flags used to open the file. Similarly, `handle` is the `Handle` - /// returned by the file system from the `open` method, if any. If the file system - /// implementation did not return a `Handle` from `open` then the contents of `handle` are - /// undefined. - /// - /// If `delayed_write` is true then it indicates that this is a write for buffered data. - /// - /// If `kill_priv` is true then it indicates that the file system is expected to clear the - /// setuid and setgid bits. - /// - /// This method should return exactly the number of bytes requested by the kernel, except in the - /// case of error. An exception to this rule is if the file was opened with the "direct I/O" - /// option (`libc::O_DIRECT`), in which case the kernel will forward the return code from this - /// method to the userspace application that made the system call. - #[allow(clippy::too_many_arguments)] - fn write( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - r: R, - size: u32, - offset: u64, - lock_owner: Option, - delayed_write: bool, - kill_priv: bool, - flags: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Flush the contents of a file. - /// - /// This method is called on every `close()` of a file descriptor. Since it is possible to - /// duplicate file descriptors there may be many `flush` calls for one call to `open`. - /// - /// File systems should not make any assumptions about when `flush` will be - /// called or even if it will be called at all. - /// - /// `handle` is the `Handle` returned by the file system from the `open` method, if any. If the - /// file system did not return a `Handle` from `open` then the contents of `handle` are - /// undefined. - /// - /// Unlike `fsync`, the file system is not required to flush pending writes. One reason to flush - /// data is if the file system wants to return write errors during close. However, this is not - /// portable because POSIX does not require `close` to wait for delayed I/O to complete. - /// - /// If the `FsOptions::POSIX_LOCKS` feature is enabled, then the file system must remove all - /// locks belonging to `lock_owner`. - /// - /// If this method returns an `ENOSYS` error then the kernel will treat it as success and all - /// subsequent calls to `flush` will be handled by the kernel without being forwarded to the - /// file system. - fn flush( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - lock_owner: u64, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Synchronize file contents. - /// - /// File systems must ensure that the file contents have been flushed to disk before returning - /// from this method. If `datasync` is true then only the file data (but not the metadata) needs - /// to be flushed. - /// - /// `handle` is the `Handle` returned by the file system from the `open` method, if any. If the - /// file system did not return a `Handle` from `open` then the contents of - /// `handle` are undefined. - /// - /// If this method returns an `ENOSYS` error then the kernel will treat it as success and all - /// subsequent calls to `fsync` will be handled by the kernel without being forwarded to the - /// file system. - fn fsync( - &self, - ctx: Context, - inode: Self::Inode, - datasync: bool, - handle: Self::Handle, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Allocate requested space for file data. - /// - /// If this function returns success, then the file system must guarantee that it is possible to - /// write up to `length` bytes of data starting at `offset` without failing due to a lack of - /// free space on the disk. - /// - /// `handle` is the `Handle` returned by the file system from the `open` method, if any. If the - /// file system did not return a `Handle` from `open` then the contents of `handle` are - /// undefined. - /// - /// If this method returns an `ENOSYS` error then the kernel will treat that as a permanent - /// failure: all future calls to `fallocate` will fail with `EOPNOTSUPP` without being forwarded - /// to the file system. - fn fallocate( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - mode: u32, - offset: u64, - length: u64, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Release an open file. - /// - /// This method is called when there are no more references to an open file: all file - /// descriptors are closed and all memory mappings are unmapped. - /// - /// For every `open` call there will be exactly one `release` call (unless the file system is - /// force-unmounted). - /// - /// The file system may reply with an error, but error values are not returned to the `close()` - /// or `munmap()` which triggered the release. - /// - /// `handle` is the `Handle` returned by the file system from the `open` method, if any. If the - /// file system did not return a `Handle` from `open` then the contents of - /// `handle` are undefined. - /// - /// If `flush` is `true` then the contents of the file should also be flushed to disk. - #[allow(clippy::too_many_arguments)] - fn release( - &self, - ctx: Context, - inode: Self::Inode, - flags: u32, - handle: Self::Handle, - flush: bool, - flock_release: bool, - lock_owner: Option, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Get information about the file system. - fn statfs(&self, ctx: Context, inode: Self::Inode) -> io::Result { - // Safe because we are zero-initializing a struct with only POD fields. - let mut st: libc::statvfs64 = unsafe { mem::zeroed() }; - - // This matches the behavior of libfuse as it returns these values if the - // filesystem doesn't implement this method. - st.f_namemax = 255; - st.f_bsize = 512; - - Ok(st) - } - - /// Set an extended attribute. - /// - /// If this method fails with an `ENOSYS` error, then the kernel will treat that as a permanent - /// failure. The kernel will return `EOPNOTSUPP` for all future calls to `setxattr` without - /// forwarding them to the file system. - /// - /// Valid values for flags are the same as those accepted by the `setxattr(2)` system call and - /// have the same behavior. - fn setxattr( - &self, - ctx: Context, - inode: Self::Inode, - name: &CStr, - value: &[u8], - flags: u32, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Get an extended attribute. - /// - /// If `size` is 0, then the file system should respond with `GetxattrReply::Count` and the - /// number of bytes needed to hold the value. If `size` is large enough to hold the value, then - /// the file system should reply with `GetxattrReply::Value` and the value of the extended - /// attribute. If `size` is not 0 but is also not large enough to hold the value, then the file - /// system should reply with an `ERANGE` error. - /// - /// If this method fails with an `ENOSYS` error, then the kernel will treat that as a permanent - /// failure. The kernel will return `EOPNOTSUPP` for all future calls to `getxattr` without - /// forwarding them to the file system. - fn getxattr( - &self, - ctx: Context, - inode: Self::Inode, - name: &CStr, - size: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// List extended attribute names. - /// - /// If `size` is 0, then the file system should respond with `ListxattrReply::Count` and the - /// number of bytes needed to hold a `\0` byte separated list of the names of all the extended - /// attributes. If `size` is large enough to hold the `\0` byte separated list of the attribute - /// names, then the file system should reply with `ListxattrReply::Names` and the list. If - /// `size` is not 0 but is also not large enough to hold the list, then the file system should - /// reply with an `ERANGE` error. - /// - /// If this method fails with an `ENOSYS` error, then the kernel will treat that as a permanent - /// failure. The kernel will return `EOPNOTSUPP` for all future calls to `listxattr` without - /// forwarding them to the file system. - fn listxattr(&self, ctx: Context, inode: Self::Inode, size: u32) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Remove an extended attribute. - /// - /// If this method fails with an `ENOSYS` error, then the kernel will treat that as a permanent - /// failure. The kernel will return `EOPNOTSUPP` for all future calls to `removexattr` without - /// forwarding them to the file system. - fn removexattr(&self, ctx: Context, inode: Self::Inode, name: &CStr) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Open a directory for reading. - /// - /// The file system may choose to return a `Handle` to refer to the newly opened directory. The - /// kernel will then use this `Handle` for all operations on the content of the directory - /// (`readdir`, `readdirplus`, `fsyncdir`, `releasedir`). If the file system does not return a - /// `Handle` then the kernel will use the `Inode` for the directory to operate on its contents. - /// In this case the file system may wish to enable the `FsOptions::ZERO_MESSAGE_OPENDIR` - /// feature if it is supported by the kernel (see below). - /// - /// The returned `OpenOptions` allow the file system to change the way the opened directory is - /// handled by the kernel. See the documentation of `OpenOptions` for more information. - /// - /// If the `FsOptions::ZERO_MESSAGE_OPENDIR` feature is enabled by both the file system - /// implementation and the kernel, then the file system may return an error of `ENOSYS`. This - /// will be interpreted by the kernel as success and future calls to `opendir` and `releasedir` - /// will be handled by the kernel without being passed on to the file system. - fn opendir( - &self, - ctx: Context, - inode: Self::Inode, - flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - // Matches the behavior of libfuse. - Ok((None, OpenOptions::empty())) - } - - /// Read a directory. - /// - /// `handle` is the `Handle` returned by the file system from the `opendir` method, if any. If - /// the file system did not return a `Handle` from `opendir` then the contents of `handle` are - /// undefined. - /// - /// `size` indicates the maximum number of bytes that should be returned by this method. - /// - /// If `offset` is non-zero then it corresponds to one of the `offset` values from a `DirEntry` - /// that was previously returned by a call to `readdir` for the same handle. In this case the - /// file system should skip over the entries before the position defined by the `offset` value. - /// If entries were added or removed while the `Handle` is open then the file system may still - /// include removed entries or skip newly created entries. However, adding or removing entries - /// should never cause the file system to skip over unrelated entries or include an entry more - /// than once. This means that `offset` cannot be a simple index and must include sufficient - /// information to uniquely determine the next entry in the list even when the set of entries is - /// being changed. - /// - /// The file system may return entries for the current directory (".") and parent directory - /// ("..") but is not required to do so. If the file system does not return these entries, then - /// they are implicitly added by the kernel. - /// - /// The lookup count for `Inode`s associated with the returned directory entries is **NOT** - /// affected by this method. - /// - // TODO(chirantan): Change method signature to return `Iterator` rather than using an - // `FnMut` for adding entries. - fn readdir( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - size: u32, - offset: u64, - add_entry: F, - ) -> io::Result<()> - where - F: FnMut(DirEntry) -> io::Result, - { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Read a directory with entry attributes. - /// - /// Like `readdir` but also includes the attributes for each directory entry. - /// - /// `handle` is the `Handle` returned by the file system from the `opendir` method, if any. If - /// the file system did not return a `Handle` from `opendir` then the contents of `handle` are - /// undefined. - /// - /// `size` indicates the maximum number of bytes that should be returned by this method. - /// - /// Unlike `readdir`, the lookup count for `Inode`s associated with the returned directory - /// entries **IS** affected by this method (since it returns an `Entry` for each `DirEntry`). - /// The count for each `Inode` should be increased by 1. - /// - /// File systems that implement this method should enable the `FsOptions::DO_READDIRPLUS` - /// feature when supported by the kernel. The kernel will not call this method unless that - /// feature is enabled. - /// - /// Additionally, file systems that implement both `readdir` and `readdirplus` should enable the - /// `FsOptions::READDIRPLUS_AUTO` feature to allow the kernel to issue both `readdir` and - /// `readdirplus` requests, depending on how much information is expected to be required. - /// - /// TODO(chirantan): Change method signature to return `Iterator<(DirEntry, Entry)>` rather than - /// using an `FnMut` for adding entries. - fn readdirplus( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - size: u32, - offset: u64, - add_entry: F, - ) -> io::Result<()> - where - F: FnMut(DirEntry, Entry) -> io::Result, - { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Synchronize the contents of a directory. - /// - /// File systems must ensure that the directory contents have been flushed to disk before - /// returning from this method. If `datasync` is true then only the directory data (but not the - /// metadata) needs to be flushed. - /// - /// `handle` is the `Handle` returned by the file system from the `opendir` method, if any. If - /// the file system did not return a `Handle` from `opendir` then the contents of - /// `handle` are undefined. - /// - /// If this method returns an `ENOSYS` error then the kernel will treat it as success and all - /// subsequent calls to `fsyncdir` will be handled by the kernel without being forwarded to the - /// file system. - fn fsyncdir( - &self, - ctx: Context, - inode: Self::Inode, - datasync: bool, - handle: Self::Handle, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Release an open directory. - /// - /// For every `opendir` call there will be exactly one `releasedir` call (unless the file system - /// is force-unmounted). - /// - /// `handle` is the `Handle` returned by the file system from the `opendir` method, if any. If - /// the file system did not return a `Handle` from `opendir` then the contents of `handle` are - /// undefined. - /// - /// `flags` contains used the flags used to open the directory in `opendir`. - fn releasedir( - &self, - ctx: Context, - inode: Self::Inode, - flags: u32, - handle: Self::Handle, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Setup a mapping so that guest can access files in DAX style. - #[allow(clippy::too_many_arguments)] - fn setupmapping( - &self, - _ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - foffset: u64, - len: u64, - flags: u64, - moffset: u64, - vu_req: &mut T, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - fn removemapping( - &self, - _ctx: Context, - requests: Vec, - vu_req: &mut T, - ) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Check file access permissions. - /// - /// This method is called when a userspace process in the client makes an `access()` or - /// `chdir()` system call. If the file system was mounted with the `-o default_permissions` - /// mount option, then the kernel will perform these checks itself and this method will not be - /// called. - /// - /// If this method returns an `ENOSYS` error, then the kernel will treat it as a permanent - /// success: all future calls to `access` will return success without being forwarded to the - /// file system. - fn access(&self, ctx: Context, inode: Self::Inode, mask: u32) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// Reposition read/write file offset. - fn lseek( - &self, - ctx: Context, - inode: Self::Inode, - handle: Self::Handle, - offset: u64, - whence: u32, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - #[allow(clippy::too_many_arguments)] - fn copyfilerange( - &self, - ctx: Context, - inode_in: Self::Inode, - handle_in: Self::Handle, - offset_in: u64, - inode_out: Self::Inode, - handle_out: Self::Handle, - offset_out: u64, - len: u64, - flags: u64, - ) -> io::Result { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn getlk(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn setlk(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn setlkw(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn ioctl(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn bmap(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn poll(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } - - /// TODO: support this - fn notify_reply(&self) -> io::Result<()> { - Err(io::Error::from_raw_os_error(libc::ENOSYS)) - } -} diff --git a/vhost_user_fs/src/fs_cache_req_handler.rs b/vhost_user_fs/src/fs_cache_req_handler.rs deleted file mode 100644 index f95ef8fd6..000000000 --- a/vhost_user_fs/src/fs_cache_req_handler.rs +++ /dev/null @@ -1,62 +0,0 @@ -use crate::fuse; -use std::io; -use std::os::unix::io::RawFd; -use vhost_rs::vhost_user::message::{ - VhostUserFSSlaveMsg, VhostUserFSSlaveMsgFlags, VHOST_USER_FS_SLAVE_ENTRIES, -}; -use vhost_rs::vhost_user::{SlaveFsCacheReq, VhostUserMasterReqHandler}; - -/// Trait for virtio-fs cache requests operations. This is mainly used to hide -/// vhost-user details from virtio-fs's fuse part. -pub trait FsCacheReqHandler: Send + Sync + 'static { - /// Setup a dedicated mapping so that guest can access file data in DAX style. - fn map( - &mut self, - foffset: u64, - moffset: u64, - len: u64, - flags: u64, - fd: RawFd, - ) -> io::Result<()>; - - /// Remove those mappings that provide the access to file data. - fn unmap(&mut self, requests: Vec) -> io::Result<()>; -} - -impl FsCacheReqHandler for SlaveFsCacheReq { - fn map( - &mut self, - foffset: u64, - moffset: u64, - len: u64, - flags: u64, - fd: RawFd, - ) -> io::Result<()> { - let mut msg: VhostUserFSSlaveMsg = Default::default(); - msg.fd_offset[0] = foffset; - msg.cache_offset[0] = moffset; - msg.len[0] = len; - msg.flags[0] = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { - VhostUserFSSlaveMsgFlags::MAP_W | VhostUserFSSlaveMsgFlags::MAP_R - } else { - VhostUserFSSlaveMsgFlags::MAP_R - }; - - self.fs_slave_map(&msg, fd)?; - Ok(()) - } - - fn unmap(&mut self, requests: Vec) -> io::Result<()> { - for chunk in requests.chunks(VHOST_USER_FS_SLAVE_ENTRIES) { - let mut msg: VhostUserFSSlaveMsg = Default::default(); - - for (ind, req) in chunk.iter().enumerate() { - msg.len[ind] = req.len; - msg.cache_offset[ind] = req.moffset; - } - - self.fs_slave_unmap(&msg)?; - } - Ok(()) - } -} diff --git a/vhost_user_fs/src/fuse.rs b/vhost_user_fs/src/fuse.rs deleted file mode 100644 index 585d46eb2..000000000 --- a/vhost_user_fs/src/fuse.rs +++ /dev/null @@ -1,1165 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::mem; - -use bitflags::bitflags; -use vm_memory::ByteValued; - -/// Version number of this interface. -pub const KERNEL_VERSION: u32 = 7; - -/// Minor version number of this interface. -pub const KERNEL_MINOR_VERSION: u32 = 27; - -/// The ID of the inode corresponding to the root directory of the file system. -pub const ROOT_ID: u64 = 1; - -// Bitmasks for `fuse_setattr_in.valid`. -const FATTR_MODE: u32 = 1; -const FATTR_UID: u32 = 2; -const FATTR_GID: u32 = 4; -const FATTR_SIZE: u32 = 8; -const FATTR_ATIME: u32 = 16; -const FATTR_MTIME: u32 = 32; -pub const FATTR_FH: u32 = 64; -const FATTR_ATIME_NOW: u32 = 128; -const FATTR_MTIME_NOW: u32 = 256; -pub const FATTR_LOCKOWNER: u32 = 512; -const FATTR_CTIME: u32 = 1024; - -bitflags! { - pub struct SetattrValid: u32 { - const MODE = FATTR_MODE; - const UID = FATTR_UID; - const GID = FATTR_GID; - const SIZE = FATTR_SIZE; - const ATIME = FATTR_ATIME; - const MTIME = FATTR_MTIME; - const ATIME_NOW = FATTR_ATIME_NOW; - const MTIME_NOW = FATTR_MTIME_NOW; - const CTIME = FATTR_CTIME; - } -} - -// Flags returned by the OPEN request. - -/// Bypass page cache for this open file. -const FOPEN_DIRECT_IO: u32 = 1; - -/// Don't invalidate the data cache on open. -const FOPEN_KEEP_CACHE: u32 = 2; - -/// The file is not seekable. -const FOPEN_NONSEEKABLE: u32 = 4; - -/// Allow caching this directory. -const FOPEN_CACHE_DIR: u32 = 8; - -bitflags! { - /// Options controlling the behavior of files opened by the server in response - /// to an open or create request. - pub struct OpenOptions: u32 { - const DIRECT_IO = FOPEN_DIRECT_IO; - const KEEP_CACHE = FOPEN_KEEP_CACHE; - const NONSEEKABLE = FOPEN_NONSEEKABLE; - const CACHE_DIR = FOPEN_CACHE_DIR; - } -} - -// INIT request/reply flags. - -/// Asynchronous read requests. -const ASYNC_READ: u32 = 1; - -/// Remote locking for POSIX file locks. -const POSIX_LOCKS: u32 = 2; - -/// Kernel sends file handle for fstat, etc... (not yet supported). -const FILE_OPS: u32 = 4; - -/// Handles the O_TRUNC open flag in the filesystem. -const ATOMIC_O_TRUNC: u32 = 8; - -/// FileSystem handles lookups of "." and "..". -const EXPORT_SUPPORT: u32 = 16; - -/// FileSystem can handle write size larger than 4kB. -const BIG_WRITES: u32 = 32; - -/// Don't apply umask to file mode on create operations. -const DONT_MASK: u32 = 64; - -/// Kernel supports splice write on the device. -const SPLICE_WRITE: u32 = 128; - -/// Kernel supports splice move on the device. -const SPLICE_MOVE: u32 = 256; - -/// Kernel supports splice read on the device. -const SPLICE_READ: u32 = 512; - -/// Remote locking for BSD style file locks. -const FLOCK_LOCKS: u32 = 1024; - -/// Kernel supports ioctl on directories. -const HAS_IOCTL_DIR: u32 = 2048; - -/// Automatically invalidate cached pages. -const AUTO_INVAL_DATA: u32 = 4096; - -/// Do READDIRPLUS (READDIR+LOOKUP in one). -const DO_READDIRPLUS: u32 = 8192; - -/// Adaptive readdirplus. -const READDIRPLUS_AUTO: u32 = 16384; - -/// Asynchronous direct I/O submission. -const ASYNC_DIO: u32 = 32768; - -/// Use writeback cache for buffered writes. -const WRITEBACK_CACHE: u32 = 65536; - -/// Kernel supports zero-message opens. -const NO_OPEN_SUPPORT: u32 = 131_072; - -/// Allow parallel lookups and readdir. -const PARALLEL_DIROPS: u32 = 262_144; - -/// Fs handles killing suid/sgid/cap on write/chown/trunc. -const HANDLE_KILLPRIV: u32 = 524_288; - -/// FileSystem supports posix acls. -const POSIX_ACL: u32 = 1_048_576; - -/// Reading the device after abort returns ECONNABORTED. -const ABORT_ERROR: u32 = 2_097_152; - -/// Init_out.max_pages contains the max number of req pages. -const MAX_PAGES: u32 = 4_194_304; - -/// Cache READLINK responses -const CACHE_SYMLINKS: u32 = 8_388_608; - -/// Kernel supports zero-message opendir -const NO_OPENDIR_SUPPORT: u32 = 16_777_216; - -/// Only invalidate cached pages on explicit request -const EXPLICIT_INVAL_DATA: u32 = 33_554_432; - -bitflags! { - /// A bitfield passed in as a parameter to and returned from the `init` method of the - /// `FileSystem` trait. - pub struct FsOptions: u32 { - /// Indicates that the filesystem supports asynchronous read requests. - /// - /// If this capability is not requested/available, the kernel will ensure that there is at - /// most one pending read request per file-handle at any time, and will attempt to order - /// read requests by increasing offset. - /// - /// This feature is enabled by default when supported by the kernel. - const ASYNC_READ = ASYNC_READ; - - /// Indicates that the filesystem supports "remote" locking. - /// - /// This feature is not enabled by default and should only be set if the filesystem - /// implements the `getlk` and `setlk` methods of the `FileSystem` trait. - const POSIX_LOCKS = POSIX_LOCKS; - - /// Kernel sends file handle for fstat, etc... (not yet supported). - const FILE_OPS = FILE_OPS; - - /// Indicates that the filesystem supports the `O_TRUNC` open flag. If disabled, and an - /// application specifies `O_TRUNC`, fuse first calls `setattr` to truncate the file and - /// then calls `open` with `O_TRUNC` filtered out. - /// - /// This feature is enabled by default when supported by the kernel. - const ATOMIC_O_TRUNC = ATOMIC_O_TRUNC; - - /// Indicates that the filesystem supports lookups of "." and "..". - /// - /// This feature is disabled by default. - const EXPORT_SUPPORT = EXPORT_SUPPORT; - - /// FileSystem can handle write size larger than 4kB. - const BIG_WRITES = BIG_WRITES; - - /// Indicates that the kernel should not apply the umask to the file mode on create - /// operations. - /// - /// This feature is disabled by default. - const DONT_MASK = DONT_MASK; - - /// Indicates that the server should try to use `splice(2)` when writing to the fuse device. - /// This may improve performance. - /// - /// This feature is not currently supported. - const SPLICE_WRITE = SPLICE_WRITE; - - /// Indicates that the server should try to move pages instead of copying when writing to / - /// reading from the fuse device. This may improve performance. - /// - /// This feature is not currently supported. - const SPLICE_MOVE = SPLICE_MOVE; - - /// Indicates that the server should try to use `splice(2)` when reading from the fuse - /// device. This may improve performance. - /// - /// This feature is not currently supported. - const SPLICE_READ = SPLICE_READ; - - /// If set, then calls to `flock` will be emulated using POSIX locks and must - /// then be handled by the filesystem's `setlock()` handler. - /// - /// If not set, `flock` calls will be handled by the FUSE kernel module internally (so any - /// access that does not go through the kernel cannot be taken into account). - /// - /// This feature is disabled by default. - const FLOCK_LOCKS = FLOCK_LOCKS; - - /// Indicates that the filesystem supports ioctl's on directories. - /// - /// This feature is enabled by default when supported by the kernel. - const HAS_IOCTL_DIR = HAS_IOCTL_DIR; - - /// Traditionally, while a file is open the FUSE kernel module only asks the filesystem for - /// an update of the file's attributes when a client attempts to read beyond EOF. This is - /// unsuitable for e.g. network filesystems, where the file contents may change without the - /// kernel knowing about it. - /// - /// If this flag is set, FUSE will check the validity of the attributes on every read. If - /// the attributes are no longer valid (i.e., if the *attribute* timeout has expired) then - /// FUSE will first send another `getattr` request. If the new mtime differs from the - /// previous value, any cached file *contents* will be invalidated as well. - /// - /// This flag should always be set when available. If all file changes go through the - /// kernel, *attribute* validity should be set to a very large number to avoid unnecessary - /// `getattr()` calls. - /// - /// This feature is enabled by default when supported by the kernel. - const AUTO_INVAL_DATA = AUTO_INVAL_DATA; - - /// Indicates that the filesystem supports readdirplus. - /// - /// The feature is not enabled by default and should only be set if the filesystem - /// implements the `readdirplus` method of the `FileSystem` trait. - const DO_READDIRPLUS = DO_READDIRPLUS; - - /// Indicates that the filesystem supports adaptive readdirplus. - /// - /// If `DO_READDIRPLUS` is not set, this flag has no effect. - /// - /// If `DO_READDIRPLUS` is set and this flag is not set, the kernel will always issue - /// `readdirplus()` requests to retrieve directory contents. - /// - /// If `DO_READDIRPLUS` is set and this flag is set, the kernel will issue both `readdir()` - /// and `readdirplus()` requests, depending on how much information is expected to be - /// required. - /// - /// This feature is not enabled by default and should only be set if the file system - /// implements both the `readdir` and `readdirplus` methods of the `FileSystem` trait. - const READDIRPLUS_AUTO = READDIRPLUS_AUTO; - - /// Indicates that the filesystem supports asynchronous direct I/O submission. - /// - /// If this capability is not requested/available, the kernel will ensure that there is at - /// most one pending read and one pending write request per direct I/O file-handle at any - /// time. - /// - /// This feature is enabled by default when supported by the kernel. - const ASYNC_DIO = ASYNC_DIO; - - /// Indicates that writeback caching should be enabled. This means that individual write - /// request may be buffered and merged in the kernel before they are sent to the file - /// system. - /// - /// This feature is disabled by default. - const WRITEBACK_CACHE = WRITEBACK_CACHE; - - /// Indicates support for zero-message opens. If this flag is set in the `capable` parameter - /// of the `init` trait method, then the file system may return `ENOSYS` from the open() handler - /// to indicate success. Further attempts to open files will be handled in the kernel. (If - /// this flag is not set, returning ENOSYS will be treated as an error and signaled to the - /// caller). - /// - /// Setting (or not setting) the field in the `FsOptions` returned from the `init` method - /// has no effect. - const ZERO_MESSAGE_OPEN = NO_OPEN_SUPPORT; - - /// Indicates support for parallel directory operations. If this flag is unset, the FUSE - /// kernel module will ensure that lookup() and readdir() requests are never issued - /// concurrently for the same directory. - /// - /// This feature is enabled by default when supported by the kernel. - const PARALLEL_DIROPS = PARALLEL_DIROPS; - - /// Indicates that the file system is responsible for unsetting setuid and setgid bits when a - /// file is written, truncated, or its owner is changed. - /// - /// This feature is enabled by default when supported by the kernel. - const HANDLE_KILLPRIV = HANDLE_KILLPRIV; - - /// Indicates support for POSIX ACLs. - /// - /// If this feature is enabled, the kernel will cache and have responsibility for enforcing - /// ACLs. ACL will be stored as xattrs and passed to userspace, which is responsible for - /// updating the ACLs in the filesystem, keeping the file mode in sync with the ACL, and - /// ensuring inheritance of default ACLs when new filesystem nodes are created. Note that - /// this requires that the file system is able to parse and interpret the xattr - /// representation of ACLs. - /// - /// Enabling this feature implicitly turns on the `default_permissions` mount option (even - /// if it was not passed to mount(2)). - /// - /// This feature is disabled by default. - const POSIX_ACL = POSIX_ACL; - - /// Indicates that if the connection is gone because of sysfs abort, reading from the device - /// will return -ECONNABORTED. - /// - /// This feature is not currently supported. - const ABORT_ERROR = ABORT_ERROR; - - /// Indicates support for negotiating the maximum number of pages supported. - /// - /// If this feature is enabled, we can tell the kernel the maximum number of pages that we - /// support to transfer in a single request. - /// - /// This feature is enabled by default if supported by the kernel. - const MAX_PAGES = MAX_PAGES; - - /// Indicates that the kernel supports caching READLINK responses. - /// - /// This feature is not currently supported. - const CACHE_SYMLINKS = CACHE_SYMLINKS; - - /// Indicates support for zero-message opens. If this flag is set in the `capable` parameter - /// of the `init` trait method, then the file system may return `ENOSYS` from the opendir() handler - /// to indicate success. Further attempts to open directories will be handled in the kernel. (If - /// this flag is not set, returning ENOSYS will be treated as an error and signaled to the - /// caller). - /// - /// Setting (or not setting) the field in the `FsOptions` returned from the `init` method - /// has no effect. - const ZERO_MESSAGE_OPENDIR = NO_OPENDIR_SUPPORT; - - /// Indicates support for explicit data invalidation. If this feature is enabled, the - /// server is fully responsible for data cache invalidation, and the kernel won't - /// invalidate files data cache on size change and only truncate that cache to new size - /// in case the size decreased. - /// - /// This feature is not currently supported. - const EXPLICIT_INVAL_DATA = EXPLICIT_INVAL_DATA; - } -} - -// Release flags. -pub const RELEASE_FLUSH: u32 = 1; -pub const RELEASE_FLOCK_UNLOCK: u32 = 2; - -// Getattr flags. -pub const GETATTR_FH: u32 = 1; - -// Lock flags. -pub const LK_FLOCK: u32 = 1; - -// Write flags. - -/// Delayed write from page cache, file handle is guessed. -pub const WRITE_CACHE: u32 = 1; - -/// `lock_owner` field is valid. -pub const WRITE_LOCKOWNER: u32 = 2; - -/// Kill suid and sgid bits -pub const WRITE_KILL_PRIV: u32 = 4; - -// Read flags. -pub const READ_LOCKOWNER: u32 = 2; - -// Ioctl flags. - -/// 32bit compat ioctl on 64bit machine -const IOCTL_COMPAT: u32 = 1; - -/// Not restricted to well-formed ioctls, retry allowed -const IOCTL_UNRESTRICTED: u32 = 2; - -/// Retry with new iovecs -const IOCTL_RETRY: u32 = 4; - -/// 32bit ioctl -const IOCTL_32BIT: u32 = 8; - -/// Is a directory -const IOCTL_DIR: u32 = 16; - -/// x32 compat ioctl on 64bit machine (64bit time_t) -const IOCTL_COMPAT_X32: u32 = 32; - -/// Maximum of in_iovecs + out_iovecs -const IOCTL_MAX_IOV: u32 = 256; - -bitflags! { - pub struct IoctlFlags: u32 { - /// 32bit compat ioctl on 64bit machine - const IOCTL_COMPAT = IOCTL_COMPAT; - - /// Not restricted to well-formed ioctls, retry allowed - const IOCTL_UNRESTRICTED = IOCTL_UNRESTRICTED; - - /// Retry with new iovecs - const IOCTL_RETRY = IOCTL_RETRY; - - /// 32bit ioctl - const IOCTL_32BIT = IOCTL_32BIT; - - /// Is a directory - const IOCTL_DIR = IOCTL_DIR; - - /// x32 compat ioctl on 64bit machine (64bit time_t) - const IOCTL_COMPAT_X32 = IOCTL_COMPAT_X32; - - /// Maximum of in_iovecs + out_iovecs - const IOCTL_MAX_IOV = IOCTL_MAX_IOV; - } -} - -/// Request poll notify. -pub const POLL_SCHEDULE_NOTIFY: u32 = 1; - -/// The read buffer is required to be at least 8k, but may be much larger. -pub const FUSE_MIN_READ_BUFFER: u32 = 8192; - -pub const FUSE_COMPAT_ENTRY_OUT_SIZE: u32 = 120; -pub const FUSE_COMPAT_ATTR_OUT_SIZE: u32 = 96; -pub const FUSE_COMPAT_MKNOD_IN_SIZE: u32 = 8; -pub const FUSE_COMPAT_WRITE_IN_SIZE: u32 = 24; -pub const FUSE_COMPAT_STATFS_SIZE: u32 = 48; -pub const FUSE_COMPAT_INIT_OUT_SIZE: u32 = 8; -pub const FUSE_COMPAT_22_INIT_OUT_SIZE: u32 = 24; - -// Message definitions follow. It is safe to implement ByteValued for all of these -// because they are POD types. - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Attr { - pub ino: u64, - pub size: u64, - pub blocks: u64, - pub atime: u64, - pub mtime: u64, - pub ctime: u64, - pub atimensec: u32, - pub mtimensec: u32, - pub ctimensec: u32, - pub mode: u32, - pub nlink: u32, - pub uid: u32, - pub gid: u32, - pub rdev: u32, - pub blksize: u32, - pub padding: u32, -} -unsafe impl ByteValued for Attr {} - -impl From for Attr { - fn from(st: libc::stat64) -> Attr { - Attr { - ino: st.st_ino, - size: st.st_size as u64, - blocks: st.st_blocks as u64, - atime: st.st_atime as u64, - mtime: st.st_mtime as u64, - ctime: st.st_ctime as u64, - atimensec: st.st_atime_nsec as u32, - mtimensec: st.st_mtime_nsec as u32, - ctimensec: st.st_ctime_nsec as u32, - mode: st.st_mode, - nlink: st.st_nlink as u32, - uid: st.st_uid, - gid: st.st_gid, - rdev: st.st_rdev as u32, - blksize: st.st_blksize as u32, - ..Default::default() - } - } -} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Kstatfs { - pub blocks: u64, - pub bfree: u64, - pub bavail: u64, - pub files: u64, - pub ffree: u64, - pub bsize: u32, - pub namelen: u32, - pub frsize: u32, - pub padding: u32, - pub spare: [u32; 6], -} -unsafe impl ByteValued for Kstatfs {} - -impl From for Kstatfs { - fn from(st: libc::statvfs64) -> Self { - Kstatfs { - blocks: st.f_blocks, - bfree: st.f_bfree, - bavail: st.f_bavail, - files: st.f_files, - ffree: st.f_ffree, - bsize: st.f_bsize as u32, - namelen: st.f_namemax as u32, - frsize: st.f_frsize as u32, - ..Default::default() - } - } -} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct FileLock { - pub start: u64, - pub end: u64, - pub type_: u32, - pub pid: u32, /* tgid */ -} -unsafe impl ByteValued for FileLock {} - -#[repr(u32)] -#[derive(Debug, Copy, Clone)] -pub enum Opcode { - Lookup = 1, - Forget = 2, /* No Reply */ - Getattr = 3, - Setattr = 4, - Readlink = 5, - Symlink = 6, - Mknod = 8, - Mkdir = 9, - Unlink = 10, - Rmdir = 11, - Rename = 12, - Link = 13, - Open = 14, - Read = 15, - Write = 16, - Statfs = 17, - Release = 18, - Fsync = 20, - Setxattr = 21, - Getxattr = 22, - Listxattr = 23, - Removexattr = 24, - Flush = 25, - Init = 26, - Opendir = 27, - Readdir = 28, - Releasedir = 29, - Fsyncdir = 30, - Getlk = 31, - Setlk = 32, - Setlkw = 33, - Access = 34, - Create = 35, - Interrupt = 36, - Bmap = 37, - Destroy = 38, - Ioctl = 39, - Poll = 40, - NotifyReply = 41, - BatchForget = 42, - Fallocate = 43, - Readdirplus = 44, - Rename2 = 45, - Lseek = 46, - CopyFileRange = 47, - SetupMapping = 48, - RemoveMapping = 49, -} - -#[repr(u32)] -#[derive(Debug, Copy, Clone)] -pub enum NotifyOpcode { - Poll = 1, - InvalInode = 2, - InvalEntry = 3, - Store = 4, - Retrieve = 5, - Delete = 6, - CodeMax = 7, -} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct EntryOut { - pub nodeid: u64, /* Inode ID */ - pub generation: u64, /* Inode generation: nodeid:gen must be unique for the fs's lifetime */ - pub entry_valid: u64, /* Cache timeout for the name */ - pub attr_valid: u64, /* Cache timeout for the attributes */ - pub entry_valid_nsec: u32, - pub attr_valid_nsec: u32, - pub attr: Attr, -} -unsafe impl ByteValued for EntryOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct ForgetIn { - pub nlookup: u64, -} -unsafe impl ByteValued for ForgetIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct ForgetOne { - pub nodeid: u64, - pub nlookup: u64, -} -unsafe impl ByteValued for ForgetOne {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct BatchForgetIn { - pub count: u32, - pub dummy: u32, -} -unsafe impl ByteValued for BatchForgetIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct GetattrIn { - pub flags: u32, - pub dummy: u32, - pub fh: u64, -} -unsafe impl ByteValued for GetattrIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct AttrOut { - pub attr_valid: u64, /* Cache timeout for the attributes */ - pub attr_valid_nsec: u32, - pub dummy: u32, - pub attr: Attr, -} -unsafe impl ByteValued for AttrOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct MknodIn { - pub mode: u32, - pub rdev: u32, - pub umask: u32, - pub padding: u32, -} -unsafe impl ByteValued for MknodIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct MkdirIn { - pub mode: u32, - pub umask: u32, -} -unsafe impl ByteValued for MkdirIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct RenameIn { - pub newdir: u64, -} -unsafe impl ByteValued for RenameIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Rename2In { - pub newdir: u64, - pub flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for Rename2In {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct LinkIn { - pub oldnodeid: u64, -} -unsafe impl ByteValued for LinkIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct SetattrIn { - pub valid: u32, - pub padding: u32, - pub fh: u64, - pub size: u64, - pub lock_owner: u64, - pub atime: u64, - pub mtime: u64, - pub ctime: u64, - pub atimensec: u32, - pub mtimensec: u32, - pub ctimensec: u32, - pub mode: u32, - pub unused4: u32, - pub uid: u32, - pub gid: u32, - pub unused5: u32, -} -unsafe impl ByteValued for SetattrIn {} - -impl Into for SetattrIn { - fn into(self) -> libc::stat64 { - // Safe because we are zero-initializing a struct with only POD fields. - let mut out: libc::stat64 = unsafe { mem::zeroed() }; - out.st_mode = self.mode; - out.st_uid = self.uid; - out.st_gid = self.gid; - out.st_size = self.size as i64; - out.st_atime = self.atime as i64; - out.st_mtime = self.mtime as i64; - out.st_ctime = self.ctime as i64; - out.st_atime_nsec = i64::from(self.atimensec); - out.st_mtime_nsec = i64::from(self.mtimensec); - out.st_ctime_nsec = i64::from(self.ctimensec); - - out - } -} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct OpenIn { - pub flags: u32, - pub unused: u32, -} -unsafe impl ByteValued for OpenIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct CreateIn { - pub flags: u32, - pub mode: u32, - pub umask: u32, - pub padding: u32, -} -unsafe impl ByteValued for CreateIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct OpenOut { - pub fh: u64, - pub open_flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for OpenOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct ReleaseIn { - pub fh: u64, - pub flags: u32, - pub release_flags: u32, - pub lock_owner: u64, -} -unsafe impl ByteValued for ReleaseIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct FlushIn { - pub fh: u64, - pub unused: u32, - pub padding: u32, - pub lock_owner: u64, -} -unsafe impl ByteValued for FlushIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct ReadIn { - pub fh: u64, - pub offset: u64, - pub size: u32, - pub read_flags: u32, - pub lock_owner: u64, - pub flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for ReadIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct WriteIn { - pub fh: u64, - pub offset: u64, - pub size: u32, - pub write_flags: u32, - pub lock_owner: u64, - pub flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for WriteIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct WriteOut { - pub size: u32, - pub padding: u32, -} -unsafe impl ByteValued for WriteOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct StatfsOut { - pub st: Kstatfs, -} -unsafe impl ByteValued for StatfsOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct FsyncIn { - pub fh: u64, - pub fsync_flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for FsyncIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct SetxattrIn { - pub size: u32, - pub flags: u32, -} -unsafe impl ByteValued for SetxattrIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct GetxattrIn { - pub size: u32, - pub padding: u32, -} -unsafe impl ByteValued for GetxattrIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct GetxattrOut { - pub size: u32, - pub padding: u32, -} -unsafe impl ByteValued for GetxattrOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct LkIn { - pub fh: u64, - pub owner: u64, - pub lk: FileLock, - pub lk_flags: u32, - pub padding: u32, -} -unsafe impl ByteValued for LkIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct LkOut { - pub lk: FileLock, -} -unsafe impl ByteValued for LkOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct AccessIn { - pub mask: u32, - pub padding: u32, -} -unsafe impl ByteValued for AccessIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct InitIn { - pub major: u32, - pub minor: u32, - pub max_readahead: u32, - pub flags: u32, -} -unsafe impl ByteValued for InitIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct InitOut { - pub major: u32, - pub minor: u32, - pub max_readahead: u32, - pub flags: u32, - pub max_background: u16, - pub congestion_threshold: u16, - pub max_write: u32, - pub time_gran: u32, - pub max_pages: u16, - pub padding: u16, - pub unused: [u32; 8], -} -unsafe impl ByteValued for InitOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct InterruptIn { - pub unique: u64, -} -unsafe impl ByteValued for InterruptIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct BmapIn { - pub block: u64, - pub blocksize: u32, - pub padding: u32, -} -unsafe impl ByteValued for BmapIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct BmapOut { - pub block: u64, -} -unsafe impl ByteValued for BmapOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct IoctlIn { - pub fh: u64, - pub flags: u32, - pub cmd: u32, - pub arg: u64, - pub in_size: u32, - pub out_size: u32, -} -unsafe impl ByteValued for IoctlIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct IoctlIovec { - pub base: u64, - pub len: u64, -} -unsafe impl ByteValued for IoctlIovec {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct IoctlOut { - pub result: i32, - pub flags: u32, - pub in_iovs: u32, - pub out_iovs: u32, -} -unsafe impl ByteValued for IoctlOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct PollIn { - pub fh: u64, - pub kh: u64, - pub flags: u32, - pub events: u32, -} -unsafe impl ByteValued for PollIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct PollOut { - pub revents: u32, - pub padding: u32, -} -unsafe impl ByteValued for PollOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyPollWakeupOut { - pub kh: u64, -} -unsafe impl ByteValued for NotifyPollWakeupOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct FallocateIn { - pub fh: u64, - pub offset: u64, - pub length: u64, - pub mode: u32, - pub padding: u32, -} -unsafe impl ByteValued for FallocateIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct InHeader { - pub len: u32, - pub opcode: u32, - pub unique: u64, - pub nodeid: u64, - pub uid: u32, - pub gid: u32, - pub pid: u32, - pub padding: u32, -} -unsafe impl ByteValued for InHeader {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct OutHeader { - pub len: u32, - pub error: i32, - pub unique: u64, -} -unsafe impl ByteValued for OutHeader {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Dirent { - pub ino: u64, - pub off: u64, - pub namelen: u32, - pub type_: u32, - // char name[]; -} -unsafe impl ByteValued for Dirent {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Direntplus { - pub entry_out: EntryOut, - pub dirent: Dirent, -} -unsafe impl ByteValued for Direntplus {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyInvalInodeOut { - pub ino: u64, - pub off: i64, - pub len: i64, -} -unsafe impl ByteValued for NotifyInvalInodeOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyInvalEntryOut { - pub parent: u64, - pub namelen: u32, - pub padding: u32, -} -unsafe impl ByteValued for NotifyInvalEntryOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyDeleteOut { - pub parent: u64, - pub child: u64, - pub namelen: u32, - pub padding: u32, -} -unsafe impl ByteValued for NotifyDeleteOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyStoreOut { - pub nodeid: u64, - pub offset: u64, - pub size: u32, - pub padding: u32, -} -unsafe impl ByteValued for NotifyStoreOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct Notify_Retrieve_Out { - pub notify_unique: u64, - pub nodeid: u64, - pub offset: u64, - pub size: u32, - pub padding: u32, -} -unsafe impl ByteValued for Notify_Retrieve_Out {} - -/* Matches the size of fuse_write_in */ -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct NotifyRetrieveIn { - pub dummy1: u64, - pub offset: u64, - pub size: u32, - pub dummy2: u32, - pub dummy3: u64, - pub dummy4: u64, -} -unsafe impl ByteValued for NotifyRetrieveIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct LseekIn { - pub fh: u64, - pub offset: u64, - pub whence: u32, - pub padding: u32, -} -unsafe impl ByteValued for LseekIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct LseekOut { - pub offset: u64, -} -unsafe impl ByteValued for LseekOut {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct CopyfilerangeIn { - pub fh_in: u64, - pub off_in: u64, - pub nodeid_out: u64, - pub fh_out: u64, - pub off_out: u64, - pub len: u64, - pub flags: u64, -} -unsafe impl ByteValued for CopyfilerangeIn {} - -bitflags! { - pub struct SetupmappingFlags: u64 { - const WRITE = 0x1; - const READ = 0x2; - } -} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct SetupmappingIn { - pub fh: u64, - pub foffset: u64, - pub len: u64, - pub flags: u64, - pub moffset: u64, -} - -unsafe impl ByteValued for SetupmappingIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct RemovemappingIn { - pub count: u32, -} - -unsafe impl ByteValued for RemovemappingIn {} - -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct RemovemappingOne { - pub moffset: u64, - pub len: u64, -} - -unsafe impl ByteValued for RemovemappingOne {} diff --git a/vhost_user_fs/src/lib.rs b/vhost_user_fs/src/lib.rs deleted file mode 100644 index 2dd43224f..000000000 --- a/vhost_user_fs/src/lib.rs +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright © 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause - -#[macro_use] -extern crate log; - -pub mod descriptor_utils; -pub mod file_traits; -pub mod filesystem; -pub mod fs_cache_req_handler; -pub mod fuse; -pub mod multikey; -pub mod passthrough; -pub mod sandbox; -pub mod seccomp; -pub mod server; - -use std::ffi::FromBytesWithNulError; -use std::{error, fmt, io}; - -#[derive(Debug)] -pub enum Error { - /// Failed to decode protocol messages. - DecodeMessage(io::Error), - /// Failed to encode protocol messages. - EncodeMessage(io::Error), - /// One or more parameters are missing. - MissingParameter, - /// A C string parameter is invalid. - InvalidCString(FromBytesWithNulError), - /// The `len` field of the header is too small. - InvalidHeaderLength, - /// The `size` field of the `SetxattrIn` message does not match the length - /// of the decoded value. - InvalidXattrSize((u32, usize)), -} - -impl error::Error for Error {} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use Error::*; - match self { - DecodeMessage(err) => write!(f, "failed to decode fuse message: {}", err), - EncodeMessage(err) => write!(f, "failed to encode fuse message: {}", err), - MissingParameter => write!(f, "one or more parameters are missing"), - InvalidHeaderLength => write!(f, "the `len` field of the header is too small"), - InvalidCString(err) => write!(f, "a c string parameter is invalid: {}", err), - InvalidXattrSize((size, len)) => write!( - f, - "The `size` field of the `SetxattrIn` message does not match the length of the\ - decoded value: size = {}, value.len() = {}", - size, len - ), - } - } -} - -pub type Result = ::std::result::Result; diff --git a/vhost_user_fs/src/main.rs b/vhost_user_fs/src/main.rs deleted file mode 100644 index 632afd508..000000000 --- a/vhost_user_fs/src/main.rs +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright 2019 Intel Corporation. All Rights Reserved. -// -// SPDX-License-Identifier: (Apache-2.0 AND BSD-3-Clause) - -#[macro_use(crate_version, crate_authors)] -extern crate clap; -extern crate log; -extern crate vhost_rs; -extern crate vhost_user_backend; -extern crate virtio_devices; - -use clap::{App, Arg}; -use futures::executor::{ThreadPool, ThreadPoolBuilder}; -use libc::EFD_NONBLOCK; -use log::*; -use seccomp::SeccompAction; -use std::num::Wrapping; -use std::sync::{Arc, Mutex, RwLock}; -use std::{convert, error, fmt, io, process}; - -use vhost_rs::vhost_user::message::*; -use vhost_rs::vhost_user::{Listener, SlaveFsCacheReq}; -use vhost_user_backend::{VhostUserBackend, VhostUserDaemon, Vring}; -use vhost_user_fs::descriptor_utils::Error as VufDescriptorError; -use vhost_user_fs::descriptor_utils::{Reader, Writer}; -use vhost_user_fs::filesystem::FileSystem; -use vhost_user_fs::passthrough::{self, PassthroughFs}; -use vhost_user_fs::sandbox::Sandbox; -use vhost_user_fs::seccomp::enable_seccomp; -use vhost_user_fs::server::Server; -use vhost_user_fs::Error as VhostUserFsError; -use virtio_bindings::bindings::virtio_net::*; -use virtio_bindings::bindings::virtio_ring::{ - VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, -}; -use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap}; -use vm_virtio::queue::DescriptorChain; -use vmm_sys_util::eventfd::EventFd; - -const QUEUE_SIZE: usize = 1024; -const NUM_QUEUES: usize = 2; -const THREAD_POOL_SIZE: usize = 64; - -// The guest queued an available buffer for the high priority queue. -const HIPRIO_QUEUE_EVENT: u16 = 0; -// The guest queued an available buffer for the request queue. -const REQ_QUEUE_EVENT: u16 = 1; -// The device has been dropped. -const KILL_EVENT: u16 = 2; - -type Result = std::result::Result; -type VhostUserBackendResult = std::result::Result; - -#[derive(Debug)] -enum Error { - /// Failed to create kill eventfd. - CreateKillEventFd(io::Error), - /// Failed to create thread pool. - CreateThreadPool(io::Error), - /// Failed to handle event other than input event. - HandleEventNotEpollIn, - /// Failed to handle unknown event. - HandleEventUnknownEvent, - /// No memory configured. - NoMemoryConfigured, - /// Processing queue failed. - ProcessQueue(VhostUserFsError), - /// Creating a queue reader failed. - QueueReader(VufDescriptorError), - /// Creating a queue writer failed. - QueueWriter(VufDescriptorError), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "vhost_user_fs_error: {:?}", self) - } -} - -impl error::Error for Error {} - -impl convert::From for io::Error { - fn from(e: Error) -> Self { - io::Error::new(io::ErrorKind::Other, e) - } -} - -struct VhostUserFsThread { - mem: Option>, - kill_evt: EventFd, - server: Arc>, - // handle request from slave to master - vu_req: Option, - event_idx: bool, - pool: ThreadPool, -} - -impl Clone for VhostUserFsThread { - fn clone(&self) -> Self { - VhostUserFsThread { - mem: self.mem.clone(), - kill_evt: self.kill_evt.try_clone().unwrap(), - server: self.server.clone(), - vu_req: self.vu_req.clone(), - event_idx: self.event_idx, - pool: self.pool.clone(), - } - } -} - -impl VhostUserFsThread { - fn new(fs: F, thread_pool_size: usize) -> Result { - Ok(VhostUserFsThread { - mem: None, - kill_evt: EventFd::new(EFD_NONBLOCK).map_err(Error::CreateKillEventFd)?, - server: Arc::new(Server::new(fs)), - vu_req: None, - event_idx: false, - pool: ThreadPoolBuilder::new() - .pool_size(thread_pool_size) - .create() - .map_err(Error::CreateThreadPool)?, - }) - } - - fn process_queue(&mut self, vring_lock: Arc>) -> Result { - let mut used_any = false; - let (atomic_mem, mem) = match &self.mem { - Some(m) => (m, m.memory()), - None => return Err(Error::NoMemoryConfigured), - }; - let mut vring = vring_lock.write().unwrap(); - - while let Some(avail_desc) = vring.mut_queue().iter(&mem).next() { - used_any = true; - - // Prepare a set of objects that can be moved to the worker thread. - let desc_head = avail_desc.get_head(); - let atomic_mem = atomic_mem.clone(); - let server = self.server.clone(); - let mut vu_req = self.vu_req.clone(); - let event_idx = self.event_idx; - let vring_lock = vring_lock.clone(); - - self.pool.spawn_ok(async move { - let mem = atomic_mem.memory(); - let desc = DescriptorChain::new_from_head(&mem, desc_head).unwrap(); - let head_index = desc.index; - - let reader = Reader::new(&mem, desc.clone()) - .map_err(Error::QueueReader) - .unwrap(); - let writer = Writer::new(&mem, desc.clone()) - .map_err(Error::QueueWriter) - .unwrap(); - - server - .handle_message(reader, writer, vu_req.as_mut()) - .map_err(Error::ProcessQueue) - .unwrap(); - - let mut vring = vring_lock.write().unwrap(); - - if event_idx { - let queue = vring.mut_queue(); - if let Some(used_idx) = queue.add_used(&mem, head_index, 0) { - if queue.needs_notification(&mem, Wrapping(used_idx)) { - vring.signal_used_queue().unwrap(); - } - } - } else { - vring.mut_queue().add_used(&mem, head_index, 0); - vring.signal_used_queue().unwrap(); - } - }); - } - - Ok(used_any) - } -} - -struct VhostUserFsBackend { - thread: Mutex>, -} - -impl VhostUserFsBackend { - fn new(fs: F, thread_pool_size: usize) -> Result { - let thread = Mutex::new(VhostUserFsThread::new(fs, thread_pool_size)?); - Ok(VhostUserFsBackend { thread }) - } -} - -impl VhostUserBackend for VhostUserFsBackend { - fn num_queues(&self) -> usize { - NUM_QUEUES - } - - fn max_queue_size(&self) -> usize { - QUEUE_SIZE - } - - fn features(&self) -> u64 { - 1 << VIRTIO_F_VERSION_1 - | 1 << VIRTIO_RING_F_INDIRECT_DESC - | 1 << VIRTIO_RING_F_EVENT_IDX - | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() - } - - fn protocol_features(&self) -> VhostUserProtocolFeatures { - VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ - } - - fn set_event_idx(&mut self, enabled: bool) { - self.thread.lock().unwrap().event_idx = enabled; - } - - fn update_memory(&mut self, mem: GuestMemoryMmap) -> VhostUserBackendResult<()> { - self.thread.lock().unwrap().mem = Some(GuestMemoryAtomic::new(mem)); - Ok(()) - } - - fn handle_event( - &self, - device_event: u16, - evset: epoll::Events, - vrings: &[Arc>], - _thread_id: usize, - ) -> VhostUserBackendResult { - if evset != epoll::Events::EPOLLIN { - return Err(Error::HandleEventNotEpollIn.into()); - } - - let mut thread = self.thread.lock().unwrap(); - let mem = match &thread.mem { - Some(m) => m.memory(), - None => return Err(Error::NoMemoryConfigured.into()), - }; - - let vring_lock = match device_event { - HIPRIO_QUEUE_EVENT => { - debug!("HIPRIO_QUEUE_EVENT"); - vrings[0].clone() - } - REQ_QUEUE_EVENT => { - debug!("QUEUE_EVENT"); - vrings[1].clone() - } - _ => return Err(Error::HandleEventUnknownEvent.into()), - }; - - if thread.event_idx { - // vm-virtio's Queue implementation only checks avail_index - // once, so to properly support EVENT_IDX we need to keep - // calling process_queue() until it stops finding new - // requests on the queue. - loop { - { - let mut vring = vring_lock.write().unwrap(); - vring.mut_queue().update_avail_event(&mem); - } - if !thread.process_queue(vring_lock.clone())? { - break; - } - } - } else { - // Without EVENT_IDX, a single call is enough. - thread.process_queue(vring_lock)?; - } - - Ok(false) - } - - fn exit_event(&self, _thread_index: usize) -> Option<(EventFd, Option)> { - Some(( - self.thread.lock().unwrap().kill_evt.try_clone().unwrap(), - Some(KILL_EVENT), - )) - } - - fn set_slave_req_fd(&mut self, vu_req: SlaveFsCacheReq) { - self.thread.lock().unwrap().vu_req = Some(vu_req); - } -} - -fn main() { - let cmd_arguments = App::new("vhost-user-fs backend") - .version(crate_version!()) - .author(crate_authors!()) - .about("Launch a vhost-user-fs backend.") - .arg( - Arg::with_name("shared-dir") - .long("shared-dir") - .help("Shared directory path") - .takes_value(true) - .min_values(1), - ) - .arg( - Arg::with_name("sock") - .long("sock") - .help("vhost-user socket path (deprecated)") - .takes_value(true) - .min_values(1), - ) - .arg( - Arg::with_name("socket") - .long("socket") - .help("vhost-user socket path") - .takes_value(true) - .min_values(1), - ) - .arg( - Arg::with_name("thread-pool-size") - .long("thread-pool-size") - .help("thread pool size (default 64)") - .takes_value(true) - .min_values(1), - ) - .arg( - Arg::with_name("disable-xattr") - .long("disable-xattr") - .help("Disable support for extended attributes"), - ) - .arg( - Arg::with_name("disable-sandbox") - .long("disable-sandbox") - .help("Don't set up a sandbox for the daemon"), - ) - .arg( - Arg::with_name("seccomp") - .long("seccomp") - .help("Disable/debug seccomp security") - .possible_values(&["kill", "log", "trap", "none"]) - .default_value("kill"), - ) - .get_matches(); - - // Retrieve arguments - let shared_dir = cmd_arguments - .value_of("shared-dir") - .expect("Failed to retrieve shared directory path"); - let socket = match cmd_arguments.value_of("socket") { - Some(path) => path, - None => { - println!("warning: use of deprecated parameter '--sock': Please use the '--socket' option instead."); - cmd_arguments - .value_of("sock") - .expect("Failed to retrieve vhost-user socket path") - } - }; - - let thread_pool_size: usize = match cmd_arguments.value_of("thread-pool-size") { - Some(size) => size.parse().expect("Invalid argument for thread-pool-size"), - None => THREAD_POOL_SIZE, - }; - let xattr: bool = !cmd_arguments.is_present("disable-xattr"); - let create_sandbox: bool = !cmd_arguments.is_present("disable-sandbox"); - let seccomp_mode: SeccompAction = match cmd_arguments.value_of("seccomp").unwrap() { - "none" => SeccompAction::Allow, // i.e. no seccomp - "kill" => SeccompAction::Kill, - "log" => SeccompAction::Log, - "trap" => SeccompAction::Trap, - _ => unreachable!(), // We told Arg possible_values - }; - - let listener = Listener::new(socket, true).unwrap(); - - let fs_cfg = if create_sandbox { - let mut sandbox = Sandbox::new(shared_dir.to_string()); - match sandbox.enter().unwrap() { - Some(child_pid) => { - unsafe { libc::waitpid(child_pid, std::ptr::null_mut(), 0) }; - return; - } - None => passthrough::Config { - root_dir: "/".to_string(), - xattr, - proc_sfd_rawfd: sandbox.get_proc_self_fd(), - ..Default::default() - }, - } - } else { - passthrough::Config { - root_dir: shared_dir.to_string(), - xattr, - ..Default::default() - } - }; - - // Must happen before we start the thread pool - if seccomp_mode != SeccompAction::Allow { - enable_seccomp(seccomp_mode).unwrap(); - }; - - let fs = PassthroughFs::new(fs_cfg).unwrap(); - let fs_backend = Arc::new(RwLock::new( - VhostUserFsBackend::new(fs, thread_pool_size).unwrap(), - )); - - let mut daemon = - VhostUserDaemon::new(String::from("vhost-user-fs-backend"), fs_backend.clone()).unwrap(); - - if let Err(e) = daemon.start(listener) { - error!("Failed to start daemon: {:?}", e); - process::exit(1); - } - - if let Err(e) = daemon.wait() { - error!("Waiting for daemon failed: {:?}", e); - } - - let kill_evt = fs_backend - .read() - .unwrap() - .thread - .lock() - .unwrap() - .kill_evt - .try_clone() - .unwrap(); - if let Err(e) = kill_evt.write(1) { - error!("Error shutting down worker thread: {:?}", e) - } -} diff --git a/vhost_user_fs/src/multikey.rs b/vhost_user_fs/src/multikey.rs deleted file mode 100644 index 8dc35a447..000000000 --- a/vhost_user_fs/src/multikey.rs +++ /dev/null @@ -1,274 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::borrow::Borrow; -use std::collections::BTreeMap; - -/// A BTreeMap that supports 2 types of keys per value. All the usual restrictions and warnings for -/// `std::collections::BTreeMap` also apply to this struct. Additionally, there is a 1:1 -/// relationship between the 2 key types. In other words, for each `K1` in the map, there is exactly -/// one `K2` in the map and vice versa. -#[derive(Default)] -pub struct MultikeyBTreeMap -where - K1: Ord, - K2: Ord, -{ - // We need to keep a copy of the second key in the main map so that we can remove entries using - // just the main key. Otherwise we would require the caller to provide both keys when calling - // `remove`. - main: BTreeMap, - alt: BTreeMap, -} - -impl MultikeyBTreeMap -where - K1: Clone + Ord, - K2: Clone + Ord, -{ - /// Create a new empty MultikeyBTreeMap. - pub fn new() -> Self { - MultikeyBTreeMap { - main: BTreeMap::default(), - alt: BTreeMap::default(), - } - } - - /// Returns a reference to the value corresponding to the key. - /// - /// The key may be any borrowed form of `K1``, but the ordering on the borrowed form must match - /// the ordering on `K1`. - pub fn get(&self, key: &Q) -> Option<&V> - where - K1: Borrow, - Q: Ord + ?Sized, - { - self.main.get(key).map(|(_, v)| v) - } - - /// Returns a reference to the value corresponding to the alternate key. - /// - /// The key may be any borrowed form of the `K2``, but the ordering on the borrowed form must - /// match the ordering on `K2`. - /// - /// Note that this method performs 2 lookups: one to get the main key and another to get the - /// value associated with that key. For best performance callers should prefer the `get` method - /// over this method whenever possible as `get` only needs to perform one lookup. - pub fn get_alt(&self, key: &Q2) -> Option<&V> - where - K2: Borrow, - Q2: Ord + ?Sized, - { - if let Some(k) = self.alt.get(key) { - self.get(k) - } else { - None - } - } - - /// Inserts a new entry into the map with the given keys and value. - /// - /// Returns `None` if the map did not have an entry with `k1` or `k2` present. If exactly one - /// key was present, then the value associated with that key is updated, the other key is - /// removed, and the old value is returned. If **both** keys were present then the value - /// associated with the main key is updated, the value associated with the alternate key is - /// removed, and the old value associated with the main key is returned. - pub fn insert(&mut self, k1: K1, k2: K2, v: V) -> Option { - let oldval = if let Some(oldkey) = self.alt.insert(k2.clone(), k1.clone()) { - self.main.remove(&oldkey) - } else { - None - }; - self.main - .insert(k1, (k2.clone(), v)) - .or(oldval) - .map(|(oldk2, v)| { - if oldk2 != k2 { - self.alt.remove(&oldk2); - } - v - }) - } - - /// Remove a key from the map, returning the value associated with that key if it was previously - /// in the map. - /// - /// The key may be any borrowed form of `K1``, but the ordering on the borrowed form must match - /// the ordering on `K1`. - pub fn remove(&mut self, key: &Q) -> Option - where - K1: Borrow, - Q: Ord + ?Sized, - { - self.main.remove(key).map(|(k2, v)| { - self.alt.remove(&k2); - v - }) - } - - /// Clears the map, removing all values. - pub fn clear(&mut self) { - self.alt.clear(); - self.main.clear() - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn get() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - assert_eq!(*m.get(&k1).expect("failed to look up main key"), val); - assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val); - } - - #[test] - fn update_main_key() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - let new_k1 = 0x3add_f8f8_c7c5_df5e; - let val2 = 0x7389_f8a7; - assert_eq!( - m.insert(new_k1, k2, val2) - .expect("failed to update main key"), - val - ); - - assert!(m.get(&k1).is_none()); - assert_eq!(*m.get(&new_k1).expect("failed to look up main key"), val2); - assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val2); - } - - #[test] - fn update_alt_key() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - let new_k2 = 0x6825_a60b_61ac_b333; - let val2 = 0xbb14_8f2c; - assert_eq!( - m.insert(k1, new_k2, val2) - .expect("failed to update alt key"), - val - ); - - assert!(m.get_alt(&k2).is_none()); - assert_eq!(*m.get(&k1).expect("failed to look up main key"), val2); - assert_eq!( - *m.get_alt(&new_k2).expect("failed to look up alt key"), - val2 - ); - } - - #[test] - fn update_value() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - let val2 = 0xe42d_79ba; - assert_eq!( - m.insert(k1, k2, val2).expect("failed to update alt key"), - val - ); - - assert_eq!(*m.get(&k1).expect("failed to look up main key"), val2); - assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val2); - } - - #[test] - fn update_both_keys_main() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - let new_k1 = 0xc980_587a_24b3_ae30; - let new_k2 = 0x2773_c5ee_8239_45a2; - let val2 = 0x31f4_33f9; - assert!(m.insert(new_k1, new_k2, val2).is_none()); - - let val3 = 0x8da1_9cf7; - assert_eq!( - m.insert(k1, new_k2, val3) - .expect("failed to update main key"), - val - ); - - // Both new_k1 and k2 should now be gone from the map. - assert!(m.get(&new_k1).is_none()); - assert!(m.get_alt(&k2).is_none()); - - assert_eq!(*m.get(&k1).expect("failed to look up main key"), val3); - assert_eq!( - *m.get_alt(&new_k2).expect("failed to look up alt key"), - val3 - ); - } - - #[test] - fn update_both_keys_alt() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - let new_k1 = 0xc980_587a_24b3_ae30; - let new_k2 = 0x2773_c5ee_8239_45a2; - let val2 = 0x31f4_33f9; - assert!(m.insert(new_k1, new_k2, val2).is_none()); - - let val3 = 0x8da1_9cf7; - assert_eq!( - m.insert(new_k1, k2, val3) - .expect("failed to update main key"), - val2 - ); - - // Both k1 and new_k2 should now be gone from the map. - assert!(m.get(&k1).is_none()); - assert!(m.get_alt(&new_k2).is_none()); - - assert_eq!(*m.get(&new_k1).expect("failed to look up main key"), val3); - assert_eq!(*m.get_alt(&k2).expect("failed to look up alt key"), val3); - } - - #[test] - fn remove() { - let mut m = MultikeyBTreeMap::::new(); - - let k1 = 0xc6c8_f5e0_b13e_ed40; - let k2 = 0x1a04_ce4b_8329_14fe; - let val = 0xf4e3_c360; - assert!(m.insert(k1, k2, val).is_none()); - - assert_eq!(m.remove(&k1).expect("failed to remove entry"), val); - assert!(m.get(&k1).is_none()); - assert!(m.get_alt(&k2).is_none()); - } -} diff --git a/vhost_user_fs/src/passthrough.rs b/vhost_user_fs/src/passthrough.rs deleted file mode 100644 index 011b3afde..000000000 --- a/vhost_user_fs/src/passthrough.rs +++ /dev/null @@ -1,1724 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use super::fs_cache_req_handler::FsCacheReqHandler; -use crate::filesystem::{ - Context, DirEntry, Entry, FileSystem, FsOptions, GetxattrReply, ListxattrReply, OpenOptions, - SetattrValid, ZeroCopyReader, ZeroCopyWriter, -}; -use crate::fuse; -use crate::multikey::MultikeyBTreeMap; -use std::collections::btree_map; -use std::collections::BTreeMap; -use std::ffi::{CStr, CString}; -use std::fs::File; -use std::io; -use std::mem::{self, size_of, MaybeUninit}; -use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; -use std::str::FromStr; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::{Arc, RwLock}; -use std::time::Duration; -use vm_memory::ByteValued; - -const CURRENT_DIR_CSTR: &[u8] = b".\0"; -const PARENT_DIR_CSTR: &[u8] = b"..\0"; -const EMPTY_CSTR: &[u8] = b"\0"; -const PROC_CSTR: &[u8] = b"/proc/self/fd\0"; - -type Inode = u64; -type Handle = u64; - -#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] -struct InodeAltKey { - ino: libc::ino64_t, - dev: libc::dev_t, -} - -struct InodeData { - inode: Inode, - // Most of these aren't actually files but ¯\_(ツ)_/¯. - file: File, - refcount: AtomicU64, -} - -struct HandleData { - inode: Inode, - file: RwLock, -} - -#[repr(C, packed)] -#[derive(Clone, Copy, Debug, Default)] -struct LinuxDirent64 { - d_ino: libc::ino64_t, - d_off: libc::off64_t, - d_reclen: libc::c_ushort, - d_ty: libc::c_uchar, -} -unsafe impl ByteValued for LinuxDirent64 {} - -macro_rules! scoped_cred { - ($name:ident, $ty:ty, $syscall_nr:expr) => { - #[derive(Debug)] - struct $name; - - impl $name { - // Changes the effective uid/gid of the current thread to `val`. Changes - // the thread's credentials back to root when the returned struct is dropped. - fn new(val: $ty) -> io::Result> { - if val == 0 { - // Nothing to do since we are already uid 0. - return Ok(None); - } - - // We want credential changes to be per-thread because otherwise - // we might interfere with operations being carried out on other - // threads with different uids/gids. However, posix requires that - // all threads in a process share the same credentials. To do this - // libc uses signals to ensure that when one thread changes its - // credentials the other threads do the same thing. - // - // So instead we invoke the syscall directly in order to get around - // this limitation. Another option is to use the setfsuid and - // setfsgid systems calls. However since those calls have no way to - // return an error, it's preferable to do this instead. - - // This call is safe because it doesn't modify any memory and we - // check the return value. - let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) }; - if res == 0 { - Ok(Some($name)) - } else { - Err(io::Error::last_os_error()) - } - } - } - - impl Drop for $name { - fn drop(&mut self) { - let res = unsafe { libc::syscall($syscall_nr, -1, 0, -1) }; - if res < 0 { - error!( - "failed to change credentials back to root: {}", - io::Error::last_os_error(), - ); - } - } - } - }; -} -scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid); -scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid); - -fn set_creds( - uid: libc::uid_t, - gid: libc::gid_t, -) -> io::Result<(Option, Option)> { - // We have to change the gid before we change the uid because if we change the uid first then we - // lose the capability to change the gid. However changing back can happen in any order. - ScopedGid::new(gid).and_then(|gid| Ok((ScopedUid::new(uid)?, gid))) -} - -fn ebadf() -> io::Error { - io::Error::from_raw_os_error(libc::EBADF) -} - -fn stat(f: &File) -> io::Result { - let mut st = MaybeUninit::::zeroed(); - - // Safe because this is a constant value and a valid C string. - let pathname = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; - - // Safe because the kernel will only write data in `st` and we check the return - // value. - let res = unsafe { - libc::fstatat64( - f.as_raw_fd(), - pathname.as_ptr(), - st.as_mut_ptr(), - libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, - ) - }; - if res >= 0 { - // Safe because the kernel guarantees that the struct is now fully initialized. - Ok(unsafe { st.assume_init() }) - } else { - Err(io::Error::last_os_error()) - } -} - -/// The caching policy that the file system should report to the FUSE client. By default the FUSE -/// protocol uses close-to-open consistency. This means that any cached contents of the file are -/// invalidated the next time that file is opened. -#[derive(Debug, Clone)] -pub enum CachePolicy { - /// The client should never cache file data and all I/O should be directly forwarded to the - /// server. This policy must be selected when file contents may change without the knowledge of - /// the FUSE client (i.e., the file system does not have exclusive access to the directory). - Never, - - /// The client is free to choose when and how to cache file data. This is the default policy and - /// uses close-to-open consistency as described in the enum documentation. - Auto, - - /// The client should always cache file data. This means that the FUSE client will not - /// invalidate any cached data that was returned by the file system the last time the file was - /// opened. This policy should only be selected when the file system has exclusive access to the - /// directory. - Always, -} - -impl FromStr for CachePolicy { - type Err = &'static str; - - fn from_str(s: &str) -> Result { - match s { - "never" | "Never" | "NEVER" => Ok(CachePolicy::Never), - "auto" | "Auto" | "AUTO" => Ok(CachePolicy::Auto), - "always" | "Always" | "ALWAYS" => Ok(CachePolicy::Always), - _ => Err("invalid cache policy"), - } - } -} - -impl Default for CachePolicy { - fn default() -> Self { - CachePolicy::Auto - } -} - -/// Options that configure the behavior of the file system. -#[derive(Debug, Clone)] -pub struct Config { - /// How long the FUSE client should consider directory entries to be valid. If the contents of a - /// directory can only be modified by the FUSE client (i.e., the file system has exclusive - /// access), then this should be a large value. - /// - /// The default value for this option is 5 seconds. - pub entry_timeout: Duration, - - /// How long the FUSE client should consider file and directory attributes to be valid. If the - /// attributes of a file or directory can only be modified by the FUSE client (i.e., the file - /// system has exclusive access), then this should be set to a large value. - /// - /// The default value for this option is 5 seconds. - pub attr_timeout: Duration, - - /// The caching policy the file system should use. See the documentation of `CachePolicy` for - /// more details. - pub cache_policy: CachePolicy, - - /// Whether the file system should enabled writeback caching. This can improve performance as it - /// allows the FUSE client to cache and coalesce multiple writes before sending them to the file - /// system. However, enabling this option can increase the risk of data corruption if the file - /// contents can change without the knowledge of the FUSE client (i.e., the server does **NOT** - /// have exclusive access). Additionally, the file system should have read access to all files - /// in the directory it is serving as the FUSE client may send read requests even for files - /// opened with `O_WRONLY`. - /// - /// Therefore callers should only enable this option when they can guarantee that: 1) the file - /// system has exclusive access to the directory and 2) the file system has read permissions for - /// all files in that directory. - /// - /// The default value for this option is `false`. - pub writeback: bool, - - /// The path of the root directory. - /// - /// The default is `/`. - pub root_dir: String, - - /// Whether the file system should support Extended Attributes (xattr). Enabling this feature may - /// have a significant impact on performance, especially on write parallelism. This is the result - /// of FUSE attempting to remove the special file privileges after each write request. - /// - /// The default value for this options is `false`. - pub xattr: bool, - - /// Optional file descriptor for /proc/self/fd. Callers can obtain a file descriptor and pass it - /// here, so there's no need to open it in PassthroughFs::new(). This is specially useful for - /// sandboxing. - /// - /// The default is `None`. - pub proc_sfd_rawfd: Option, -} - -impl Default for Config { - fn default() -> Self { - Config { - entry_timeout: Duration::from_secs(5), - attr_timeout: Duration::from_secs(5), - cache_policy: Default::default(), - writeback: false, - root_dir: String::from("/"), - xattr: false, - proc_sfd_rawfd: None, - } - } -} - -/// A file system that simply "passes through" all requests it receives to the underlying file -/// system. To keep the implementation simple it servers the contents of its root directory. Users -/// that wish to serve only a specific directory should set up the environment so that that -/// directory ends up as the root of the file system process. One way to accomplish this is via a -/// combination of mount namespaces and the pivot_root system call. -pub struct PassthroughFs { - // File descriptors for various points in the file system tree. These fds are always opened with - // the `O_PATH` option so they cannot be used for reading or writing any data. See the - // documentation of the `O_PATH` flag in `open(2)` for more details on what one can and cannot - // do with an fd opened with this flag. - inodes: RwLock>>, - next_inode: AtomicU64, - - // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be - // used for reading and writing data. - handles: RwLock>>, - next_handle: AtomicU64, - - // File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from - // `inodes` into one that can go into `handles`. This is accomplished by reading the - // `/proc/self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are - // meant to be serving doesn't have access to `/proc/self/fd`. - proc_self_fd: File, - - // Whether writeback caching is enabled for this directory. This will only be true when - // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`. - writeback: AtomicBool, - - cfg: Config, -} - -impl PassthroughFs { - pub fn new(cfg: Config) -> io::Result { - let fd = if let Some(fd) = cfg.proc_sfd_rawfd { - fd - } else { - // Safe because this is a constant value and a valid C string. - let proc_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_CSTR) }; - - // Safe because this doesn't modify any memory and we check the return value. - let fd = unsafe { - libc::openat( - libc::AT_FDCWD, - proc_cstr.as_ptr(), - libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - fd - }; - - // Safe because we just opened this fd or it was provided by our caller. - let proc_self_fd = unsafe { File::from_raw_fd(fd) }; - - Ok(PassthroughFs { - inodes: RwLock::new(MultikeyBTreeMap::new()), - next_inode: AtomicU64::new(fuse::ROOT_ID + 1), - - handles: RwLock::new(BTreeMap::new()), - next_handle: AtomicU64::new(0), - - proc_self_fd, - - writeback: AtomicBool::new(false), - cfg, - }) - } - - pub fn keep_fds(&self) -> Vec { - vec![self.proc_self_fd.as_raw_fd()] - } - - fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let pathname = CString::new(format!("{}", data.file.as_raw_fd())) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - // When writeback caching is enabled, the kernel may send read requests even if the - // userspace program opened the file write-only. So we need to ensure that we have opened - // the file for reading as well as writing. - let writeback = self.writeback.load(Ordering::Relaxed); - if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY { - flags &= !libc::O_ACCMODE; - flags |= libc::O_RDWR; - } - - // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`. - // However, this breaks atomicity as the file may have changed on disk, invalidating the - // cached copy of the data in the kernel and the offset that the kernel thinks is the end of - // the file. Just allow this for now as it is the user's responsibility to enable writeback - // caching only for directories that are not shared. It also means that we need to clear the - // `O_APPEND` flag. - if writeback && flags & libc::O_APPEND != 0 { - flags &= !libc::O_APPEND; - } - - // Safe because this doesn't modify any memory and we check the return value. We don't - // really check `flags` because if the kernel can't handle poorly specified flags then we - // have much bigger problems. Also, clear the `O_NOFOLLOW` flag if it is set since we need - // to follow the `/proc/self/fd` symlink to get the file. - let fd = unsafe { - libc::openat( - self.proc_self_fd.as_raw_fd(), - pathname.as_ptr(), - (flags | libc::O_CLOEXEC) & (!libc::O_NOFOLLOW), - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd. - Ok(unsafe { File::from_raw_fd(fd) }) - } - - fn do_lookup(&self, parent: Inode, name: &CStr) -> io::Result { - let p = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - let fd = unsafe { - libc::openat( - p.file.as_raw_fd(), - name.as_ptr(), - libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd. - let f = unsafe { File::from_raw_fd(fd) }; - - let st = stat(&f)?; - - let altkey = InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }; - let data = self.inodes.read().unwrap().get_alt(&altkey).map(Arc::clone); - - let inode = if let Some(data) = data { - // Matches with the release store in `forget`. - data.refcount.fetch_add(1, Ordering::Acquire); - data.inode - } else { - // There is a possible race here where 2 threads end up adding the same file - // into the inode list. However, since each of those will get a unique Inode - // value and unique file descriptors this shouldn't be that much of a problem. - let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); - self.inodes.write().unwrap().insert( - inode, - InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }, - Arc::new(InodeData { - inode, - file: f, - refcount: AtomicU64::new(1), - }), - ); - - inode - }; - - Ok(Entry { - inode, - generation: 0, - attr: st, - attr_timeout: self.cfg.attr_timeout, - entry_timeout: self.cfg.entry_timeout, - }) - } - - fn do_readdir( - &self, - inode: Inode, - handle: Handle, - size: u32, - offset: u64, - mut add_entry: F, - ) -> io::Result<()> - where - F: FnMut(DirEntry) -> io::Result, - { - if size == 0 { - return Ok(()); - } - - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let mut buf = vec![0; size as usize]; - - { - // Since we are going to work with the kernel offset, we have to acquire the file lock - // for both the `lseek64` and `getdents64` syscalls to ensure that no other thread - // changes the kernel offset while we are using it. - let dir = data.file.write().unwrap(); - - // Safe because this doesn't modify any memory and we check the return value. - let res = - unsafe { libc::lseek64(dir.as_raw_fd(), offset as libc::off64_t, libc::SEEK_SET) }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because the kernel guarantees that it will only write to `buf` and we check the - // return value. - let res = unsafe { - libc::syscall( - libc::SYS_getdents64, - dir.as_raw_fd(), - buf.as_mut_ptr() as *mut LinuxDirent64, - size as libc::c_int, - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - buf.resize(res as usize, 0); - - // Explicitly drop the lock so that it's not held while we fill in the fuse buffer. - mem::drop(dir); - } - - let mut rem = &buf[..]; - while !rem.is_empty() { - // We only use debug asserts here because these values are coming from the kernel and we - // trust them implicitly. - debug_assert!( - rem.len() >= size_of::(), - "not enough space left in `rem`" - ); - - let (front, back) = rem.split_at(size_of::()); - - let dirent64 = - LinuxDirent64::from_slice(front).expect("unable to get LinuxDirent64 from slice"); - - let namelen = dirent64.d_reclen as usize - size_of::(); - debug_assert!(namelen <= back.len(), "back is smaller than `namelen`"); - - let name = &back[..namelen]; - let res = if name.starts_with(CURRENT_DIR_CSTR) || name.starts_with(PARENT_DIR_CSTR) { - // We don't want to report the "." and ".." entries. However, returning `Ok(0)` will - // break the loop so return `Ok` with a non-zero value instead. - Ok(1) - } else { - add_entry(DirEntry { - ino: dirent64.d_ino, - offset: dirent64.d_off as u64, - type_: u32::from(dirent64.d_ty), - name, - }) - }; - - debug_assert!( - rem.len() >= dirent64.d_reclen as usize, - "rem is smaller than `d_reclen`" - ); - - match res { - Ok(0) => break, - Ok(_) => rem = &rem[dirent64.d_reclen as usize..], - Err(e) => return Err(e), - } - } - - Ok(()) - } - - fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option, OpenOptions)> { - let file = RwLock::new(self.open_inode(inode, flags as i32)?); - - let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); - let data = HandleData { inode, file }; - - self.handles.write().unwrap().insert(handle, Arc::new(data)); - - let mut opts = OpenOptions::empty(); - match self.cfg.cache_policy { - // We only set the direct I/O option on files. - CachePolicy::Never => opts.set( - OpenOptions::DIRECT_IO, - flags & (libc::O_DIRECTORY as u32) == 0, - ), - CachePolicy::Always => { - if flags & (libc::O_DIRECTORY as u32) == 0 { - opts |= OpenOptions::KEEP_CACHE; - } else { - opts |= OpenOptions::CACHE_DIR; - } - } - _ => {} - }; - - Ok((Some(handle), opts)) - } - - fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { - let mut handles = self.handles.write().unwrap(); - - if let btree_map::Entry::Occupied(e) = handles.entry(handle) { - if e.get().inode == inode { - // We don't need to close the file here because that will happen automatically when - // the last `Arc` is dropped. - e.remove(); - return Ok(()); - } - } - - Err(ebadf()) - } - - fn do_getattr(&self, inode: Inode) -> io::Result<(libc::stat64, Duration)> { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let st = stat(&data.file)?; - - Ok((st, self.cfg.attr_timeout)) - } - - fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> { - let data = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { libc::unlinkat(data.file.as_raw_fd(), name.as_ptr(), flags) }; - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } -} - -fn forget_one( - inodes: &mut MultikeyBTreeMap>, - inode: Inode, - count: u64, -) { - if let Some(data) = inodes.get(&inode) { - // Acquiring the write lock on the inode map prevents new lookups from incrementing the - // refcount but there is the possibility that a previous lookup already acquired a - // reference to the inode data and is in the process of updating the refcount so we need - // to loop here until we can decrement successfully. - loop { - let refcount = data.refcount.load(Ordering::Relaxed); - - // Saturating sub because it doesn't make sense for a refcount to go below zero and - // we don't want misbehaving clients to cause integer overflow. - let new_count = refcount.saturating_sub(count); - - // Synchronizes with the acquire load in `do_lookup`. - if data - .refcount - .compare_and_swap(refcount, new_count, Ordering::Release) - == refcount - { - if new_count == 0 { - // We just removed the last refcount for this inode. There's no need for an - // acquire fence here because we hold a write lock on the inode map and any - // thread that is waiting to do a forget on the same inode will have to wait - // until we release the lock. So there's is no other release store for us to - // synchronize with before deleting the entry. - inodes.remove(&inode); - } - break; - } - } - } -} - -impl FileSystem for PassthroughFs { - type Inode = Inode; - type Handle = Handle; - - fn init(&self, capable: FsOptions) -> io::Result { - let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed"); - - // Safe because this doesn't modify any memory and we check the return value. - // We use `O_PATH` because we just want this for traversing the directory tree - // and not for actually reading the contents. - let fd = unsafe { - libc::openat( - libc::AT_FDCWD, - root.as_ptr(), - libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd above. - let f = unsafe { File::from_raw_fd(fd) }; - - let st = stat(&f)?; - - // Safe because this doesn't modify any memory and there is no need to check the return - // value because this system call always succeeds. We need to clear the umask here because - // we want the client to be able to set all the bits in the mode. - unsafe { libc::umask(0o000) }; - - let mut inodes = self.inodes.write().unwrap(); - - // Not sure why the root inode gets a refcount of 2 but that's what libfuse does. - inodes.insert( - fuse::ROOT_ID, - InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }, - Arc::new(InodeData { - inode: fuse::ROOT_ID, - file: f, - refcount: AtomicU64::new(2), - }), - ); - - let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; - if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { - opts |= FsOptions::WRITEBACK_CACHE; - self.writeback.store(true, Ordering::Relaxed); - } - Ok(opts) - } - - fn destroy(&self) { - self.handles.write().unwrap().clear(); - self.inodes.write().unwrap().clear(); - } - - fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let mut out = MaybeUninit::::zeroed(); - - // Safe because this will only modify `out` and we check the return value. - let res = unsafe { libc::fstatvfs64(data.file.as_raw_fd(), out.as_mut_ptr()) }; - if res == 0 { - // Safe because the kernel guarantees that `out` has been initialized. - Ok(unsafe { out.assume_init() }) - } else { - Err(io::Error::last_os_error()) - } - } - - fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result { - self.do_lookup(parent, name) - } - - fn forget(&self, _ctx: Context, inode: Inode, count: u64) { - let mut inodes = self.inodes.write().unwrap(); - - forget_one(&mut inodes, inode, count) - } - - fn batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>) { - let mut inodes = self.inodes.write().unwrap(); - - for (inode, count) in requests { - forget_one(&mut inodes, inode, count) - } - } - - fn opendir( - &self, - _ctx: Context, - inode: Inode, - flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - self.do_open(inode, flags | (libc::O_DIRECTORY as u32)) - } - - fn releasedir( - &self, - _ctx: Context, - inode: Inode, - _flags: u32, - handle: Handle, - ) -> io::Result<()> { - self.do_release(inode, handle) - } - - fn mkdir( - &self, - ctx: Context, - parent: Inode, - name: &CStr, - mode: u32, - umask: u32, - ) -> io::Result { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - let data = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { libc::mkdirat(data.file.as_raw_fd(), name.as_ptr(), mode & !umask) }; - if res == 0 { - self.do_lookup(parent, name) - } else { - Err(io::Error::last_os_error()) - } - } - - fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { - self.do_unlink(parent, name, libc::AT_REMOVEDIR) - } - - fn readdir( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - size: u32, - offset: u64, - add_entry: F, - ) -> io::Result<()> - where - F: FnMut(DirEntry) -> io::Result, - { - self.do_readdir(inode, handle, size, offset, add_entry) - } - - fn readdirplus( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - size: u32, - offset: u64, - mut add_entry: F, - ) -> io::Result<()> - where - F: FnMut(DirEntry, Entry) -> io::Result, - { - self.do_readdir(inode, handle, size, offset, |dir_entry| { - // Safe because the kernel guarantees that the buffer is nul-terminated. Additionally, - // the kernel will pad the name with '\0' bytes up to 8-byte alignment and there's no - // way for us to know exactly how many padding bytes there are. This would cause - // `CStr::from_bytes_with_nul` to return an error because it would think there are - // interior '\0' bytes. We trust the kernel to provide us with properly formatted data - // so we'll just skip the checks here. - let name = unsafe { CStr::from_bytes_with_nul_unchecked(dir_entry.name) }; - let entry = self.do_lookup(inode, name)?; - - add_entry(dir_entry, entry) - }) - } - - fn open( - &self, - _ctx: Context, - inode: Inode, - flags: u32, - ) -> io::Result<(Option, OpenOptions)> { - self.do_open(inode, flags) - } - - fn release( - &self, - _ctx: Context, - inode: Inode, - _flags: u32, - handle: Handle, - _flush: bool, - _flock_release: bool, - _lock_owner: Option, - ) -> io::Result<()> { - self.do_release(inode, handle) - } - - fn create( - &self, - ctx: Context, - parent: Inode, - name: &CStr, - mode: u32, - flags: u32, - umask: u32, - ) -> io::Result<(Entry, Option, OpenOptions)> { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - let data = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. We don't - // really check `flags` because if the kernel can't handle poorly specified flags then we - // have much bigger problems. - let fd = unsafe { - libc::openat( - data.file.as_raw_fd(), - name.as_ptr(), - flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, - mode & !(umask & 0o777), - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd. - let file = RwLock::new(unsafe { File::from_raw_fd(fd) }); - - let entry = self.do_lookup(parent, name)?; - - let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); - let data = HandleData { - inode: entry.inode, - file, - }; - - self.handles.write().unwrap().insert(handle, Arc::new(data)); - - let mut opts = OpenOptions::empty(); - match self.cfg.cache_policy { - CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, - CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, - _ => {} - }; - - Ok((entry, Some(handle), opts)) - } - - fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { - self.do_unlink(parent, name, 0) - } - - fn setupmapping( - &self, - _ctx: Context, - inode: Inode, - _handle: Handle, - foffset: u64, - len: u64, - flags: u64, - moffset: u64, - vu_req: &mut T, - ) -> io::Result<()> { - debug!( - "setupmapping: ino {:?} foffset {} len {} flags {} moffset {}", - inode, foffset, len, flags, moffset - ); - - let open_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { - libc::O_RDWR - } else { - libc::O_RDONLY - }; - - let file = self.open_inode(inode, open_flags as i32)?; - (*vu_req).map(foffset, moffset, len, flags, file.as_raw_fd()) - } - - fn removemapping( - &self, - _ctx: Context, - requests: Vec, - vu_req: &mut T, - ) -> io::Result<()> { - (*vu_req).unmap(requests) - } - - fn read( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - mut w: W, - size: u32, - offset: u64, - _lock_owner: Option, - _flags: u32, - ) -> io::Result { - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // This is safe because write_from uses preadv64, so the underlying file descriptor - // offset is not affected by this operation. - let mut f = data.file.read().unwrap().try_clone().unwrap(); - w.write_from(&mut f, size as usize, offset) - } - - fn write( - &self, - ctx: Context, - inode: Inode, - handle: Handle, - mut r: R, - size: u32, - offset: u64, - _lock_owner: Option, - _delayed_write: bool, - kill_priv: bool, - _flags: u32, - ) -> io::Result { - if kill_priv { - // We need to change credentials during a write so that the kernel will remove setuid - // or setgid bits from the file if it was written to by someone other than the owner. - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - } - - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // This is safe because read_to uses pwritev64, so the underlying file descriptor - // offset is not affected by this operation. - let mut f = data.file.read().unwrap().try_clone().unwrap(); - r.read_to(&mut f, size as usize, offset) - } - - fn getattr( - &self, - _ctx: Context, - inode: Inode, - _handle: Option, - ) -> io::Result<(libc::stat64, Duration)> { - self.do_getattr(inode) - } - - fn setattr( - &self, - _ctx: Context, - inode: Inode, - attr: libc::stat64, - handle: Option, - valid: SetattrValid, - ) -> io::Result<(libc::stat64, Duration)> { - let inode_data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - enum Data { - Handle(Arc, RawFd), - ProcPath(CString), - } - - // If we have a handle then use it otherwise get a new fd from the inode. - let data = if let Some(handle) = handle { - let hd = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let fd = hd.file.write().unwrap().as_raw_fd(); - Data::Handle(hd, fd) - } else { - let pathname = CString::new(format!("{}", inode_data.file.as_raw_fd())) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - Data::ProcPath(pathname) - }; - - if valid.contains(SetattrValid::MODE) { - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - match data { - Data::Handle(_, fd) => libc::fchmod(fd, attr.st_mode), - Data::ProcPath(ref p) => { - libc::fchmodat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), attr.st_mode, 0) - } - } - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - } - - if valid.intersects(SetattrValid::UID | SetattrValid::GID) { - let uid = if valid.contains(SetattrValid::UID) { - attr.st_uid - } else { - // Cannot use -1 here because these are unsigned values. - ::std::u32::MAX - }; - let gid = if valid.contains(SetattrValid::GID) { - attr.st_gid - } else { - // Cannot use -1 here because these are unsigned values. - ::std::u32::MAX - }; - - // Safe because this is a constant value and a valid C string. - let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::fchownat( - inode_data.file.as_raw_fd(), - empty.as_ptr(), - uid, - gid, - libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - } - - if valid.contains(SetattrValid::SIZE) { - // Safe because this doesn't modify any memory and we check the return value. - let res = match data { - Data::Handle(_, fd) => unsafe { libc::ftruncate(fd, attr.st_size) }, - _ => { - // There is no `ftruncateat` so we need to get a new fd and truncate it. - let f = self.open_inode(inode, libc::O_NONBLOCK | libc::O_RDWR)?; - unsafe { libc::ftruncate(f.as_raw_fd(), attr.st_size) } - } - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - } - - if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) { - let mut tvs = [ - libc::timespec { - tv_sec: 0, - tv_nsec: libc::UTIME_OMIT, - }, - libc::timespec { - tv_sec: 0, - tv_nsec: libc::UTIME_OMIT, - }, - ]; - - if valid.contains(SetattrValid::ATIME_NOW) { - tvs[0].tv_nsec = libc::UTIME_NOW; - } else if valid.contains(SetattrValid::ATIME) { - tvs[0].tv_sec = attr.st_atime; - tvs[0].tv_nsec = attr.st_atime_nsec; - } - - if valid.contains(SetattrValid::MTIME_NOW) { - tvs[1].tv_nsec = libc::UTIME_NOW; - } else if valid.contains(SetattrValid::MTIME) { - tvs[1].tv_sec = attr.st_mtime; - tvs[1].tv_nsec = attr.st_mtime_nsec; - } - - // Safe because this doesn't modify any memory and we check the return value. - let res = match data { - Data::Handle(_, fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) }, - Data::ProcPath(ref p) => unsafe { - libc::utimensat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0) - }, - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - } - - self.do_getattr(inode) - } - - fn rename( - &self, - _ctx: Context, - olddir: Inode, - oldname: &CStr, - newdir: Inode, - newname: &CStr, - flags: u32, - ) -> io::Result<()> { - let old_inode = self - .inodes - .read() - .unwrap() - .get(&olddir) - .map(Arc::clone) - .ok_or_else(ebadf)?; - let new_inode = self - .inodes - .read() - .unwrap() - .get(&newdir) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands - // and we have glibc 2.28. - let res = unsafe { - libc::syscall( - libc::SYS_renameat2, - old_inode.file.as_raw_fd(), - oldname.as_ptr(), - new_inode.file.as_raw_fd(), - newname.as_ptr(), - flags, - ) - }; - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } - - fn mknod( - &self, - ctx: Context, - parent: Inode, - name: &CStr, - mode: u32, - rdev: u32, - umask: u32, - ) -> io::Result { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - let data = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::mknodat( - data.file.as_raw_fd(), - name.as_ptr(), - (mode & !umask) as libc::mode_t, - u64::from(rdev), - ) - }; - - if res < 0 { - Err(io::Error::last_os_error()) - } else { - self.do_lookup(parent, name) - } - } - - fn link( - &self, - _ctx: Context, - inode: Inode, - newparent: Inode, - newname: &CStr, - ) -> io::Result { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - let new_inode = self - .inodes - .read() - .unwrap() - .get(&newparent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this is a constant value and a valid C string. - let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::linkat( - data.file.as_raw_fd(), - empty.as_ptr(), - new_inode.file.as_raw_fd(), - newname.as_ptr(), - libc::AT_EMPTY_PATH, - ) - }; - if res == 0 { - self.do_lookup(newparent, newname) - } else { - Err(io::Error::last_os_error()) - } - } - - fn symlink( - &self, - ctx: Context, - linkname: &CStr, - parent: Inode, - name: &CStr, - ) -> io::Result { - let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; - let data = self - .inodes - .read() - .unwrap() - .get(&parent) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = - unsafe { libc::symlinkat(linkname.as_ptr(), data.file.as_raw_fd(), name.as_ptr()) }; - if res == 0 { - self.do_lookup(parent, name) - } else { - Err(io::Error::last_os_error()) - } - } - - fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result> { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let mut buf = vec![0; libc::PATH_MAX as usize]; - - // Safe because this is a constant value and a valid C string. - let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) }; - - // Safe because this will only modify the contents of `buf` and we check the return value. - let res = unsafe { - libc::readlinkat( - data.file.as_raw_fd(), - empty.as_ptr(), - buf.as_mut_ptr() as *mut libc::c_char, - buf.len(), - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - buf.resize(res as usize, 0); - Ok(buf) - } - - fn flush( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - _lock_owner: u64, - ) -> io::Result<()> { - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Since this method is called whenever an fd is closed in the client, we can emulate that - // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe - // because this doesn't modify any memory and we check the return values. - unsafe { - let newfd = libc::dup(data.file.write().unwrap().as_raw_fd()); - if newfd < 0 { - return Err(io::Error::last_os_error()); - } - - if libc::close(newfd) < 0 { - Err(io::Error::last_os_error()) - } else { - Ok(()) - } - } - } - - fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> { - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let fd = data.file.write().unwrap().as_raw_fd(); - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - if datasync { - libc::fdatasync(fd) - } else { - libc::fsync(fd) - } - }; - - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } - - fn fsyncdir( - &self, - ctx: Context, - inode: Inode, - datasync: bool, - handle: Handle, - ) -> io::Result<()> { - self.fsync(ctx, inode, datasync, handle) - } - - fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> { - let data = self - .inodes - .read() - .unwrap() - .get(&inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let st = stat(&data.file)?; - let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); - - if mode == libc::F_OK { - // The file exists since we were able to call `stat(2)` on it. - return Ok(()); - } - - if (mode & libc::R_OK) != 0 - && ctx.uid != 0 - && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0) - && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0) - && st.st_mode & 0o004 == 0 - { - return Err(io::Error::from_raw_os_error(libc::EACCES)); - } - - if (mode & libc::W_OK) != 0 - && ctx.uid != 0 - && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0) - && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0) - && st.st_mode & 0o002 == 0 - { - return Err(io::Error::from_raw_os_error(libc::EACCES)); - } - - // root can only execute something if it is executable by one of the owner, the group, or - // everyone. - if (mode & libc::X_OK) != 0 - && (ctx.uid != 0 || st.st_mode & 0o111 == 0) - && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0) - && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0) - && st.st_mode & 0o001 == 0 - { - return Err(io::Error::from_raw_os_error(libc::EACCES)); - } - - Ok(()) - } - - fn setxattr( - &self, - _ctx: Context, - inode: Inode, - name: &CStr, - value: &[u8], - flags: u32, - ) -> io::Result<()> { - if !self.cfg.xattr { - return Err(io::Error::from_raw_os_error(libc::ENOSYS)); - } - - // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we - // need to get a new fd. - let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::fsetxattr( - file.as_raw_fd(), - name.as_ptr(), - value.as_ptr() as *const libc::c_void, - value.len(), - flags as libc::c_int, - ) - }; - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } - - fn getxattr( - &self, - _ctx: Context, - inode: Inode, - name: &CStr, - size: u32, - ) -> io::Result { - if !self.cfg.xattr { - return Err(io::Error::from_raw_os_error(libc::ENOSYS)); - } - - // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we - // need to get a new fd. - let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?; - - let mut buf = vec![0; size as usize]; - - // Safe because this will only modify the contents of `buf`. - let res = unsafe { - libc::fgetxattr( - file.as_raw_fd(), - name.as_ptr(), - buf.as_mut_ptr() as *mut libc::c_void, - size as libc::size_t, - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - if size == 0 { - Ok(GetxattrReply::Count(res as u32)) - } else { - buf.resize(res as usize, 0); - Ok(GetxattrReply::Value(buf)) - } - } - - fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result { - if !self.cfg.xattr { - return Err(io::Error::from_raw_os_error(libc::ENOSYS)); - } - - // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we - // need to get a new fd. - let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?; - - let mut buf = vec![0; size as usize]; - - // Safe because this will only modify the contents of `buf`. - let res = unsafe { - libc::flistxattr( - file.as_raw_fd(), - buf.as_mut_ptr() as *mut libc::c_char, - size as libc::size_t, - ) - }; - if res < 0 { - return Err(io::Error::last_os_error()); - } - - if size == 0 { - Ok(ListxattrReply::Count(res as u32)) - } else { - buf.resize(res as usize, 0); - Ok(ListxattrReply::Names(buf)) - } - } - - fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> { - if !self.cfg.xattr { - return Err(io::Error::from_raw_os_error(libc::ENOSYS)); - } - - // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we - // need to get a new fd. - let file = self.open_inode(inode, libc::O_RDONLY | libc::O_NONBLOCK)?; - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { libc::fremovexattr(file.as_raw_fd(), name.as_ptr()) }; - - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } - - fn fallocate( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - mode: u32, - offset: u64, - length: u64, - ) -> io::Result<()> { - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let fd = data.file.write().unwrap().as_raw_fd(); - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { - libc::fallocate64( - fd, - mode as libc::c_int, - offset as libc::off64_t, - length as libc::off64_t, - ) - }; - if res == 0 { - Ok(()) - } else { - Err(io::Error::last_os_error()) - } - } - - fn lseek( - &self, - _ctx: Context, - inode: Inode, - handle: Handle, - offset: u64, - whence: u32, - ) -> io::Result { - let data = self - .handles - .read() - .unwrap() - .get(&handle) - .filter(|hd| hd.inode == inode) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - let fd = data.file.write().unwrap().as_raw_fd(); - - // Safe because this doesn't modify any memory and we check the return value. - let res = unsafe { libc::lseek(fd, offset as libc::off64_t, whence as libc::c_int) }; - if res < 0 { - Err(io::Error::last_os_error()) - } else { - Ok(res as u64) - } - } - - fn copyfilerange( - &self, - _ctx: Context, - inode_in: Inode, - handle_in: Handle, - offset_in: u64, - inode_out: Inode, - handle_out: Handle, - offset_out: u64, - len: u64, - flags: u64, - ) -> io::Result { - let data_in = self - .handles - .read() - .unwrap() - .get(&handle_in) - .filter(|hd| hd.inode == inode_in) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Take just a read lock as we're not going to alter the file descriptor offset. - let fd_in = data_in.file.read().unwrap().as_raw_fd(); - - let data_out = self - .handles - .read() - .unwrap() - .get(&handle_out) - .filter(|hd| hd.inode == inode_out) - .map(Arc::clone) - .ok_or_else(ebadf)?; - - // Take just a read lock as we're not going to alter the file descriptor offset. - let fd_out = data_out.file.read().unwrap().as_raw_fd(); - - // Safe because this will only modify `offset_in` and `offset_out` and we check - // the return value. - let res = unsafe { - libc::syscall( - libc::SYS_copy_file_range, - fd_in, - &mut (offset_in as i64) as &mut _ as *mut _, - fd_out, - &mut (offset_out as i64) as &mut _ as *mut _, - len, - flags, - ) - }; - if res < 0 { - Err(io::Error::last_os_error()) - } else { - Ok(res as usize) - } - } -} diff --git a/vhost_user_fs/src/sandbox.rs b/vhost_user_fs/src/sandbox.rs deleted file mode 100644 index 76e9c56d7..000000000 --- a/vhost_user_fs/src/sandbox.rs +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2020 Red Hat, Inc. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use std::ffi::CString; -use std::os::unix::io::RawFd; -use std::{fmt, fs, io}; - -use tempdir::TempDir; - -#[derive(Debug)] -pub enum Error { - /// Failed to bind mount `/proc/self/fd` into a temporary directory. - BindMountProcSelfFd(io::Error), - /// Failed to bind mount shared directory. - BindMountSharedDir(io::Error), - /// Failed to change to the old root directory. - ChdirOldRoot(io::Error), - /// Failed to change to the new root directory. - ChdirNewRoot(io::Error), - /// Failed to clean the properties of the mount point. - CleanMount(io::Error), - /// Failed to create a temporary directory. - CreateTempDir(io::Error), - /// Call to libc::fork returned an error. - Fork(io::Error), - /// Error bind-mounting a directory. - MountBind(io::Error), - /// Failed to mount old root. - MountOldRoot(io::Error), - /// Error mounting proc. - MountProc(io::Error), - /// Failed to mount new root. - MountNewRoot(io::Error), - /// Error mounting target directory. - MountTarget(io::Error), - /// Failed to open new root. - OpenNewRoot(io::Error), - /// Failed to open old root. - OpenOldRoot(io::Error), - /// Failed to open `/proc/self/fd`. - OpenProcSelfFd(io::Error), - /// Error switching root directory. - PivotRoot(io::Error), - /// Failed to remove temporary directory. - RmdirTempDir(io::Error), - /// Failed to lazily unmount old root. - UmountOldRoot(io::Error), - /// Failed to lazily unmount temporary directory. - UmountTempDir(io::Error), - /// Call to libc::unshare returned an error. - Unshare(io::Error), - /// Failed to read from procfs. - ReadProc(io::Error), - /// Failed to parse `/proc/sys/fs/nr_open`. - InvalidNrOpen(std::num::ParseIntError), - /// Failed to set rlimit. - SetRlimit(io::Error), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "vhost_user_fs_sandbox_error: {:?}", self) - } -} - -/// A helper for creating a sandbox for isolating the service. -pub struct Sandbox { - /// The directory that is going to be shared with the VM. The sandbox will be constructed on top - /// of this directory. - shared_dir: String, - /// A file descriptor for `/proc/self/fd` obtained from the sandboxed context. - proc_self_fd: Option, -} - -impl Sandbox { - pub fn new(shared_dir: String) -> Self { - Sandbox { - shared_dir, - proc_self_fd: None, - } - } - - // Make `self.shared_dir` our root directory, and get an isolated file descriptor for - // `/proc/self/fd`. - // - // This is based on virtiofsd's setup_namespaces() and setup_mounts(), and it's very similar to - // the strategy used in containers. Consists on a careful sequence of mounts and bind-mounts to - // ensure it's not possible to escape the sandbox through `self.shared_dir` nor the file - // descriptor obtained for `/proc/self/fd`. - // - // It's ugly, but it's the only way until Linux implements a proper containerization API. - fn setup_mounts(&mut self) -> Result<(), Error> { - // Ensure our mount changes don't affect the parent mount namespace. - let c_root_dir = CString::new("/").unwrap(); - let ret = unsafe { - libc::mount( - std::ptr::null(), - c_root_dir.as_ptr(), - std::ptr::null(), - libc::MS_SLAVE | libc::MS_REC, - std::ptr::null(), - ) - }; - if ret != 0 { - return Err(Error::CleanMount(std::io::Error::last_os_error())); - } - - // Mount `/proc` in this context. - let c_proc_dir = CString::new("/proc").unwrap(); - let c_proc_fs = CString::new("proc").unwrap(); - let ret = unsafe { - libc::mount( - c_proc_fs.as_ptr(), - c_proc_dir.as_ptr(), - c_proc_fs.as_ptr(), - libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_NOSUID | libc::MS_RELATIME, - std::ptr::null(), - ) - }; - if ret != 0 { - return Err(Error::MountProc(std::io::Error::last_os_error())); - } - - // Bind-mount `/proc/self/fd` info a temporary directory, preventing access to ancestor - // directories. - let c_proc_self_fd = CString::new("/proc/self/fd").unwrap(); - let tmp_dir = TempDir::new("vhostuserfs-") - .map_err(|_| Error::CreateTempDir(std::io::Error::last_os_error()))?; - let c_tmp_dir = CString::new(tmp_dir.into_path().to_str().unwrap()).unwrap(); - let ret = unsafe { - libc::mount( - c_proc_self_fd.as_ptr(), - c_tmp_dir.as_ptr(), - std::ptr::null(), - libc::MS_BIND, - std::ptr::null(), - ) - }; - if ret < 0 { - return Err(Error::BindMountProcSelfFd(std::io::Error::last_os_error())); - } - - // Obtain a file descriptor for `/proc/self/fd` through the bind-mounted temporary directory. - let proc_self_fd = unsafe { libc::open(c_tmp_dir.as_ptr(), libc::O_PATH) }; - if proc_self_fd < 0 { - return Err(Error::OpenProcSelfFd(std::io::Error::last_os_error())); - } - self.proc_self_fd = Some(proc_self_fd); - - // Now that we have a file descriptor for `/proc/self/fd`, we no longer need the bind-mount. - // Unmount it and remove the temporary directory. - let ret = unsafe { libc::umount2(c_tmp_dir.as_ptr(), libc::MNT_DETACH) }; - if ret < 0 { - return Err(Error::UmountTempDir(std::io::Error::last_os_error())); - } - let ret = unsafe { libc::rmdir(c_tmp_dir.as_ptr()) }; - if ret < 0 { - return Err(Error::RmdirTempDir(std::io::Error::last_os_error())); - } - - // Bind-mount `self.shared_dir` on itself so we can use as new root on `pivot_root` syscall. - let c_shared_dir = CString::new(self.shared_dir.clone()).unwrap(); - let ret = unsafe { - libc::mount( - c_shared_dir.as_ptr(), - c_shared_dir.as_ptr(), - std::ptr::null(), - libc::MS_BIND | libc::MS_REC, - std::ptr::null(), - ) - }; - if ret < 0 { - return Err(Error::BindMountSharedDir(std::io::Error::last_os_error())); - } - - // Get a file descriptor to our old root so we can reference it after switching root. - let oldroot_fd = unsafe { - libc::open( - c_root_dir.as_ptr(), - libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC, - ) - }; - if oldroot_fd < 0 { - return Err(Error::OpenOldRoot(std::io::Error::last_os_error())); - } - - // Get a file descriptor to the new root so we can reference it after switching root. - let newroot_fd = unsafe { - libc::open( - c_shared_dir.as_ptr(), - libc::O_DIRECTORY | libc::O_RDONLY | libc::O_CLOEXEC, - ) - }; - if newroot_fd < 0 { - return Err(Error::OpenNewRoot(std::io::Error::last_os_error())); - } - - // Change to new root directory to prepare for `pivot_root` syscall. - let ret = unsafe { libc::fchdir(newroot_fd) }; - if ret < 0 { - return Err(Error::ChdirNewRoot(std::io::Error::last_os_error())); - } - - // Call to `pivot_root` using `.` as both new and old root. - let c_current_dir = CString::new(".").unwrap(); - let ret = unsafe { - libc::syscall( - libc::SYS_pivot_root, - c_current_dir.as_ptr(), - c_current_dir.as_ptr(), - ) - }; - if ret < 0 { - return Err(Error::PivotRoot(std::io::Error::last_os_error())); - } - - // Change to old root directory to prepare for cleaning up and unmounting it. - let ret = unsafe { libc::fchdir(oldroot_fd) }; - if ret < 0 { - return Err(Error::ChdirOldRoot(std::io::Error::last_os_error())); - } - - // Clean up old root to avoid mount namespace propagation. - let c_empty = CString::new("").unwrap(); - let ret = unsafe { - libc::mount( - c_empty.as_ptr(), - c_current_dir.as_ptr(), - c_empty.as_ptr(), - libc::MS_SLAVE | libc::MS_REC, - std::ptr::null(), - ) - }; - if ret != 0 { - return Err(Error::CleanMount(std::io::Error::last_os_error())); - } - - // Lazily unmount old root. - let ret = unsafe { libc::umount2(c_current_dir.as_ptr(), libc::MNT_DETACH) }; - if ret < 0 { - return Err(Error::UmountOldRoot(std::io::Error::last_os_error())); - } - - // Change to new root. - let ret = unsafe { libc::fchdir(newroot_fd) }; - if ret < 0 { - return Err(Error::ChdirNewRoot(std::io::Error::last_os_error())); - } - - // We no longer need these file descriptors, so close them. - unsafe { libc::close(newroot_fd) }; - unsafe { libc::close(oldroot_fd) }; - - Ok(()) - } - - /// Sets the limit of open files to the max possible. - fn setup_nofile_rlimit(&self) -> Result<(), Error> { - // /proc/sys/fs/nr_open is a sysctl file that shows the maximum number - // of file-handles a process can allocate. - let path = "/proc/sys/fs/nr_open"; - let max_str = fs::read_to_string(path).map_err(Error::ReadProc)?; - let max = max_str.trim().parse().map_err(Error::InvalidNrOpen)?; - - let limit = libc::rlimit { - rlim_cur: max, - rlim_max: max, - }; - let ret = unsafe { libc::setrlimit(libc::RLIMIT_NOFILE, &limit) }; - if ret < 0 { - Err(Error::SetRlimit(std::io::Error::last_os_error())) - } else { - Ok(()) - } - } - - /// Set up sandbox, fork and jump into it. - /// - /// On success, the returned value will be the PID of the child for the parent and `None` for - /// the child itself, with the latter running isolated in `self.shared_dir`. - pub fn enter(&mut self) -> Result, Error> { - let uid = unsafe { libc::geteuid() }; - - let flags = if uid == 0 { - libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET - } else { - // If running as an unprivileged user, rely on user_namespaces(7) for isolation. The - // main limitation of this strategy is that only the current uid/gid are mapped into - // the new namespace, so most operations on permissions will fail. - libc::CLONE_NEWPID | libc::CLONE_NEWNS | libc::CLONE_NEWNET | libc::CLONE_NEWUSER - }; - - let ret = unsafe { libc::unshare(flags) }; - if ret != 0 { - return Err(Error::Unshare(std::io::Error::last_os_error())); - } - - let child = unsafe { libc::fork() }; - match child { - 0 => { - // This is the child. Request to receive SIGTERM on parent's death. - unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) }; - self.setup_nofile_rlimit()?; - self.setup_mounts()?; - Ok(None) - } - x if x > 0 => { - // This is the parent. - Ok(Some(child)) - } - _ => Err(Error::Fork(std::io::Error::last_os_error())), - } - } - - pub fn get_proc_self_fd(&self) -> Option { - self.proc_self_fd - } -} diff --git a/vhost_user_fs/src/seccomp.rs b/vhost_user_fs/src/seccomp.rs deleted file mode 100644 index 86f91e7ed..000000000 --- a/vhost_user_fs/src/seccomp.rs +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2020 Red Hat, Inc. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use seccomp::{allow_syscall, BpfProgram, SeccompAction, SeccompFilter}; -use std::convert::TryInto; -use std::{convert, fmt}; - -#[derive(Debug)] -pub enum Error { - /// Cannot create seccomp filter - CreateSeccompFilter(seccomp::SeccompError), - - /// Cannot apply seccomp filter - ApplySeccompFilter(seccomp::Error), -} - -impl convert::From for Error { - fn from(e: seccomp::Error) -> Self { - Error::ApplySeccompFilter(e) - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "vhost_user_fs_seccomp_error: {:?}", self) - } -} - -fn vuf_filter(action: SeccompAction) -> Result { - Ok(SeccompFilter::new( - vec![ - allow_syscall(libc::SYS_accept4), - allow_syscall(libc::SYS_brk), - allow_syscall(libc::SYS_capget), // For CAP_FSETID - allow_syscall(libc::SYS_capset), - allow_syscall(libc::SYS_clock_gettime), - allow_syscall(libc::SYS_clone), - allow_syscall(libc::SYS_close), - allow_syscall(libc::SYS_copy_file_range), - allow_syscall(libc::SYS_dup), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_epoll_create), - allow_syscall(libc::SYS_epoll_create1), - allow_syscall(libc::SYS_epoll_ctl), - allow_syscall(libc::SYS_epoll_pwait), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_epoll_wait), - allow_syscall(libc::SYS_eventfd2), - allow_syscall(libc::SYS_exit), - allow_syscall(libc::SYS_exit_group), - allow_syscall(libc::SYS_fallocate), - allow_syscall(libc::SYS_fchdir), - allow_syscall(libc::SYS_fchmod), - allow_syscall(libc::SYS_fchmodat), - allow_syscall(libc::SYS_fchownat), - allow_syscall(libc::SYS_fcntl), - allow_syscall(libc::SYS_fdatasync), - allow_syscall(libc::SYS_fgetxattr), - allow_syscall(libc::SYS_flistxattr), - allow_syscall(libc::SYS_flock), - allow_syscall(libc::SYS_fremovexattr), - allow_syscall(libc::SYS_fsetxattr), - allow_syscall(libc::SYS_fstat), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_fstatfs), - allow_syscall(libc::SYS_fsync), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_ftruncate), - allow_syscall(libc::SYS_futex), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_getdents), - allow_syscall(libc::SYS_getdents64), - allow_syscall(libc::SYS_getegid), - allow_syscall(libc::SYS_geteuid), - allow_syscall(libc::SYS_getpid), - allow_syscall(libc::SYS_gettid), - allow_syscall(libc::SYS_gettimeofday), - allow_syscall(libc::SYS_getxattr), - allow_syscall(libc::SYS_linkat), - allow_syscall(libc::SYS_listxattr), - allow_syscall(libc::SYS_lseek), - allow_syscall(libc::SYS_madvise), - allow_syscall(libc::SYS_mkdirat), - allow_syscall(libc::SYS_mknodat), - allow_syscall(libc::SYS_mmap), - allow_syscall(libc::SYS_mprotect), - allow_syscall(libc::SYS_mremap), - allow_syscall(libc::SYS_munmap), - allow_syscall(libc::SYS_newfstatat), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_open), - allow_syscall(libc::SYS_openat), - allow_syscall(libc::SYS_prctl), // TODO restrict to just PR_SET_NAME? - allow_syscall(libc::SYS_preadv), - allow_syscall(libc::SYS_pread64), - allow_syscall(libc::SYS_pwritev), - allow_syscall(libc::SYS_pwrite64), - allow_syscall(libc::SYS_read), - allow_syscall(libc::SYS_readlinkat), - allow_syscall(libc::SYS_recvmsg), - allow_syscall(libc::SYS_renameat), - allow_syscall(libc::SYS_renameat2), - allow_syscall(libc::SYS_removexattr), - allow_syscall(libc::SYS_rt_sigaction), - allow_syscall(libc::SYS_rt_sigprocmask), - allow_syscall(libc::SYS_rt_sigreturn), - allow_syscall(libc::SYS_sched_getaffinity), // used by thread_pool - allow_syscall(libc::SYS_sendmsg), - allow_syscall(libc::SYS_setresgid), - allow_syscall(libc::SYS_setresuid), - //allow_syscall(libc::SYS_setresgid32), Needed on some platforms, - //allow_syscall(libc::SYS_setresuid32), Needed on some platforms - allow_syscall(libc::SYS_set_robust_list), - allow_syscall(libc::SYS_setxattr), - allow_syscall(libc::SYS_sigaltstack), - allow_syscall(libc::SYS_statx), - allow_syscall(libc::SYS_symlinkat), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_time), // Rarely needed, except on static builds - allow_syscall(libc::SYS_tgkill), - allow_syscall(libc::SYS_umask), - #[cfg(target_arch = "x86_64")] - allow_syscall(libc::SYS_unlink), - allow_syscall(libc::SYS_unlinkat), - allow_syscall(libc::SYS_unshare), - allow_syscall(libc::SYS_utimensat), - allow_syscall(libc::SYS_write), - allow_syscall(libc::SYS_writev), - ] - .into_iter() - .collect(), - action, - )?) -} - -pub fn enable_seccomp(action: SeccompAction) -> Result<(), Error> { - let scfilter = vuf_filter(action)?; - let bpfprog: BpfProgram = scfilter.try_into()?; - SeccompFilter::apply(bpfprog).map_err(Error::ApplySeccompFilter) -} diff --git a/vhost_user_fs/src/server.rs b/vhost_user_fs/src/server.rs deleted file mode 100644 index 9fcd8019c..000000000 --- a/vhost_user_fs/src/server.rs +++ /dev/null @@ -1,1419 +0,0 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -use super::fs_cache_req_handler::FsCacheReqHandler; -use crate::descriptor_utils::{Reader, Writer}; -use crate::filesystem::{ - Context, DirEntry, Entry, FileSystem, GetxattrReply, ListxattrReply, ZeroCopyReader, - ZeroCopyWriter, -}; -use crate::fuse::*; -use crate::{Error, Result}; -use std::convert::TryInto; -use std::ffi::CStr; -use std::fs::File; -use std::io::{self, Read, Write}; -use std::mem::size_of; -use vm_memory::ByteValued; - -const MAX_BUFFER_SIZE: u32 = 1 << 20; -const DIRENT_PADDING: [u8; 8] = [0; 8]; - -struct ZCReader<'a>(Reader<'a>); - -impl<'a> ZeroCopyReader for ZCReader<'a> { - fn read_to(&mut self, f: &mut File, count: usize, off: u64) -> io::Result { - self.0.read_to_at(f, count, off) - } -} - -impl<'a> io::Read for ZCReader<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.0.read(buf) - } -} - -struct ZCWriter<'a>(Writer<'a>); - -impl<'a> ZeroCopyWriter for ZCWriter<'a> { - fn write_from(&mut self, f: &mut File, count: usize, off: u64) -> io::Result { - self.0.write_from_at(f, count, off) - } -} - -impl<'a> io::Write for ZCWriter<'a> { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.0.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.flush() - } -} - -pub struct Server { - fs: F, -} - -impl Server { - pub fn new(fs: F) -> Server { - Server { fs } - } - - #[allow(clippy::cognitive_complexity)] - pub fn handle_message( - &self, - mut r: Reader, - w: Writer, - vu_req: Option<&mut T>, - ) -> Result { - let in_header: InHeader = r.read_obj().map_err(Error::DecodeMessage)?; - - if in_header.len > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - match in_header.opcode { - x if x == Opcode::Lookup as u32 => self.lookup(in_header, r, w), - x if x == Opcode::Forget as u32 => self.forget(in_header, r), // No reply. - x if x == Opcode::Getattr as u32 => self.getattr(in_header, r, w), - x if x == Opcode::Setattr as u32 => self.setattr(in_header, r, w), - x if x == Opcode::Readlink as u32 => self.readlink(in_header, w), - x if x == Opcode::Symlink as u32 => self.symlink(in_header, r, w), - x if x == Opcode::Mknod as u32 => self.mknod(in_header, r, w), - x if x == Opcode::Mkdir as u32 => self.mkdir(in_header, r, w), - x if x == Opcode::Unlink as u32 => self.unlink(in_header, r, w), - x if x == Opcode::Rmdir as u32 => self.rmdir(in_header, r, w), - x if x == Opcode::Rename as u32 => self.rename(in_header, r, w), - x if x == Opcode::Link as u32 => self.link(in_header, r, w), - x if x == Opcode::Open as u32 => self.open(in_header, r, w), - x if x == Opcode::Read as u32 => self.read(in_header, r, w), - x if x == Opcode::Write as u32 => self.write(in_header, r, w), - x if x == Opcode::Statfs as u32 => self.statfs(in_header, w), - x if x == Opcode::Release as u32 => self.release(in_header, r, w), - x if x == Opcode::Fsync as u32 => self.fsync(in_header, r, w), - x if x == Opcode::Setxattr as u32 => self.setxattr(in_header, r, w), - x if x == Opcode::Getxattr as u32 => self.getxattr(in_header, r, w), - x if x == Opcode::Listxattr as u32 => self.listxattr(in_header, r, w), - x if x == Opcode::Removexattr as u32 => self.removexattr(in_header, r, w), - x if x == Opcode::Flush as u32 => self.flush(in_header, r, w), - x if x == Opcode::Init as u32 => self.init(in_header, r, w), - x if x == Opcode::Opendir as u32 => self.opendir(in_header, r, w), - x if x == Opcode::Readdir as u32 => self.readdir(in_header, r, w), - x if x == Opcode::Releasedir as u32 => self.releasedir(in_header, r, w), - x if x == Opcode::Fsyncdir as u32 => self.fsyncdir(in_header, r, w), - x if x == Opcode::Getlk as u32 => self.getlk(in_header, r, w), - x if x == Opcode::Setlk as u32 => self.setlk(in_header, r, w), - x if x == Opcode::Setlkw as u32 => self.setlkw(in_header, r, w), - x if x == Opcode::Access as u32 => self.access(in_header, r, w), - x if x == Opcode::Create as u32 => self.create(in_header, r, w), - x if x == Opcode::Interrupt as u32 => self.interrupt(in_header), - x if x == Opcode::Bmap as u32 => self.bmap(in_header, r, w), - x if x == Opcode::Destroy as u32 => self.destroy(), - x if x == Opcode::Ioctl as u32 => self.ioctl(in_header, r, w), - x if x == Opcode::Poll as u32 => self.poll(in_header, r, w), - x if x == Opcode::NotifyReply as u32 => self.notify_reply(in_header, r, w), - x if x == Opcode::BatchForget as u32 => self.batch_forget(in_header, r, w), - x if x == Opcode::Fallocate as u32 => self.fallocate(in_header, r, w), - x if x == Opcode::Readdirplus as u32 => self.readdirplus(in_header, r, w), - x if x == Opcode::Rename2 as u32 => self.rename2(in_header, r, w), - x if x == Opcode::Lseek as u32 => self.lseek(in_header, r, w), - x if x == Opcode::CopyFileRange as u32 => self.copyfilerange(in_header, r, w), - x if x == Opcode::SetupMapping as u32 => self.setupmapping(in_header, r, w, vu_req), - x if x == Opcode::RemoveMapping as u32 => self.removemapping(in_header, r, w, vu_req), - _ => reply_error( - io::Error::from_raw_os_error(libc::ENOSYS), - in_header.unique, - w, - ), - } - } - - fn setupmapping( - &self, - in_header: InHeader, - mut r: Reader, - w: Writer, - vu_req: Option<&mut T>, - ) -> Result { - if let Some(req) = vu_req { - let SetupmappingIn { - fh, - foffset, - len, - flags, - moffset, - } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.setupmapping( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - foffset, - len, - flags, - moffset, - req, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } else { - reply_error( - io::Error::from_raw_os_error(libc::EINVAL), - in_header.unique, - w, - ) - } - } - - fn removemapping( - &self, - in_header: InHeader, - mut r: Reader, - w: Writer, - vu_req: Option<&mut T>, - ) -> Result { - if let Some(req) = vu_req { - let RemovemappingIn { count } = r.read_obj().map_err(Error::DecodeMessage)?; - - if let Some(size) = (count as usize).checked_mul(size_of::()) { - if size > MAX_BUFFER_SIZE as usize { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - } else { - return reply_error( - io::Error::from_raw_os_error(libc::EOVERFLOW), - in_header.unique, - w, - ); - } - - let mut requests = Vec::with_capacity(count as usize); - for _ in 0..count { - requests.push( - r.read_obj::() - .map_err(Error::DecodeMessage)?, - ); - } - - match self - .fs - .removemapping(Context::from(in_header), requests, req) - { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } else { - reply_error( - io::Error::from_raw_os_error(libc::EINVAL), - in_header.unique, - w, - ) - } - } - - fn lookup(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .ok_or(Error::InvalidHeaderLength)?; - - let mut buf = vec![0u8; namelen]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - let name = bytes_to_cstr(buf.as_ref())?; - - match self - .fs - .lookup(Context::from(in_header), in_header.nodeid.into(), &name) - { - Ok(entry) => { - let out = EntryOut::from(entry); - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn forget(&self, in_header: InHeader, mut r: Reader) -> Result { - let ForgetIn { nlookup } = r.read_obj().map_err(Error::DecodeMessage)?; - - self.fs - .forget(Context::from(in_header), in_header.nodeid.into(), nlookup); - - // There is no reply for forget messages. - Ok(0) - } - - fn getattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let GetattrIn { flags, fh, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - let handle = if (flags & GETATTR_FH) != 0 { - Some(fh.into()) - } else { - None - }; - - match self - .fs - .getattr(Context::from(in_header), in_header.nodeid.into(), handle) - { - Ok((st, timeout)) => { - let out = AttrOut { - attr_valid: timeout.as_secs(), - attr_valid_nsec: timeout.subsec_nanos(), - dummy: 0, - attr: st.into(), - }; - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn setattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let setattr_in: SetattrIn = r.read_obj().map_err(Error::DecodeMessage)?; - - let handle = if setattr_in.valid & FATTR_FH != 0 { - Some(setattr_in.fh.into()) - } else { - None - }; - - let valid = SetattrValid::from_bits_truncate(setattr_in.valid); - - let st: libc::stat64 = setattr_in.into(); - - match self.fs.setattr( - Context::from(in_header), - in_header.nodeid.into(), - st, - handle, - valid, - ) { - Ok((st, timeout)) => { - let out = AttrOut { - attr_valid: timeout.as_secs(), - attr_valid_nsec: timeout.subsec_nanos(), - dummy: 0, - attr: st.into(), - }; - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn readlink(&self, in_header: InHeader, w: Writer) -> Result { - match self - .fs - .readlink(Context::from(in_header), in_header.nodeid.into()) - { - Ok(linkname) => { - // We need to disambiguate the option type here even though it is `None`. - reply_ok(None::, Some(&linkname), in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn symlink(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - // Unfortunately the name and linkname are encoded one after another and - // separated by a nul character. - let len = (in_header.len as usize) - .checked_sub(size_of::()) - .ok_or(Error::InvalidHeaderLength)?; - let mut buf = vec![0; len]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - // We want to include the '\0' byte in the first slice. - let split_pos = buf - .iter() - .position(|c| *c == b'\0') - .map(|p| p + 1) - .ok_or(Error::MissingParameter)?; - - let (name, linkname) = buf.split_at(split_pos); - - match self.fs.symlink( - Context::from(in_header), - bytes_to_cstr(linkname)?, - in_header.nodeid.into(), - bytes_to_cstr(name)?, - ) { - Ok(entry) => { - let out = EntryOut::from(entry); - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn mknod(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let MknodIn { - mode, rdev, umask, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - match self.fs.mknod( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - mode, - rdev, - umask, - ) { - Ok(entry) => { - let out = EntryOut::from(entry); - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn mkdir(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let MkdirIn { mode, umask } = r.read_obj().map_err(Error::DecodeMessage)?; - - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - match self.fs.mkdir( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - mode, - umask, - ) { - Ok(entry) => { - let out = EntryOut::from(entry); - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn unlink(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - match self.fs.unlink( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn rmdir(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - match self.fs.rmdir( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn do_rename( - &self, - in_header: InHeader, - msg_size: usize, - newdir: u64, - flags: u32, - mut r: Reader, - w: Writer, - ) -> Result { - let buflen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(msg_size)) - .ok_or(Error::InvalidHeaderLength)?; - let mut buf = vec![0; buflen]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - // We want to include the '\0' byte in the first slice. - let split_pos = buf - .iter() - .position(|c| *c == b'\0') - .map(|p| p + 1) - .ok_or(Error::MissingParameter)?; - - let (oldname, newname) = buf.split_at(split_pos); - - match self.fs.rename( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(oldname)?, - newdir.into(), - bytes_to_cstr(newname)?, - flags, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn rename(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let RenameIn { newdir } = r.read_obj().map_err(Error::DecodeMessage)?; - - self.do_rename(in_header, size_of::(), newdir, 0, r, w) - } - - fn rename2(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let Rename2In { newdir, flags, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - let flags = flags & (libc::RENAME_EXCHANGE | libc::RENAME_NOREPLACE) as u32; - - self.do_rename(in_header, size_of::(), newdir, flags, r, w) - } - - fn link(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let LinkIn { oldnodeid } = r.read_obj().map_err(Error::DecodeMessage)?; - - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - match self.fs.link( - Context::from(in_header), - oldnodeid.into(), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - ) { - Ok(entry) => { - let out = EntryOut::from(entry); - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn open(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let OpenIn { flags, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self - .fs - .open(Context::from(in_header), in_header.nodeid.into(), flags) - { - Ok((handle, opts)) => { - let out = OpenOut { - fh: handle.map(Into::into).unwrap_or(0), - open_flags: opts.bits(), - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn read(&self, in_header: InHeader, mut r: Reader, mut w: Writer) -> Result { - let ReadIn { - fh, - offset, - size, - read_flags, - lock_owner, - flags, - .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - if size > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - let owner = if read_flags & READ_LOCKOWNER != 0 { - Some(lock_owner) - } else { - None - }; - - // Split the writer into 2 pieces: one for the `OutHeader` and the rest for the data. - let data_writer = ZCWriter(w.split_at(size_of::()).unwrap()); - - match self.fs.read( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - data_writer, - size, - offset, - owner, - flags, - ) { - Ok(count) => { - // Don't use `reply_ok` because we need to set a custom size length for the - // header. - let out = OutHeader { - len: (size_of::() + count) as u32, - error: 0, - unique: in_header.unique, - }; - - w.write_all(out.as_slice()).map_err(Error::EncodeMessage)?; - Ok(out.len as usize) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn write(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let WriteIn { - fh, - offset, - size, - write_flags, - lock_owner, - flags, - .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - if size > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - let owner = if write_flags & WRITE_LOCKOWNER != 0 { - Some(lock_owner) - } else { - None - }; - - let delayed_write = write_flags & WRITE_CACHE != 0; - let kill_priv = write_flags & WRITE_KILL_PRIV != 0; - - let data_reader = ZCReader(r); - - match self.fs.write( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - data_reader, - size, - offset, - owner, - delayed_write, - kill_priv, - flags, - ) { - Ok(count) => { - let out = WriteOut { - size: count as u32, - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn statfs(&self, in_header: InHeader, w: Writer) -> Result { - match self - .fs - .statfs(Context::from(in_header), in_header.nodeid.into()) - { - Ok(st) => reply_ok(Some(Kstatfs::from(st)), None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn release(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let ReleaseIn { - fh, - flags, - release_flags, - lock_owner, - } = r.read_obj().map_err(Error::DecodeMessage)?; - - let flush = release_flags & RELEASE_FLUSH != 0; - let flock_release = release_flags & RELEASE_FLOCK_UNLOCK != 0; - let lock_owner = if flush || flock_release { - Some(lock_owner) - } else { - None - }; - - match self.fs.release( - Context::from(in_header), - in_header.nodeid.into(), - flags, - fh.into(), - flush, - flock_release, - lock_owner, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn fsync(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let FsyncIn { - fh, fsync_flags, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - let datasync = fsync_flags & 0x1 != 0; - - match self.fs.fsync( - Context::from(in_header), - in_header.nodeid.into(), - datasync, - fh.into(), - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn setxattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let SetxattrIn { size, flags } = r.read_obj().map_err(Error::DecodeMessage)?; - - // The name and value and encoded one after another and separated by a '\0' character. - let len = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - let mut buf = vec![0; len]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - // We want to include the '\0' byte in the first slice. - let split_pos = buf - .iter() - .position(|c| *c == b'\0') - .map(|p| p + 1) - .ok_or(Error::MissingParameter)?; - - let (name, value) = buf.split_at(split_pos); - - if size != value.len() as u32 { - return Err(Error::InvalidXattrSize((size, value.len()))); - } - - match self.fs.setxattr( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(name)?, - value, - flags, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn getxattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let GetxattrIn { size, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - let mut name = vec![0; namelen]; - - r.read_exact(&mut name).map_err(Error::DecodeMessage)?; - - if size > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - match self.fs.getxattr( - Context::from(in_header), - in_header.nodeid.into(), - bytes_to_cstr(&name)?, - size, - ) { - Ok(GetxattrReply::Value(val)) => reply_ok(None::, Some(&val), in_header.unique, w), - Ok(GetxattrReply::Count(count)) => { - let out = GetxattrOut { - size: count, - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn listxattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let GetxattrIn { size, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - if size > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - match self - .fs - .listxattr(Context::from(in_header), in_header.nodeid.into(), size) - { - Ok(ListxattrReply::Names(val)) => reply_ok(None::, Some(&val), in_header.unique, w), - Ok(ListxattrReply::Count(count)) => { - let out = GetxattrOut { - size: count, - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn removexattr(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .ok_or(Error::InvalidHeaderLength)?; - - let mut buf = vec![0; namelen]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - let name = bytes_to_cstr(&buf)?; - - match self - .fs - .removexattr(Context::from(in_header), in_header.nodeid.into(), name) - { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn flush(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let FlushIn { fh, lock_owner, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.flush( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - lock_owner, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn init(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let InitIn { - major, - minor, - max_readahead, - flags, - } = r.read_obj().map_err(Error::DecodeMessage)?; - - if major < KERNEL_VERSION { - error!("Unsupported fuse protocol version: {}.{}", major, minor); - return reply_error( - io::Error::from_raw_os_error(libc::EPROTO), - in_header.unique, - w, - ); - } - - if major > KERNEL_VERSION { - // Wait for the kernel to reply back with a 7.X version. - let out = InitOut { - major: KERNEL_VERSION, - minor: KERNEL_MINOR_VERSION, - ..Default::default() - }; - - return reply_ok(Some(out), None, in_header.unique, w); - } - - if minor < KERNEL_MINOR_VERSION { - error!( - "Unsupported fuse protocol minor version: {}.{}", - major, minor - ); - return reply_error( - io::Error::from_raw_os_error(libc::EPROTO), - in_header.unique, - w, - ); - } - - // These fuse features are supported by this server by default. - let supported = FsOptions::ASYNC_READ - | FsOptions::PARALLEL_DIROPS - | FsOptions::BIG_WRITES - | FsOptions::AUTO_INVAL_DATA - | FsOptions::HANDLE_KILLPRIV - | FsOptions::ASYNC_DIO - | FsOptions::HAS_IOCTL_DIR - | FsOptions::ATOMIC_O_TRUNC - | FsOptions::MAX_PAGES; - - let capable = FsOptions::from_bits_truncate(flags); - - let page_size: u32 = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; - let max_pages = ((MAX_BUFFER_SIZE - 1) / page_size) + 1; - - match self.fs.init(capable) { - Ok(want) => { - let enabled = capable & (want | supported); - - let out = InitOut { - major: KERNEL_VERSION, - minor: KERNEL_MINOR_VERSION, - max_readahead, - flags: enabled.bits(), - max_background: ::std::u16::MAX, - congestion_threshold: (::std::u16::MAX / 4) * 3, - max_write: MAX_BUFFER_SIZE, - time_gran: 1, // nanoseconds - max_pages: max_pages.try_into().unwrap(), - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn opendir(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let OpenIn { flags, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self - .fs - .opendir(Context::from(in_header), in_header.nodeid.into(), flags) - { - Ok((handle, opts)) => { - let out = OpenOut { - fh: handle.map(Into::into).unwrap_or(0), - open_flags: opts.bits(), - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn do_readdir( - &self, - in_header: InHeader, - mut r: Reader, - mut w: Writer, - plus: bool, - ) -> Result { - let ReadIn { - fh, offset, size, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - if size > MAX_BUFFER_SIZE { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - let available_bytes = w.available_bytes(); - if available_bytes < size as usize { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - - // Skip over enough bytes for the header. - let mut cursor = w.split_at(size_of::()).unwrap(); - - let res = if plus { - self.fs.readdirplus( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - size, - offset, - |d, e| add_dirent(&mut cursor, size, d, Some(e)), - ) - } else { - self.fs.readdir( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - size, - offset, - |d| add_dirent(&mut cursor, size, d, None), - ) - }; - - if let Err(e) = res { - reply_error(e, in_header.unique, w) - } else { - // Don't use `reply_ok` because we need to set a custom size length for the - // header. - let out = OutHeader { - len: (size_of::() + cursor.bytes_written()) as u32, - error: 0, - unique: in_header.unique, - }; - - w.write_all(out.as_slice()).map_err(Error::EncodeMessage)?; - Ok(out.len as usize) - } - } - - fn readdir(&self, in_header: InHeader, r: Reader, w: Writer) -> Result { - self.do_readdir(in_header, r, w, false) - } - - fn readdirplus(&self, in_header: InHeader, r: Reader, w: Writer) -> Result { - self.do_readdir(in_header, r, w, true) - } - - fn releasedir(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let ReleaseIn { fh, flags, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.releasedir( - Context::from(in_header), - in_header.nodeid.into(), - flags, - fh.into(), - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn fsyncdir(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let FsyncIn { - fh, fsync_flags, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - let datasync = fsync_flags & 0x1 != 0; - - match self.fs.fsyncdir( - Context::from(in_header), - in_header.nodeid.into(), - datasync, - fh.into(), - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn getlk(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.getlk() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn setlk(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.setlk() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn setlkw(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.setlkw() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn access(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let AccessIn { mask, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self - .fs - .access(Context::from(in_header), in_header.nodeid.into(), mask) - { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn create(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let CreateIn { - flags, mode, umask, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - let namelen = (in_header.len as usize) - .checked_sub(size_of::()) - .and_then(|l| l.checked_sub(size_of::())) - .ok_or(Error::InvalidHeaderLength)?; - - let mut buf = vec![0; namelen]; - - r.read_exact(&mut buf).map_err(Error::DecodeMessage)?; - - let name = bytes_to_cstr(&buf)?; - - match self.fs.create( - Context::from(in_header), - in_header.nodeid.into(), - name, - mode, - flags, - umask, - ) { - Ok((entry, handle, opts)) => { - let entry_out = EntryOut { - nodeid: entry.inode, - generation: entry.generation, - entry_valid: entry.entry_timeout.as_secs(), - attr_valid: entry.attr_timeout.as_secs(), - entry_valid_nsec: entry.entry_timeout.subsec_nanos(), - attr_valid_nsec: entry.attr_timeout.subsec_nanos(), - attr: entry.attr.into(), - }; - let open_out = OpenOut { - fh: handle.map(Into::into).unwrap_or(0), - open_flags: opts.bits(), - ..Default::default() - }; - - // Kind of a hack to write both structs. - reply_ok( - Some(entry_out), - Some(open_out.as_slice()), - in_header.unique, - w, - ) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn interrupt(&self, _in_header: InHeader) -> Result { - Ok(0) - } - - fn bmap(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.bmap() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn destroy(&self) -> Result { - // No reply to this function. - self.fs.destroy(); - - Ok(0) - } - - fn ioctl(&self, in_header: InHeader, _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.ioctl() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn poll(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.poll() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn notify_reply(&self, in_header: InHeader, mut _r: Reader, w: Writer) -> Result { - if let Err(e) = self.fs.notify_reply() { - reply_error(e, in_header.unique, w) - } else { - Ok(0) - } - } - - fn batch_forget(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let BatchForgetIn { count, .. } = r.read_obj().map_err(Error::DecodeMessage)?; - - if let Some(size) = (count as usize).checked_mul(size_of::()) { - if size > MAX_BUFFER_SIZE as usize { - return reply_error( - io::Error::from_raw_os_error(libc::ENOMEM), - in_header.unique, - w, - ); - } - } else { - return reply_error( - io::Error::from_raw_os_error(libc::EOVERFLOW), - in_header.unique, - w, - ); - } - - let mut requests = Vec::with_capacity(count as usize); - for _ in 0..count { - requests.push( - r.read_obj::() - .map(|f| (f.nodeid.into(), f.nlookup)) - .map_err(Error::DecodeMessage)?, - ); - } - - self.fs.batch_forget(Context::from(in_header), requests); - - // No reply for forget messages. - Ok(0) - } - - fn fallocate(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let FallocateIn { - fh, - offset, - length, - mode, - .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.fallocate( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - mode, - offset, - length, - ) { - Ok(()) => reply_ok(None::, None, in_header.unique, w), - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn lseek(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let LseekIn { - fh, offset, whence, .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.lseek( - Context::from(in_header), - in_header.nodeid.into(), - fh.into(), - offset, - whence, - ) { - Ok(offset) => { - let out = LseekOut { offset }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } - - fn copyfilerange(&self, in_header: InHeader, mut r: Reader, w: Writer) -> Result { - let CopyfilerangeIn { - fh_in, - off_in, - nodeid_out, - fh_out, - off_out, - len, - flags, - .. - } = r.read_obj().map_err(Error::DecodeMessage)?; - - match self.fs.copyfilerange( - Context::from(in_header), - in_header.nodeid.into(), - fh_in.into(), - off_in, - nodeid_out.into(), - fh_out.into(), - off_out, - len, - flags, - ) { - Ok(count) => { - let out = WriteOut { - size: count as u32, - ..Default::default() - }; - - reply_ok(Some(out), None, in_header.unique, w) - } - Err(e) => reply_error(e, in_header.unique, w), - } - } -} - -fn reply_ok( - out: Option, - data: Option<&[u8]>, - unique: u64, - mut w: Writer, -) -> Result { - let mut len = size_of::(); - - if out.is_some() { - len += size_of::(); - } - - if let Some(ref data) = data { - len += data.len(); - } - - let header = OutHeader { - len: len as u32, - error: 0, - unique, - }; - - w.write_all(header.as_slice()) - .map_err(Error::EncodeMessage)?; - - if let Some(out) = out { - w.write_all(out.as_slice()).map_err(Error::EncodeMessage)?; - } - - if let Some(data) = data { - w.write_all(data).map_err(Error::EncodeMessage)?; - } - - debug_assert_eq!(len, w.bytes_written()); - Ok(w.bytes_written()) -} - -fn reply_error(e: io::Error, unique: u64, mut w: Writer) -> Result { - let header = OutHeader { - len: size_of::() as u32, - error: -e.raw_os_error().unwrap_or(libc::EIO), - unique, - }; - - w.write_all(header.as_slice()) - .map_err(Error::EncodeMessage)?; - - debug_assert_eq!(header.len as usize, w.bytes_written()); - Ok(w.bytes_written()) -} - -fn bytes_to_cstr(buf: &[u8]) -> Result<&CStr> { - // Convert to a `CStr` first so that we can drop the '\0' byte at the end - // and make sure there are no interior '\0' bytes. - CStr::from_bytes_with_nul(buf).map_err(Error::InvalidCString) -} - -fn add_dirent( - cursor: &mut Writer, - max: u32, - d: DirEntry, - entry: Option, -) -> io::Result { - if d.name.len() > ::std::u32::MAX as usize { - return Err(io::Error::from_raw_os_error(libc::EOVERFLOW)); - } - - let dirent_len = size_of::() - .checked_add(d.name.len()) - .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?; - - // Directory entries must be padded to 8-byte alignment. If adding 7 causes - // an overflow then this dirent cannot be properly padded. - let padded_dirent_len = dirent_len - .checked_add(7) - .map(|l| l & !7) - .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?; - - let total_len = if entry.is_some() { - padded_dirent_len - .checked_add(size_of::()) - .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))? - } else { - padded_dirent_len - }; - - if (max as usize).saturating_sub(cursor.bytes_written()) < total_len { - Ok(0) - } else { - if let Some(entry) = entry { - cursor.write_all(EntryOut::from(entry).as_slice())?; - } - - let dirent = Dirent { - ino: d.ino, - off: d.offset, - namelen: d.name.len() as u32, - type_: d.type_, - }; - - cursor.write_all(dirent.as_slice())?; - cursor.write_all(d.name)?; - - // We know that `dirent_len` <= `padded_dirent_len` due to the check above - // so there's no need for checked arithmetic. - let padding = padded_dirent_len - dirent_len; - if padding > 0 { - cursor.write_all(&DIRENT_PADDING[..padding])?; - } - - Ok(total_len) - } -}