diff --git a/Cargo.lock b/Cargo.lock index feb65da67..6c85f5502 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -321,6 +321,7 @@ version = "0.1.0" dependencies = [ "byteorder", "crc-any", + "flate2", "io-uring", "libc", "log", @@ -334,6 +335,7 @@ dependencies = [ "vm-memory", "vm-virtio", "vmm-sys-util", + "zstd", ] [[package]] @@ -368,6 +370,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -728,6 +732,16 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +[[package]] +name = "flate2" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "flume" version = "0.11.1" @@ -1076,6 +1090,16 @@ dependencies = [ "syn", ] +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -1235,6 +1259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -1965,6 +1990,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "slab" version = "0.4.11" @@ -2809,6 +2840,34 @@ dependencies = [ "syn", ] +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zvariant" version = "5.7.0" diff --git a/block/Cargo.toml b/block/Cargo.toml index 9823c1f81..e6ca61bd4 100644 --- a/block/Cargo.toml +++ b/block/Cargo.toml @@ -11,6 +11,7 @@ io_uring = ["dep:io-uring"] [dependencies] byteorder = { workspace = true } crc-any = "2.5.0" +flate2 = "1.0" io-uring = { version = "0.7.10", optional = true } libc = { workspace = true } log = { workspace = true } @@ -28,6 +29,7 @@ vm-memory = { workspace = true, features = [ ] } vm-virtio = { path = "../vm-virtio" } vmm-sys-util = { workspace = true } +zstd = "0.13" [lints] workspace = true diff --git a/block/src/qcow/decoder.rs b/block/src/qcow/decoder.rs new file mode 100644 index 000000000..f1237aae7 --- /dev/null +++ b/block/src/qcow/decoder.rs @@ -0,0 +1,87 @@ +// Copyright 2025 The Cloud Hypervisor Authors. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("Zlib decompress error")] + ZlibDecompress(#[source] flate2::DecompressError), + #[error("Zlib unexpected status: {0:?}")] + ZlibUnexpectedStatus(flate2::Status), + #[error("Zstd decompress error")] + ZstdDecompress(#[source] std::io::Error), + #[error("Zstd: failed to fill buffer")] + ZstdFillBuffer(#[source] std::io::Error), +} + +pub type Result = std::result::Result; + +/// Generic trait for decoding zlib/zstd formats +pub trait Decoder { + fn decode(&self, input: &[u8], output: &mut [u8]) -> Result; +} + +#[derive(Default)] +pub struct ZlibDecoder {} + +impl Decoder for ZlibDecoder { + fn decode(&self, input: &[u8], output: &mut [u8]) -> Result { + use flate2::{Decompress, FlushDecompress, Status}; + + let mut decompressor = Decompress::new(false); + let status = decompressor + .decompress(input, output, FlushDecompress::Finish) + .map_err(Error::ZlibDecompress)?; + if status == Status::StreamEnd { + Ok(decompressor.total_out() as usize) + } else { + Err(Error::ZlibUnexpectedStatus(status)) + } + } +} + +#[derive(Default)] +pub struct ZstdDecoder {} + +impl Decoder for ZstdDecoder { + fn decode(&self, input: &[u8], output: &mut [u8]) -> Result { + use std::io::Read; + + let mut decoder = zstd::stream::read::Decoder::new(input).map_err(Error::ZstdDecompress)?; + let decoded_size = decoder.read(output).map_err(Error::ZstdFillBuffer)?; + Ok(decoded_size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_zlib_decode() { + let d = ZlibDecoder::default(); + let valid_input = vec![99, 96, 100, 98, 6, 0]; + let mut output1 = vec![0; 4]; + d.decode(&valid_input, &mut output1).unwrap(); + assert_eq!(&output1, b"\x00\x01\x02\x03"); + + let invalid_input = vec![1, 2, 3, 4]; + let mut output2 = vec![0; 1024]; + d.decode(&invalid_input, &mut output2).unwrap_err(); + } + + #[test] + fn test_zstd_decode() { + let d = ZstdDecoder::default(); + let valid_input = vec![40, 181, 47, 253, 32, 2, 17, 0, 0, 1, 254]; + let mut output1 = vec![0; 2]; + d.decode(&valid_input, &mut output1).unwrap(); + assert_eq!(&output1, b"\x01\xfe"); + + let invalid_input = vec![1, 2, 3, 4]; + let mut output2 = vec![0; 1024]; + d.decode(&invalid_input, &mut output2).unwrap_err(); + } +} diff --git a/block/src/qcow/mod.rs b/block/src/qcow/mod.rs index e4f9790c9..0e0c972ab 100644 --- a/block/src/qcow/mod.rs +++ b/block/src/qcow/mod.rs @@ -4,6 +4,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +mod decoder; mod qcow_raw_file; mod raw_file; mod refcount; @@ -17,7 +18,7 @@ use std::os::fd::{AsRawFd, RawFd}; use std::str; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; -use libc::{EINVAL, ENOSPC, ENOTSUP}; +use libc::{EINVAL, EIO, ENOSPC}; use remain::sorted; use thiserror::Error; use vmm_sys_util::file_traits::{FileSetLen, FileSync}; @@ -25,6 +26,7 @@ use vmm_sys_util::seek_hole::SeekHole; use vmm_sys_util::write_zeroes::{PunchHole, WriteZeroesAt}; use crate::BlockBackend; +use crate::qcow::decoder::{Decoder, ZlibDecoder, ZstdDecoder}; use crate::qcow::qcow_raw_file::QcowRawFile; pub use crate::qcow::raw_file::RawFile; use crate::qcow::refcount::RefCount; @@ -42,8 +44,6 @@ pub enum Error { BackingFileOpen(#[source] Box), #[error("Backing file name is too long: {0} bytes over")] BackingFileTooLong(usize), - #[error("Compressed blocks not supported")] - CompressedBlocksNotSupported, #[error("Failed to evict cache")] EvictingCache(#[source] io::Error), #[error("File larger than max of {MAX_QCOW_FILE_SIZE}: {0}")] @@ -110,6 +110,8 @@ pub enum Error { TooManyL1Entries(u64), #[error("Ref count table too large: {0}")] TooManyRefcounts(u64), + #[error("Unsupported compression type")] + UnsupportedCompressionType, #[error("Unsupported refcount order")] UnsupportedRefcountOrder, #[error("Unsupported version: {0}")] @@ -127,6 +129,12 @@ pub enum ImageType { Qcow2, } +#[derive(Clone, Debug)] +pub enum CompressionType { + Zlib, + Zstd, +} + // Maximum data size supported. const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB. @@ -153,15 +161,50 @@ const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; // Flags const COMPRESSED_FLAG: u64 = 1 << 62; +const COMPRESSED_SECTOR_SIZE: u64 = 512; const CLUSTER_USED_FLAG: u64 = 1 << 63; const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1; +// Compression types as defined in https://www.qemu.org/docs/master/interop/qcow2.html +const COMPRESSION_TYPE_ZLIB: u64 = 0; // zlib/deflate +const COMPRESSION_TYPE_ZSTD: u64 = 1; // zstd + // The format supports a "header extension area", that crosvm does not use. const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8; // Defined by the specification const MAX_BACKING_FILE_SIZE: u32 = 1023; +fn l2_entry_is_empty(l2_entry: u64) -> bool { + l2_entry == 0 +} + +fn l2_entry_is_compressed(l2_entry: u64) -> bool { + l2_entry & COMPRESSED_FLAG != 0 +} + +// Get file offset and size of compressed cluster data +fn l2_entry_compressed_cluster_layout(l2_entry: u64, cluster_bits: u32) -> (u64, usize) { + let compressed_size_shift = 62 - (cluster_bits - 8); + let compressed_size_mask = (1 << (cluster_bits - 8)) - 1; + let compressed_cluster_addr = l2_entry & ((1 << compressed_size_shift) - 1); + let nsectors = (l2_entry >> compressed_size_shift & compressed_size_mask) + 1; + let compressed_cluster_size = ((nsectors * COMPRESSED_SECTOR_SIZE) + - (compressed_cluster_addr & (COMPRESSED_SECTOR_SIZE - 1))) + as usize; + (compressed_cluster_addr, compressed_cluster_size) +} + +// Get file offset of standard (non-compressed) cluster +fn l2_entry_std_cluster_addr(l2_entry: u64) -> u64 { + l2_entry & L2_TABLE_OFFSET_MASK +} + +// Make L2 entry for standard (non-compressed) cluster +fn l2_entry_make_std(cluster_addr: u64) -> u64 { + (cluster_addr & L2_TABLE_OFFSET_MASK) | CLUSTER_USED_FLAG +} + /// Contains the information from the header of a qcow file. #[derive(Clone, Debug)] pub struct QcowHeader { @@ -190,6 +233,7 @@ pub struct QcowHeader { pub autoclear_features: u64, pub refcount_order: u32, pub header_size: u32, + pub compression_type: CompressionType, // Post-header entries pub backing_file_path: Option, @@ -255,8 +299,19 @@ impl QcowHeader { } else { read_u32_from_file(f)? }, + compression_type: CompressionType::Zlib, backing_file_path: None, }; + if version == 3 && header.header_size > V3_BARE_HEADER_SIZE { + let raw_compression_type = read_u64_from_file(f)? >> (64 - 8); + header.compression_type = if raw_compression_type == COMPRESSION_TYPE_ZLIB { + Ok(CompressionType::Zlib) + } else if raw_compression_type == COMPRESSION_TYPE_ZSTD { + Ok(CompressionType::Zstd) + } else { + Err(Error::UnsupportedCompressionType) + }?; + } if header.backing_file_size > MAX_BACKING_FILE_SIZE { return Err(Error::BackingFileTooLong(header.backing_file_size as usize)); } @@ -274,6 +329,13 @@ impl QcowHeader { Ok(header) } + pub fn get_decoder(&self) -> Box { + match self.compression_type { + CompressionType::Zlib => Box::new(ZlibDecoder {}), + CompressionType::Zstd => Box::new(ZstdDecoder {}), + } + } + pub fn create_for_size_and_path( version: u32, size: u64, @@ -337,6 +399,7 @@ impl QcowHeader { autoclear_features: 0, refcount_order: DEFAULT_REFCOUNT_ORDER, header_size, + compression_type: CompressionType::Zlib, backing_file_path: backing_file.map(String::from), }) } @@ -588,17 +651,6 @@ impl QcowFile { let l2_entries = cluster_size / size_of::() as u64; - // Check for compressed blocks - for l2_addr_disk in l1_table.get_values() { - if *l2_addr_disk != 0 - && let Err(e) = Self::read_l2_cluster(&mut raw_file, *l2_addr_disk) - && let Some(os_error) = e.raw_os_error() - && os_error == ENOTSUP - { - return Err(Error::CompressedBlocksNotSupported); - } - } - let mut qcow = QcowFile { raw_file, header, @@ -714,11 +766,7 @@ impl QcowFile { let raw_file = &mut self.raw_file; self.l2_cache .insert(l1_index, table, |index, evicted| { - raw_file.write_pointer_table( - l1_table[index], - evicted.get_values(), - CLUSTER_USED_FLAG, - ) + raw_file.write_pointer_table(l1_table[index], evicted.get_values(), 0) }) .map_err(Error::EvictingCache)?; } @@ -1050,11 +1098,40 @@ impl QcowFile { (address / self.raw_file.cluster_size()) % self.l2_entries } - // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have - // yet to be allocated, return None. - fn file_offset_read(&mut self, address: u64) -> std::io::Result> { + // Decompress the cluster, return EIO on failure + fn decompress_l2_cluster(&mut self, l2_entry: u64) -> std::io::Result> { + let (compressed_cluster_addr, compressed_cluster_size) = + l2_entry_compressed_cluster_layout(l2_entry, self.header.cluster_bits); + // Read compressed cluster from raw file + self.raw_file + .file_mut() + .seek(SeekFrom::Start(compressed_cluster_addr))?; + let mut compressed_cluster = vec![0; compressed_cluster_size]; + self.raw_file + .file_mut() + .read_exact(&mut compressed_cluster)?; + let decoder = self.header.get_decoder(); + // Decompress + let cluster_size = self.raw_file.cluster_size() as usize; + let mut decompressed_cluster = vec![0; cluster_size]; + let decompressed_size = decoder + .decode(&compressed_cluster, &mut decompressed_cluster) + .map_err(|_| std::io::Error::from_raw_os_error(EIO))?; + if decompressed_size as u64 != self.raw_file.cluster_size() { + return Err(std::io::Error::from_raw_os_error(EIO)); + } + Ok(decompressed_cluster) + } + + fn file_read( + &mut self, + address: u64, + count: usize, + buf: &mut [u8], + ) -> std::io::Result> { + let err_inval = std::io::Error::from_raw_os_error(EINVAL); if address >= self.virtual_size() { - return Err(std::io::Error::from_raw_os_error(EINVAL)); + return Err(err_inval); } let l1_index = self.l1_table_index(address) as usize; @@ -1072,11 +1149,28 @@ impl QcowFile { self.cache_l2_cluster(l1_index, l2_addr_disk, false)?; - let cluster_addr = self.l2_cache.get(l1_index).unwrap()[l2_index]; - if cluster_addr == 0 { + let l2_entry = self.l2_cache.get(l1_index).unwrap()[l2_index]; + if l2_entry_is_empty(l2_entry) { + // Reading from an unallocated cluster will return zeros. return Ok(None); + } else if l2_entry_is_compressed(l2_entry) { + // Compressed cluster. + // Read it, decompress, then return slice from decompressed data. + let mut decompressed_cluster = self.decompress_l2_cluster(l2_entry)?; + decompressed_cluster.resize(self.raw_file.cluster_size() as usize, 0); + let start = self.raw_file.cluster_offset(address) as usize; + let end = start.checked_add(count); + if end.is_none() || end.unwrap() > decompressed_cluster.len() { + return Err(err_inval); + } + buf[..count].copy_from_slice(&decompressed_cluster[start..end.unwrap()]); + } else { + let start = l2_entry_std_cluster_addr(l2_entry) + self.raw_file.cluster_offset(address); + let raw_file = self.raw_file.file_mut(); + raw_file.seek(SeekFrom::Start(start))?; + raw_file.read_exact(buf)?; } - Ok(Some(cluster_addr + self.raw_file.cluster_offset(address))) + Ok(Some(())) } // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need @@ -1100,24 +1194,39 @@ impl QcowFile { set_refcounts.push((new_addr, 1)); } - let cluster_addr = match self.l2_cache.get(l1_index).unwrap()[l2_index] { - 0 => { - let initial_data = if let Some(backing) = self.backing_file.as_mut() { - let cluster_size = self.raw_file.cluster_size(); - let cluster_begin = address - (address % cluster_size); - let mut cluster_data = vec![0u8; cluster_size as usize]; - backing.seek(SeekFrom::Start(cluster_begin))?; - backing.read_exact(&mut cluster_data)?; - Some(cluster_data) - } else { - None - }; - // Need to allocate a data cluster - let cluster_addr = self.append_data_cluster(initial_data)?; - self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?; - cluster_addr + let l2_entry = self.l2_cache.get(l1_index).unwrap()[l2_index]; + let cluster_addr = if l2_entry_is_compressed(l2_entry) { + // Writing to compressed cluster. + // Allocate new cluster, decompress into new cluster, then use + // offset of new cluster. + let decompressed_cluster = self.decompress_l2_cluster(l2_entry)?; + let cluster_addr = self.append_data_cluster(None)?; + self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?; + self.raw_file + .file_mut() + .seek(SeekFrom::Start(cluster_addr))?; + let nwritten = self.raw_file.file_mut().write(&decompressed_cluster)?; + if nwritten != decompressed_cluster.len() { + return Err(std::io::Error::from_raw_os_error(EIO)); } - a => a, + cluster_addr + } else if l2_entry_is_empty(l2_entry) { + let initial_data = if let Some(backing) = self.backing_file.as_mut() { + let cluster_size = self.raw_file.cluster_size(); + let cluster_begin = address - (address % cluster_size); + let mut cluster_data = vec![0u8; cluster_size as usize]; + backing.seek(SeekFrom::Start(cluster_begin))?; + backing.read_exact(&mut cluster_data)?; + Some(cluster_data) + } else { + None + }; + // Need to allocate a data cluster + let cluster_addr = self.append_data_cluster(initial_data)?; + self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?; + cluster_addr + } else { + l2_entry_std_cluster_addr(l2_entry) }; for (addr, count) in set_refcounts { @@ -1157,7 +1266,7 @@ impl QcowFile { self.l1_table[l1_index] = new_addr; } // 'unwrap' is OK because it was just added. - self.l2_cache.get_mut(l1_index).unwrap()[l2_index] = cluster_addr; + self.l2_cache.get_mut(l1_index).unwrap()[l2_index] = l2_entry_make_std(cluster_addr); Ok(()) } @@ -1334,10 +1443,9 @@ impl QcowFile { // Partial cluster - zero out the relevant bytes if it was allocated. // Any space in unallocated clusters can be left alone, since // unallocated clusters already read back as zeroes. - if let Some(offset) = self.file_offset_read(curr_addr)? { - // Partial cluster - zero it out. - self.raw_file.file_mut().write_zeroes_at(offset, count)?; - } + let offset = self.file_offset_write(curr_addr)?; + // Partial cluster - zero it out. + self.raw_file.file_mut().write_zeroes_at(offset, count)?; } nwritten += count; @@ -1348,14 +1456,8 @@ impl QcowFile { // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any // cluster is compressed. fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result> { - let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?; - if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) { - return Err(std::io::Error::from_raw_os_error(ENOTSUP)); - } - Ok(file_values - .iter() - .map(|entry| *entry & L2_TABLE_OFFSET_MASK) - .collect()) + let l2_table = raw_file.read_pointer_cluster(cluster_addr, None)?; + Ok(l2_table) } // Put an L2 cluster to the cache with evicting less-used cluster @@ -1383,11 +1485,7 @@ impl QcowFile { let l1_table = &self.l1_table; let raw_file = &mut self.raw_file; self.l2_cache.insert(l1_index, l2_table, |index, evicted| { - raw_file.write_pointer_table( - l1_table[index], - evicted.get_values(), - CLUSTER_USED_FLAG, - ) + raw_file.write_pointer_table(l1_table[index], evicted.get_values(), 0) })?; } Ok(new_cluster) @@ -1456,11 +1554,8 @@ impl QcowFile { // The index must be valid from when we inserted it. let addr = self.l1_table[*l1_index]; if addr != 0 { - self.raw_file.write_pointer_table( - addr, - l2_table.get_values(), - CLUSTER_USED_FLAG, - )?; + self.raw_file + .write_pointer_table(addr, l2_table.get_values(), 0)?; } else { return Err(std::io::Error::from_raw_os_error(EINVAL)); } @@ -1512,14 +1607,10 @@ impl Read for QcowFile { let mut nread: usize = 0; while nread < read_count { let curr_addr = address + nread as u64; - let file_offset = self.file_offset_read(curr_addr)?; let count = self.limit_range_cluster(curr_addr, read_count - nread); - if let Some(offset) = file_offset { - self.raw_file.file_mut().seek(SeekFrom::Start(offset))?; - self.raw_file - .file_mut() - .read_exact(&mut buf[nread..(nread + count)])?; + if (self.file_read(curr_addr, count, &mut buf[nread..(nread + count)])?).is_some() { + // Data is successfully read from the cluster } else if let Some(backing) = self.backing_file.as_mut() { backing.seek(SeekFrom::Start(curr_addr))?; backing.read_exact(&mut buf[nread..(nread + count)])?;