From 4dc4a9b61ffece7b0524c980d8b207f0b658067c Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 18 Sep 2024 11:03:33 +0100 Subject: [PATCH 1/3] feat(virtio): split `add_used()` into 2 separate methods `Queue::add_used()` method first writes a Descriptor head in the used descriptor ring buffer and then advances the index of this buffer to let the guest know we used one or more Descriptors. Carve out each one of these steps in their own function so that we can add multiple descriptors in the used ring and advance the index only once we finish handling descriptors in one step. This will be useful when, in later commits, we will implement VIRTIO_NET_F_RX_MRGBUF for the RX queue of the network device. Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/queue.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/vmm/src/devices/virtio/queue.rs b/src/vmm/src/devices/virtio/queue.rs index c183e8b2c00..b80b2571c12 100644 --- a/src/vmm/src/devices/virtio/queue.rs +++ b/src/vmm/src/devices/virtio/queue.rs @@ -574,8 +574,16 @@ impl Queue { self.next_avail -= Wrapping(1); } - /// Puts an available descriptor head into the used ring for use by the guest. - pub fn add_used(&mut self, desc_index: u16, len: u32) -> Result<(), QueueError> { + /// Write used element into used_ring ring. + /// - [`ring_index_offset`] is an offset added to + /// the current [`self.next_used`] to obtain actual index + /// into used_ring. + pub fn write_used_element( + &mut self, + ring_index_offset: u16, + desc_index: u16, + len: u32, + ) -> Result<(), QueueError> { if self.actual_size() <= desc_index { error!( "attempted to add out of bounds descriptor to used ring: {}", @@ -584,7 +592,7 @@ impl Queue { return Err(QueueError::DescIndexOutOfBounds(desc_index)); } - let next_used = self.next_used.0 % self.actual_size(); + let next_used = (self.next_used + Wrapping(ring_index_offset)).0 % self.actual_size(); let used_element = UsedElement { id: u32::from(desc_index), len, @@ -594,14 +602,24 @@ impl Queue { unsafe { self.used_ring_ring_set(usize::from(next_used), used_element); } + Ok(()) + } - self.num_added += Wrapping(1); - self.next_used += Wrapping(1); + /// Advance queue and used ring by `n` elements. + pub fn advance_used_ring(&mut self, n: u16) { + self.num_added += Wrapping(n); + self.next_used += Wrapping(n); // This fence ensures all descriptor writes are visible before the index update is. fence(Ordering::Release); self.used_ring_idx_set(self.next_used.0); + } + + /// Puts an available descriptor head into the used ring for use by the guest. + pub fn add_used(&mut self, desc_index: u16, len: u32) -> Result<(), QueueError> { + self.write_used_element(0, desc_index, len)?; + self.advance_used_ring(1); Ok(()) } From afc647efdceb1589f3874e147940d5d1660e600c Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 2 Oct 2024 16:12:59 +0000 Subject: [PATCH 2/3] feat(net): add VIRTIO_NET_F_MRG_RXBUF support to virtio-net device Implement VIRTIO_NET_F_MRG_RXBUF feature for VirtIO network device. It allows receiving a single network frame into multiple descriptor chains. The amount of descriptor chains (also known as heads) that were used is written into the `virtio_net_hdr_v1` structure which is located at the beginning of the guest buffer. Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/net/device.rs | 425 ++++++++++++++----- src/vmm/src/devices/virtio/net/persist.rs | 14 +- src/vmm/src/devices/virtio/net/test_utils.rs | 26 +- 3 files changed, 344 insertions(+), 121 deletions(-) diff --git a/src/vmm/src/devices/virtio/net/device.rs b/src/vmm/src/devices/virtio/net/device.rs index 8e71cefee29..deb6976b2af 100755 --- a/src/vmm/src/devices/virtio/net/device.rs +++ b/src/vmm/src/devices/virtio/net/device.rs @@ -6,11 +6,11 @@ // found in the THIRD-PARTY file. use std::collections::VecDeque; -use std::mem; +use std::mem::{self}; use std::net::Ipv4Addr; use std::sync::{Arc, Mutex}; -use libc::EAGAIN; +use libc::{iovec, EAGAIN}; use log::error; use vmm_sys_util::eventfd::EventFd; @@ -19,7 +19,7 @@ use crate::devices::virtio::gen::virtio_blk::VIRTIO_F_VERSION_1; use crate::devices::virtio::gen::virtio_net::{ virtio_net_hdr_v1, VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, - VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, + VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, }; use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::iovec::{ @@ -108,7 +108,8 @@ pub struct RxBuffers { // A map of which part of the memory belongs to which `DescriptorChain` object pub parsed_descriptors: VecDeque, // Buffers that we have used and they are ready to be given back to the guest. - pub deferred_descriptor: Option, + pub used_descriptors: u16, + pub used_bytes: u32, } impl RxBuffers { @@ -118,7 +119,8 @@ impl RxBuffers { min_buffer_size: 0, iovec: IoVecBufferMut::new()?, parsed_descriptors: VecDeque::with_capacity(FIRECRACKER_MAX_QUEUE_SIZE.into()), - deferred_descriptor: None, + used_descriptors: 0, + used_bytes: 0, }) } @@ -141,10 +143,10 @@ impl RxBuffers { Ok(()) } - /// Returns `true` if there buffer is emply. + /// Returns the total size of available space in the buffer. #[inline(always)] - fn is_empty(&self) -> bool { - self.iovec.len() == 0 + fn capacity(&self) -> u32 { + self.iovec.len() } /// Mark the first `size` bytes of available memory as used. @@ -154,18 +156,36 @@ impl RxBuffers { /// * The `RxBuffers` should include at least one parsed `DescriptorChain`. /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored /// in the `RxBuffers`. - unsafe fn mark_used(&mut self, size: u32) -> ParsedDescriptorChain { - // Since we were able to write a frame in guest memory, we should have at least one - // descriptor chain here. If not, we have a bug, so fail fast, since the device is - // fundamentally broken. - let mut parsed_dc = self.parsed_descriptors.pop_front().expect( - "net: internal bug. Mismatch between written frame size and available descriptors", - ); + unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) { + self.used_bytes = bytes_written; + + let mut used_heads: u16 = 0; + for parsed_dc in self.parsed_descriptors.iter() { + let used_bytes = bytes_written.min(parsed_dc.length); + // Safe because we know head_index isn't out of bounds + rx_queue + .write_used_element(self.used_descriptors, parsed_dc.head_index, used_bytes) + .unwrap(); + bytes_written -= used_bytes; + self.used_descriptors += 1; + used_heads += 1; + + if bytes_written == 0 { + break; + } + } - self.header_set_num_buffers(1); - self.iovec.drop_chain_front(&parsed_dc); - parsed_dc.length = size; - parsed_dc + // We need to set num_buffers before dropping chains from `self.iovec`. Otherwise + // when we set headers, we will iterate over new, yet unused chains instead of the ones + // we need. + self.header_set_num_buffers(used_heads); + for _ in 0..used_heads { + let parsed_dc = self + .parsed_descriptors + .pop_front() + .expect("This should never happen if write to the buffer succeeded."); + self.iovec.drop_chain_front(&parsed_dc); + } } /// Write the number of descriptors used in VirtIO header @@ -184,14 +204,22 @@ impl RxBuffers { /// This will let the guest know that about all the `DescriptorChain` object that has been /// used to receive a frame from the TAP. - fn finish_frame(&mut self, dc: &ParsedDescriptorChain, rx_queue: &mut Queue) { - // It is fine to `.unrap()` here. The only reason why `add_used` can fail is if the - // `head_index` is not a valid descriptor id. `head_index` here is a valid - // `DescriptorChain` index. We got it from `queue.pop_or_enable_notification()` which - // checks for its validity. In other words, if this unwrap() fails there's a bug in our - // emulation logic which, most likely, we can't recover from. So, let's crash here - // instead of logging an error and continuing. - rx_queue.add_used(dc.head_index, dc.length).unwrap(); + fn finish_frame(&mut self, rx_queue: &mut Queue) { + rx_queue.advance_used_ring(self.used_descriptors); + self.used_descriptors = 0; + self.used_bytes = 0; + } + + /// Return a slice of iovecs for the first slice in the buffer. + /// Panics if there are no parsed descriptors. + fn single_chain_slice_mut(&mut self) -> &mut [iovec] { + let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize; + &mut self.iovec.as_iovec_mut_slice()[..nr_iovecs] + } + + /// Return a slice of iovecs for all descriptor chains in the buffer. + fn all_chains_slice_mut(&mut self) -> &mut [iovec] { + self.iovec.as_iovec_mut_slice() } } @@ -254,6 +282,7 @@ impl Net { | 1 << VIRTIO_NET_F_HOST_TSO6 | 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_MRG_RXBUF | 1 << VIRTIO_RING_F_EVENT_IDX; let mut config_space = ConfigSpace::default(); @@ -401,27 +430,31 @@ impl Net { // Attempts to copy a single frame into the guest if there is enough // rate limiting budget. // Returns true on successful frame delivery. - fn rate_limited_rx_single_frame(&mut self, dc: &ParsedDescriptorChain) -> bool { + pub fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool { let rx_queue = &mut self.queues[RX_INDEX]; - if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, dc.length as u64) { + if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) { self.metrics.rx_rate_limiter_throttled.inc(); return false; } - self.rx_buffer.finish_frame(dc, rx_queue); + self.rx_buffer.finish_frame(rx_queue); true } /// Returns the minimum size of buffer we expect the guest to provide us depending on the /// features we have negotiated with it fn minimum_rx_buffer_size(&self) -> u32 { - if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64) - || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64) - || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64) - { - 65562 + if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { + if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64) + || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64) + || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64) + { + MAX_BUFFER_SIZE.try_into().unwrap() + } else { + 1526 + } } else { - 1526 + vnet_hdr_len().try_into().unwrap() } } @@ -445,13 +478,15 @@ impl Net { } error!("net: Could not parse an RX descriptor: {err}"); - // Try to add the bad descriptor to the used ring. - if let Err(err) = queue.add_used(index, 0) { - error!( - "net: Failed to add available RX descriptor {index} while handling a \ - parsing error: {err}", - ); - } + + // Add this broken chain to the used_ring. It will be + // reported to the quest on the next `rx_buffer.finish_frame` call. + // SAFETY: + // index is verified on `DescriptorChain` creation. + queue + .write_used_element(self.rx_buffer.used_descriptors, index, 0) + .unwrap(); + self.rx_buffer.used_descriptors += 1; } } } @@ -531,16 +566,18 @@ impl Net { } // We currently prioritize packets from the MMDS over regular network packets. - fn read_from_mmds_or_tap(&mut self) -> Result, NetError> { - // If we don't have any buffers available try to parse more from the RX queue. There might - // be some buffers we didn't get the chance to process, because we got to handle the TAP - // event before the RX queue event. - if self.rx_buffer.is_empty() { + fn read_from_mmds_or_tap(&mut self) -> Result, NetError> { + // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as + // this is the max size of 1 packet. + // SAFETY: + // * MAX_BUFFER_SIZE is constant and fits into u32 + #[allow(clippy::cast_possible_truncation)] + if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 { self.parse_rx_descriptors(); - // If after parsing the RX queue we still don't have any buffers stop processing RX + // If after parsing the RX queue we still don't have enough capacity, stop processing RX // frames. - if self.rx_buffer.is_empty() { + if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 { return Ok(None); } } @@ -556,29 +593,37 @@ impl Net { self.rx_buffer .iovec .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?; - // SAFETY: This is safe: + // SAFETY: + // * len will never be bigger that u32::MAX because mmds is bound + // by the size of `self.rx_frame_buf` which is MAX_BUFFER_SIZE size. + let len: u32 = (vnet_hdr_len() + len).try_into().unwrap(); + + // SAFETY: // * We checked that `rx_buffer` includes at least one `DescriptorChain` // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects // are at least that big. - let dc = unsafe { - self.rx_buffer - .mark_used((vnet_hdr_len() + len).try_into().unwrap()) - }; - - return Ok(Some(dc)); + unsafe { + self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]); + } + return Ok(Some(len)); } } - // SAFETY: this is safe because we ensured that `self.rx_buffer` has at least one - // DescriptorChain parsed in it. + // SAFETY: + // * We ensured that `self.rx_buffer` has at least one DescriptorChain parsed in it. let len = unsafe { self.read_tap().map_err(NetError::IO) }?; + // SAFETY: + // * len will never be bigger that u32::MAX + let len: u32 = len.try_into().unwrap(); - // SAFETY: This is safe, + // SAFETY: // * `rx_buffer` has at least one `DescriptorChain` // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more // bytes than its capacity. - let dc = unsafe { self.rx_buffer.mark_used(len.try_into().unwrap()) }; - Ok(Some(dc)) + unsafe { + self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]); + } + Ok(Some(len)) } /// Read as many frames as possible. @@ -589,12 +634,11 @@ impl Net { self.metrics.no_rx_avail_buffer.inc(); break; } - Ok(Some(dc)) => { + Ok(Some(bytes)) => { self.metrics.rx_count.inc(); - self.metrics.rx_bytes_count.add(dc.length as u64); + self.metrics.rx_bytes_count.add(bytes as u64); self.metrics.rx_packets_count.inc(); - if !self.rate_limited_rx_single_frame(&dc) { - self.rx_buffer.deferred_descriptor = Some(dc); + if !self.rate_limited_rx_single_frame(bytes) { break; } } @@ -622,11 +666,10 @@ impl Net { fn resume_rx(&mut self) -> Result<(), DeviceError> { // First try to handle any deferred frame - if let Some(deferred_descriptor) = self.rx_buffer.deferred_descriptor.take() { + if self.rx_buffer.used_bytes != 0 { // If can't finish sending this frame, re-set it as deferred and return; we can't // process any more frames from the TAP. - if !self.rate_limited_rx_single_frame(&deferred_descriptor) { - self.rx_buffer.deferred_descriptor = Some(deferred_descriptor); + if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) { return Ok(()); } } @@ -692,7 +735,7 @@ impl Net { &self.metrics, ) .unwrap_or(false); - if frame_consumed_by_mmds && self.rx_buffer.deferred_descriptor.is_none() { + if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 { // MMDS consumed this frame/request, let's also try to process the response. process_rx_for_mmds = true; } @@ -777,9 +820,12 @@ impl Net { /// /// `self.rx_buffer` needs to have at least one descriptor chain parsed pub unsafe fn read_tap(&mut self) -> std::io::Result { - let nr_iovecs = self.rx_buffer.parsed_descriptors[0].nr_iovecs as usize; - self.tap - .read_iovec(&mut self.rx_buffer.iovec.as_iovec_mut_slice()[..nr_iovecs]) + let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { + self.rx_buffer.all_chains_slice_mut() + } else { + self.rx_buffer.single_chain_slice_mut() + }; + self.tap.read_iovec(slice) } fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result { @@ -981,6 +1027,7 @@ impl VirtioDevice for Net { #[cfg(test)] #[macro_use] +#[allow(clippy::cast_possible_truncation)] pub mod tests { use std::net::Ipv4Addr; use std::os::fd::AsRawFd; @@ -988,6 +1035,8 @@ pub mod tests { use std::time::Duration; use std::{mem, thread}; + use vm_memory::GuestAddress; + use super::*; use crate::check_metric_after_block; use crate::devices::virtio::gen::virtio_ring::VIRTIO_RING_F_EVENT_IDX; @@ -1002,6 +1051,7 @@ pub mod tests { }; use crate::devices::virtio::net::NET_QUEUE_SIZES; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; + use crate::devices::virtio::test_utils::VirtQueue; use crate::dumbo::pdu::arp::{EthIPv4ArpFrame, ETH_IPV4_FRAME_LEN}; use crate::dumbo::pdu::ethernet::ETHERTYPE_ARP; use crate::dumbo::EthernetFrame; @@ -1011,6 +1061,12 @@ pub mod tests { use crate::utils::net::mac::{MacAddr, MAC_ADDR_LEN}; use crate::vstate::memory::{Address, GuestMemory}; + impl Net { + pub fn finish_frame(&mut self) { + self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]); + } + } + /// Write the number of descriptors used in VirtIO header fn header_set_num_buffers(frame: &mut [u8], nr_descs: u16) { let bytes = nr_descs.to_le_bytes(); @@ -1069,6 +1125,7 @@ pub mod tests { | 1 << VIRTIO_NET_F_HOST_TSO6 | 1 << VIRTIO_NET_F_HOST_UFO | 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_NET_F_MRG_RXBUF | 1 << VIRTIO_RING_F_EVENT_IDX; assert_eq!( @@ -1190,10 +1247,7 @@ pub mod tests { assert_eq!(th.rxq.used.idx.get(), 0); } - #[test] - fn test_rx_read_only_descriptor() { - let mem = single_region_mem(2 * MAX_BUFFER_SIZE); - let mut th = TestHelper::get_default(&mem); + fn rx_read_only_descriptor(mut th: TestHelper) { th.activate_net(); th.add_desc_chain( @@ -1217,12 +1271,25 @@ pub mod tests { } #[test] - fn test_rx_short_writable_descriptor() { + fn test_rx_read_only_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_read_only_descriptor(th); + } + + #[test] + fn test_rx_read_only_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_read_only_descriptor(th); + } + + fn rx_short_descriptor(mut th: TestHelper) { th.activate_net(); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 100, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain(NetQueue::Rx, 0, &[(0, 10, VIRTQ_DESC_F_WRITE)]); let mut frame = th.check_rx_discarded_buffer(1000); th.rxq.check_used_elem(0, 0, 0); @@ -1231,9 +1298,22 @@ pub mod tests { } #[test] - fn test_rx_partial_write() { + fn test_rx_short_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_short_descriptor(th); + } + + #[test] + fn test_rx_short_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_short_descriptor(th); + } + + fn rx_invalid_descriptor(mut th: TestHelper) { th.activate_net(); // The descriptor chain is created so that the last descriptor doesn't fit in the @@ -1256,9 +1336,22 @@ pub mod tests { } #[test] - fn test_rx_retry() { + fn test_rx_invalid_descriptor() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_invalid_descriptor(th); + } + + #[test] + fn test_rx_invalid_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_invalid_descriptor(th); + } + + fn rx_retry(mut th: TestHelper) { th.activate_net(); // Add invalid descriptor chain - read only descriptor. @@ -1272,7 +1365,7 @@ pub mod tests { ], ); // Add invalid descriptor chain - too short. - th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 100, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 10, VIRTQ_DESC_F_WRITE)]); // Add invalid descriptor chain - invalid memory offset. th.add_desc_chain( NetQueue::Rx, @@ -1282,7 +1375,11 @@ pub mod tests { // Add valid descriptor chain. TestHelper does not negotiate any feature offloading so the // buffers need to be at least 1526 bytes long. - th.add_desc_chain(NetQueue::Rx, 1300, &[(5, 1526, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain( + NetQueue::Rx, + 1300, + &[(5, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); // Inject frame to tap and run epoll. let mut frame = inject_tap_tx_frame(&th.net(), 1000); @@ -1300,7 +1397,7 @@ pub mod tests { th.rxq.check_used_elem(1, 3, 0); th.rxq.check_used_elem(2, 4, 0); // Check that the frame wasn't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the frame has been written successfully to the valid Rx descriptor chain. th.rxq .check_used_elem(3, 5, frame.len().try_into().unwrap()); @@ -1309,9 +1406,22 @@ pub mod tests { } #[test] - fn test_rx_complex_desc_chain() { + fn test_rx_retry() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_retry(th); + } + + #[test] + fn test_rx_retry_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_retry(th); + } + + fn rx_complex_desc_chain(mut th: TestHelper) { th.activate_net(); // Create a valid Rx avail descriptor chain with multiple descriptors. @@ -1323,7 +1433,7 @@ pub mod tests { &[ (3, 100, VIRTQ_DESC_F_WRITE), (5, 50, VIRTQ_DESC_F_WRITE), - (11, 4096, VIRTQ_DESC_F_WRITE), + (11, MAX_BUFFER_SIZE as u32 - 100 - 50, VIRTQ_DESC_F_WRITE), ], ); // Inject frame to tap and run epoll. @@ -1335,7 +1445,7 @@ pub mod tests { ); // Check that the frame wasn't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 1); assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); @@ -1349,9 +1459,22 @@ pub mod tests { } #[test] - fn test_rx_multiple_frames() { + fn test_rx_complex_desc_chain() { + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_complex_desc_chain(th); + } + + #[test] + fn test_rx_complex_desc_chain_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_complex_desc_chain(th); + } + + fn rx_multiple_frames(mut th: TestHelper) { th.activate_net(); // Create 2 valid Rx avail descriptor chains. Each one has enough space to fit the @@ -1362,16 +1485,17 @@ pub mod tests { &[ (0, 500, VIRTQ_DESC_F_WRITE), (1, 500, VIRTQ_DESC_F_WRITE), - (2, 526, VIRTQ_DESC_F_WRITE), + (2, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); + // Second chain needs at least MAX_BUFFER_SIZE offset th.add_desc_chain( NetQueue::Rx, - 2000, + MAX_BUFFER_SIZE as u64 + 1000, &[ (3, 500, VIRTQ_DESC_F_WRITE), (4, 500, VIRTQ_DESC_F_WRITE), - (5, 526, VIRTQ_DESC_F_WRITE), + (5, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); // Inject 2 frames to tap and run epoll. @@ -1384,7 +1508,7 @@ pub mod tests { ); // Check that the frames weren't deferred. - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); + assert!(th.net().rx_buffer.used_bytes == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 2); assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); @@ -1394,14 +1518,85 @@ pub mod tests { .check_used_elem(0, 0, frame_1.len().try_into().unwrap()); th.rxq.dtable[0].check_data(&frame_1); th.rxq.dtable[1].check_data(&[0; 500]); - th.rxq.dtable[2].check_data(&[0; 526]); + th.rxq.dtable[2].check_data(&[0; MAX_BUFFER_SIZE - 1000]); // Check that the 2nd frame was written successfully to the 2nd Rx descriptor chain. header_set_num_buffers(frame_2.as_mut_slice(), 1); th.rxq .check_used_elem(1, 3, frame_2.len().try_into().unwrap()); th.rxq.dtable[3].check_data(&frame_2); th.rxq.dtable[4].check_data(&[0; 500]); - th.rxq.dtable[2].check_data(&[0; 526]); + th.rxq.dtable[5].check_data(&[0; MAX_BUFFER_SIZE - 1000]); + } + + #[test] + fn test_rx_multiple_frames() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_multiple_frames(th); + } + + #[test] + fn test_rx_multiple_frames_mrg() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_multiple_frames(th); + } + + fn rx_mrg_rxbuf_only(mut th: TestHelper) { + th.activate_net(); + + // Create 2 valid Rx avail descriptor chains. The total size should + // be at least 64K to pass the capacity check for rx_buffers. + // First chain is intentionally small, so non VIRTIO_NET_F_MRG_RXBUF + // version will skip it. + th.add_desc_chain(NetQueue::Rx, 0, &[(0, 500, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain( + NetQueue::Rx, + 1000, + &[(1, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); + // Inject frame to tap and run epoll. + let mut frame = inject_tap_tx_frame(&th.net(), 1000); + check_metric_after_block!( + th.net().metrics.rx_packets_count, + 1, + th.event_manager.run_with_timeout(100).unwrap() + ); + + // Check that the frame wasn't deferred. + assert!(th.net().rx_buffer.used_bytes == 0); + // Check that the used queue has advanced. + assert_eq!(th.rxq.used.idx.get(), 2); + assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); + // 2 chains should be used for the packet. + header_set_num_buffers(frame.as_mut_slice(), 2); + + // Here non VIRTIO_NET_F_MRG_RXBUF version should panic as + // first descriptor will be discarded by it. + th.rxq.check_used_elem(0, 0, 500); + + th.rxq.check_used_elem(1, 1, 500); + th.rxq.dtable[0].check_data(&frame[0..500]); + th.rxq.dtable[1].check_data(&frame[500..]); + } + + #[test] + #[should_panic] + fn test_rx_mrg_rxbuf_only() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let th = TestHelper::get_default(&mem); + rx_mrg_rxbuf_only(th); + } + + #[test] + fn test_rx_mrg_rxbuf_only_mrg() { + let mem = single_region_mem(3 * MAX_BUFFER_SIZE); + let mut th = TestHelper::get_default(&mem); + // VIRTIO_NET_F_MRG_RXBUF is not enabled by default + th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; + rx_mrg_rxbuf_only(th); } #[test] @@ -1688,9 +1883,13 @@ pub mod tests { fn test_mmds_detour_and_injection() { let mut net = default_net(); + let mem = single_region_mem(2 * MAX_BUFFER_SIZE); + let rxq = VirtQueue::new(GuestAddress(0), &mem, 16); + net.queues[RX_INDEX] = rxq.create_queue(); + // Inject a fake buffer in the devices buffers, otherwise we won't be able to receive the // MMDS frame. One iovec will be just fine. - let mut fake_buffer = vec![0u8; 1024]; + let mut fake_buffer = vec![0u8; MAX_BUFFER_SIZE]; let iov_buffer = IoVecBufferMut::from(fake_buffer.as_mut_slice()); net.rx_buffer.iovec = iov_buffer; net.rx_buffer @@ -1818,11 +2017,8 @@ pub mod tests { unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) }; // The RX queue is empty and there is a deferred frame. - th.net().rx_buffer.deferred_descriptor = Some(ParsedDescriptorChain { - head_index: 1, - length: 100, - nr_iovecs: 1, - }); + th.net().rx_buffer.used_descriptors = 1; + th.net().rx_buffer.used_bytes = 100; check_metric_after_block!( th.net().metrics.no_rx_avail_buffer, 1, @@ -1832,9 +2028,14 @@ pub mod tests { // We need to set this here to false, otherwise the device will try to // handle a deferred frame, it will fail and will never try to read from // the tap. - th.net().rx_buffer.deferred_descriptor = None; + th.net().rx_buffer.used_descriptors = 0; + th.net().rx_buffer.used_bytes = 0; - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); check_metric_after_block!( th.net().metrics.tap_read_fails, 1, @@ -1954,8 +2155,12 @@ pub mod tests { let mut rl = RateLimiter::new(1000, 0, 500, 0, 0, 0).unwrap(); // set up RX - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + assert!(th.net().rx_buffer.used_descriptors == 0); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); let mut frame = inject_tap_tx_frame(&th.net(), 1000); @@ -1974,7 +2179,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 1); - assert!(th.net().rx_buffer.deferred_descriptor.is_some()); + assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); // make sure the data is still queued for processing @@ -2073,8 +2278,12 @@ pub mod tests { let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 500).unwrap(); // set up RX - assert!(th.net().rx_buffer.deferred_descriptor.is_none()); - th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); + assert!(th.net().rx_buffer.used_descriptors == 0); + th.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); let mut frame = inject_tap_tx_frame(&th.net(), 1234); // use up the initial budget @@ -2095,7 +2304,7 @@ pub mod tests { // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert!(th.net().metrics.rx_rate_limiter_throttled.count() >= 1); - assert!(th.net().rx_buffer.deferred_descriptor.is_some()); + assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!(&th.net().irq_trigger.has_pending_irq(IrqType::Vring)); // make sure the data is still queued for processing diff --git a/src/vmm/src/devices/virtio/net/persist.rs b/src/vmm/src/devices/virtio/net/persist.rs index c7918f07ab3..51394af2d4e 100644 --- a/src/vmm/src/devices/virtio/net/persist.rs +++ b/src/vmm/src/devices/virtio/net/persist.rs @@ -12,7 +12,6 @@ use serde::{Deserialize, Serialize}; use super::device::{Net, RxBuffers}; use super::{TapError, NET_NUM_QUEUES, RX_INDEX}; use crate::devices::virtio::device::DeviceState; -use crate::devices::virtio::iovec::ParsedDescriptorChain; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::TYPE_NET; @@ -37,14 +36,18 @@ pub struct NetConfigSpaceState { pub struct RxBufferState { // Number of iovecs we have parsed from the guest parsed_descriptor_chains_nr: u16, - deferred_descriptor: Option, + // Number of used descriptors + used_descriptors: u16, + // Number of used bytes + used_bytes: u32, } impl RxBufferState { fn from_rx_buffers(rx_buffer: &RxBuffers) -> Self { RxBufferState { parsed_descriptor_chains_nr: rx_buffer.parsed_descriptors.len().try_into().unwrap(), - deferred_descriptor: rx_buffer.deferred_descriptor.clone(), + used_descriptors: rx_buffer.used_descriptors, + used_bytes: rx_buffer.used_bytes, } } } @@ -162,9 +165,8 @@ impl Persist<'_> for Net { // rolling back `next_avail` in the RX queue and call `parse_rx_descriptors`. net.queues[RX_INDEX].next_avail -= state.rx_buffers_state.parsed_descriptor_chains_nr; net.parse_rx_descriptors(); - net.rx_buffer - .deferred_descriptor - .clone_from(&state.rx_buffers_state.deferred_descriptor); + net.rx_buffer.used_descriptors = state.rx_buffers_state.used_descriptors; + net.rx_buffer.used_bytes = state.rx_buffers_state.used_bytes; } Ok(net) diff --git a/src/vmm/src/devices/virtio/net/test_utils.rs b/src/vmm/src/devices/virtio/net/test_utils.rs index eb1c6f6e883..ffe7bbc7279 100644 --- a/src/vmm/src/devices/virtio/net/test_utils.rs +++ b/src/vmm/src/devices/virtio/net/test_utils.rs @@ -295,8 +295,9 @@ pub fn assign_queues(net: &mut Net, rxq: Queue, txq: Queue) { } #[cfg(test)] +#[allow(clippy::cast_possible_truncation)] +#[allow(clippy::undocumented_unsafe_blocks)] pub mod test { - #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::ffi::OsStrExt; use std::sync::{Arc, Mutex, MutexGuard}; use std::{cmp, fmt}; @@ -310,7 +311,7 @@ pub mod test { use crate::devices::virtio::net::test_utils::{ assign_queues, default_net, inject_tap_tx_frame, NetEvent, NetQueue, }; - use crate::devices::virtio::net::{Net, RX_INDEX, TX_INDEX}; + use crate::devices::virtio::net::{Net, MAX_BUFFER_SIZE, RX_INDEX, TX_INDEX}; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, VirtqDesc}; use crate::logger::IncMetric; @@ -433,7 +434,7 @@ pub mod test { /// Generate a tap frame of `frame_len` and check that it is not read and /// the descriptor chain has been discarded pub fn check_rx_discarded_buffer(&mut self, frame_len: usize) -> Vec { - let used_idx = self.rxq.used.idx.get(); + let old_used_descriptors = self.net().rx_buffer.used_descriptors; // Inject frame to tap and run epoll. let frame = inject_tap_tx_frame(&self.net(), frame_len); @@ -443,7 +444,11 @@ pub mod test { self.event_manager.run_with_timeout(100).unwrap() ); // Check that the descriptor chain has been discarded. - assert_eq!(self.rxq.used.idx.get(), used_idx + 1); + assert_eq!( + self.net().rx_buffer.used_descriptors, + old_used_descriptors + 1 + ); + assert!(&self.net().irq_trigger.has_pending_irq(IrqType::Vring)); frame @@ -452,10 +457,17 @@ pub mod test { /// Check that after adding a valid Rx queue descriptor chain a previously deferred frame /// is eventually received by the guest pub fn check_rx_queue_resume(&mut self, expected_frame: &[u8]) { + // Need to call this to flush all previous frame + // and advance RX queue. + self.net().finish_frame(); + let used_idx = self.rxq.used.idx.get(); - // Add a valid Rx avail descriptor chain and run epoll. We do not negotiate any feature - // offloading so the buffers need to be at least 1526 bytes long. - self.add_desc_chain(NetQueue::Rx, 0, &[(0, 1526, VIRTQ_DESC_F_WRITE)]); + // Add a valid Rx avail descriptor chain and run epoll. + self.add_desc_chain( + NetQueue::Rx, + 0, + &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], + ); check_metric_after_block!( self.net().metrics.rx_packets_count, 1, From e4fd7533c6f01c34c3efd26ee0c16430eba9bdb1 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Mon, 14 Oct 2024 15:20:09 +0100 Subject: [PATCH 3/3] chore: update CHANGELOG with network improvements Add a note about new `VIRTIO_NET_F_RX_MRGBUF` feature in the virtio-net device. Signed-off-by: Egor Lazarchuk --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bb1e1ae429..f7f50678de4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to ### Added +- [#4834](https://github.com/firecracker-microvm/firecracker/pull/4834): Add + `VIRTIO_NET_F_RX_MRGBUF` support to the `virtio-net` device. When this feature + is negotiated, guest `virtio-net` driver can perform more efficient memory + management which in turn improves RX and TX performance. + ### Changed ### Deprecated