From e472d33b1252dcdf3958ce05130ce3adf7fd3078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Mon, 22 Jul 2024 18:51:36 +0300 Subject: [PATCH] Add `pread` and `pwrite` shims --- src/shims/unix/fd.rs | 93 ++++++++++++++++++++++++++------- src/shims/unix/foreign_items.rs | 44 +++++++++++++++- src/shims/unix/fs.rs | 48 +++++++++++++++++ tests/pass/shims/fs.rs | 41 +++++++++++++++ 4 files changed, 205 insertions(+), 21 deletions(-) diff --git a/src/shims/unix/fd.rs b/src/shims/unix/fd.rs index 8fb046b5e6..de6d27f529 100644 --- a/src/shims/unix/fd.rs +++ b/src/shims/unix/fd.rs @@ -36,6 +36,30 @@ pub trait FileDescription: std::fmt::Debug + Any { throw_unsup_format!("cannot write to {}", self.name()); } + /// Reads as much as possible into the given buffer from a given offset, + /// and returns the number of bytes read. + fn pread<'tcx>( + &mut self, + _communicate_allowed: bool, + _bytes: &mut [u8], + _offset: u64, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result> { + throw_unsup_format!("cannot pread from {}", self.name()); + } + + /// Writes as much as possible from the given buffer starting at a given offset, + /// and returns the number of bytes written. + fn pwrite<'tcx>( + &mut self, + _communicate_allowed: bool, + _bytes: &[u8], + _offset: u64, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result> { + throw_unsup_format!("cannot pwrite to {}", self.name()); + } + /// Seeks to the given offset (which can be relative to the beginning, end, or current position). /// Returns the new position from the start of the stream. fn seek<'tcx>( @@ -380,7 +404,18 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { Ok((-1).into()) } - fn read(&mut self, fd: i32, buf: Pointer, count: u64) -> InterpResult<'tcx, i64> { + /// Read data from `fd` into buffer specified by `buf` and `count`. + /// + /// If `offset` is `None`, reads data from current cursor position associated with `fd` + /// and updates cursor position on completion. Otherwise, reads from the specified offset + /// and keeps the cursor unchanged. + fn read( + &mut self, + fd: i32, + buf: Pointer, + count: u64, + offset: Option, + ) -> InterpResult<'tcx, i64> { let this = self.eval_context_mut(); // Isolation check is done via `FileDescriptor` trait. @@ -398,25 +433,31 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let communicate = this.machine.communicate(); // We temporarily dup the FD to be able to retain mutable access to `this`. - let Some(file_descriptor) = this.machine.fds.dup(fd) else { + let Some(fd) = this.machine.fds.dup(fd) else { trace!("read: FD not found"); return this.fd_not_found(); }; - trace!("read: FD mapped to {:?}", file_descriptor); + trace!("read: FD mapped to {fd:?}"); // We want to read at most `count` bytes. We are sure that `count` is not negative // because it was a target's `usize`. Also we are sure that its smaller than // `usize::MAX` because it is bounded by the host's `isize`. let mut bytes = vec![0; usize::try_from(count).unwrap()]; - // `File::read` never returns a value larger than `count`, - // so this cannot fail. - let result = file_descriptor - .borrow_mut() - .read(communicate, &mut bytes, this)? - .map(|c| i64::try_from(c).unwrap()); - drop(file_descriptor); - - match result { + let result = match offset { + None => fd.borrow_mut().read(communicate, &mut bytes, this), + Some(offset) => { + let Ok(offset) = u64::try_from(offset) else { + let einval = this.eval_libc("EINVAL"); + this.set_last_error(einval)?; + return Ok(-1); + }; + fd.borrow_mut().pread(communicate, &mut bytes, offset, this) + } + }; + drop(fd); + + // `File::read` never returns a value larger than `count`, so this cannot fail. + match result?.map(|c| i64::try_from(c).unwrap()) { Ok(read_bytes) => { // If reading to `bytes` did not fail, we write those bytes to the buffer. // Crucially, if fewer than `bytes.len()` bytes were read, only write @@ -434,7 +475,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } } - fn write(&mut self, fd: i32, buf: Pointer, count: u64) -> InterpResult<'tcx, i64> { + fn write( + &mut self, + fd: i32, + buf: Pointer, + count: u64, + offset: Option, + ) -> InterpResult<'tcx, i64> { let this = self.eval_context_mut(); // Isolation check is done via `FileDescriptor` trait. @@ -451,16 +498,24 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let bytes = this.read_bytes_ptr_strip_provenance(buf, Size::from_bytes(count))?.to_owned(); // We temporarily dup the FD to be able to retain mutable access to `this`. - let Some(file_descriptor) = this.machine.fds.dup(fd) else { + let Some(fd) = this.machine.fds.dup(fd) else { return this.fd_not_found(); }; - let result = file_descriptor - .borrow_mut() - .write(communicate, &bytes, this)? - .map(|c| i64::try_from(c).unwrap()); - drop(file_descriptor); + let result = match offset { + None => fd.borrow_mut().write(communicate, &bytes, this), + Some(offset) => { + let Ok(offset) = u64::try_from(offset) else { + let einval = this.eval_libc("EINVAL"); + this.set_last_error(einval)?; + return Ok(-1); + }; + fd.borrow_mut().pwrite(communicate, &bytes, offset, this) + } + }; + drop(fd); + let result = result?.map(|c| i64::try_from(c).unwrap()); this.try_unwrap_io_result(result) } } diff --git a/src/shims/unix/foreign_items.rs b/src/shims/unix/foreign_items.rs index 3a18d62203..966e590fcc 100644 --- a/src/shims/unix/foreign_items.rs +++ b/src/shims/unix/foreign_items.rs @@ -92,7 +92,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let fd = this.read_scalar(fd)?.to_i32()?; let buf = this.read_pointer(buf)?; let count = this.read_target_usize(count)?; - let result = this.read(fd, buf, count)?; + let result = this.read(fd, buf, count, None)?; this.write_scalar(Scalar::from_target_isize(result, this), dest)?; } "write" => { @@ -101,7 +101,47 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let buf = this.read_pointer(buf)?; let count = this.read_target_usize(n)?; trace!("Called write({:?}, {:?}, {:?})", fd, buf, count); - let result = this.write(fd, buf, count)?; + let result = this.write(fd, buf, count, None)?; + // Now, `result` is the value we return back to the program. + this.write_scalar(Scalar::from_target_isize(result, this), dest)?; + } + "pread" => { + let [fd, buf, count, offset] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let buf = this.read_pointer(buf)?; + let count = this.read_target_usize(count)?; + let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off_t").size)?; + let result = this.read(fd, buf, count, Some(offset))?; + this.write_scalar(Scalar::from_target_isize(result, this), dest)?; + } + "pwrite" => { + let [fd, buf, n, offset] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let buf = this.read_pointer(buf)?; + let count = this.read_target_usize(n)?; + let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off_t").size)?; + trace!("Called pwrite({:?}, {:?}, {:?}, {:?})", fd, buf, count, offset); + let result = this.write(fd, buf, count, Some(offset))?; + // Now, `result` is the value we return back to the program. + this.write_scalar(Scalar::from_target_isize(result, this), dest)?; + } + "pread64" => { + let [fd, buf, count, offset] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let buf = this.read_pointer(buf)?; + let count = this.read_target_usize(count)?; + let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off64_t").size)?; + let result = this.read(fd, buf, count, Some(offset))?; + this.write_scalar(Scalar::from_target_isize(result, this), dest)?; + } + "pwrite64" => { + let [fd, buf, n, offset] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let buf = this.read_pointer(buf)?; + let count = this.read_target_usize(n)?; + let offset = this.read_scalar(offset)?.to_int(this.libc_ty_layout("off64_t").size)?; + trace!("Called pwrite64({:?}, {:?}, {:?}, {:?})", fd, buf, count, offset); + let result = this.write(fd, buf, count, Some(offset))?; // Now, `result` is the value we return back to the program. this.write_scalar(Scalar::from_target_isize(result, this), dest)?; } diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs index e34aa5c09d..80268ced48 100644 --- a/src/shims/unix/fs.rs +++ b/src/shims/unix/fs.rs @@ -49,6 +49,54 @@ impl FileDescription for FileHandle { Ok(self.file.write(bytes)) } + fn pread<'tcx>( + &mut self, + communicate_allowed: bool, + bytes: &mut [u8], + offset: u64, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result> { + assert!(communicate_allowed, "isolation should have prevented even opening a file"); + // Emulates pread using seek + read + seek to restore cursor position. + // Correctness of this emulation relies on sequential nature of Miri execution. + // The closure is used to emulate `try` block, since we "bubble" `io::Error` using `?`. + let mut f = || { + let cursor_pos = self.file.stream_position()?; + self.file.seek(SeekFrom::Start(offset))?; + let res = self.file.read(bytes); + // Attempt to restore cursor position even if the read has failed + self.file + .seek(SeekFrom::Start(cursor_pos)) + .expect("failed to restore file position, this shouldn't be possible"); + res + }; + Ok(f()) + } + + fn pwrite<'tcx>( + &mut self, + communicate_allowed: bool, + bytes: &[u8], + offset: u64, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result> { + assert!(communicate_allowed, "isolation should have prevented even opening a file"); + // Emulates pwrite using seek + write + seek to restore cursor position. + // Correctness of this emulation relies on sequential nature of Miri execution. + // The closure is used to emulate `try` block, since we "bubble" `io::Error` using `?`. + let mut f = || { + let cursor_pos = self.file.stream_position()?; + self.file.seek(SeekFrom::Start(offset))?; + let res = self.file.write(bytes); + // Attempt to restore cursor position even if the write has failed + self.file + .seek(SeekFrom::Start(cursor_pos)) + .expect("failed to restore file position, this shouldn't be possible"); + res + }; + Ok(f()) + } + fn seek<'tcx>( &mut self, communicate_allowed: bool, diff --git a/tests/pass/shims/fs.rs b/tests/pass/shims/fs.rs index 16d3e8cab3..70e375b098 100644 --- a/tests/pass/shims/fs.rs +++ b/tests/pass/shims/fs.rs @@ -30,6 +30,8 @@ fn main() { test_directory(); test_canonicalize(); test_from_raw_os_error(); + #[cfg(unix)] + test_pread_pwrite(); } fn test_path_conversion() { @@ -303,3 +305,42 @@ fn test_from_raw_os_error() { // Make sure we can also format this. let _ = format!("{error:?}"); } + +#[cfg(unix)] +fn test_pread_pwrite() { + use std::os::unix::fs::FileExt; + + let bytes = b"hello world"; + let path = utils::prepare_with_content("miri_test_fs_pread_pwrite.txt", bytes); + let mut f = OpenOptions::new().read(true).write(true).open(path).unwrap(); + + let mut buf1 = [0u8; 3]; + f.seek(SeekFrom::Start(5)).unwrap(); + + // Check that we get expected result after seek + f.read_exact(&mut buf1).unwrap(); + assert_eq!(&buf1, b" wo"); + f.seek(SeekFrom::Start(5)).unwrap(); + + // Check pread + f.read_exact_at(&mut buf1, 2).unwrap(); + assert_eq!(&buf1, b"llo"); + f.read_exact_at(&mut buf1, 6).unwrap(); + assert_eq!(&buf1, b"wor"); + + // Ensure that cursor position is not changed + f.read_exact(&mut buf1).unwrap(); + assert_eq!(&buf1, b" wo"); + f.seek(SeekFrom::Start(5)).unwrap(); + + // Check pwrite + f.write_all_at(b" mo", 6).unwrap(); + + let mut buf2 = [0u8; 11]; + f.read_exact_at(&mut buf2, 0).unwrap(); + assert_eq!(&buf2, b"hello mold"); + + // Ensure that cursor position is not changed + f.read_exact(&mut buf1).unwrap(); + assert_eq!(&buf1, b" m"); +}