Skip to content

Commit

Permalink
avoid excessive initialization when copying to a Vec
Browse files Browse the repository at this point in the history
It now keeps track of initialized bytes to avoid reinitialization.
It also keeps track of read sizes to avoid initializing more bytes
than the reader needs. This is important when passing a huge vector to a
Read that only has a few bytes to offer and doesn't implement read_buf().
  • Loading branch information
the8472 committed Nov 4, 2023
1 parent 9c20ddd commit 8d8f06b
Showing 1 changed file with 47 additions and 17 deletions.
64 changes: 47 additions & 17 deletions library/std/src/io/copy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{BorrowedBuf, BufReader, BufWriter, Read, Result, Write, DEFAULT_BUF_SIZE};
use crate::alloc::Allocator;
use crate::cmp;
use crate::cmp::min;
use crate::collections::VecDeque;
use crate::io::IoSlice;
use crate::mem::MaybeUninit;
Expand Down Expand Up @@ -271,28 +272,57 @@ impl<A: Allocator> BufferedWriterSpec for Vec<u8, A> {
}
}

// don't immediately offer the vec's whole spare capacity, otherwise
// we might have to fully initialize it if the reader doesn't have a custom read_buf() impl
let mut max_read_size = DEFAULT_BUF_SIZE;

loop {
self.reserve(DEFAULT_BUF_SIZE);
let mut buf: BorrowedBuf<'_> = self.spare_capacity_mut().into();
match reader.read_buf(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.is_interrupted() => continue,
Err(e) => return Err(e),
};
let mut initialized_spare_capacity = 0;

let read = buf.filled().len();
if read == 0 {
break;
}
loop {
let buf = self.spare_capacity_mut();
let read_size = min(max_read_size, buf.len());
let mut buf = BorrowedBuf::from(&mut buf[..read_size]);
// SAFETY: init is either 0 or the init_len from the previous iteration.
unsafe {
buf.set_init(initialized_spare_capacity);
}
match reader.read_buf(buf.unfilled()) {
Ok(()) => {
let bytes_read = buf.len();

// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
// and the number of read bytes can't exceed the spare capacity since
// that's what the buffer is borrowing from.
unsafe { self.set_len(self.len() + read) };
bytes += read as u64;
}
// EOF
if bytes_read == 0 {
return Ok(bytes);
}

Ok(bytes)
// the reader is returning short reads but it doesn't call ensure_init()
if buf.init_len() < buf.capacity() {
max_read_size = usize::MAX;
}
// the reader hasn't returned short reads so far
if bytes_read == buf.capacity() {
max_read_size *= 2;
}

initialized_spare_capacity = buf.init_len() - bytes_read;
bytes += bytes_read as u64;
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
// and the number of read bytes can't exceed the spare capacity since
// that's what the buffer is borrowing from.
unsafe { self.set_len(self.len() + bytes_read) };

// spare capacity full, reserve more
if self.len() == self.capacity() {
break;
}
}
Err(e) if e.is_interrupted() => continue,
Err(e) => return Err(e),
}
}
}
}
}

Expand Down

0 comments on commit 8d8f06b

Please sign in to comment.