From 54c88f0fe0f935fa5c20298ba3b442cacdf12118 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Wed, 21 Feb 2024 11:00:16 -0500 Subject: [PATCH] Provides implementation of `Vec::extend_from_slice` optimized for `T: Copy` (#236) * Adds `extend_from_slice_copy` inherent impl * Updated comments * Extends benchmarks to test a variety of slice lengths * Updates doc comment for extend_from_slice_copy * Removes comment with open question about alignment * Changes Vec's impl of io::Write to use extend_from_slice_copy * Removes changes to .gitignore * adds non-power-of-2 sizes to the benchmark --------- Co-authored-by: Zack Slayton --- benches/benches.rs | 71 +++++++++++++++++++++++++++++++++++++++ src/collections/string.rs | 26 +------------- src/collections/vec.rs | 65 +++++++++++++++++++++++++++++++++-- 3 files changed, 135 insertions(+), 27 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index ed70e97..7326823 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -104,8 +104,78 @@ fn string_push_str(bump: &bumpalo::Bump, str: &str) { criterion::black_box(s); } +#[cfg(feature = "collections")] +fn extend_u8(bump: &bumpalo::Bump, slice: &[u8]) { + let slice = criterion::black_box(slice); + let mut vec = bumpalo::collections::Vec::::with_capacity_in(slice.len(), bump); + vec.extend(slice.iter().copied()); + criterion::black_box(vec); +} + +#[cfg(feature = "collections")] +fn extend_from_slice_u8(bump: &bumpalo::Bump, slice: &[u8]) { + let slice = criterion::black_box(slice); + let mut vec = bumpalo::collections::Vec::::with_capacity_in(slice.len(), bump); + vec.extend_from_slice(slice); + criterion::black_box(vec); +} + +#[cfg(feature = "collections")] +fn extend_from_slice_copy_u8(bump: &bumpalo::Bump, slice: &[u8]) { + let slice = criterion::black_box(slice); + let mut vec = bumpalo::collections::Vec::::with_capacity_in(slice.len(), bump); + vec.extend_from_slice_copy(slice); + criterion::black_box(vec); +} + const ALLOCATIONS: usize = 10_000; +fn bench_extend_from_slice_copy(c: &mut Criterion) { + let lengths = &[ + 4usize, + 5, + 8, + 11, + 16, + 64, + 128, + 331, + 1024, + 4 * 1024, + 16 * 1024, + ]; + + for len in lengths.iter().copied() { + let str = "x".repeat(len); + let mut group = c.benchmark_group(format!("extend {len} bytes")); + group.throughput(Throughput::Elements(len as u64)); + group.bench_function("extend", |b| { + let mut bump = bumpalo::Bump::with_capacity(len); + b.iter(|| { + bump.reset(); + extend_u8(&bump, str.as_bytes()); + }); + }); + group.bench_function("extend_from_slice", |b| { + let mut bump = bumpalo::Bump::with_capacity(len); + let str = "x".repeat(len); + b.iter(|| { + bump.reset(); + extend_from_slice_u8(&bump, str.as_bytes()); + }); + }); + group.bench_function("extend_from_slice_copy", |b| { + let mut bump = bumpalo::Bump::with_capacity(len); + let str = "x".repeat(len); + b.iter(|| { + bump.reset(); + extend_from_slice_copy_u8(&bump, str.as_bytes()); + }); + }); + group.finish(); + } +} + fn bench_alloc(c: &mut Criterion) { let mut group = c.benchmark_group("alloc"); group.throughput(Throughput::Elements(ALLOCATIONS as u64)); @@ -249,6 +319,7 @@ fn bench_string_push_str(c: &mut Criterion) { criterion_group!( benches, + bench_extend_from_slice_copy, bench_alloc, bench_alloc_with, bench_alloc_try_with, diff --git a/src/collections/string.rs b/src/collections/string.rs index 8d79211..e9fafbf 100644 --- a/src/collections/string.rs +++ b/src/collections/string.rs @@ -936,31 +936,7 @@ impl<'bump> String<'bump> { /// ``` #[inline] pub fn push_str(&mut self, string: &str) { - // Reserve space in the Vec for the string to be added - let old_len = self.vec.len(); - self.vec.reserve(string.len()); - - let new_len = old_len + string.len(); - debug_assert!(new_len <= self.vec.capacity()); - - // Copy string into space just reserved - // SAFETY: - // * `src` is valid for reads of `string.len()` bytes by virtue of being an allocated `&str`. - // * `dst` is valid for writes of `string.len()` bytes as `self.vec.reserve(string.len())` - // above guarantees that. - // * Alignment is not relevant as `u8` has no alignment requirements. - // * Source and destination ranges cannot overlap as we just reserved the destination - // range from the bump. - unsafe { - let src = string.as_ptr(); - let dst = self.vec.as_mut_ptr().add(old_len); - ptr::copy_nonoverlapping(src, dst, string.len()); - } - - // Update length of Vec to include string just pushed - // SAFETY: We reserved sufficent capacity for the string above. - // The elements at `old_len..new_len` were initialized by `copy_nonoverlapping` above. - unsafe { self.vec.set_len(new_len) }; + self.vec.extend_from_slice_copy(string.as_bytes()) } /// Returns this `String`'s capacity, in bytes. diff --git a/src/collections/vec.rs b/src/collections/vec.rs index 66c821b..bef6484 100644 --- a/src/collections/vec.rs +++ b/src/collections/vec.rs @@ -1777,6 +1777,67 @@ impl<'bump, T: 'bump + Clone> Vec<'bump, T> { } } +impl<'bump, T: 'bump + Copy> Vec<'bump, T> { + /// Copies all elements in the slice `other` and appends them to the `Vec`. + /// + /// Note that this function is same as [`extend_from_slice`] except that it is optimized for + /// slices of types that implement the `Copy` trait. If and when Rust gets specialization + /// this function will likely be deprecated (but still available). + /// + /// # Examples + /// + /// ``` + /// use bumpalo::{Bump, collections::Vec}; + /// + /// let b = Bump::new(); + /// + /// let mut vec = bumpalo::vec![in &b; 1]; + /// vec.extend_from_slice_copy(&[2, 3, 4]); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// ``` + /// use bumpalo::{Bump, collections::Vec}; + /// + /// let b = Bump::new(); + /// + /// let mut vec = bumpalo::vec![in &b; 'H' as u8]; + /// vec.extend_from_slice_copy("ello, world!".as_bytes()); + /// assert_eq!(vec, "Hello, world!".as_bytes()); + /// ``` + /// + /// [`extend`]: #method.extend_from_slice + pub fn extend_from_slice_copy(&mut self, other: &[T]) { + + // Reserve space in the Vec for the values to be added + let old_len = self.len(); + self.reserve(other.len()); + + let new_len = old_len + other.len(); + debug_assert!(new_len <= self.capacity()); + + // Copy values into the space that was just reserved + // SAFETY: + // * `src` is valid for reads of `other.len()` values by virtue of being a `&[T]`. + // * `dst` is valid for writes of `other.len()` bytes as `self.reserve(other.len())` + // above guarantees that. + // * Because `src` is a `&[T]` and dst is a `&[T]` within the `Vec`, + // `copy_nonoverlapping`'s alignment requirements are met. + // * Source and destination ranges cannot overlap as we just reserved the destination + // range from the bump. + unsafe { + let src = other.as_ptr(); + let dst = self.as_mut_ptr().add(old_len); + ptr::copy_nonoverlapping(src, dst, other.len()); + } + + // Update length of Vec to include values just pushed + // SAFETY: We reserved sufficient capacity for the values above. + // The elements at `old_len..new_len` were initialized by `copy_nonoverlapping` above. + unsafe { self.set_len(new_len) }; + } +} + // This code generalises `extend_with_{element,default}`. trait ExtendWith { fn next(&mut self) -> T; @@ -2619,13 +2680,13 @@ where impl<'bump> io::Write for Vec<'bump, u8> { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { - self.extend_from_slice(buf); + self.extend_from_slice_copy(buf); Ok(buf.len()) } #[inline] fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { - self.extend_from_slice(buf); + self.extend_from_slice_copy(buf); Ok(()) }