From 38013e708eb6ec5abe9992078857b147ff3b58d5 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 26 Mar 2021 10:38:32 -0400 Subject: [PATCH 1/2] Use iter::from_fn in String::remove_matches --- library/alloc/src/string.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index dbe5bc1da460d..4aa2be2b1787c 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -48,7 +48,7 @@ use core::fmt; use core::hash; #[cfg(not(no_global_oom_handling))] use core::iter::FromIterator; -use core::iter::FusedIterator; +use core::iter::{from_fn, FusedIterator}; #[cfg(not(no_global_oom_handling))] use core::ops::Add; #[cfg(not(no_global_oom_handling))] @@ -1290,15 +1290,9 @@ impl String { { use core::str::pattern::Searcher; - let matches = { + let matches: Vec<_> = { let mut searcher = pat.into_searcher(self); - let mut matches = Vec::new(); - - while let Some(m) = searcher.next_match() { - matches.push(m); - } - - matches + from_fn(|| searcher.next_match()).collect() }; let len = self.len(); From 977903bb1135e22dc7c33aee0096c383f93aa719 Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Fri, 26 Mar 2021 10:39:42 -0400 Subject: [PATCH 2/2] String::remove_matches O(n^2) -> O(n) Copy only non-matching bytes. --- library/alloc/src/string.rs | 53 ++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 4aa2be2b1787c..1fb645a302e76 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -1290,26 +1290,49 @@ impl String { { use core::str::pattern::Searcher; - let matches: Vec<_> = { + let rejections = { let mut searcher = pat.into_searcher(self); - from_fn(|| searcher.next_match()).collect() + // Per Searcher::next: + // + // A Match result needs to contain the whole matched pattern, + // however Reject results may be split up into arbitrary many + // adjacent fragments. Both ranges may have zero length. + // + // In practice the implementation of Searcher::next_match tends to + // be more efficient, so we use it here and do some work to invert + // matches into rejections since that's what we want to copy below. + let mut front = 0; + let rejections: Vec<_> = from_fn(|| { + let (start, end) = searcher.next_match()?; + let prev_front = front; + front = end; + Some((prev_front, start)) + }) + .collect(); + rejections.into_iter().chain(core::iter::once((front, self.len()))) }; - let len = self.len(); - let mut shrunk_by = 0; + let mut len = 0; + let ptr = self.vec.as_mut_ptr(); + + for (start, end) in rejections { + let count = end - start; + if start != len { + // SAFETY: per Searcher::next: + // + // The stream of Match and Reject values up to a Done will + // contain index ranges that are adjacent, non-overlapping, + // covering the whole haystack, and laying on utf8 + // boundaries. + unsafe { + ptr::copy(ptr.add(start), ptr.add(len), count); + } + } + len += count; + } - // SAFETY: start and end will be on utf8 byte boundaries per - // the Searcher docs unsafe { - for (start, end) in matches { - ptr::copy( - self.vec.as_mut_ptr().add(end - shrunk_by), - self.vec.as_mut_ptr().add(start - shrunk_by), - len - end, - ); - shrunk_by += end - start; - } - self.vec.set_len(len - shrunk_by); + self.vec.set_len(len); } }