From 53b4ae0cd2919f8f7a67622720841c3dd2fe2490 Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Tue, 13 Jan 2015 22:50:56 -0800 Subject: [PATCH 1/5] Add fmt size_hints --- text/0000-fmt-size-hint.md | 106 +++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 text/0000-fmt-size-hint.md diff --git a/text/0000-fmt-size-hint.md b/text/0000-fmt-size-hint.md new file mode 100644 index 00000000000..7242db43f69 --- /dev/null +++ b/text/0000-fmt-size-hint.md @@ -0,0 +1,106 @@ +- Start Date: 2015-01-13 +- RFC PR: (leave this empty) +- Rust Issue: (leave this empty) + +# Summary + +Add a `size_hint` method to each of the `fmt` traits, allowing a buffer to allocate with the correct size before writing. + +# Motivation + +Using the `fmt` traits is slower than a straight `memcpy` of data. The removal of `std::is_utf8` has helped some. The other low-hanging fruit is to add a size hint, so as to reduce or prevent unnecessary allocations during writing to the output buffer. My initial implementation includes [benchmarks][], which looked like this on machine: + +[benchmarks]: https://gist.github.com/seanmonstar/8fb7aa6b0512b80522f9#file-size_hint-rs-L91-L162 + +``` +running 11 tests +test bench_long ... bench: 133 ns/iter (+/- 18) +test bench_long_hint ... bench: 72 ns/iter (+/- 10) +test bench_long_memcpy ... bench: 44 ns/iter (+/- 2) +test bench_med ... bench: 112 ns/iter (+/- 10) +test bench_med_hint ... bench: 59 ns/iter (+/- 7) +test bench_med_memcpy ... bench: 32 ns/iter (+/- 6) +test bench_nested ... bench: 248 ns/iter (+/- 19) +test bench_nested_hint ... bench: 134 ns/iter (+/- 6) +test bench_short ... bench: 96 ns/iter (+/- 13) +test bench_short_hint ... bench: 60 ns/iter (+/- 3) +test bench_short_memcpy ... bench: 33 ns/iter (+/- 3) +``` + +# Detailed design + +Add a `size_hint` method to all of the `fmt` traits, with a default implementation so no one is broken. Opting in simply means better performance. All traits should have their own size_hint implementation, since the trait used can change the length of the output written. + +```rust +trait Show { + fn fmt(&self, &mut Formatter) -> Result; + fn size_hint(&self) -> SizeHint { + SizeHint { min: 0, max: None } + } +} +``` + +Add a `SizeHint` type, with named properties, instead of using tuple indexing. Include an `Add` implementation for `SizeHint`, so they can be easily added together from nested properties. + +```rust +struct SizeHint { + min: usize, + max: Option +} + +impl Add for SizeHint { + type Output = SizeHint; + fn add(self, other: SizeHint) -> SizeHint { + SizeHint { + min: self.min + other.min, + max: match (self.max, other.max) { + (Some(left), Some(right)) => Some(left + right), + // if either is None, we can't assume a max + _ => None + } + } + } +} +``` + +Some example implementations: + +```rust +impl fmt::String for str { + // fn fmt ... + fn size_hint(&self) -> SizeHint { + let len = self.len(); + SizeHint { min: len, max: Some(len) } + } + +} + +struct Foo(String, String); + +impl fmt::Show for Foo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Foo({:?}, {:?})", self.0, self.1) + } + + fn size_hint(&self) -> SizeHint { + Show::size_hint(&self.0) + Show::size_hint(&self.1) + SizeHint { + min: 7, + max: Some(7) + } + } +} +``` + +Deriving `Show` would also be able to implement `size_hint` meaning most everyone just gets this for free. + +# Drawbacks + +I can't think of a reason to stop this. + +# Alternatives + +The impact of not doing this is that `"foo".to_string()` stays at its current speed. Adding the size hints knocks the time down by around half in each case. + +# Unresolved questions + +This RFC proposes a `SizeHint` that has both a lower and upper bound. It's not immediately clear to me how to intelligently make use of both. From 84071262a4ec24aff360db5d7a89f39fc83f0a67 Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Thu, 15 Jan 2015 10:28:40 -0800 Subject: [PATCH 2/5] update Add implementation to handle overflows --- text/0000-fmt-size-hint.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/text/0000-fmt-size-hint.md b/text/0000-fmt-size-hint.md index 7242db43f69..884b264476f 100644 --- a/text/0000-fmt-size-hint.md +++ b/text/0000-fmt-size-hint.md @@ -52,11 +52,11 @@ impl Add for SizeHint { type Output = SizeHint; fn add(self, other: SizeHint) -> SizeHint { SizeHint { - min: self.min + other.min, - max: match (self.max, other.max) { - (Some(left), Some(right)) => Some(left + right), - // if either is None, we can't assume a max - _ => None + min: self.min.saturating_add(other.min), + max: if let (Some(left), Some(right)) = (self.max, other.max) { + Some(left.checked_add(right)), + } else { + None } } } From 5e0ad266b011c14ce76f65ee3d698cb024fc2348 Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Thu, 15 Jan 2015 12:25:01 -0800 Subject: [PATCH 3/5] add details about SizeHint, implementation inside std::fmt::format, and an alternative --- text/0000-fmt-size-hint.md | 88 +++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/text/0000-fmt-size-hint.md b/text/0000-fmt-size-hint.md index 884b264476f..e53a9ff724c 100644 --- a/text/0000-fmt-size-hint.md +++ b/text/0000-fmt-size-hint.md @@ -29,6 +29,8 @@ test bench_short_memcpy ... bench: 33 ns/iter (+/- 3) # Detailed design +## size_hint method + Add a `size_hint` method to all of the `fmt` traits, with a default implementation so no one is broken. Opting in simply means better performance. All traits should have their own size_hint implementation, since the trait used can change the length of the output written. ```rust @@ -40,6 +42,8 @@ trait Show { } ``` +## SizeHint type + Add a `SizeHint` type, with named properties, instead of using tuple indexing. Include an `Add` implementation for `SizeHint`, so they can be easily added together from nested properties. ```rust @@ -63,6 +67,80 @@ impl Add for SizeHint { } ``` +This type differs from `Iter::size_hint`, primarily to provide an `Add` implementation that doesn't interfere with `(usize, Option)` globally. Since using our own internal type, a struct with named properties is more expressive than a tuple-struct using tuple indexing. + +It's possible that `Iter::size_hint` could adopt the same type, but that seems out of scope of this RFC. + +## std::fmt::format + +There are 2 ways that the format traits are used: through `std::fmt::format`, and `std::string::ToString`. The `ToString` blanket implementation will be adjusted to simply wrap `std::fmt::format`, so there is no longer duplicated code. + +```rust +impl ToString for T { + fn to_string(&self) -> String { + format!("{}", self) + } +} +``` + +The size hint will be accessed in `std::fmt::format` to provide the initial capacity to the `fmt::Writer`. Since calls to `write!` use a pre-existing `Writer`, use of a size hint there is up to the creator of said `Writer`. + +Here is where we could be clever with `SizeHint`'s `min` and `max` values. Perhaps if difference is large enough, some value in between could be more efficient. This is left in the Unresolved Questions section. + +```rust +pub fn format(args: Arguments) -> string::String { + let mut output = string::String::with_capacity(args.size_hint().min); + let _ = write!(&mut output, "{}", args); + output +} +``` + +This involves implementing `size_hint` for `Arguments`: + +```rust +impl String for Arguments { + //fn fmt(&self, ...) + fn size_hint(&self) -> SizeHint { + let pieces = self.pieces.iter().fold(0, |sum, &piece| sum.saturating_add(piece.len())); + let args = self.args.iter().fold(SizeHint { min: 0, max: None }, |sum, arg| { + sum + String::size_hint(arg) + }); + args + SizeHint { min: pieces, max: Some(pieces) } + } +} + +``` + +Each `Argument` includes a `fmt` function, and reference to the object to format, with its type erased. In order to get the `SizeHint`, the appropriate `size_hint` function will need to be included in the `Argument` struct. + + +```rust +pub struct Argument<'a> { + value: &'a Void, + formatter: fn(&Void, &mut Formatter) -> Result, + hint: fn(&Void) -> SizeHint, +} + +impl<'a> String for Argument<'a> { + // fn fmt ... + fn size_hint(&self) -> SizeHint { + (self.hint)(self.value) + } +} +``` + +The public facing constructor of `Argument` would be altered to this: + +```rust +pub fn argument<'a, T>(f: fn(&T, &mut Formatter) -> Result, + s: fn(&T) -> SizeHint, + t: &'a T) -> Argument<'a> { + Argument::new(t, f, s) +} +``` + +## Examples + Some example implementations: ```rust @@ -95,11 +173,17 @@ Deriving `Show` would also be able to implement `size_hint` meaning most everyon # Drawbacks -I can't think of a reason to stop this. +Added complexity may conflict with landing more critical things for 1.0, but that should only mean a possible postponing, vs rejection. # Alternatives -The impact of not doing this is that `"foo".to_string()` stays at its current speed. Adding the size hints knocks the time down by around half in each case. +An alternative proposed by @kballard: + +> I've got a different approach that does not change the API of `Show` or `String` (or anything else that third-party code is expected to implement). It's focused around optimizing the case of `"foo".to_string()`. The approach here is to add a parameter to `std::fmt::Writer::write_str()` called more_coming that is true if the caller believes it will be calling `write_str()` again. The flag is tracked in the Formatter struct so calls to `write!()` inside an implementation of `Show/String` don't incorrectly claim there will be no writes if their caller still has more to write. Ultimately, the flag is used in the implementation of `fmt::Writer` on `String` to use `reserve_exact()` if nothing more is expected to be written. + +A drawback of this alternative is that it focuses improvements only when the object is a String, or at least does not contain properties that will be be formatted as well. The proposal in this RFC provides improvements for all types. + +The impact of not doing this at all is that `"foo".to_string()` stays at its current speed. Adding the size hints knocks the time down by around half in each case. # Unresolved questions From c93d35ce9ae6be36272a7772c7da56886e924afd Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Fri, 16 Jan 2015 12:39:27 -0800 Subject: [PATCH 4/5] add unstable attributes to SizeHint and size_hint --- text/0000-fmt-size-hint.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/text/0000-fmt-size-hint.md b/text/0000-fmt-size-hint.md index e53a9ff724c..c67a921afbc 100644 --- a/text/0000-fmt-size-hint.md +++ b/text/0000-fmt-size-hint.md @@ -36,6 +36,7 @@ Add a `size_hint` method to all of the `fmt` traits, with a default implementati ```rust trait Show { fn fmt(&self, &mut Formatter) -> Result; + #[unstable] fn size_hint(&self) -> SizeHint { SizeHint { min: 0, max: None } } @@ -47,6 +48,7 @@ trait Show { Add a `SizeHint` type, with named properties, instead of using tuple indexing. Include an `Add` implementation for `SizeHint`, so they can be easily added together from nested properties. ```rust +#[unstable] struct SizeHint { min: usize, max: Option From 732de9c43ec2678924059e7bbfffa02efef8930d Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Thu, 5 Feb 2015 16:11:29 -0800 Subject: [PATCH 5/5] add drawback regarding keeping methods in sync --- text/0000-fmt-size-hint.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/text/0000-fmt-size-hint.md b/text/0000-fmt-size-hint.md index c67a921afbc..97e2ba19a1f 100644 --- a/text/0000-fmt-size-hint.md +++ b/text/0000-fmt-size-hint.md @@ -177,6 +177,8 @@ Deriving `Show` would also be able to implement `size_hint` meaning most everyon Added complexity may conflict with landing more critical things for 1.0, but that should only mean a possible postponing, vs rejection. +Additionally, this requires more work to keep `fmt` and `size_hint` in sync for all manual implementations. Deriving will be able to build the method automatically. + # Alternatives An alternative proposed by @kballard: