From 3701661658bb14675b2be1510897aa744ccd02b7 Mon Sep 17 00:00:00 2001 From: tormol Date: Sat, 19 Sep 2020 23:16:12 +0200 Subject: [PATCH 1/7] Switch back to short-circuiting || and && operators in const fn Enabled by Rust 1.46 as part of const if/else. --- .github/workflows/ci.yml | 2 +- README.md | 4 ++-- src/ascii_char.rs | 24 ++++++++++++------------ src/lib.rs | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8cabbe..8aed29d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.41.1, stable, beta, nightly] + rust: [1.46.0, stable, beta, nightly] steps: - uses: actions/checkout@v2 - uses: hecrj/setup-rust-action@v1 diff --git a/README.md b/README.md index 690fd59..feb4bec 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ ascii = { version = "1.1", default-features = false, features = ["alloc"] } ## Minimum supported Rust version -The minimum Rust version for 1.1.\* releases is 1.41.1. +The minimum Rust version for 1.2.\* releases is 1.46.0. Later 1.y.0 releases might require newer Rust versions, but the three most recent stable releases at the time of publishing will always be supported. For example this means that if the current stable Rust version is 1.70 when -ascii 1.2.0 is released, then ascii 1.2.\* will not require a newer +ascii 1.3.0 is released, then ascii 1.3.\* will not require a newer Rust version than 1.68. ## History diff --git a/src/ascii_char.rs b/src/ascii_char.rs index 5011949..19610ee 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -411,7 +411,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_alphabetic(self) -> bool { - (self.to_not_upper() >= b'a') & (self.to_not_upper() <= b'z') + (self.to_not_upper() >= b'a') && (self.to_not_upper() <= b'z') } /// Check if the character is a letter (a-z, A-Z). @@ -457,14 +457,14 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_digit(&self) -> bool { - (*self as u8 >= b'0') & (*self as u8 <= b'9') + (*self as u8 >= b'0') && (*self as u8 <= b'9') } /// Check if the character is a letter or number #[inline] #[must_use] pub const fn is_alphanumeric(self) -> bool { - self.is_alphabetic() | self.is_ascii_digit() + self.is_alphabetic() || self.is_ascii_digit() } /// Check if the character is a letter or number @@ -491,7 +491,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_blank(&self) -> bool { - (*self as u8 == b' ') | (*self as u8 == b'\t') + (*self as u8 == b' ') || (*self as u8 == b'\t') } /// Check if the character one of ' ', '\t', '\n', '\r', @@ -500,7 +500,7 @@ impl AsciiChar { #[must_use] pub const fn is_whitespace(self) -> bool { let b = self as u8; - self.is_ascii_blank() | (b == b'\n') | (b == b'\r') | (b == 0x0b) | (b == 0x0c) + self.is_ascii_blank() || (b == b'\n') || (b == b'\r') || (b == 0x0b) || (b == 0x0c) } /// Check if the character is a ' ', '\t', '\n', '\r' or '\0xc' (form feed). @@ -510,9 +510,9 @@ impl AsciiChar { #[must_use] pub const fn is_ascii_whitespace(&self) -> bool { self.is_ascii_blank() - | (*self as u8 == b'\n') - | (*self as u8 == b'\r') - | (*self as u8 == 0x0c/*form feed*/) + || (*self as u8 == b'\n') + || (*self as u8 == b'\r') + || (*self as u8 == 0x0c/*form feed*/) } /// Check if the character is a control character @@ -530,7 +530,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_control(&self) -> bool { - ((*self as u8) < b' ') | (*self as u8 == 127) + ((*self as u8) < b' ') || (*self as u8 == 127) } /// Checks if the character is printable (except space) @@ -624,7 +624,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_punctuation(&self) -> bool { - self.is_ascii_graphic() & !self.is_alphanumeric() + self.is_ascii_graphic() && !self.is_alphanumeric() } /// Checks if the character is a valid hex digit @@ -641,7 +641,7 @@ impl AsciiChar { #[inline] #[must_use] pub const fn is_ascii_hexdigit(&self) -> bool { - self.is_ascii_digit() | ((*self as u8 | 0x20_u8).wrapping_sub(b'a') < 6) + self.is_ascii_digit() || ((*self as u8 | 0x20u8).wrapping_sub(b'a') < 6) } /// Unicode has printable versions of the ASCII control codes, like '␛'. @@ -728,7 +728,7 @@ impl AsciiChar { #[must_use] pub const fn eq_ignore_ascii_case(&self, other: &Self) -> bool { (self.as_byte() == other.as_byte()) - | (self.is_alphabetic() & (self.to_not_upper() == other.to_not_upper())) + || (self.is_alphabetic() && (self.to_not_upper() == other.to_not_upper())) } } diff --git a/src/lib.rs b/src/lib.rs index 5eacc16..df584ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,11 @@ //! //! # Minimum supported Rust version //! -//! The minimum Rust version for 1.1.\* releases is 1.41.1. +//! The minimum Rust version for 1.2.\* releases is 1.46.0. //! Later 1.y.0 releases might require newer Rust versions, but the three most //! recent stable releases at the time of publishing will always be supported. //! For example this means that if the current stable Rust version is 1.70 when -//! ascii 1.2.0 is released, then ascii 1.2.\* will not require a newer +//! ascii 1.3.0 is released, then ascii 1.3.\* will not require a newer //! Rust version than 1.68. //! //! # History From 43ba1a3c6eba6fb4127297325761843eb628969d Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 5 Jun 2022 20:54:01 +0200 Subject: [PATCH 2/7] Constify methods that transmute * AsciiChar::from_ascii_unchecked() * AsciiChar::as_printable_char() --- .github/workflows/ci.yml | 2 +- README.md | 2 +- src/ascii_char.rs | 9 +++++---- src/lib.rs | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8aed29d..6da4f94 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - rust: [1.46.0, stable, beta, nightly] + rust: [1.56.1, stable, beta, nightly] steps: - uses: actions/checkout@v2 - uses: hecrj/setup-rust-action@v1 diff --git a/README.md b/README.md index feb4bec..fa960cc 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ ascii = { version = "1.1", default-features = false, features = ["alloc"] } ## Minimum supported Rust version -The minimum Rust version for 1.2.\* releases is 1.46.0. +The minimum Rust version for 1.2.\* releases is 1.56.1. Later 1.y.0 releases might require newer Rust versions, but the three most recent stable releases at the time of publishing will always be supported. For example this means that if the current stable Rust version is 1.70 when diff --git a/src/ascii_char.rs b/src/ascii_char.rs index 19610ee..709b8de 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -375,9 +375,9 @@ impl AsciiChar { /// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`. #[inline] #[must_use] - pub unsafe fn from_ascii_unchecked(ch: u8) -> Self { + pub const unsafe fn from_ascii_unchecked(ch: u8) -> Self { // SAFETY: Caller guarantees `ch` is within bounds of ascii. - unsafe { ch.to_ascii_char_unchecked() } + unsafe { mem::transmute(ch) } } /// Converts an ASCII character into a `u8`. @@ -659,14 +659,15 @@ impl AsciiChar { /// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p'); /// ``` #[must_use] - pub fn as_printable_char(self) -> char { + pub const fn as_printable_char(self) -> char { + #![allow(clippy::transmute_int_to_char)] // from_utf32_unchecked() is not const fn yet. match self as u8 { // Non printable characters // SAFETY: From codepoint 0x2400 ('␀') to 0x241f (`␟`), there are characters representing // the unprintable characters from 0x0 to 0x1f, ordered correctly. // As `b` is guaranteed to be within 0x0 to 0x1f, the conversion represents a // valid character. - b @ 0x0..=0x1f => unsafe { char::from_u32_unchecked(u32::from('␀') + u32::from(b)) }, + b @ 0x0..=0x1f => unsafe { mem::transmute('␀' as u32 + b as u32) }, // 0x7f (delete) has it's own character at codepoint 0x2420, not 0x247f, so it is special // cased to return it's character diff --git a/src/lib.rs b/src/lib.rs index df584ee..2147d77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ //! //! # Minimum supported Rust version //! -//! The minimum Rust version for 1.2.\* releases is 1.46.0. +//! The minimum Rust version for 1.2.\* releases is 1.56.1. //! Later 1.y.0 releases might require newer Rust versions, but the three most //! recent stable releases at the time of publishing will always be supported. //! For example this means that if the current stable Rust version is 1.70 when From 41d4a009909955533a6a9a4cd5a8376213c1d456 Mon Sep 17 00:00:00 2001 From: tormol Date: Sat, 19 Sep 2020 23:43:11 +0200 Subject: [PATCH 3/7] Add AsciiChar::try_new() --- src/ascii_char.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/ascii_char.rs b/src/ascii_char.rs index 709b8de..266718d 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -360,6 +360,32 @@ impl AsciiChar { ALL[ch as usize] } + /// Create an `AsciiChar` from a `char`, in a `const fn` way. + /// + /// Within non-`const fn` functions the more general + /// [`from_ascii()`](#method.from_ascii) should be used instead. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiChar; + /// assert!(AsciiChar::try_new('-').is_ok()); + /// assert!(AsciiChar::try_new('—').is_err()); + /// assert_eq!(AsciiChar::try_new('\x7f'), Ok(AsciiChar::DEL)); + /// ``` + /// + /// # Errors + /// + /// Fails for non-ASCII characters. + #[inline] + pub const fn try_new(ch: char) -> Result { + unsafe { + match ch as u32 { + 0..=127 => Ok(mem::transmute(ch as u8)), + _ => Err(ToAsciiCharError(())), + } + } + } + /// Constructs an ASCII character from a `u8`, `char` or other character /// type without any checks. /// From 875d06a1e1f58577744bbb257832e6bbb733e30a Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 20 Sep 2020 14:45:13 +0200 Subject: [PATCH 4/7] Add back AsciiStr::new(), now as const fn --- src/ascii_str.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index e8a6e12..d5eac7f 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -2,7 +2,7 @@ use alloc::borrow::ToOwned; #[cfg(feature = "alloc")] use alloc::boxed::Box; -use core::fmt; +use core::{fmt, mem}; use core::ops::{Index, IndexMut}; use core::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive}; use core::slice::{self, Iter, IterMut, SliceIndex}; @@ -28,6 +28,23 @@ pub struct AsciiStr { } impl AsciiStr { + /// Coerces into an `AsciiStr` slice. + /// + /// # Examples + /// ``` + /// # use ascii::{AsciiChar, AsciiStr}; + /// const HELLO: &AsciiStr = AsciiStr::new( + /// &[AsciiChar::H, AsciiChar::e, AsciiChar::l, AsciiChar::l, AsciiChar::o] + /// ); + /// + /// assert_eq!(HELLO.as_str(), "Hello"); + /// ``` + #[inline] + #[must_use] + pub const fn new(s: &[AsciiChar]) -> &Self { + unsafe { mem::transmute(s) } + } + /// Converts `&self` to a `&str` slice. #[inline] #[must_use] From 8198f0493545f9f877e4a1f2a27e4f009cfc9c71 Mon Sep 17 00:00:00 2001 From: tormol Date: Mon, 6 Jun 2022 13:30:42 +0200 Subject: [PATCH 5/7] Add AsciiStr::from_ascii_str() and from_ascii_bytes() As `const fn` alternatives to the trait-based from_ascii(). Closes #84 --- src/ascii_str.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index d5eac7f..ee530d0 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -125,6 +125,53 @@ impl AsciiStr { bytes.as_ref().as_ascii_str() } + /// Convert a byte slice innto an `AsciiStr`. + /// + /// [`from_ascii()`](#method.from_ascii) should be preferred outside of `const` contexts + /// as it might be faster due to using functions that are not `const fn`. + /// + /// # Errors + /// Returns `Err` if not all bytes are valid ASCII values. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiStr; + /// assert!(AsciiStr::from_ascii_bytes(b"\x00\x22\x44").is_ok()); + /// assert!(AsciiStr::from_ascii_bytes(b"\x66\x77\x88").is_err()); + /// ``` + pub const fn from_ascii_bytes(b: &[u8]) -> Result<&Self, AsAsciiStrError> { + #![allow(clippy::indexing_slicing)] // .get() is not const yes (as of Rust 1.61) + let mut valid = 0; + loop { + if valid == b.len() { + // SAFETY: `is_ascii` having returned true for all bytes guarantees all bytes are within ascii range. + return unsafe { Ok(mem::transmute(b)) }; + } else if b[valid].is_ascii() { + valid += 1; + } else { + return Err(AsAsciiStrError(valid)); + } + } + } + + /// Convert a `str` innto an `AsciiStr`. + /// + /// [`from_ascii()`](#method.from_ascii) should be preferred outside of `const` contexts + /// as it might be faster due to using functions that are not `const fn`. + /// + /// # Errors + /// Returns `Err` if it contains non-ASCII codepoints. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiStr; + /// assert!(AsciiStr::from_ascii_str("25 C").is_ok()); + /// assert!(AsciiStr::from_ascii_str("35°C").is_err()); + /// ``` + pub const fn from_ascii_str(s: &str) -> Result<&Self, AsAsciiStrError> { + Self::from_ascii_bytes(s.as_bytes()) + } + /// Converts anything that can be represented as a byte slice to an `AsciiStr` without checking /// for non-ASCII characters.. /// From aa5c79568bcbe7879785936d8d369f063da2c6c1 Mon Sep 17 00:00:00 2001 From: tormol Date: Mon, 6 Jun 2022 12:50:52 +0200 Subject: [PATCH 6/7] Constify methods that previously "dereferenced" raw pointers * AsciiStr::as_str() * AsciiStr::as_bytes() Dereferencing raw pointers in `const fn` requires Rust 1.58, while transmute only requires Rust 1.56. --- src/ascii_str.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index ee530d0..661a62e 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -48,17 +48,17 @@ impl AsciiStr { /// Converts `&self` to a `&str` slice. #[inline] #[must_use] - pub fn as_str(&self) -> &str { + pub const fn as_str(&self) -> &str { // SAFETY: All variants of `AsciiChar` are valid bytes for a `str`. - unsafe { &*(self as *const AsciiStr as *const str) } + unsafe { mem::transmute(self) } } /// Converts `&self` into a byte slice. #[inline] #[must_use] - pub fn as_bytes(&self) -> &[u8] { + pub const fn as_bytes(&self) -> &[u8] { // SAFETY: All variants of `AsciiChar` are valid `u8`, given they're `repr(u8)`. - unsafe { &*(self as *const AsciiStr as *const [u8]) } + unsafe { mem::transmute(self) } } /// Returns the entire string as slice of `AsciiChar`s. From 94b84704183f6551917834184be2f4d4854ab479 Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 5 Jun 2022 20:54:01 +0200 Subject: [PATCH 7/7] Constify AsciiStr::trim methods * AsciiStr::trim_start() * AsciiStr::trim_end() * AsciiStr::trim() --- src/ascii_str.rs | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 661a62e..bfdf54e 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -278,7 +278,7 @@ impl AsciiStr { /// assert_eq!("white \tspace", example.trim()); /// ``` #[must_use] - pub fn trim(&self) -> &Self { + pub const fn trim(&self) -> &Self { self.trim_start().trim_end() } @@ -291,14 +291,16 @@ impl AsciiStr { /// assert_eq!("white \tspace \t", example.trim_start()); /// ``` #[must_use] - pub fn trim_start(&self) -> &Self { - let whitespace_len = self - .chars() - .position(|ch| !ch.is_whitespace()) - .unwrap_or_else(|| self.len()); - - // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice. - unsafe { self.as_slice().get_unchecked(whitespace_len..).into() } + pub const fn trim_start(&self) -> &Self { + let mut trimmed = &self.slice; + while let Some((first, rest)) = trimmed.split_first() { + if first.is_whitespace() { + trimmed = rest; + } else { + break; + } + } + AsciiStr::new(trimmed) } /// Returns an ASCII string slice with trailing whitespace removed. @@ -310,20 +312,16 @@ impl AsciiStr { /// assert_eq!(" \twhite \tspace", example.trim_end()); /// ``` #[must_use] - pub fn trim_end(&self) -> &Self { - // Number of whitespace characters counting from the end - let whitespace_len = self - .chars() - .rev() - .position(|ch| !ch.is_whitespace()) - .unwrap_or_else(|| self.len()); - - // SAFETY: `whitespace_len` is `0..=len`, which is at most `len`, which is a valid empty slice, and at least `0`, which is the whole slice. - unsafe { - self.as_slice() - .get_unchecked(..self.len() - whitespace_len) - .into() + pub const fn trim_end(&self) -> &Self { + let mut trimmed = &self.slice; + while let Some((last, rest)) = trimmed.split_last() { + if last.is_whitespace() { + trimmed = rest; + } else { + break; + } } + AsciiStr::new(trimmed) } /// Compares two strings case-insensitively.