diff --git a/src/tools/unicode-table-generator/Cargo.toml b/src/tools/unicode-table-generator/Cargo.toml index f8a500922d052..3ca6e9e316f1d 100644 --- a/src/tools/unicode-table-generator/Cargo.toml +++ b/src/tools/unicode-table-generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "unicode-table-generator" version = "0.1.0" -edition = "2021" +edition = "2024" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/tools/unicode-table-generator/src/cascading_map.rs b/src/tools/unicode-table-generator/src/cascading_map.rs index 1eb35e819c07c..78a7bba320870 100644 --- a/src/tools/unicode-table-generator/src/cascading_map.rs +++ b/src/tools/unicode-table-generator/src/cascading_map.rs @@ -21,7 +21,7 @@ impl RawEmitter { let points = ranges .iter() - .flat_map(|r| (r.start..r.end).into_iter().collect::>()) + .flat_map(|r| (r.start..r.end).collect::>()) .collect::>(); println!("there are {} points", points.len()); @@ -32,21 +32,20 @@ impl RawEmitter { // assert that there is no whitespace over the 0x3000 range. assert!(point <= 0x3000, "the highest unicode whitespace value has changed"); let high_bytes = point as usize >> 8; - let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_insert_with(Vec::new); + let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_default(); codepoints.push(point); } let mut bit_for_high_byte = 1u8; let mut arms = Vec::::new(); - let mut high_bytes: Vec = - codepoints_by_high_bytes.keys().map(|k| k.clone()).collect(); + let mut high_bytes: Vec = codepoints_by_high_bytes.keys().copied().collect(); high_bytes.sort(); for high_byte in high_bytes { let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap(); if codepoints.len() == 1 { let ch = codepoints.pop().unwrap(); - arms.push(format!("{} => c as u32 == {:#04x}", high_byte, ch)); + arms.push(format!("{high_byte} => c as u32 == {ch:#04x}")); continue; } // more than 1 codepoint in this arm @@ -54,8 +53,7 @@ impl RawEmitter { map[(*codepoint & 0xff) as usize] |= bit_for_high_byte; } arms.push(format!( - "{} => WHITESPACE_MAP[c as usize & 0xff] & {} != 0", - high_byte, bit_for_high_byte + "{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0" )); bit_for_high_byte <<= 1; } @@ -68,7 +66,7 @@ impl RawEmitter { writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap(); for arm in arms { - writeln!(&mut self.file, " {},", arm).unwrap(); + writeln!(&mut self.file, " {arm},").unwrap(); } writeln!(&mut self.file, " _ => false,").unwrap(); writeln!(&mut self.file, " }}").unwrap(); diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs index 00241b7ee0eb5..9c6454492e7e0 100644 --- a/src/tools/unicode-table-generator/src/case_mapping.rs +++ b/src/tools/unicode-table-generator/src/case_mapping.rs @@ -9,7 +9,7 @@ const INDEX_MASK: u32 = 1 << 22; pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String { let mut file = String::new(); - write!(file, "const INDEX_MASK: u32 = 0x{:x};", INDEX_MASK).unwrap(); + write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap(); file.push_str("\n\n"); file.push_str(HEADER.trim_start()); file.push('\n'); diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index 415db2c4dbc05..6cdb82a87bdff 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -160,15 +160,15 @@ fn load_data() -> UnicodeData { .push(Codepoints::Single(row.codepoint)); } - if let Some(mapped) = row.simple_lowercase_mapping { - if mapped != row.codepoint { - to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0)); - } + if let Some(mapped) = row.simple_lowercase_mapping + && mapped != row.codepoint + { + to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0)); } - if let Some(mapped) = row.simple_uppercase_mapping { - if mapped != row.codepoint { - to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0)); - } + if let Some(mapped) = row.simple_uppercase_mapping + && mapped != row.codepoint + { + to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0)); } } @@ -196,12 +196,12 @@ fn load_data() -> UnicodeData { .flat_map(|codepoints| match codepoints { Codepoints::Single(c) => c .scalar() - .map(|ch| (ch as u32..ch as u32 + 1)) + .map(|ch| ch as u32..ch as u32 + 1) .into_iter() .collect::>(), Codepoints::Range(c) => c .into_iter() - .flat_map(|c| c.scalar().map(|ch| (ch as u32..ch as u32 + 1))) + .flat_map(|c| c.scalar().map(|ch| ch as u32..ch as u32 + 1)) .collect::>(), }) .collect::>>(), @@ -236,7 +236,7 @@ fn main() { let ranges_by_property = &unicode_data.ranges; if let Some(path) = test_path { - std::fs::write(&path, generate_tests(&write_location, &ranges_by_property)).unwrap(); + std::fs::write(&path, generate_tests(&write_location, ranges_by_property)).unwrap(); } let mut total_bytes = 0; @@ -246,9 +246,9 @@ fn main() { let mut emitter = RawEmitter::new(); if property == &"White_Space" { - emit_whitespace(&mut emitter, &ranges); + emit_whitespace(&mut emitter, ranges); } else { - emit_codepoints(&mut emitter, &ranges); + emit_codepoints(&mut emitter, ranges); } modules.push((property.to_lowercase().to_string(), emitter.file)); @@ -288,7 +288,7 @@ fn main() { for line in contents.lines() { if !line.trim().is_empty() { table_file.push_str(" "); - table_file.push_str(&line); + table_file.push_str(line); } table_file.push('\n'); } @@ -312,7 +312,7 @@ fn version() -> String { let start = readme.find(prefix).unwrap() + prefix.len(); let end = readme.find(" of the Unicode Standard.").unwrap(); let version = - readme[start..end].split('.').map(|v| v.parse::().expect(&v)).collect::>(); + readme[start..end].split('.').map(|v| v.parse::().expect(v)).collect::>(); let [major, minor, micro] = [version[0], version[1], version[2]]; out.push_str(&format!("({major}, {minor}, {micro});\n")); @@ -320,7 +320,7 @@ fn version() -> String { } fn fmt_list(values: impl IntoIterator) -> String { - let pieces = values.into_iter().map(|b| format!("{:?}, ", b)).collect::>(); + let pieces = values.into_iter().map(|b| format!("{b:?}, ")).collect::>(); let mut out = String::new(); let mut line = String::from("\n "); for piece in pieces { @@ -348,7 +348,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec>)]) -> String s.push_str("\nfn main() {\n"); for (property, ranges) in ranges { - s.push_str(&format!(r#" println!("Testing {}");"#, property)); + s.push_str(&format!(r#" println!("Testing {property}");"#)); s.push('\n'); s.push_str(&format!(" {}_true();\n", property.to_lowercase())); s.push_str(&format!(" {}_false();\n", property.to_lowercase())); @@ -373,7 +373,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec>)]) -> String s.push_str(" }\n\n"); } - s.push_str("}"); + s.push('}'); s } @@ -388,7 +388,7 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool range.start, )); } else { - s.push_str(&format!(" for chn in {:?}u32 {{\n", range)); + s.push_str(&format!(" for chn in {range:?}u32 {{\n")); s.push_str(&format!( " assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \"{{:?}}\", chn);\n", if truthy { "" } else { "!" }, @@ -439,7 +439,7 @@ fn merge_ranges(ranges: &mut Vec>) { let mut last_end = None; for range in ranges { if let Some(last) = last_end { - assert!(range.start > last, "{:?}", range); + assert!(range.start > last, "{range:?}"); } last_end = Some(range.end); } diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs index ee94d3c93a6cb..e9e0efc459442 100644 --- a/src/tools/unicode-table-generator/src/raw_emitter.rs +++ b/src/tools/unicode-table-generator/src/raw_emitter.rs @@ -156,10 +156,10 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range]) { emitter.blank_line(); let mut bitset = emitter.clone(); - let bitset_ok = bitset.emit_bitset(&ranges).is_ok(); + let bitset_ok = bitset.emit_bitset(ranges).is_ok(); let mut skiplist = emitter.clone(); - skiplist.emit_skiplist(&ranges); + skiplist.emit_skiplist(ranges); if bitset_ok && bitset.bytes_used <= skiplist.bytes_used { *emitter = bitset; @@ -174,7 +174,7 @@ pub fn emit_whitespace(emitter: &mut RawEmitter, ranges: &[Range]) { emitter.blank_line(); let mut cascading = emitter.clone(); - cascading.emit_cascading_map(&ranges); + cascading.emit_cascading_map(ranges); *emitter = cascading; emitter.desc = String::from("cascading"); } @@ -272,7 +272,7 @@ impl Canonicalized { // for canonical when possible. while let Some((&to, _)) = mappings .iter() - .find(|(&to, _)| to == 0) + .find(|&(&to, _)| to == 0) .or_else(|| mappings.iter().max_by_key(|m| m.1.len())) { // Get the mapping with the most entries. Currently, no mapping can @@ -311,10 +311,9 @@ impl Canonicalized { } } } - assert!( - unique_mapping - .insert(to, UniqueMapping::Canonical(canonical_words.len())) - .is_none() + assert_eq!( + unique_mapping.insert(to, UniqueMapping::Canonical(canonical_words.len())), + None ); canonical_words.push(to); @@ -340,14 +339,10 @@ impl Canonicalized { // We'll probably always have some slack though so this loop will still // be needed. for &w in unique_words { - if !unique_mapping.contains_key(&w) { - assert!( - unique_mapping - .insert(w, UniqueMapping::Canonical(canonical_words.len())) - .is_none() - ); + unique_mapping.entry(w).or_insert_with(|| { canonical_words.push(w); - } + UniqueMapping::Canonical(canonical_words.len()) + }); } assert_eq!(canonicalized_words.len() + canonical_words.len(), unique_words.len()); assert_eq!(unique_mapping.len(), unique_words.len());