diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 51ac441a7a425..dbe6f466a2a58 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -698,28 +698,49 @@ impl WorkItem { /// Generate a short description of this work item suitable for use as a thread name. fn short_description(&self) -> String { - // `pthread_setname()` on *nix is limited to 15 characters and longer names are ignored. - // Use very short descriptions in this case to maximize the space available for the module name. - // Windows does not have that limitation so use slightly more descriptive names there. + // `pthread_setname()` on *nix ignores anything beyond the first 15 + // bytes. Use short descriptions to maximize the space available for + // the module name. + #[cfg(not(windows))] + fn desc(short: &str, _long: &str, name: &str) -> String { + // The short label is three bytes, and is followed by a space. That + // leaves 11 bytes for the CGU name. How we obtain those 11 bytes + // depends on the the CGU name form. + // + // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part + // before the `-cgu.0` is the same for every CGU, so use the + // `cgu.0` part. The number suffix will be different for each + // CGU. + // + // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole + // name because each CGU will have a unique ASCII hash, and the + // first 11 bytes will be enough to identify it. + // + // - Incremental (with `-Zhuman-readable-cgu-names`), e.g. + // `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole + // name. The first 11 bytes won't be enough to uniquely identify + // it, but no obvious substring will, and this is a rarely used + // option so it doesn't matter much. + // + assert_eq!(short.len(), 3); + let name = if let Some(index) = name.find("-cgu.") { + &name[index + 1..] // +1 skips the leading '-'. + } else { + name + }; + format!("{short} {name}") + } + + // Windows has no thread name length limit, so use more descriptive names. + #[cfg(windows)] + fn desc(_short: &str, long: &str, name: &str) -> String { + format!("{long} {name}") + } + match self { - WorkItem::Optimize(m) => { - #[cfg(windows)] - return format!("optimize module {}", m.name); - #[cfg(not(windows))] - return format!("opt {}", m.name); - } - WorkItem::CopyPostLtoArtifacts(m) => { - #[cfg(windows)] - return format!("copy LTO artifacts for {}", m.name); - #[cfg(not(windows))] - return format!("copy {}", m.name); - } - WorkItem::LTO(m) => { - #[cfg(windows)] - return format!("LTO module {}", m.name()); - #[cfg(not(windows))] - return format!("LTO {}", m.name()); - } + WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name), + WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name), + WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()), } } } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 97e3748b8b826..e663f4486f7fe 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -368,6 +368,7 @@ fn merge_codegen_units<'tcx>( let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); + // Rename the newly merged CGUs. if cx.tcx.sess.opts.incremental.is_some() { // If we are doing incremental compilation, we want CGU names to // reflect the path of the source level module they correspond to. @@ -404,18 +405,41 @@ fn merge_codegen_units<'tcx>( } } } + + // A sorted order here ensures what follows can be deterministic. + codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); } else { - // If we are compiling non-incrementally we just generate simple CGU - // names containing an index. + // When compiling non-incrementally, we rename the CGUS so they have + // identical names except for the numeric suffix, something like + // `regex.f10ba03eb5ec7975-cgu.N`, where `N` varies. + // + // It is useful for debugging and profiling purposes if the resulting + // CGUs are sorted by name *and* reverse sorted by size. (CGU 0 is the + // biggest, CGU 1 is the second biggest, etc.) + // + // So first we reverse sort by size. Then we generate the names with + // zero-padded suffixes, which means they are automatically sorted by + // names. The numeric suffix width depends on the number of CGUs, which + // is always greater than zero: + // - [1,9] CGUS: `0`, `1`, `2`, ... + // - [10,99] CGUS: `00`, `01`, `02`, ... + // - [100,999] CGUS: `000`, `001`, `002`, ... + // - etc. + // + // If we didn't zero-pad the sorted-by-name order would be `XYZ-cgu.0`, + // `XYZ-cgu.1`, `XYZ-cgu.10`, `XYZ-cgu.11`, ..., `XYZ-cgu.2`, etc. + codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate())); + let num_digits = codegen_units.len().ilog10() as usize + 1; for (index, cgu) in codegen_units.iter_mut().enumerate() { + // Note: `WorkItem::short_description` depends on this name ending + // with `-cgu.` followed by a numeric suffix. Please keep it in + // sync with this code. + let suffix = format!("{index:0num_digits$}"); let numbered_codegen_unit_name = - cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index)); + cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix)); cgu.set_name(numbered_codegen_unit_name); } } - - // A sorted order here ensures what follows can be deterministic. - codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); } fn internalize_symbols<'tcx>(