Skip to content

Commit 4492b0b

Browse files
committed
Add the unstable option symbol_mangling_digest to reduce the binary size of dynamic library based on service requirements
Enrich test cases and cover all possible values of new option
1 parent 59096cd commit 4492b0b

File tree

9 files changed

+230
-4
lines changed

9 files changed

+230
-4
lines changed

compiler/rustc_interface/src/tests.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use rustc_session::config::{
88
FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay,
99
LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirSpanview, NextSolverConfig,
1010
OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, Passes, Polonius,
11-
ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, WasiExecModel,
11+
ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingDigest,
12+
SymbolManglingVersion, WasiExecModel,
1213
};
1314
use rustc_session::lint::Level;
1415
use rustc_session::search_paths::SearchPath;
@@ -820,6 +821,7 @@ fn test_unstable_options_tracking_hash() {
820821
tracked!(split_lto_unit, Some(true));
821822
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
822823
tracked!(stack_protector, StackProtector::All);
824+
tracked!(symbol_mangling_digest, SymbolManglingDigest::new(true));
823825
tracked!(teach, true);
824826
tracked!(thinlto, Some(true));
825827
tracked!(thir_unsafeck, true);

compiler/rustc_session/src/config.rs

+132-3
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ use rustc_target::abi::Align;
2121
use rustc_target::spec::LinkSelfContainedComponents;
2222
use rustc_target::spec::{PanicStrategy, RelocModel, SanitizerSet, SplitDebuginfo};
2323
use rustc_target::spec::{Target, TargetTriple, TargetWarnings, TARGETS};
24+
use std::cmp::PartialEq;
2425
use std::collections::btree_map::{
2526
Iter as BTreeMapIter, Keys as BTreeMapKeysIter, Values as BTreeMapValuesIter,
2627
};
2728
use std::collections::{BTreeMap, BTreeSet};
2829
use std::ffi::OsStr;
2930
use std::fmt;
3031
use std::fs;
31-
use std::hash::Hash;
32+
use std::hash::{Hash, Hasher};
3233
use std::iter;
3334
use std::path::{Path, PathBuf};
3435
use std::str::{self, FromStr};
@@ -364,6 +365,127 @@ pub enum SymbolManglingVersion {
364365
V0,
365366
}
366367

368+
#[derive(Clone, Debug)]
369+
pub struct SymbolManglingDigest {
370+
fulls: FxHashSet<String>,
371+
prefixes: Vec<String>,
372+
salt: String,
373+
level: u8,
374+
excluded: bool,
375+
}
376+
377+
impl SymbolManglingDigest {
378+
pub fn new(excluded: bool) -> Self {
379+
Self {
380+
fulls: FxHashSet::default(),
381+
prefixes: Vec::new(),
382+
salt: String::new(),
383+
level: 2,
384+
excluded,
385+
}
386+
}
387+
388+
pub fn enabled(&self) -> bool {
389+
!self.fulls.is_empty() || !self.prefixes.is_empty() || self.excluded
390+
}
391+
392+
pub fn hasher_enable(&mut self, args: &str) -> bool {
393+
let cloned = self.clone();
394+
if self.hasher_reinit(args) {
395+
return true;
396+
}
397+
self.fulls = cloned.fulls;
398+
self.prefixes = cloned.prefixes;
399+
self.level = cloned.level;
400+
self.salt = cloned.salt;
401+
self.excluded = cloned.excluded;
402+
false
403+
}
404+
405+
pub fn hasher_args(&self) -> (&str, u8) {
406+
(&self.salt, self.level)
407+
}
408+
409+
pub fn hasher_contains(&self, val: &str) -> bool {
410+
if self.fulls.contains(val) {
411+
return self.excluded ^ true;
412+
}
413+
for prefix in self.prefixes.iter() {
414+
if val.starts_with(prefix) {
415+
return self.excluded ^ true;
416+
}
417+
}
418+
self.excluded ^ false
419+
}
420+
421+
fn hasher_reinit(&mut self, args: &str) -> bool {
422+
for arg in args.split(',') {
423+
let mut it = arg.split('=');
424+
let Some(name) = it.next() else {
425+
continue;
426+
};
427+
if let Some(value) = it.next() {
428+
match name {
429+
"salt" => self.salt = value.to_string(),
430+
"level" => match value {
431+
"1" => self.level = 1,
432+
"2" => self.level = 2,
433+
_ => return false,
434+
},
435+
"excluded" => match value {
436+
"true" => self.excluded = true,
437+
"false" => self.excluded = false,
438+
_ => return false,
439+
},
440+
_ => return false,
441+
}
442+
} else if name.ends_with("*") {
443+
let _ = self.prefixes.push(name[..name.len() - 1].to_string());
444+
} else {
445+
let _ = self.fulls.insert(name.to_string());
446+
}
447+
}
448+
true
449+
}
450+
451+
fn to_vec(&self) -> Vec<&str> {
452+
let mut ret = Vec::with_capacity(self.fulls.len() + self.prefixes.len());
453+
#[allow(rustc::potential_query_instability)]
454+
self.fulls.iter().for_each(|val| ret.push(val.as_str()));
455+
ret.sort();
456+
self.prefixes.iter().for_each(|val| ret.push(val.as_str()));
457+
ret[self.fulls.len()..].sort();
458+
ret
459+
}
460+
}
461+
462+
impl Hash for SymbolManglingDigest {
463+
fn hash<H>(&self, hasher: &mut H)
464+
where
465+
H: Hasher,
466+
{
467+
for val in self.to_vec() {
468+
val.hash(hasher);
469+
}
470+
self.fulls.len().hash(hasher);
471+
self.prefixes.len().hash(hasher);
472+
self.salt.hash(hasher);
473+
self.level.hash(hasher);
474+
self.excluded.hash(hasher);
475+
}
476+
}
477+
478+
impl PartialEq for SymbolManglingDigest {
479+
fn eq(&self, other: &Self) -> bool {
480+
self.excluded == other.excluded
481+
&& self.level == other.level
482+
&& self.salt == other.salt
483+
&& self.fulls.len() == other.fulls.len()
484+
&& self.prefixes.len() == other.prefixes.len()
485+
&& self.to_vec() == other.to_vec()
486+
}
487+
}
488+
367489
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
368490
pub enum DebugInfo {
369491
None,
@@ -2763,6 +2885,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
27632885
);
27642886
}
27652887

2888+
if unstable_opts.symbol_mangling_digest.enabled() {
2889+
early_dcx.early_error(
2890+
"option `-C instrument-coverage` is not compatible with `-Z symbol_mangling_digest`"
2891+
);
2892+
}
2893+
27662894
// `-C instrument-coverage` implies `-C symbol-mangling-version=v0` - to ensure consistent
27672895
// and reversible name mangling. Note, LLVM coverage tools can analyze coverage over
27682896
// multiple runs, including some changes to source code; so mangled names must be consistent
@@ -3226,8 +3354,8 @@ pub(crate) mod dep_tracking {
32263354
ErrorOutputType, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay,
32273355
LinkerPluginLto, LocationDetail, LtoCli, NextSolverConfig, OomStrategy, OptLevel,
32283356
OutFileName, OutputType, OutputTypes, Polonius, RemapPathScopeComponents, ResolveDocLinks,
3229-
SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion,
3230-
TrimmedDefPaths, WasiExecModel,
3357+
SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingDigest,
3358+
SymbolManglingVersion, TrimmedDefPaths, WasiExecModel,
32313359
};
32323360
use crate::lint;
32333361
use crate::utils::NativeLib;
@@ -3319,6 +3447,7 @@ pub(crate) mod dep_tracking {
33193447
SplitDwarfKind,
33203448
StackProtector,
33213449
SwitchWithOptPath,
3450+
SymbolManglingDigest,
33223451
SymbolManglingVersion,
33233452
RemapPathScopeComponents,
33243453
SourceFileHashAlgorithm,

compiler/rustc_session/src/options.rs

+13
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ mod desc {
405405
pub const parse_switch_with_opt_path: &str =
406406
"an optional path to the profiling data output directory";
407407
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
408+
pub const parse_symbol_mangling_digest: &str = "configuing parameters for shortening symbol names: `<crate>[*],...[,excluded=true|false][,salt=value][,level=1|2]`";
408409
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
409410
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
410411
pub const parse_relocation_model: &str =
@@ -1206,6 +1207,16 @@ mod parse {
12061207
true
12071208
}
12081209

1210+
pub(crate) fn parse_symbol_mangling_digest(
1211+
slot: &mut SymbolManglingDigest,
1212+
v: Option<&str>,
1213+
) -> bool {
1214+
if let Some(v) = v {
1215+
return slot.hasher_enable(v);
1216+
}
1217+
true
1218+
}
1219+
12091220
pub(crate) fn parse_src_file_hash(
12101221
slot: &mut Option<SourceFileHashAlgorithm>,
12111222
v: Option<&str>,
@@ -1905,6 +1916,8 @@ written to standard error output)"),
19051916
"prefer dynamic linking to static linking for staticlibs (default: no)"),
19061917
strict_init_checks: bool = (false, parse_bool, [TRACKED],
19071918
"control if mem::uninitialized and mem::zeroed panic on more UB"),
1919+
symbol_mangling_digest: SymbolManglingDigest = (SymbolManglingDigest::new(false), parse_symbol_mangling_digest, [TRACKED],
1920+
"configuring parameters for shortening symbol names(default: disable)"),
19081921
#[rustc_lint_opt_deny_field_access("use `Session::teach` instead of this field")]
19091922
teach: bool = (false, parse_bool, [TRACKED],
19101923
"show extended diagnostic help (default: no)"),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
2+
use rustc_hir::def_id::DefId;
3+
use rustc_middle::ty::TyCtxt;
4+
5+
pub(super) fn generate<'tcx>(tcx: TyCtxt<'tcx>, symbol: String, def_id: DefId) -> String {
6+
let crate_name = tcx.crate_name(def_id.krate);
7+
let crate_name = crate_name.as_str();
8+
let symbol_mangling_digest = &tcx.sess.opts.unstable_opts.symbol_mangling_digest;
9+
if !symbol_mangling_digest.hasher_contains(crate_name) {
10+
return symbol;
11+
}
12+
13+
let (salt, level) = symbol_mangling_digest.hasher_args();
14+
15+
let hash = tcx.with_stable_hashing_context(|mut hcx| {
16+
let mut hasher = StableHasher::new();
17+
symbol.hash_stable(&mut hcx, &mut hasher);
18+
salt.hash_stable(&mut hcx, &mut hasher);
19+
hasher.finish::<Hash64>().as_u64()
20+
});
21+
22+
match level {
23+
1 => encode_1(tcx, crate_name, hash, def_id),
24+
_ => encode_2(tcx, crate_name, hash, def_id),
25+
}
26+
}
27+
28+
fn encode_1<'tcx>(tcx: TyCtxt<'tcx>, crate_name: &str, hash: u64, def_id: DefId) -> String {
29+
if let Some(item_name) = tcx.opt_item_name(def_id) {
30+
let item_name = item_name.as_str();
31+
format!(
32+
"_ZN{}{crate_name}.{item_name}.{:08x}E",
33+
crate_name.len() + item_name.len() + 10,
34+
hash & 0xffffffff
35+
)
36+
} else {
37+
encode_2(tcx, crate_name, hash, def_id)
38+
}
39+
}
40+
41+
fn encode_2<'tcx>(_tcx: TyCtxt<'tcx>, crate_name: &str, hash: u64, _def_id: DefId) -> String {
42+
format!("_ZN{}{crate_name}.{hash:016x}E", crate_name.len() + 17)
43+
}

compiler/rustc_symbol_mangling/src/lib.rs

+7
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ use rustc_middle::query::Providers;
111111
use rustc_middle::ty::{self, Instance, TyCtxt};
112112
use rustc_session::config::SymbolManglingVersion;
113113

114+
mod digest;
114115
mod legacy;
115116
mod v0;
116117

@@ -267,6 +268,12 @@ fn compute_symbol_name<'tcx>(
267268
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
268269
};
269270

271+
let symbol = if tcx.sess.opts.unstable_opts.symbol_mangling_digest.enabled() {
272+
digest::generate(tcx, symbol, def_id)
273+
} else {
274+
symbol
275+
};
276+
270277
debug_assert!(
271278
rustc_demangle::try_demangle(&symbol).is_ok(),
272279
"compute_symbol_name: `{symbol}` cannot be demangled"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# `symbol_mangling_digest`
2+
3+
An optimization option is added to allow users to generate shorter symbol names for dylib. At the expense of commissioning capabilities such as readability of symbol names, this option eliminates the space bottlenech encountered by using Rust to replace existing C/C++ functional modules in resource-constrained scenarios.
4+
5+
The new option are defined as follows: `-Z symbol_mangling_digest=<crate_name>[*],...[,excluded=<true|false>][,salt=<value>][,level=<1|2>]`.
6+
7+
- `crate_name[*],...`: Name of a crate. Multiple crate names are allowd. If the suffix `*` is carried, it is the prefix of the crate name. It and `excluded` togeter determine the range of symbols to be optimized. User must be very clear about the optimization range. If the crate supports regular expression maching, the optimization range is difficult to determine. May cause confusion. Defaults to null.
8+
- `excluded=<true|false>`: If the value is `false`, only the names of symbols whose crate names are successfully matched are optimized. If the value is `true`, it indicates that the name of the symbol that fails to be matched is optimized. the default value is `false`.
9+
- `salt=<value>`: User-specified salt value used in hash calculation. The default value is null.
10+
- `level=<1|2>`: Specifies the combination policy of the final symbol name. If the value is `1`, the final combination format is `{crate}.{item}.{hash32}`. If the value is `2`, the final combination format is `{crate}.{hash64}`. The default value is `2`.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
include ../tools.mk
2+
3+
all:
4+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo foo.rs
5+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo --extern foo=$(TMPDIR)/libfoo.so bar.rs
6+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo,salt=bar foo.rs
7+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo,salt=bar --extern foo=$(TMPDIR)/libfoo.so bar.rs
8+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo,salt=bar,level=1 foo.rs
9+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=foo,salt=bar,level=1 --extern foo=$(TMPDIR)/libfoo.so bar.rs
10+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=std,alloc,core,excluded=true,salt=bar,level=1 foo.rs
11+
$(RUSTC) -C prefer-dynamic -Z symbol_mangling_digest=std,alloc,core,excluded=true,salt=bar,level=1 --extern foo=$(TMPDIR)/libfoo.so bar.rs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#![crate_type = "bin"]
2+
3+
extern crate foo;
4+
5+
fn main() {
6+
foo::foo();
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#![crate_type = "dylib"]
2+
pub fn foo() {
3+
println!("hello foo");
4+
}

0 commit comments

Comments
 (0)