Skip to content

Commit e5a01b9

Browse files
authored
Rollup merge of #104439 - ferrocene:pa-generate-copyright, r=pnkfelix
Add prototype to generate `COPYRIGHT` from REUSE metadata This PR adds a prototype to generate the `COPYRIGHT` file from the metadata gathered with REUSE. There are two new tools: * `src/tools/collect-license-metadata` invokes REUSE, parses its output and stores a concise JSON representation of the metadata in `src/etc/license-metadata.json`. * `src/tools/generate-copyright` parses the metadata generated above, (in the future will) gather crate dependencies metadata, and renders the `COPYRIGHT.md` file. Note that since the contents of those files are currently incorrect, rather than outputting in the paths above, the files will be stored in `build/` and not committed. This will be changed once we're confident about the metadata. Eventually, `src/etc/license-metadata.json` will be committed into the repository and verified to be up to date by CI (similar to our GitHub Actions configuration), to avoid having people install REUSE on their local machine in most cases. You can see the (incorrect) generated files in https://gist.github.com./pietroalbini/3f3f22b6f9cc8533abf7494b6a50cf97. r? `@pnkfelix`
2 parents 9db224f + f8a7123 commit e5a01b9

File tree

15 files changed

+719
-0
lines changed

15 files changed

+719
-0
lines changed

Cargo.lock

+77
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ dependencies = [
581581
"libc",
582582
"num-integer",
583583
"num-traits",
584+
"serde",
584585
"time",
585586
"winapi",
586587
]
@@ -730,6 +731,16 @@ dependencies = [
730731
"rustc-semver",
731732
]
732733

734+
[[package]]
735+
name = "collect-license-metadata"
736+
version = "0.1.0"
737+
dependencies = [
738+
"anyhow",
739+
"serde",
740+
"serde_json",
741+
"spdx-rs",
742+
]
743+
733744
[[package]]
734745
name = "color-eyre"
735746
version = "0.6.2"
@@ -1552,6 +1563,15 @@ dependencies = [
15521563
"termcolor",
15531564
]
15541565

1566+
[[package]]
1567+
name = "generate-copyright"
1568+
version = "0.1.0"
1569+
dependencies = [
1570+
"anyhow",
1571+
"serde",
1572+
"serde_json",
1573+
]
1574+
15551575
[[package]]
15561576
name = "generic-array"
15571577
version = "0.14.4"
@@ -4864,6 +4884,35 @@ dependencies = [
48644884
"winapi",
48654885
]
48664886

4887+
[[package]]
4888+
name = "spdx-expression"
4889+
version = "0.5.2"
4890+
source = "registry+https://github.com./rust-lang/crates.io-index"
4891+
checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77"
4892+
dependencies = [
4893+
"nom",
4894+
"serde",
4895+
"thiserror",
4896+
"tracing",
4897+
]
4898+
4899+
[[package]]
4900+
name = "spdx-rs"
4901+
version = "0.5.1"
4902+
source = "registry+https://github.com./rust-lang/crates.io-index"
4903+
checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3"
4904+
dependencies = [
4905+
"chrono",
4906+
"log",
4907+
"nom",
4908+
"serde",
4909+
"spdx-expression",
4910+
"strum",
4911+
"strum_macros",
4912+
"thiserror",
4913+
"uuid",
4914+
]
4915+
48674916
[[package]]
48684917
name = "stable_deref_trait"
48694918
version = "1.2.0"
@@ -4967,6 +5016,25 @@ version = "0.10.0"
49675016
source = "registry+https://github.com./rust-lang/crates.io-index"
49685017
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
49695018

5019+
[[package]]
5020+
name = "strum"
5021+
version = "0.24.1"
5022+
source = "registry+https://github.com./rust-lang/crates.io-index"
5023+
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
5024+
5025+
[[package]]
5026+
name = "strum_macros"
5027+
version = "0.24.3"
5028+
source = "registry+https://github.com./rust-lang/crates.io-index"
5029+
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
5030+
dependencies = [
5031+
"heck",
5032+
"proc-macro2",
5033+
"quote",
5034+
"rustversion",
5035+
"syn",
5036+
]
5037+
49705038
[[package]]
49715039
name = "syn"
49725040
version = "1.0.102"
@@ -5596,6 +5664,15 @@ version = "0.1.1"
55965664
source = "registry+https://github.com./rust-lang/crates.io-index"
55975665
checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d"
55985666

5667+
[[package]]
5668+
name = "uuid"
5669+
version = "0.8.2"
5670+
source = "registry+https://github.com./rust-lang/crates.io-index"
5671+
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
5672+
dependencies = [
5673+
"getrandom 0.2.0",
5674+
]
5675+
55995676
[[package]]
56005677
name = "valuable"
56015678
version = "0.1.0"

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ members = [
3939
"src/tools/bump-stage0",
4040
"src/tools/replace-version-placeholder",
4141
"src/tools/lld-wrapper",
42+
"src/tools/collect-license-metadata",
43+
"src/tools/generate-copyright",
4244
]
4345

4446
exclude = [

config.toml.example

+10
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,16 @@ changelog-seen = 2
255255
# Defaults to the Python interpreter used to execute x.py
256256
#python = "python"
257257

258+
# The path to the REUSE executable to use. Note that REUSE is not required in
259+
# most cases, as our tooling relies on a cached (and shrinked) copy of the
260+
# REUSE output present in the git repository and in our source tarballs.
261+
#
262+
# REUSE is only needed if your changes caused the overral licensing of the
263+
# repository to change, and the cached copy has to be regenerated.
264+
#
265+
# Defaults to the "reuse" command in the system path.
266+
#reuse = "reuse"
267+
258268
# Force Cargo to check that Cargo.lock describes the precise dependency
259269
# set that all the Cargo.toml files create, instead of updating it.
260270
#locked-deps = false

src/bootstrap/builder.rs

+2
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,8 @@ impl<'a> Builder<'a> {
754754
run::BumpStage0,
755755
run::ReplaceVersionPlaceholder,
756756
run::Miri,
757+
run::CollectLicenseMetadata,
758+
run::GenerateCopyright,
757759
),
758760
// These commands either don't use paths, or they're special-cased in Build::build()
759761
Kind::Clean | Kind::Format | Kind::Setup => vec![],

src/bootstrap/config.rs

+3
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ pub struct Config {
213213
pub npm: Option<PathBuf>,
214214
pub gdb: Option<PathBuf>,
215215
pub python: Option<PathBuf>,
216+
pub reuse: Option<PathBuf>,
216217
pub cargo_native_static: bool,
217218
pub configure_args: Vec<String>,
218219

@@ -611,6 +612,7 @@ define_config! {
611612
nodejs: Option<String> = "nodejs",
612613
npm: Option<String> = "npm",
613614
python: Option<String> = "python",
615+
reuse: Option<String> = "reuse",
614616
locked_deps: Option<bool> = "locked-deps",
615617
vendor: Option<bool> = "vendor",
616618
full_bootstrap: Option<bool> = "full-bootstrap",
@@ -1004,6 +1006,7 @@ impl Config {
10041006
config.npm = build.npm.map(PathBuf::from);
10051007
config.gdb = build.gdb.map(PathBuf::from);
10061008
config.python = build.python.map(PathBuf::from);
1009+
config.reuse = build.reuse.map(PathBuf::from);
10071010
config.submodules = build.submodules;
10081011
set(&mut config.low_priority, build.low_priority);
10091012
set(&mut config.compiler_docs, build.compiler_docs);

src/bootstrap/run.rs

+63
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::path::PathBuf;
12
use std::process::Command;
23

34
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
@@ -189,3 +190,65 @@ impl Step for Miri {
189190
builder.run(&mut miri);
190191
}
191192
}
193+
194+
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
195+
pub struct CollectLicenseMetadata;
196+
197+
impl Step for CollectLicenseMetadata {
198+
type Output = PathBuf;
199+
const ONLY_HOSTS: bool = true;
200+
201+
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
202+
run.path("src/tools/collect-license-metadata")
203+
}
204+
205+
fn make_run(run: RunConfig<'_>) {
206+
run.builder.ensure(CollectLicenseMetadata);
207+
}
208+
209+
fn run(self, builder: &Builder<'_>) -> Self::Output {
210+
let Some(reuse) = &builder.config.reuse else {
211+
panic!("REUSE is required to collect the license metadata");
212+
};
213+
214+
// Temporary location, it will be moved to src/etc once it's accurate.
215+
let dest = builder.out.join("license-metadata.json");
216+
217+
let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
218+
cmd.env("REUSE_EXE", reuse);
219+
cmd.env("DEST", &dest);
220+
builder.run(&mut cmd);
221+
222+
dest
223+
}
224+
}
225+
226+
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
227+
pub struct GenerateCopyright;
228+
229+
impl Step for GenerateCopyright {
230+
type Output = PathBuf;
231+
const ONLY_HOSTS: bool = true;
232+
233+
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
234+
run.path("src/tools/generate-copyright")
235+
}
236+
237+
fn make_run(run: RunConfig<'_>) {
238+
run.builder.ensure(GenerateCopyright);
239+
}
240+
241+
fn run(self, builder: &Builder<'_>) -> Self::Output {
242+
let license_metadata = builder.ensure(CollectLicenseMetadata);
243+
244+
// Temporary location, it will be moved to the proper one once it's accurate.
245+
let dest = builder.out.join("COPYRIGHT.md");
246+
247+
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
248+
cmd.env("LICENSE_METADATA", &license_metadata);
249+
cmd.env("DEST", &dest);
250+
builder.run(&mut cmd);
251+
252+
dest
253+
}
254+
}

src/bootstrap/sanity.rs

+7
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ than building it.
140140
.map(|p| cmd_finder.must_have(p))
141141
.or_else(|| cmd_finder.maybe_have("gdb"));
142142

143+
build.config.reuse = build
144+
.config
145+
.reuse
146+
.take()
147+
.map(|p| cmd_finder.must_have(p))
148+
.or_else(|| cmd_finder.maybe_have("reuse"));
149+
143150
// We're gonna build some custom C code here and there, host triples
144151
// also build some C++ shims for LLVM so we need a C++ compiler.
145152
for target in &build.targets {

src/bootstrap/tool.rs

+2
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ bootstrap_tool!(
380380
HtmlChecker, "src/tools/html-checker", "html-checker";
381381
BumpStage0, "src/tools/bump-stage0", "bump-stage0";
382382
ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder";
383+
CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata";
384+
GenerateCopyright, "src/tools/generate-copyright", "generate-copyright";
383385
);
384386

385387
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "collect-license-metadata"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
anyhow = "1.0.65"
8+
serde = { version = "1.0.147", features = ["derive"] }
9+
serde_json = "1.0.85"
10+
spdx-rs = "0.5.1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
use std::collections::HashMap;
2+
3+
const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"];
4+
5+
pub(crate) struct LicensesInterner {
6+
by_id: Vec<License>,
7+
by_struct: HashMap<License, usize>,
8+
}
9+
10+
impl LicensesInterner {
11+
pub(crate) fn new() -> Self {
12+
LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
13+
}
14+
15+
pub(crate) fn intern(&mut self, mut license: License) -> LicenseId {
16+
license.simplify();
17+
if let Some(id) = self.by_struct.get(&license) {
18+
LicenseId(*id)
19+
} else {
20+
let id = self.by_id.len();
21+
self.by_id.push(license.clone());
22+
self.by_struct.insert(license, id);
23+
LicenseId(id)
24+
}
25+
}
26+
27+
pub(crate) fn resolve(&self, id: LicenseId) -> &License {
28+
&self.by_id[id.0]
29+
}
30+
}
31+
32+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)]
33+
#[serde(transparent)]
34+
pub(crate) struct LicenseId(usize);
35+
36+
#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)]
37+
pub(crate) struct License {
38+
pub(crate) spdx: String,
39+
pub(crate) copyright: Vec<String>,
40+
}
41+
42+
impl License {
43+
fn simplify(&mut self) {
44+
self.remove_copyright_prefixes();
45+
self.copyright.sort();
46+
self.copyright.dedup();
47+
}
48+
49+
fn remove_copyright_prefixes(&mut self) {
50+
for copyright in &mut self.copyright {
51+
let mut stripped = copyright.trim();
52+
let mut previous_stripped;
53+
loop {
54+
previous_stripped = stripped;
55+
for pattern in COPYRIGHT_PREFIXES {
56+
stripped = stripped.trim_start_matches(pattern).trim_start();
57+
}
58+
if stripped == previous_stripped {
59+
break;
60+
}
61+
}
62+
*copyright = stripped.into();
63+
}
64+
}
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
mod licenses;
2+
mod path_tree;
3+
mod reuse;
4+
5+
use crate::licenses::LicensesInterner;
6+
use anyhow::Error;
7+
use std::path::PathBuf;
8+
9+
fn main() -> Result<(), Error> {
10+
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
11+
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
12+
13+
let mut interner = LicensesInterner::new();
14+
let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;
15+
16+
let mut tree = crate::path_tree::build(paths);
17+
tree.simplify();
18+
19+
if let Some(parent) = dest.parent() {
20+
std::fs::create_dir_all(parent)?;
21+
}
22+
std::fs::write(
23+
&dest,
24+
&serde_json::to_vec_pretty(&serde_json::json!({
25+
"files": crate::path_tree::expand_interned_licenses(tree, &interner),
26+
}))?,
27+
)?;
28+
29+
Ok(())
30+
}

0 commit comments

Comments
 (0)