xtask_lib/tasks/ci/
clippy_report.rs

1//! `cargo xtask ci clippy-report` — deduplicate clippy JSON artifacts and
2//! report via reviewdog.
3//!
4//! ## How it fits into CI
5//!
6//! The `clippy` matrix job runs `cargo xtask clippy --save-json <file>` for
7//! every feature label on `ubuntu-latest`, uploading each file as a GitHub
8//! Actions artifact.  After the matrix completes, the `clippy-report` job
9//! downloads all artifacts into a single directory and calls:
10//!
11//! ```text
12//! cargo xtask ci clippy-report --artifacts-dir <dir>
13//! ```
14//!
15//! This command reads every `.json` file in the directory, deduplicates
16//! diagnostics across feature labels, and pipes the unique set through
17//! `reviewdog` once — so each warning appears as exactly one PR review
18//! comment regardless of how many feature combinations triggered it.
19//!
20//! ## Deduplication key
21//!
22//! Two diagnostics are considered identical when they share the same
23//! `(file_name, line_start, message)` triple taken from the primary span of the
24//! clippy JSON message.  Only diagnostic messages (compiler-message with spans)
25//! are included; non-diagnostic JSON (build artifacts, build-finished, etc.)
26//! is filtered out.
27//!
28//! ## Reviewdog
29//!
30//! Uses the `github-pr-review` reporter.  Requires:
31//! - `reviewdog` on `PATH`
32//! - `REVIEWDOG_GITHUB_API_TOKEN` set to a token with `pull-requests: write`
33
34use std::collections::HashSet;
35use std::fs;
36use std::io::{BufRead, BufReader, Write};
37use std::path::{Path, PathBuf};
38use std::process::{Command, Stdio};
39
40use anyhow::{Context, Result, bail};
41use clap::Args;
42use serde_json::Value;
43
44/// Arguments for `cargo xtask ci clippy-report`.
45#[derive(Debug, Args)]
46pub struct ClippyReportArgs {
47    /// Directory containing `.json` artifact files produced by
48    /// `cargo xtask clippy --save-json`.
49    #[arg(long)]
50    pub artifacts_dir: PathBuf,
51}
52
53/// Reads all `.json` files in `artifacts_dir`, deduplicates diagnostics, and
54/// pipes the unique set through `reviewdog` using the `github-pr-review`
55/// reporter.
56///
57/// # Errors
58///
59/// Returns an error if any artifact file cannot be read, if `reviewdog`
60/// cannot be spawned, or if `reviewdog` exits non-zero.
61pub fn run(args: ClippyReportArgs) -> Result<()> {
62    let lines = collect_unique_lines(&args.artifacts_dir)?;
63
64    println!(
65        "clippy-report: {} unique diagnostics across all feature labels",
66        lines.len()
67    );
68
69    pipe_to_reviewdog(&lines)
70}
71
72/// Collects all JSON lines from every `.json` file in `dir`, returning only
73/// the unique ones (deduplicated by primary span + message text).
74///
75/// Only diagnostic messages (compiler-message with spans) are included.
76/// Non-diagnostic JSON (build artifacts, build-finished, etc.) is filtered out.
77///
78/// # Errors
79///
80/// Returns an error if the directory cannot be read or any file cannot be
81/// opened.
82fn collect_unique_lines(dir: &Path) -> Result<Vec<String>> {
83    let mut seen: HashSet<DiagnosticKey> = HashSet::new();
84    let mut unique: Vec<String> = Vec::new();
85
86    for path in json_files(dir)? {
87        let file =
88            fs::File::open(&path).with_context(|| format!("failed to open {}", path.display()))?;
89
90        for line in BufReader::new(file).lines() {
91            let line = line.with_context(|| format!("failed to read {}", path.display()))?;
92
93            if line.trim().is_empty() {
94                continue;
95            }
96
97            let key = diagnostic_key(&line);
98
99            if let DiagnosticKey::Spanned { .. } = key
100                && seen.insert(key)
101            {
102                unique.push(line);
103            }
104        }
105    }
106
107    Ok(unique)
108}
109
110/// Returns sorted paths of every `.json` file directly inside `dir`.
111///
112/// Sorting ensures deterministic ordering across runs.
113///
114/// # Errors
115///
116/// Returns an error if the directory cannot be read.
117fn json_files(dir: &Path) -> Result<Vec<PathBuf>> {
118    let mut paths: Vec<PathBuf> = fs::read_dir(dir)
119        .with_context(|| format!("failed to read directory {}", dir.display()))?
120        .filter_map(|entry| {
121            let entry = entry.ok()?;
122            let path = entry.path();
123            if path.extension().and_then(|e| e.to_str()) == Some("json") {
124                Some(path)
125            } else {
126                None
127            }
128        })
129        .collect();
130
131    paths.sort();
132
133    Ok(paths)
134}
135
136/// A key that uniquely identifies a clippy diagnostic for deduplication.
137///
138/// Two diagnostics with the same file, line, and message text are considered
139/// identical even if they were produced under different feature combinations.
140/// Non-diagnostic JSON (e.g., build artifacts) returns a Raw key but is filtered
141/// out during collection since only Spanned keys are forwarded to reviewdog.
142#[derive(Debug, PartialEq, Eq, Hash)]
143enum DiagnosticKey {
144    Spanned {
145        file: String,
146        line: u64,
147        message: String,
148    },
149    Raw(String),
150}
151
152/// Extracts a primary span from clippy JSON diagnostic message.
153///
154/// The spans array can have multiple entries. The primary span is identified
155/// by having `is_primary: true`. If no span has this flag, returns the first span.
156fn find_primary_span(message: &Value) -> Option<&Value> {
157    let spans = message.pointer("/message/spans")?.as_array()?;
158
159    for span in spans {
160        if span.get("is_primary").and_then(Value::as_bool) == Some(true) {
161            return Some(span);
162        }
163    }
164
165    spans.first()
166}
167
168/// Extracts a [`DiagnosticKey`] from a raw clippy JSON line.
169fn diagnostic_key(line: &str) -> DiagnosticKey {
170    let Ok(value) = serde_json::from_str::<Value>(line) else {
171        return DiagnosticKey::Raw(line.to_owned());
172    };
173
174    let Some(span) = find_primary_span(&value) else {
175        return DiagnosticKey::Raw(line.to_owned());
176    };
177
178    let file = span
179        .get("file_name")
180        .and_then(Value::as_str)
181        .map(str::to_owned);
182    let line_start = span.get("line_start").and_then(Value::as_u64);
183    let message = value
184        .pointer("/message/message")
185        .and_then(Value::as_str)
186        .map(str::to_owned);
187
188    match (file, line_start, message) {
189        (Some(file), Some(line), Some(message)) => DiagnosticKey::Spanned {
190            file,
191            line,
192            message,
193        },
194        _ => DiagnosticKey::Raw(line.to_owned()),
195    }
196}
197
198/// Converts a clippy JSON line to short format for reviewdog.
199///
200/// # Panics
201///
202/// Panics if the JSON line does not have the expected diagnostic structure
203/// (i.e., missing primary span or `/message/message`).
204/// Non-diagnostic JSON lines (like `build-finished`) should be filtered out
205/// before calling this function.
206fn json_to_short(line: &str) -> String {
207    let value = serde_json::from_str::<Value>(line).expect("failed to parse JSON line");
208
209    let span = find_primary_span(&value).expect("clippy JSON should have a primary span");
210
211    let file = span
212        .get("file_name")
213        .and_then(Value::as_str)
214        .expect("primary span should have file_name");
215
216    let line_start = span
217        .get("line_start")
218        .and_then(Value::as_u64)
219        .expect("primary span should have line_start");
220
221    let column_start = span
222        .get("column_start")
223        .and_then(Value::as_u64)
224        .unwrap_or(1);
225
226    let level = value
227        .pointer("/message/level")
228        .and_then(Value::as_str)
229        .unwrap_or("warning");
230
231    let message = value
232        .pointer("/message/message")
233        .and_then(Value::as_str)
234        .expect("clippy JSON should have /message/message");
235
236    let code = value.pointer("/message/code/code").and_then(Value::as_str);
237
238    if let Some(code) = code {
239        format!("{file}:{line_start}:{column_start}: {level}: {message} [{code}]")
240    } else {
241        format!("{file}:{line_start}:{column_start}: {level}: {message}")
242    }
243}
244
245/// Spawns `reviewdog` with the `github-pr-review` reporter and writes `lines`
246/// to its stdin.
247///
248/// JSON lines are converted to short format for compatibility with reviewdog's
249/// clippy parser.
250///
251/// # Errors
252///
253/// Returns an error if `reviewdog` cannot be spawned or exits non-zero.
254fn pipe_to_reviewdog(lines: &[String]) -> Result<()> {
255    let reviewdog_args = [
256        "-f=clippy",
257        "-filter-mode=added",
258        "-fail-on-error=false",
259        "-reporter=github-pr-review",
260    ];
261
262    println!("$ reviewdog {}", reviewdog_args.join(" "));
263
264    let mut reviewdog = Command::new("reviewdog")
265        .args(reviewdog_args)
266        .stdin(Stdio::piped())
267        .stdout(Stdio::inherit())
268        .stderr(Stdio::inherit())
269        .spawn()
270        .context("failed to spawn `reviewdog` — is it installed and on PATH?")?;
271
272    let mut stdin = reviewdog
273        .stdin
274        .take()
275        .context("reviewdog stdin not captured")?;
276
277    for line in lines {
278        let short_line = json_to_short(line);
279        writeln!(stdin, "{short_line}").context("failed to write to reviewdog stdin")?;
280    }
281
282    drop(stdin);
283
284    let status = reviewdog.wait().context("failed to wait for `reviewdog`")?;
285
286    if !status.success() {
287        bail!("`reviewdog` exited with status {status}");
288    }
289
290    Ok(())
291}
292
293#[cfg(test)]
294mod tests {
295    use std::io::Write;
296
297    use tempfile::tempdir;
298
299    use super::*;
300
301    fn write_artifact(dir: &Path, name: &str, lines: &[&str]) {
302        let path = dir.join(name);
303        let mut f = fs::File::create(path).unwrap();
304        for line in lines {
305            writeln!(f, "{line}").unwrap();
306        }
307    }
308
309    fn spanned_line(file: &str, line: u64, message: &str) -> String {
310        serde_json::json!({
311            "reason": "compiler-message",
312            "message": {
313                "message": message,
314                "spans": [{ "file_name": file, "line_start": line }]
315            }
316        })
317        .to_string()
318    }
319
320    #[test]
321    fn deduplicates_identical_diagnostics_across_files() {
322        let dir = tempdir().unwrap();
323        let warning = spanned_line("src/lib.rs", 10, "unused variable");
324
325        write_artifact(dir.path(), "default.json", &[&warning]);
326        write_artifact(dir.path(), "test.json", &[&warning]);
327
328        let lines = collect_unique_lines(dir.path()).unwrap();
329
330        assert_eq!(lines.len(), 1);
331    }
332
333    #[test]
334    fn keeps_diagnostics_with_different_messages() {
335        let dir = tempdir().unwrap();
336
337        write_artifact(
338            dir.path(),
339            "a.json",
340            &[&spanned_line("src/lib.rs", 10, "unused variable")],
341        );
342        write_artifact(
343            dir.path(),
344            "b.json",
345            &[&spanned_line("src/lib.rs", 10, "dead code")],
346        );
347
348        let lines = collect_unique_lines(dir.path()).unwrap();
349
350        assert_eq!(lines.len(), 2);
351    }
352
353    #[test]
354    fn keeps_diagnostics_with_different_lines() {
355        let dir = tempdir().unwrap();
356
357        write_artifact(
358            dir.path(),
359            "a.json",
360            &[&spanned_line("src/lib.rs", 10, "unused variable")],
361        );
362        write_artifact(
363            dir.path(),
364            "b.json",
365            &[&spanned_line("src/lib.rs", 20, "unused variable")],
366        );
367
368        let lines = collect_unique_lines(dir.path()).unwrap();
369
370        assert_eq!(lines.len(), 2);
371    }
372
373    #[test]
374    fn keeps_diagnostics_with_different_files() {
375        let dir = tempdir().unwrap();
376
377        write_artifact(
378            dir.path(),
379            "a.json",
380            &[&spanned_line("src/lib.rs", 10, "unused variable")],
381        );
382        write_artifact(
383            dir.path(),
384            "b.json",
385            &[&spanned_line("src/main.rs", 10, "unused variable")],
386        );
387
388        let lines = collect_unique_lines(dir.path()).unwrap();
389
390        assert_eq!(lines.len(), 2);
391    }
392
393    #[test]
394    fn non_json_lines_are_filtered_out() {
395        let dir = tempdir().unwrap();
396
397        write_artifact(dir.path(), "a.json", &["not json"]);
398        write_artifact(dir.path(), "b.json", &["not json"]);
399
400        let lines = collect_unique_lines(dir.path()).unwrap();
401
402        assert_eq!(lines.len(), 0);
403    }
404
405    #[test]
406    fn spanless_json_is_filtered_out() {
407        let dir = tempdir().unwrap();
408        let spanless = serde_json::json!({ "reason": "build-finished" }).to_string();
409
410        write_artifact(dir.path(), "a.json", &[&spanless]);
411        write_artifact(dir.path(), "b.json", &[&spanless]);
412
413        let lines = collect_unique_lines(dir.path()).unwrap();
414
415        assert_eq!(lines.len(), 0);
416    }
417
418    #[test]
419    fn empty_lines_are_skipped() {
420        let dir = tempdir().unwrap();
421
422        write_artifact(dir.path(), "a.json", &["", "  ", ""]);
423
424        let lines = collect_unique_lines(dir.path()).unwrap();
425
426        assert!(lines.is_empty());
427    }
428
429    #[test]
430    fn ignores_non_json_files_in_directory() {
431        let dir = tempdir().unwrap();
432        let warning = spanned_line("src/lib.rs", 1, "unused");
433
434        write_artifact(dir.path(), "default.json", &[&warning]);
435
436        fs::write(dir.path().join("notes.txt"), "ignore me").unwrap();
437
438        let lines = collect_unique_lines(dir.path()).unwrap();
439
440        assert_eq!(lines.len(), 1);
441    }
442
443    #[test]
444    fn json_to_short_converts_clippy_json_to_short_format() {
445        let json_line = serde_json::json!({
446            "reason": "compiler-message",
447            "message": {
448                "message": "deref which would be done by auto-deref",
449                "level": "warning",
450                "spans": [
451                    {
452                        "file_name": "crates/core/src/library/db/mod.rs",
453                        "line_start": 895,
454                        "column_start": 32,
455                        "line_end": 895,
456                        "column_end": 36,
457                        "text": "    let y: &str = &x;"
458                    }
459                ],
460                "code": {
461                    "code": "clippy::ptr_arg",
462                    "explanation": "..."
463                }
464            }
465        })
466        .to_string();
467
468        let result = json_to_short(&json_line);
469
470        assert_eq!(
471            result,
472            "crates/core/src/library/db/mod.rs:895:32: warning: deref which would be done by auto-deref [clippy::ptr_arg]"
473        );
474    }
475
476    #[test]
477    fn json_to_short_handles_missing_code_field() {
478        let json_line = serde_json::json!({
479            "reason": "compiler-message",
480            "message": {
481                "message": "unused variable: `x`",
482                "level": "warning",
483                "spans": [
484                    {
485                        "file_name": "src/lib.rs",
486                        "line_start": 10,
487                        "column_start": 5,
488                        "line_end": 10,
489                        "column_end": 6,
490                        "text": "let x = 1;"
491                    }
492                ]
493            }
494        })
495        .to_string();
496
497        let result = json_to_short(&json_line);
498
499        assert_eq!(result, "src/lib.rs:10:5: warning: unused variable: `x`");
500    }
501
502    #[test]
503    fn json_to_short_handles_error_level() {
504        let json_line = serde_json::json!({
505            "reason": "compiler-message",
506            "message": {
507                "message": "expected `,`, found `{`",
508                "level": "error",
509                "spans": [
510                    {
511                        "file_name": "src/main.rs",
512                        "line_start": 1,
513                        "column_start": 1,
514                        "line_end": 1,
515                        "column_end": 1,
516                        "text": "fn main() {"
517                    }
518                ],
519                "code": {
520                    "code": "E0789"
521                }
522            }
523        })
524        .to_string();
525
526        let result = json_to_short(&json_line);
527
528        assert_eq!(
529            result,
530            "src/main.rs:1:1: error: expected `,`, found `{` [E0789]"
531        );
532    }
533}