xtask_lib/tasks/util/
thirdparty.rs

1//! Thirdparty library download and build helpers.
2//!
3//! Library source URLs are defined as constants so Renovate can track them.
4//!
5//! ## Download
6//!
7//! [`download_libraries`] fetches each library's source.  Most libraries are
8//! downloaded as tarballs and extracted with the top-level directory stripped.
9//! Libraries that use git submodules (currently freetype2) are cloned with
10//! `--recurse-submodules` so submodule contents are always present.  If the
11//! cloned source ships an `autogen.sh` script, it is run immediately after
12//! cloning to generate the `configure` script that `build-kobo.sh` expects.
13//!
14//! ## Build
15//!
16//! [`build_libraries`] iterates over the packages in dependency order, applies
17//! `kobo.patch` if present, then invokes each library's own `build-kobo.sh`
18//! script.
19
20use std::path::Path;
21
22use anyhow::{Context, Result, bail};
23
24use super::{cmd, fs, http};
25
26/// Base names of all thirdparty shared libraries.
27///
28/// SONAMEs are discovered at runtime via `readelf -d` because upstream
29/// libraries do not follow a consistent ABI versioning scheme.
30pub const SONAMES: &[&str] = &[
31    "libz.so",
32    "libbz2.so",
33    "libpng16.so",
34    "libjpeg.so",
35    "libopenjp2.so",
36    "libjbig2dec.so",
37    "libfreetype.so",
38    "libharfbuzz.so",
39    "libgumbo.so",
40    "libdjvulibre.so",
41    "libmupdf.so",
42];
43
44/// Returns the SONAME of `lib` in `libs_dir`.
45///
46/// When the library file exists, `readelf -d` is used to extract the SONAME
47/// from the binary. When only a versioned file exists (e.g. `libz.so.1.2.13`
48/// without `libz.so`), the versioned filename is returned directly.
49///
50/// # Errors
51///
52/// Returns an error if `readelf` fails or the SONAME cannot be determined.
53pub fn soname(libs_dir: &Path, lib: &str) -> Result<String> {
54    let so_path = libs_dir.join(lib);
55    if so_path.exists() {
56        let so_path_str = so_path
57            .to_str()
58            .with_context(|| format!("shared library path is not valid UTF-8: {so_path:?}"))?;
59        let output = cmd::output("readelf", &["-d", so_path_str], libs_dir, &[])?;
60        let soname = output
61            .lines()
62            .find(|line| line.contains("SONAME"))
63            .and_then(|line| line.split_whitespace().last())
64            .map(|token| {
65                token
66                    .trim_start_matches('[')
67                    .trim_end_matches(']')
68                    .to_string()
69            })
70            .with_context(|| format!("failed to find SONAME in readelf output for {lib}"))?;
71        Ok(soname)
72    } else {
73        let prefix = format!("{}.", lib);
74        let matching: Vec<_> = std::fs::read_dir(libs_dir)?
75            .filter_map(|e| e.ok())
76            .filter(|e| e.file_name().to_string_lossy().starts_with(&prefix))
77            .collect();
78
79        match matching.len() {
80            1 => Ok(matching[0].file_name().to_string_lossy().into_owned()),
81            0 => bail!(
82                "no versioned file found for {} in {}",
83                lib,
84                libs_dir.display()
85            ),
86            _ => bail!(
87                "multiple versioned files found for {} in {}",
88                lib,
89                libs_dir.display()
90            ),
91        }
92    }
93}
94
95/// Version strings for thirdparty libraries tracked by Renovate.
96///
97/// Each version constant is the single source of truth — the download URL is
98/// derived from it at call time in [`library_source`].  A Renovate regex manager
99/// in `renovate.json` matches these constants and opens PRs when new releases
100/// are available.
101///
102/// # TODO
103///
104/// Add Renovate regex managers for the remaining URL constants below so that
105/// all thirdparty dependency updates are tracked automatically.  The
106/// following are now tracked via VERSION constants: openjpeg, harfbuzz, gumbo.
107/// Their SONAMEs are discovered at build time via `readelf -d` rather than
108/// hardcoded.  Remaining: bzip2, jbig2dec, mupdf.
109pub const ZLIB_VERSION: &str = "1.3.2";
110pub const LIBPNG_VERSION: &str = "1.6.53";
111pub const DJVULIBRE_VERSION: &str = "3.5.29";
112/// IJG libjpeg version tracked via the libjpeg-turbo `jpeg-<version>` tag mirror.
113pub const LIBJPEG_VERSION: &str = "10";
114
115pub const BZIP2_URL: &str = "https://sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz";
116/// OpenJPEG version, derived from the archive URL.
117pub const OPENJPEG_VERSION: &str = "2.5.4";
118pub const JBIG2DEC_URL: &str =
119    "https://github.com/ArtifexSoftware/jbig2dec/releases/download/0.20/jbig2dec-0.20.tar.gz";
120/// FreeType version, cloned from `freetype/freetype` at tag `VER-X-Y-Z`.
121///
122/// Tracked by Renovate via the `github-tags` datasource with
123/// `extractVersionTemplate: "^VER-(?<version>.+)$"`.  freetype2 is cloned
124/// rather than downloaded as a tarball because its build system requires the
125/// `nyorain/dlg` git submodule, which is absent from archive tarballs.
126pub const FREETYPE2_VERSION: &str = "2.14.1";
127/// HarfBuzz version, derived from the archive URL.
128pub const HARFBUZZ_VERSION: &str = "12.3.2";
129/// Gumbo version, derived from the archive URL.
130pub const GUMBO_VERSION: &str = "0.10.1";
131
132pub const MUPDF_URL: &str = "https://casper.mupdf.com/downloads/archive/mupdf-1.27.0-source.tar.gz";
133
134/// All libraries in dependency order for building.
135const LIBRARY_NAMES: &[&str] = &[
136    "zlib",
137    "bzip2",
138    "libpng",
139    "libjpeg",
140    "openjpeg",
141    "jbig2dec",
142    "freetype2",
143    "harfbuzz",
144    "gumbo",
145    "djvulibre",
146    "mupdf",
147];
148
149/// Describes how a thirdparty library's source is obtained.
150pub enum LibrarySource {
151    /// Download a tarball and extract it with the top-level directory stripped.
152    Tarball(String),
153    /// Clone a git repository at a specific tag, recursing into submodules.
154    Git { repo: String, tag: String },
155}
156
157/// Returns the source descriptor for a named library.
158///
159/// # Errors
160///
161/// Returns an error if `name` is not a known library.
162pub fn library_source(name: &str) -> Result<LibrarySource> {
163    match name {
164        "zlib" => Ok(LibrarySource::Tarball(format!(
165            "https://github.com/madler/zlib/releases/download/v{v}/zlib-{v}.tar.gz",
166            v = ZLIB_VERSION
167        ))),
168        "bzip2" => Ok(LibrarySource::Tarball(BZIP2_URL.to_owned())),
169        "libpng" => Ok(LibrarySource::Tarball(format!(
170            "https://github.com/pnggroup/libpng/archive/refs/tags/v{v}.tar.gz",
171            v = LIBPNG_VERSION
172        ))),
173        "libjpeg" => Ok(LibrarySource::Tarball(format!(
174            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/refs/tags/jpeg-{v}.tar.gz",
175            v = LIBJPEG_VERSION
176        ))),
177        "openjpeg" => Ok(LibrarySource::Tarball(format!(
178            "https://github.com/uclouvain/openjpeg/archive/v{v}.tar.gz",
179            v = OPENJPEG_VERSION
180        ))),
181        "jbig2dec" => Ok(LibrarySource::Tarball(JBIG2DEC_URL.to_owned())),
182        "freetype2" => Ok(LibrarySource::Git {
183            repo: "https://github.com/freetype/freetype".to_owned(),
184            tag: format!("VER-{}", FREETYPE2_VERSION.replace('.', "-")),
185        }),
186        "harfbuzz" => Ok(LibrarySource::Tarball(format!(
187            "https://github.com/harfbuzz/harfbuzz/archive/{v}.tar.gz",
188            v = HARFBUZZ_VERSION
189        ))),
190        "gumbo" => Ok(LibrarySource::Tarball(format!(
191            "https://github.com/google/gumbo-parser/archive/v{v}.tar.gz",
192            v = GUMBO_VERSION
193        ))),
194        "djvulibre" => Ok(LibrarySource::Tarball(format!(
195            "https://github.com/barak/djvulibre/archive/refs/tags/release.{v}.tar.gz",
196            v = DJVULIBRE_VERSION
197        ))),
198        "mupdf" => Ok(LibrarySource::Tarball(MUPDF_URL.to_owned())),
199        _ => bail!("unknown thirdparty library: {name}"),
200    }
201}
202
203/// Downloads source for the given libraries into `thirdparty/`.
204///
205/// When `names` is empty all libraries are downloaded.  Tarballs are extracted
206/// with the top-level directory stripped.  Libraries with a [`LibrarySource::Git`]
207/// source are cloned with `--recurse-submodules` so submodule contents are
208/// always present.
209///
210/// Skips libraries with persisted marker files:
211/// - source-ready marker ([`SOURCE_READY_MARKER`])
212/// - built marker ([`BUILT_MARKER`])
213///
214/// This avoids fragile file-heuristic detection across heterogeneous upstream
215/// source trees.
216///
217/// # Errors
218///
219/// Returns an error if any download, extraction, or clone fails.
220pub fn download_libraries(thirdparty_dir: &Path, names: &[&str]) -> Result<()> {
221    let targets: Vec<&str> = if names.is_empty() {
222        LIBRARY_NAMES.to_vec()
223    } else {
224        names.to_vec()
225    };
226
227    for name in targets {
228        let dest_dir = thirdparty_dir.join(name);
229
230        if is_source_ready(&dest_dir) || is_built(&dest_dir) {
231            println!("Skipping {name} (source ready)…");
232            continue;
233        }
234
235        println!("Downloading {name}…");
236
237        match library_source(name)? {
238            LibrarySource::Tarball(url) => {
239                let tarball = thirdparty_dir.join(format!("{name}.tgz"));
240
241                if dest_dir.exists() {
242                    clean_untracked(&dest_dir)?;
243                } else {
244                    std::fs::create_dir_all(&dest_dir)
245                        .with_context(|| format!("failed to create {}", dest_dir.display()))?;
246                }
247
248                http::download(&url, &tarball)
249                    .with_context(|| format!("failed to download {name}"))?;
250
251                fs::extract_tarball_strip_one(&tarball, &dest_dir)
252                    .with_context(|| format!("failed to extract {name}"))?;
253
254                std::fs::remove_file(&tarball).ok();
255
256                write_marker(&dest_dir, SOURCE_READY_MARKER, name, "source")?;
257            }
258            LibrarySource::Git { repo, tag } => {
259                if !dest_dir.exists() {
260                    std::fs::create_dir_all(&dest_dir)
261                        .with_context(|| format!("failed to create {}", dest_dir.display()))?;
262                }
263
264                git_clone_tag(&repo, &tag, &dest_dir)
265                    .with_context(|| format!("failed to clone {name}"))?;
266
267                let autogen = dest_dir.join("autogen.sh");
268                if autogen.exists() {
269                    cmd::run("./autogen.sh", &[], &dest_dir, &[])
270                        .with_context(|| format!("failed to run autogen.sh for {name}"))?;
271                }
272
273                write_marker(&dest_dir, SOURCE_READY_MARKER, name, "source")?;
274            }
275        }
276    }
277
278    Ok(())
279}
280
281/// Clones `repo` at `tag` into `dest`, recursing into submodules.
282///
283/// Clones into a temporary sibling directory first, then moves the contents
284/// into `dest`.  This preserves any files already in `dest` that are tracked
285/// in the cadmus repository (e.g. `build-kobo.sh`), matching the behaviour of
286/// tarball extraction with `strip_one`.
287fn git_clone_tag(repo: &str, tag: &str, dest: &Path) -> Result<()> {
288    let tmp = dest.with_extension("_clone_tmp");
289
290    if tmp.exists() {
291        std::fs::remove_dir_all(&tmp)
292            .with_context(|| format!("failed to remove {}", tmp.display()))?;
293    }
294
295    cmd::run(
296        "git",
297        &[
298            "clone",
299            "--depth=1",
300            "--recurse-submodules",
301            "--branch",
302            tag,
303            repo,
304            tmp.to_str().context("tmp path is not valid UTF-8")?,
305        ],
306        std::path::Path::new("."),
307        &[],
308    )?;
309
310    for entry in
311        std::fs::read_dir(&tmp).with_context(|| format!("failed to read {}", tmp.display()))?
312    {
313        let entry = entry.with_context(|| format!("failed to read entry in {}", tmp.display()))?;
314        let target = dest.join(entry.file_name());
315        if target.exists() {
316            if target.is_dir() {
317                std::fs::remove_dir_all(&target).ok();
318            } else {
319                std::fs::remove_file(&target).ok();
320            }
321        }
322        std::fs::rename(entry.path(), &target).with_context(|| {
323            format!(
324                "failed to move {} to {}",
325                entry.path().display(),
326                target.display()
327            )
328        })?;
329    }
330
331    std::fs::remove_dir_all(&tmp).with_context(|| format!("failed to remove {}", tmp.display()))?;
332
333    Ok(())
334}
335
336/// Sentinel file written inside a library directory after source extraction.
337///
338/// Its presence means the source tree was fetched and unpacked successfully.
339pub const SOURCE_READY_MARKER: &str = ".source-ready";
340
341/// Sentinel file written inside a library directory after a successful build.
342///
343/// Its presence means the library was already compiled and cached — both the
344/// patch and the build step can be skipped on the next run.
345pub const BUILT_MARKER: &str = ".built-kobo";
346
347/// Returns `true` if `dir` already has a completed source download marker.
348fn is_source_ready(dir: &Path) -> bool {
349    dir.join(SOURCE_READY_MARKER).exists()
350}
351
352/// Returns `true` if the library in `dir` was already built and the sentinel
353/// file is present.
354fn is_built(dir: &Path) -> bool {
355    dir.join(BUILT_MARKER).exists()
356}
357
358/// Writes a marker file inside `dir` to persist task completion state.
359fn write_marker(dir: &Path, marker: &str, name: &str, state: &str) -> Result<()> {
360    std::fs::write(dir.join(marker), "")
361        .with_context(|| format!("failed to write {state} marker for {name}"))
362}
363
364/// Builds the given libraries for the Kobo ARM target.
365///
366/// When `names` is empty all libraries are built in dependency order.  For
367/// each library, `kobo.patch` is applied if present, then `./build-kobo.sh`
368/// is invoked.  A sentinel file ([`BUILT_MARKER`]) is written on success so
369/// that a warm CI cache can skip already-built libraries without re-applying
370/// the patch or re-running the build script.
371///
372/// # Errors
373///
374/// Returns an error if patching or building any library fails.
375pub fn build_libraries(thirdparty_dir: &Path, names: &[&str]) -> Result<()> {
376    let targets: Vec<&str> = if names.is_empty() {
377        LIBRARY_NAMES.to_vec()
378    } else {
379        names.to_vec()
380    };
381
382    for name in targets {
383        let lib_dir = thirdparty_dir.join(name);
384
385        if !lib_dir.exists() {
386            bail!(
387                "thirdparty/{name} not found — run `cargo xtask build-kobo --download-only` first"
388            );
389        }
390
391        if is_built(&lib_dir) {
392            println!("Skipping {name} (already built)…");
393            continue;
394        }
395
396        println!("Building {name}…");
397
398        let patch = lib_dir.join("kobo.patch");
399        if patch.exists() {
400            cmd::run("patch", &["-p", "1", "-i", "kobo.patch"], &lib_dir, &[])
401                .with_context(|| format!("failed to apply kobo.patch for {name}"))?;
402        }
403
404        cmd::run("./build-kobo.sh", &[], &lib_dir, &[])
405            .with_context(|| format!("failed to build {name}"))?;
406
407        write_marker(&lib_dir, BUILT_MARKER, name, "build")?;
408        write_marker(&lib_dir, SOURCE_READY_MARKER, name, "source")?;
409    }
410
411    Ok(())
412}
413
414/// Removes untracked files from a directory using `git ls-files`, falling back
415/// to removing and recreating the directory when git is unavailable.
416fn clean_untracked(dir: &Path) -> Result<()> {
417    let result = std::process::Command::new("git")
418        .args(["ls-files", "-o", "--directory", "-z"])
419        .arg(dir.file_name().unwrap_or(dir.as_os_str()))
420        .current_dir(dir.parent().unwrap_or(dir))
421        .output();
422
423    match result {
424        Ok(output) if output.status.success() => {
425            for entry in output.stdout.split(|&b| b == 0) {
426                if entry.is_empty() {
427                    continue;
428                }
429
430                let path = dir
431                    .parent()
432                    .unwrap_or(dir)
433                    .join(std::str::from_utf8(entry).unwrap_or(""));
434
435                if path.is_dir() {
436                    std::fs::remove_dir_all(&path).ok();
437                } else {
438                    std::fs::remove_file(&path).ok();
439                }
440            }
441        }
442        _ => {
443            std::fs::remove_dir_all(dir)
444                .with_context(|| format!("failed to remove {}", dir.display()))?;
445            std::fs::create_dir_all(dir)
446                .with_context(|| format!("failed to recreate {}", dir.display()))?;
447        }
448    }
449
450    Ok(())
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456
457    #[test]
458    fn library_source_is_defined_for_all_known_libraries() {
459        for name in LIBRARY_NAMES {
460            let source = library_source(name).unwrap();
461            match source {
462                LibrarySource::Tarball(url) => {
463                    assert!(
464                        url.starts_with("http"),
465                        "tarball URL for {name} should start with http"
466                    );
467                    assert!(
468                        url.contains(".tar.gz"),
469                        "tarball URL for {name} should contain .tar.gz"
470                    );
471                }
472                LibrarySource::Git { repo, tag } => {
473                    assert!(
474                        repo.starts_with("https://"),
475                        "git repo for {name} should use https"
476                    );
477                    assert!(!tag.is_empty(), "git tag for {name} should not be empty");
478                }
479            }
480        }
481    }
482
483    #[test]
484    fn library_source_errors_on_unknown_library() {
485        assert!(library_source("nonexistent").is_err());
486    }
487
488    #[test]
489    fn library_names_has_no_duplicates() {
490        let mut names = LIBRARY_NAMES.to_vec();
491        names.sort_unstable();
492        names.dedup();
493        assert_eq!(
494            names.len(),
495            LIBRARY_NAMES.len(),
496            "duplicate library names found"
497        );
498    }
499}