Skip to main content

xtask_lib/tasks/util/
thirdparty.rs

1//! Thirdparty library download and build helpers.
2//!
3//! Library source URLs are defined as constants so Renovate can track them.
4//!
5//! ## Download
6//!
7//! [`download_libraries`] fetches each library's source.  Most libraries are
8//! downloaded as tarballs and extracted with the top-level directory stripped.
9//! Libraries that use git submodules (currently freetype2) are cloned with
10//! `--recurse-submodules` so submodule contents are always present.  If the
11//! cloned source ships an `autogen.sh` script, it is run immediately after
12//! cloning to generate the `configure` script that `build-kobo.sh` expects.
13//!
14//! ## Build
15//!
16//! [`build_libraries`] iterates over the packages in dependency order, applies
17//! `kobo.patch` if present, then invokes each library's own `build-kobo.sh`
18//! script.
19
20use std::path::Path;
21
22use anyhow::{Context, Result, bail};
23
24use super::{cmd, fs, http};
25
26/// Base names of all thirdparty shared libraries.
27///
28/// SONAMEs are discovered at runtime via `arm-linux-gnueabihf-readelf -d`
29/// because upstream libraries do not follow a consistent ABI versioning scheme.
30pub const SONAMES: &[&str] = &[
31    "libz.so",
32    "libbz2.so",
33    "libpng16.so",
34    "libjpeg.so",
35    "libopenjp2.so",
36    "libjbig2dec.so",
37    "libfreetype.so",
38    "libharfbuzz.so",
39    "libgumbo.so",
40    "libwebp.so",
41    "libwebpdemux.so",
42    "libdjvulibre.so",
43    "libmupdf.so",
44];
45
46/// Returns the SONAME of `lib` in `libs_dir`.
47///
48/// When the library file exists, `arm-linux-gnueabihf-readelf -d` is used to
49/// extract the SONAME from the binary. When only a versioned file exists
50/// (e.g. `libz.so.1.2.13` without `libz.so`), the versioned filename is
51/// returned directly.
52///
53/// # Errors
54///
55/// Returns an error if `arm-linux-gnueabihf-readelf` fails or the SONAME
56/// cannot be determined.
57pub fn soname(libs_dir: &Path, lib: &str) -> Result<String> {
58    let so_path = libs_dir.join(lib);
59    if so_path.exists() {
60        let so_path_str = so_path
61            .to_str()
62            .with_context(|| format!("shared library path is not valid UTF-8: {so_path:?}"))?;
63        let output = cmd::output(
64            "arm-linux-gnueabihf-readelf",
65            &["-d", so_path_str],
66            libs_dir,
67            &[],
68        )?;
69        let soname = output
70            .lines()
71            .find(|line| line.contains("SONAME"))
72            .and_then(|line| line.split_whitespace().last())
73            .map(|token| {
74                token
75                    .trim_start_matches('[')
76                    .trim_end_matches(']')
77                    .to_string()
78            })
79            .with_context(|| format!("failed to find SONAME in readelf output for {lib}"))?;
80        Ok(soname)
81    } else {
82        let prefix = format!("{}.", lib);
83        let matching: Vec<_> = std::fs::read_dir(libs_dir)?
84            .filter_map(|e| e.ok())
85            .filter(|e| e.file_name().to_string_lossy().starts_with(&prefix))
86            .collect();
87
88        match matching.len() {
89            1 => Ok(matching[0].file_name().to_string_lossy().into_owned()),
90            0 => bail!(
91                "no versioned file found for {} in {}",
92                lib,
93                libs_dir.display()
94            ),
95            _ => bail!(
96                "multiple versioned files found for {} in {}",
97                lib,
98                libs_dir.display()
99            ),
100        }
101    }
102}
103
104/// Version strings for thirdparty libraries tracked by Renovate.
105///
106/// Every thirdparty library must have a `VERSION` constant here.  The
107/// constant is the single source of truth — the download URL is derived
108/// from it at call time in [`library_source`].  A corresponding Renovate
109/// regex custom manager in `renovate.json` matches each constant and
110/// opens PRs when new upstream releases are available.
111///
112/// When adding a new thirdparty library, add a `VERSION` constant here
113/// and a matching Renovate regex manager entry in `renovate.json`.
114pub const ZLIB_VERSION: &str = "1.3.2";
115pub const LIBPNG_VERSION: &str = "1.6.53";
116pub const DJVULIBRE_VERSION: &str = "3.5.30";
117/// IJG libjpeg version tracked via the libjpeg-turbo `jpeg-<version>` tag mirror.
118pub const LIBJPEG_VERSION: &str = "10";
119
120/// bzip2 version, tracked and downloaded via GitLab `bzip2/bzip2`.
121pub const BZIP2_VERSION: &str = "1.0.8";
122/// OpenJPEG version, derived from the archive URL.
123pub const OPENJPEG_VERSION: &str = "2.5.4";
124/// jbig2dec version, tracked via GitHub Releases on `ArtifexSoftware/jbig2dec`.
125pub const JBIG2DEC_VERSION: &str = "0.20";
126/// FreeType version, cloned from `freetype/freetype` at tag `VER-X-Y-Z`.
127///
128/// Tracked by Renovate via the `github-tags` datasource with
129/// `extractVersionTemplate: "^VER-(?<version>.+)$"`.  freetype2 is cloned
130/// rather than downloaded as a tarball because its build system requires the
131/// `nyorain/dlg` git submodule, which is absent from archive tarballs.
132pub const FREETYPE2_VERSION: &str = "2.14.1";
133/// HarfBuzz version, derived from the archive URL.
134pub const HARFBUZZ_VERSION: &str = "14.2.0";
135/// Gumbo version, derived from the archive URL.
136pub const GUMBO_VERSION: &str = "0.10.1";
137/// libwebp version, derived from the archive URL.
138pub const LIBWEBP_VERSION: &str = "1.2.3";
139
140/// MuPDF version, tracked via GitHub Releases on `ArtifexSoftware/mupdf-downloads`.
141pub const MUPDF_VERSION: &str = "1.27.0";
142
143const MUPDF_WEBP_PATCHES: &[&str] = &[
144    "webp-upstream-697749-kobo.patch", // verbatim KOReader upstream
145    "webp-image-h-kobo.patch",         // image.h declarations (our wrapper needs these)
146    "webp-load-webp-deviations-kobo.patch", // Cadmus deviations: demux cleanup, animation, epsilon, yres, ICC warning
147];
148
149/// Marker file written after all MuPDF WebP patches succeed.
150const WEBP_PATCHED_MARKER: &str = ".webp-patched";
151
152/// All libraries in dependency order for building.
153const LIBRARY_NAMES: &[&str] = &[
154    "zlib",
155    "bzip2",
156    "libpng",
157    "libjpeg",
158    "openjpeg",
159    "jbig2dec",
160    "libwebp",
161    "freetype2",
162    "harfbuzz",
163    "gumbo",
164    "djvulibre",
165    "mupdf",
166];
167
168/// Describes how a thirdparty library's source is obtained.
169pub enum LibrarySource {
170    /// Download a tarball and extract it with the top-level directory stripped.
171    Tarball(String),
172    /// Clone a git repository at a specific tag, recursing into submodules.
173    Git { repo: String, tag: String },
174}
175
176/// Returns the source descriptor for a named library.
177///
178/// # Errors
179///
180/// Returns an error if `name` is not a known library.
181pub fn library_source(name: &str) -> Result<LibrarySource> {
182    match name {
183        "zlib" => Ok(LibrarySource::Tarball(format!(
184            "https://github.com/madler/zlib/releases/download/v{v}/zlib-{v}.tar.gz",
185            v = ZLIB_VERSION
186        ))),
187        "bzip2" => Ok(LibrarySource::Tarball(format!(
188            "https://gitlab.com/bzip2/bzip2/-/archive/bzip2-{v}/bzip2-bzip2-{v}.tar.gz",
189            v = BZIP2_VERSION
190        ))),
191        "libpng" => Ok(LibrarySource::Tarball(format!(
192            "https://github.com/pnggroup/libpng/archive/refs/tags/v{v}.tar.gz",
193            v = LIBPNG_VERSION
194        ))),
195        "libjpeg" => Ok(LibrarySource::Tarball(format!(
196            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/refs/tags/jpeg-{v}.tar.gz",
197            v = LIBJPEG_VERSION
198        ))),
199        "openjpeg" => Ok(LibrarySource::Tarball(format!(
200            "https://github.com/uclouvain/openjpeg/archive/v{v}.tar.gz",
201            v = OPENJPEG_VERSION
202        ))),
203        "jbig2dec" => Ok(LibrarySource::Tarball(format!(
204            "https://github.com/ArtifexSoftware/jbig2dec/releases/download/{v}/jbig2dec-{v}.tar.gz",
205            v = JBIG2DEC_VERSION
206        ))),
207        "freetype2" => Ok(LibrarySource::Git {
208            repo: "https://github.com/freetype/freetype".to_owned(),
209            tag: format!("VER-{}", FREETYPE2_VERSION.replace('.', "-")),
210        }),
211        "harfbuzz" => Ok(LibrarySource::Tarball(format!(
212            "https://github.com/harfbuzz/harfbuzz/archive/{v}.tar.gz",
213            v = HARFBUZZ_VERSION
214        ))),
215        "gumbo" => Ok(LibrarySource::Tarball(format!(
216            "https://github.com/google/gumbo-parser/archive/v{v}.tar.gz",
217            v = GUMBO_VERSION
218        ))),
219        "libwebp" => Ok(LibrarySource::Tarball(format!(
220            "https://github.com/webmproject/libwebp/archive/refs/tags/v{v}.tar.gz",
221            v = LIBWEBP_VERSION
222        ))),
223        "djvulibre" => Ok(LibrarySource::Tarball(format!(
224            "https://github.com/barak/djvulibre/archive/refs/tags/release.{v}.tar.gz",
225            v = DJVULIBRE_VERSION
226        ))),
227        "mupdf" => Ok(LibrarySource::Tarball(format!(
228            "https://github.com/ArtifexSoftware/mupdf-downloads/releases/download/{v}/mupdf-{v}-source.tar.gz",
229            v = MUPDF_VERSION
230        ))),
231        _ => bail!("unknown thirdparty library: {name}"),
232    }
233}
234
235/// Downloads source for the given libraries into `thirdparty/`.
236///
237/// When `names` is empty all libraries are downloaded.  Tarballs are extracted
238/// with the top-level directory stripped.  Libraries with a [`LibrarySource::Git`]
239/// source are cloned with `--recurse-submodules` so submodule contents are
240/// always present.
241///
242/// Skips libraries with persisted marker files:
243/// - source-ready marker ([`SOURCE_READY_MARKER`])
244/// - built marker ([`BUILT_MARKER`])
245///
246/// This avoids fragile file-heuristic detection across heterogeneous upstream
247/// source trees.
248///
249/// # Errors
250///
251/// Returns an error if any download, extraction, or clone fails.
252pub fn download_libraries(thirdparty_dir: &Path, names: &[&str]) -> Result<()> {
253    let targets: Vec<&str> = if names.is_empty() {
254        LIBRARY_NAMES.to_vec()
255    } else {
256        names.to_vec()
257    };
258
259    for name in targets {
260        let dest_dir = thirdparty_dir.join(name);
261
262        if is_source_ready(&dest_dir) || is_built(&dest_dir) {
263            println!("Skipping {name} (source ready)…");
264            continue;
265        }
266
267        println!("Downloading {name}…");
268
269        match library_source(name)? {
270            LibrarySource::Tarball(url) => {
271                let tarball = thirdparty_dir.join(format!("{name}.tgz"));
272
273                if dest_dir.exists() {
274                    clean_untracked(&dest_dir)?;
275                } else {
276                    std::fs::create_dir_all(&dest_dir)
277                        .with_context(|| format!("failed to create {}", dest_dir.display()))?;
278                }
279
280                http::download(&url, &tarball)
281                    .with_context(|| format!("failed to download {name}"))?;
282
283                fs::extract_tarball_strip_one(&tarball, &dest_dir)
284                    .with_context(|| format!("failed to extract {name}"))?;
285
286                std::fs::remove_file(&tarball).ok();
287
288                write_marker(&dest_dir, SOURCE_READY_MARKER, name, "source")?;
289            }
290            LibrarySource::Git { repo, tag } => {
291                if !dest_dir.exists() {
292                    std::fs::create_dir_all(&dest_dir)
293                        .with_context(|| format!("failed to create {}", dest_dir.display()))?;
294                }
295
296                git_clone_tag(&repo, &tag, &dest_dir)
297                    .with_context(|| format!("failed to clone {name}"))?;
298
299                let autogen = dest_dir.join("autogen.sh");
300                if autogen.exists() {
301                    cmd::run("./autogen.sh", &[], &dest_dir, &[])
302                        .with_context(|| format!("failed to run autogen.sh for {name}"))?;
303                }
304
305                write_marker(&dest_dir, SOURCE_READY_MARKER, name, "source")?;
306            }
307        }
308    }
309
310    Ok(())
311}
312
313/// Clones `repo` at `tag` into `dest`, recursing into submodules.
314///
315/// Clones into a temporary sibling directory first, then moves the contents
316/// into `dest`.  This preserves any files already in `dest` that are tracked
317/// in the cadmus repository (e.g. `build-kobo.sh`), matching the behaviour of
318/// tarball extraction with `strip_one`.
319fn git_clone_tag(repo: &str, tag: &str, dest: &Path) -> Result<()> {
320    let tmp = dest.with_extension("_clone_tmp");
321
322    if tmp.exists() {
323        std::fs::remove_dir_all(&tmp)
324            .with_context(|| format!("failed to remove {}", tmp.display()))?;
325    }
326
327    cmd::run(
328        "git",
329        &[
330            "clone",
331            "--depth=1",
332            "--recurse-submodules",
333            "--branch",
334            tag,
335            repo,
336            tmp.to_str().context("tmp path is not valid UTF-8")?,
337        ],
338        std::path::Path::new("."),
339        &[],
340    )?;
341
342    for entry in
343        std::fs::read_dir(&tmp).with_context(|| format!("failed to read {}", tmp.display()))?
344    {
345        let entry = entry.with_context(|| format!("failed to read entry in {}", tmp.display()))?;
346
347        if entry.file_name() == ".git" {
348            continue;
349        }
350
351        let target = dest.join(entry.file_name());
352        if target.exists() {
353            if target.is_dir() {
354                std::fs::remove_dir_all(&target).ok();
355            } else {
356                std::fs::remove_file(&target).ok();
357            }
358        }
359        std::fs::rename(entry.path(), &target).with_context(|| {
360            format!(
361                "failed to move {} to {}",
362                entry.path().display(),
363                target.display()
364            )
365        })?;
366    }
367
368    std::fs::remove_dir_all(&tmp).with_context(|| format!("failed to remove {}", tmp.display()))?;
369
370    Ok(())
371}
372
373/// Sentinel file written inside a library directory after source extraction.
374///
375/// Its presence means the source tree was fetched and unpacked successfully.
376pub const SOURCE_READY_MARKER: &str = ".source-ready";
377
378/// Sentinel file written inside a library directory after a successful build.
379///
380/// Its presence means the library was already compiled and cached — both the
381/// patch and the build step can be skipped on the next run.
382pub const BUILT_MARKER: &str = ".built-kobo";
383
384/// Returns `true` if `dir` already has a completed source download marker.
385fn is_source_ready(dir: &Path) -> bool {
386    dir.join(SOURCE_READY_MARKER).exists()
387}
388
389/// Returns `true` if the library in `dir` was already built and the sentinel
390/// file is present.
391fn is_built(dir: &Path) -> bool {
392    dir.join(BUILT_MARKER).exists()
393}
394
395/// Writes a marker file inside `dir` to persist task completion state.
396fn write_marker(dir: &Path, marker: &str, name: &str, state: &str) -> Result<()> {
397    std::fs::write(dir.join(marker), "")
398        .with_context(|| format!("failed to write {state} marker for {name}"))
399}
400
401/// Builds the given libraries for the Kobo ARM target.
402///
403/// When `names` is empty all libraries are built in dependency order.  For
404/// each library, `kobo.patch` is applied if present, then `./build-kobo.sh`
405/// is invoked.  A sentinel file ([`BUILT_MARKER`]) is written on success so
406/// that a warm CI cache can skip already-built libraries without re-applying
407/// the patch or re-running the build script.
408///
409/// # Errors
410///
411/// Returns an error if patching or building any library fails.
412pub fn build_libraries(thirdparty_dir: &Path, names: &[&str]) -> Result<()> {
413    let targets: Vec<&str> = if names.is_empty() {
414        LIBRARY_NAMES.to_vec()
415    } else {
416        names.to_vec()
417    };
418
419    for name in targets {
420        let lib_dir = thirdparty_dir.join(name);
421
422        if !lib_dir.exists() {
423            bail!(
424                "thirdparty/{name} not found — run `cargo xtask build-kobo --download-only` first"
425            );
426        }
427
428        if is_built(&lib_dir) {
429            println!("Skipping {name} (already built)…");
430            continue;
431        }
432
433        println!("Building {name}…");
434
435        let patch = lib_dir.join("kobo.patch");
436        if patch.exists() {
437            cmd::run("patch", &["-p", "1", "-i", "kobo.patch"], &lib_dir, &[])
438                .with_context(|| format!("failed to apply kobo.patch for {name}"))?;
439        }
440
441        if name == "mupdf" {
442            apply_mupdf_webp_patches_if_needed(&lib_dir)?;
443        }
444
445        let envs = [
446            ("AR", "arm-linux-gnueabihf-ar"),
447            ("AS", "arm-linux-gnueabihf-as"),
448            ("STRIP", "arm-linux-gnueabihf-strip"),
449            ("RANLIB", "arm-linux-gnueabihf-ranlib"),
450            ("LD", "arm-linux-gnueabihf-ld"),
451            ("CC_FOR_BUILD", "cc"),
452            ("CXX_FOR_BUILD", "c++"),
453            ("CC_BUILD", "cc"),
454        ];
455        cmd::run("./build-kobo.sh", &[], &lib_dir, &envs)
456            .with_context(|| format!("failed to build {name}"))?;
457
458        write_marker(&lib_dir, BUILT_MARKER, name, "build")?;
459        write_marker(&lib_dir, SOURCE_READY_MARKER, name, "source")?;
460    }
461
462    Ok(())
463}
464
465/// Applies Cadmus' MuPDF WebP patch series unless it was already applied.
466///
467/// Returns `true` when patches were applied during this call.
468pub fn apply_mupdf_webp_patches_if_needed(mupdf_dir: &Path) -> Result<bool> {
469    if mupdf_webp_patches_applied(mupdf_dir) {
470        println!("MuPDF WebP patches already applied.");
471        Ok(false)
472    } else {
473        println!("Applying MuPDF WebP patches…");
474        for patch in MUPDF_WEBP_PATCHES {
475            cmd::run("patch", &["-p", "1", "-i", patch], mupdf_dir, &[])
476                .with_context(|| format!("failed to apply {patch}"))?;
477        }
478
479        write_marker(mupdf_dir, WEBP_PATCHED_MARKER, "mupdf", "WebP patch")?;
480        Ok(true)
481    }
482}
483
484fn mupdf_webp_patches_applied(mupdf_dir: &Path) -> bool {
485    mupdf_dir.join(WEBP_PATCHED_MARKER).exists()
486}
487
488/// Removes untracked files from a directory using `git ls-files`, falling back
489/// to removing and recreating the directory when git is unavailable.
490pub fn clean_untracked(dir: &Path) -> Result<()> {
491    let result = std::process::Command::new("git")
492        .args(["ls-files", "-o", "--directory", "-z"])
493        .arg(dir.file_name().unwrap_or(dir.as_os_str()))
494        .current_dir(dir.parent().unwrap_or(dir))
495        .output();
496
497    match result {
498        Ok(output) if output.status.success() => {
499            for entry in output.stdout.split(|&b| b == 0) {
500                if entry.is_empty() {
501                    continue;
502                }
503
504                let path = dir
505                    .parent()
506                    .unwrap_or(dir)
507                    .join(std::str::from_utf8(entry).unwrap_or(""));
508
509                if path.is_dir() {
510                    std::fs::remove_dir_all(&path).ok();
511                } else {
512                    std::fs::remove_file(&path).ok();
513                }
514            }
515        }
516        _ => {
517            std::fs::remove_dir_all(dir)
518                .with_context(|| format!("failed to remove {}", dir.display()))?;
519            std::fs::create_dir_all(dir)
520                .with_context(|| format!("failed to recreate {}", dir.display()))?;
521        }
522    }
523
524    Ok(())
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn library_source_is_defined_for_all_known_libraries() {
533        for name in LIBRARY_NAMES {
534            let source = library_source(name).unwrap();
535            match source {
536                LibrarySource::Tarball(url) => {
537                    assert!(
538                        url.starts_with("http"),
539                        "tarball URL for {name} should start with http"
540                    );
541                    assert!(
542                        url.contains(".tar.gz"),
543                        "tarball URL for {name} should contain .tar.gz"
544                    );
545                }
546                LibrarySource::Git { repo, tag } => {
547                    assert!(
548                        repo.starts_with("https://"),
549                        "git repo for {name} should use https"
550                    );
551                    assert!(!tag.is_empty(), "git tag for {name} should not be empty");
552                }
553            }
554        }
555    }
556
557    #[test]
558    fn library_source_errors_on_unknown_library() {
559        assert!(library_source("nonexistent").is_err());
560    }
561
562    #[test]
563    fn library_names_has_no_duplicates() {
564        let mut names = LIBRARY_NAMES.to_vec();
565        names.sort_unstable();
566        names.dedup();
567        assert_eq!(
568            names.len(),
569            LIBRARY_NAMES.len(),
570            "duplicate library names found"
571        );
572    }
573}