Skip to main content

cadmus_core/document/html/
mod.rs

1//! HTML document rendering for Cadmus.
2//!
3//! This module provides two concrete document types that share a common
4//! rendering pipeline through [`HtmlBase`]:
5//!
6//! - [`HtmlDocument`] — backed by the hand-rolled [`XmlParser`]. Node offsets
7//!   are exact byte positions in the source string, which is required when
8//!   reading positions, bookmarks, and annotations are persisted to disk.
9//!   Used for standalone HTML files and EPUB spine chapters.
10//!
11//! - [`Html5Document`] — backed by html5ever. Node offsets are synthetic.
12//!   Used for ephemeral rendering (e.g. the dictionary view) where HTML5
13//!   conformance matters more than offset precision.
14//!
15//! The shared [`HtmlBase`] struct holds the parsed [`XmlTree`], the layout
16//! [`Engine`], the page cache, and stylesheet paths. Both document types
17//! compose it and delegate all rendering operations to it.
18
19pub mod css;
20pub mod dom;
21pub mod engine;
22pub mod html5;
23pub mod layout;
24pub mod parse;
25pub mod style;
26pub mod xml;
27
28pub use html5::Html5Document;
29
30use self::css::CssParser;
31use self::dom::{NodeRef, XmlTree};
32use self::engine::{Engine, Page, ResourceFetcher};
33use self::layout::{DrawCommand, ImageCommand, TextAlign, TextCommand};
34use self::layout::{DrawState, LoopContext, RootData, StyleData};
35use self::style::StyleSheet;
36use self::xml::XmlParser;
37use crate::document::{BoundedText, Document, Location, TextLocation, TocEntry};
38use crate::framebuffer::Pixmap;
39use crate::geom::{Boundary, CycleDir, Edge};
40use crate::helpers::{decode_entities, Normalize};
41use crate::unit::pt_to_px;
42use anyhow::Error;
43use fxhash::FxHashMap;
44use std::fs::{self, File};
45use std::io::{Read, Write};
46use std::path::{Path, PathBuf};
47
48/// Path to the viewer stylesheet applied to all HTML documents.
49const VIEWER_STYLESHEET: &str = "css/html.css";
50/// Path to the user-editable stylesheet overlaid on top of the viewer styles.
51const USER_STYLESHEET: &str = "css/html-user.css";
52
53/// Map from URI fragment strings (e.g. `"chapter.html#section-2"`) to the
54/// document offset of the element with the matching `id` attribute.
55type UriCache = FxHashMap<String, usize>;
56
57/// Parser-independent rendering state shared by [`HtmlDocument`] and
58/// [`Html5Document`].
59///
60/// Owns the parsed [`XmlTree`], the layout [`Engine`], a lazily-built page
61/// cache, and the stylesheet configuration. Neither parser-specific logic nor
62/// the raw source text belongs here — those live in the concrete document types
63/// that compose this struct.
64pub(crate) struct HtmlBase {
65    /// The parsed document tree.
66    pub(crate) content: XmlTree,
67    /// Layout engine responsible for building draw commands and rendering pages.
68    pub(crate) engine: Engine,
69    /// Lazily built list of pages. Cleared whenever layout parameters change.
70    pub(crate) pages: Vec<Page>,
71    /// Directory used to resolve relative resource paths (images, stylesheets).
72    pub(crate) parent: PathBuf,
73    /// Byte size of the source content, used as a proxy for `pages_count`.
74    pub(crate) size: usize,
75    /// Path to the viewer stylesheet (typically `css/html.css`).
76    pub(crate) viewer_stylesheet: PathBuf,
77    /// Path to the user stylesheet (typically `css/html-user.css`).
78    pub(crate) user_stylesheet: PathBuf,
79    /// When `true`, `<style>` and `<link rel=stylesheet>` tags in the document
80    /// are ignored during page building.
81    pub(crate) ignore_document_css: bool,
82}
83
84impl HtmlBase {
85    /// Creates a new `HtmlBase` from an already-parsed tree and configuration.
86    pub(crate) fn new(
87        content: XmlTree,
88        size: usize,
89        parent: PathBuf,
90        viewer_stylesheet: PathBuf,
91        user_stylesheet: PathBuf,
92    ) -> Self {
93        HtmlBase {
94            content,
95            engine: Engine::new(),
96            pages: Vec::new(),
97            parent,
98            size,
99            viewer_stylesheet,
100            user_stylesheet,
101            ignore_document_css: false,
102        }
103    }
104
105    /// Returns the zero-based index of the page that contains `offset`, or
106    /// `None` if no page contains it.
107    ///
108    /// Triggers a full `build_pages` pass the first time it is called after
109    /// the page cache has been cleared.
110    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(offset)))]
111    pub(crate) fn page_index(&mut self, offset: usize) -> Option<usize> {
112        if self.pages.is_empty() {
113            self.pages = self.build_pages();
114        }
115        if self.pages.len() < 2
116            || self.pages[1].first().map(|dc| offset < dc.offset()) == Some(true)
117        {
118            return Some(0);
119        } else if self.pages[self.pages.len() - 1]
120            .first()
121            .map(|dc| offset >= dc.offset())
122            == Some(true)
123        {
124            return Some(self.pages.len() - 1);
125        } else {
126            for i in 1..self.pages.len() - 1 {
127                if self.pages[i].first().map(|dc| offset >= dc.offset()) == Some(true)
128                    && self.pages[i + 1].first().map(|dc| offset < dc.offset()) == Some(true)
129                {
130                    return Some(i);
131                }
132            }
133        }
134        None
135    }
136
137    /// Resolves a URI containing a fragment (e.g. `chapter.html#intro`) to the
138    /// document offset of the element with the matching `id` attribute.
139    ///
140    /// Returns `None` if the URI has no `#` or no element with the given `id`
141    /// is found. Results are written into `cache` so repeated lookups for the
142    /// same URI are free.
143    #[cfg_attr(
144        feature = "tracing",
145        tracing::instrument(skip(self, cache), fields(uri))
146    )]
147    fn resolve_link(&mut self, uri: &str, cache: &mut UriCache) -> Option<usize> {
148        let frag_index = uri.find('#')?;
149        let name = &uri[..frag_index];
150        let content = self.content.clone();
151        self.cache_uris(content.root(), name, cache);
152        cache.get(uri).cloned()
153    }
154
155    /// Recursively walks the tree rooted at `node` and inserts an entry into
156    /// `cache` for every element that carries an `id` attribute, mapping
157    /// `"name#id"` to the element's offset.
158    fn cache_uris(&mut self, node: NodeRef, name: &str, cache: &mut UriCache) {
159        if let Some(id) = node.attribute("id") {
160            cache.insert(format!("{}#{}", name, id), node.offset());
161        }
162        for child in node.children() {
163            self.cache_uris(child, name, cache);
164        }
165    }
166
167    /// Builds the complete list of pages from the current document tree and
168    /// engine settings.
169    ///
170    /// Stylesheets are loaded in priority order:
171    /// 1. The default viewer stylesheet (`css/html.css`).
172    /// 2. A custom viewer stylesheet if one has been set and differs from the
173    ///    default.
174    /// 3. The user stylesheet.
175    /// 4. Inline `<style>` elements and `<link rel=stylesheet>` references
176    ///    found in the document `<head>`, unless `ignore_document_css` is set.
177    ///
178    /// Returns a non-empty list; if the engine produces no draw commands a
179    /// single fallback page anchored at offset 0 is returned.
180    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
181    pub(crate) fn build_pages(&mut self) -> Vec<Page> {
182        let mut stylesheet = StyleSheet::new();
183        let spine_dir = PathBuf::default();
184
185        if let Ok(text) = fs::read_to_string(VIEWER_STYLESHEET) {
186            let mut css = CssParser::new(&text).parse();
187            stylesheet.append(&mut css, true);
188        }
189
190        if self.viewer_stylesheet != Path::new(VIEWER_STYLESHEET) {
191            if let Ok(text) = fs::read_to_string(&self.viewer_stylesheet) {
192                let mut css = CssParser::new(&text).parse();
193                stylesheet.append(&mut css, true);
194            }
195        }
196
197        if let Ok(text) = fs::read_to_string(&self.user_stylesheet) {
198            let mut css = CssParser::new(&text).parse();
199            stylesheet.append(&mut css, true);
200        }
201
202        if !self.ignore_document_css {
203            let mut inner_css = StyleSheet::new();
204
205            if let Some(head) = self.content.root().find("head") {
206                for child in head.children() {
207                    if child.tag_name() == Some("link")
208                        && child.attribute("rel") == Some("stylesheet")
209                    {
210                        if let Some(href) = child.attribute("href") {
211                            if let Some(name) = spine_dir.join(href).normalize().to_str() {
212                                if let Ok(buf) = self.parent.fetch(name) {
213                                    if let Ok(text) = String::from_utf8(buf) {
214                                        let mut css = CssParser::new(&text).parse();
215                                        inner_css.append(&mut css, false);
216                                    }
217                                }
218                            }
219                        }
220                    } else if child.tag_name() == Some("style")
221                        && child.attribute("type") == Some("text/css")
222                    {
223                        let mut css = CssParser::new(&child.text()).parse();
224                        inner_css.append(&mut css, false);
225                    }
226                }
227            }
228
229            stylesheet.append(&mut inner_css, true);
230        }
231
232        let mut pages = Vec::new();
233
234        let mut rect = self.engine.rect();
235        rect.shrink(&self.engine.margin);
236
237        let language = self
238            .content
239            .root()
240            .find("html")
241            .and_then(|html| html.attribute("xml:lang"))
242            .map(String::from);
243
244        let style = StyleData {
245            language,
246            font_size: self.engine.font_size,
247            line_height: pt_to_px(
248                self.engine.line_height * self.engine.font_size,
249                self.engine.dpi,
250            )
251            .round() as i32,
252            text_align: self.engine.text_align,
253            start_x: rect.min.x,
254            end_x: rect.max.x,
255            width: rect.max.x - rect.min.x,
256            ..Default::default()
257        };
258
259        let loop_context = LoopContext::default();
260        let mut draw_state = DrawState {
261            position: rect.min,
262            ..Default::default()
263        };
264
265        let root_data = RootData {
266            start_offset: 0,
267            spine_dir,
268            rect,
269        };
270
271        pages.push(Vec::new());
272
273        self.engine.build_display_list(
274            self.content.root(),
275            &style,
276            &loop_context,
277            &stylesheet,
278            &root_data,
279            &mut self.parent,
280            &mut draw_state,
281            &mut pages,
282        );
283
284        pages.retain(|page| !page.is_empty());
285
286        if pages.is_empty() {
287            pages.push(vec![DrawCommand::Marker(self.content.root().offset())]);
288        }
289
290        pages
291    }
292
293    /// Resolves a [`Location`] to a concrete document offset, triggering font
294    /// loading and page building as needed.
295    ///
296    /// - `Exact(offset)` — snaps to the first draw command on the page
297    ///   containing `offset`.
298    /// - `Previous(offset)` / `Next(offset)` — steps one page back or forward.
299    /// - `LocalUri` / `Uri` — resolves a URI fragment anchor.
300    ///
301    /// Returns `None` when the location cannot be resolved (e.g. already on
302    /// the first page for `Previous`, or no element with the given `id`).
303    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
304    pub(crate) fn resolve_location(&mut self, loc: Location) -> Option<usize> {
305        self.engine.load_fonts();
306
307        match loc {
308            Location::Exact(offset) => {
309                let page_index = self.page_index(offset)?;
310                self.pages[page_index].first().map(DrawCommand::offset)
311            }
312            Location::Previous(offset) => {
313                let page_index = self.page_index(offset)?;
314                if page_index > 0 {
315                    self.pages[page_index - 1].first().map(DrawCommand::offset)
316                } else {
317                    None
318                }
319            }
320            Location::Next(offset) => {
321                let page_index = self.page_index(offset)?;
322                if page_index < self.pages.len() - 1 {
323                    self.pages[page_index + 1].first().map(DrawCommand::offset)
324                } else {
325                    None
326                }
327            }
328            Location::LocalUri(_, ref uri) | Location::Uri(ref uri) => {
329                let mut cache = FxHashMap::default();
330                self.resolve_link(uri, &mut cache)
331            }
332        }
333    }
334
335    /// Returns all text spans on the page identified by `loc`, together with
336    /// the resolved page offset.
337    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
338    pub(crate) fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
339        let offset = self.resolve_location(loc)?;
340        let page_index = self.page_index(offset)?;
341
342        Some((
343            self.pages[page_index]
344                .iter()
345                .filter_map(|dc| match dc {
346                    DrawCommand::Text(TextCommand {
347                        text, rect, offset, ..
348                    }) => Some(BoundedText {
349                        text: text.clone(),
350                        rect: (*rect).into(),
351                        location: TextLocation::Dynamic(*offset),
352                    }),
353                    _ => None,
354                })
355                .collect(),
356            offset,
357        ))
358    }
359
360    /// Returns all image bounding rectangles on the page identified by `loc`,
361    /// together with the resolved page offset.
362    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
363    pub(crate) fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
364        let offset = self.resolve_location(loc)?;
365        let page_index = self.page_index(offset)?;
366
367        Some((
368            self.pages[page_index]
369                .iter()
370                .filter_map(|dc| match dc {
371                    DrawCommand::Image(ImageCommand { rect, .. }) => Some((*rect).into()),
372                    _ => None,
373                })
374                .collect(),
375            offset,
376        ))
377    }
378
379    /// Returns all tappable link spans on the page identified by `loc`,
380    /// together with the resolved page offset.
381    ///
382    /// Both text and image draw commands are included when they carry a URI.
383    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
384    pub(crate) fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
385        let offset = self.resolve_location(loc)?;
386        let page_index = self.page_index(offset)?;
387
388        Some((
389            self.pages[page_index]
390                .iter()
391                .filter_map(|dc| match dc {
392                    DrawCommand::Text(TextCommand {
393                        uri, rect, offset, ..
394                    })
395                    | DrawCommand::Image(ImageCommand {
396                        uri, rect, offset, ..
397                    }) if uri.is_some() => Some(BoundedText {
398                        text: uri.clone().unwrap(),
399                        rect: (*rect).into(),
400                        location: TextLocation::Dynamic(*offset),
401                    }),
402                    _ => None,
403                })
404                .collect(),
405            offset,
406        ))
407    }
408
409    /// Renders the page identified by `loc` to a [`Pixmap`] at the given
410    /// `scale` factor and returns it together with the resolved page offset.
411    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc, scale, samples)))]
412    pub(crate) fn pixmap(
413        &mut self,
414        loc: Location,
415        scale: f32,
416        samples: usize,
417    ) -> Option<(Pixmap, usize)> {
418        let offset = self.resolve_location(loc)?;
419        let page_index = self.page_index(offset)?;
420        let page = self.pages[page_index].clone();
421        let pixmap = self
422            .engine
423            .render_page(&page, scale, samples, &mut self.parent)?;
424
425        Some((pixmap, offset))
426    }
427
428    /// Reads the `content` attribute of the first `<meta name="key">` element
429    /// found in the document `<head>`, decoding any HTML entities.
430    pub(crate) fn metadata(&self, key: &str) -> Option<String> {
431        self.content
432            .root()
433            .find("head")
434            .and_then(|head| {
435                head.children().find(|child| {
436                    child.tag_name() == Some("meta") && child.attribute("name") == Some(key)
437                })
438            })
439            .and_then(|child| {
440                child
441                    .attribute("content")
442                    .map(|s| decode_entities(s).into_owned())
443            })
444    }
445}
446
447/// Resolves relative resource paths against the directory that contains the
448/// HTML file being rendered.
449impl ResourceFetcher for PathBuf {
450    fn fetch(&mut self, name: &str) -> Result<Vec<u8>, Error> {
451        let mut file = File::open(self.join(name))?;
452        let mut buf = Vec::new();
453        file.read_to_end(&mut buf)?;
454        Ok(buf)
455    }
456}
457
458/// HTML document backed by the hand-rolled [`XmlParser`].
459///
460/// Node offsets are exact byte positions in the source string, making this
461/// suitable for EPUB spine chapters and standalone HTML files where reading
462/// positions are persisted to disk as absolute byte offsets.
463pub struct HtmlDocument {
464    /// The raw source text, retained so that [`Document::save`] can write it
465    /// back to disk unchanged.
466    text: String,
467    /// Shared rendering state.
468    base: HtmlBase,
469}
470
471unsafe impl Send for HtmlDocument {}
472unsafe impl Sync for HtmlDocument {}
473
474impl HtmlDocument {
475    /// Opens the file at `path`, parses it with [`XmlParser`], and returns a
476    /// ready-to-render document.
477    #[cfg_attr(feature = "tracing", tracing::instrument(skip(path), fields(path = %path.as_ref().display())))]
478    pub fn new<P: AsRef<Path>>(path: P) -> Result<HtmlDocument, Error> {
479        let mut file = File::open(&path)?;
480        let size = file.metadata()?.len() as usize;
481        let mut text = String::new();
482        file.read_to_string(&mut text)?;
483        let mut content = XmlParser::new(&text).parse();
484        content.wrap_lost_inlines();
485        let parent = path.as_ref().parent().unwrap_or_else(|| Path::new(""));
486
487        Ok(HtmlDocument {
488            text,
489            base: HtmlBase::new(
490                content,
491                size,
492                parent.to_path_buf(),
493                PathBuf::from(VIEWER_STYLESHEET),
494                PathBuf::from(USER_STYLESHEET),
495            ),
496        })
497    }
498
499    /// Parses an in-memory HTML string and returns a ready-to-render document.
500    ///
501    /// The document has no parent directory, so relative resource references
502    /// (images, linked stylesheets) will not be resolved.
503    #[cfg_attr(feature = "tracing", tracing::instrument(skip(text), fields(len = text.len())))]
504    pub fn new_from_memory(text: &str) -> HtmlDocument {
505        let size = text.len();
506        let mut content = XmlParser::new(text).parse();
507        content.wrap_lost_inlines();
508
509        HtmlDocument {
510            text: text.to_string(),
511            base: HtmlBase::new(
512                content,
513                size,
514                PathBuf::default(),
515                PathBuf::from(VIEWER_STYLESHEET),
516                PathBuf::from(USER_STYLESHEET),
517            ),
518        }
519    }
520
521    /// Replaces the document content with a freshly parsed version of `text`
522    /// and clears the page cache.
523    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, text), fields(len = text.len())))]
524    pub fn update(&mut self, text: &str) {
525        self.base.size = text.len();
526        self.base.content = XmlParser::new(text).parse();
527        self.base.content.wrap_lost_inlines();
528        self.text = text.to_string();
529        self.base.pages.clear();
530    }
531
532    /// Overrides the page margin. Clears the page cache.
533    pub fn set_margin(&mut self, margin: &Edge) {
534        self.base.engine.set_margin(margin);
535        self.base.pages.clear();
536    }
537
538    /// Overrides the base font size in points. Clears the page cache.
539    pub fn set_font_size(&mut self, font_size: f32) {
540        self.base.engine.set_font_size(font_size);
541        self.base.pages.clear();
542    }
543
544    /// Overrides the viewer stylesheet path. Clears the page cache.
545    pub fn set_viewer_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
546        self.base.viewer_stylesheet = path.as_ref().to_path_buf();
547        self.base.pages.clear();
548    }
549
550    /// Overrides the user stylesheet path. Clears the page cache.
551    pub fn set_user_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
552        self.base.user_stylesheet = path.as_ref().to_path_buf();
553        self.base.pages.clear();
554    }
555
556    /// Always returns `None`; category metadata is not embedded in HTML files.
557    pub fn categories(&self) -> Option<String> {
558        None
559    }
560
561    /// Returns the `content` of `<meta name="description">` if present.
562    pub fn description(&self) -> Option<String> {
563        self.base.metadata("description")
564    }
565
566    /// Returns the `xml:lang` attribute of the root `<html>` element if
567    /// present.
568    pub fn language(&self) -> Option<String> {
569        self.base
570            .content
571            .root()
572            .find("html")
573            .and_then(|html| html.attribute("xml:lang"))
574            .map(String::from)
575    }
576
577    /// Returns the first four characters of the `content` of
578    /// `<meta name="date">` (i.e. the year) if present.
579    pub fn year(&self) -> Option<String> {
580        self.base
581            .metadata("date")
582            .map(|s| s.chars().take(4).collect())
583    }
584}
585
586impl Document for HtmlDocument {
587    #[inline]
588    fn dims(&self, _index: usize) -> Option<(f32, f32)> {
589        Some((
590            self.base.engine.dims.0 as f32,
591            self.base.engine.dims.1 as f32,
592        ))
593    }
594
595    fn pages_count(&self) -> usize {
596        self.base.size
597    }
598
599    fn toc(&mut self) -> Option<Vec<TocEntry>> {
600        None
601    }
602
603    fn chapter<'a>(&mut self, _offset: usize, _toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
604        None
605    }
606
607    fn chapter_relative<'a>(
608        &mut self,
609        _offset: usize,
610        _dir: CycleDir,
611        _toc: &'a [TocEntry],
612    ) -> Option<&'a TocEntry> {
613        None
614    }
615
616    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
617        self.base.resolve_location(loc)
618    }
619
620    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
621        self.base.words(loc)
622    }
623
624    fn lines(&mut self, _loc: Location) -> Option<(Vec<BoundedText>, usize)> {
625        None
626    }
627
628    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
629        self.base.images(loc)
630    }
631
632    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
633        self.base.links(loc)
634    }
635
636    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
637        self.base.pixmap(loc, scale, samples)
638    }
639
640    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
641        self.base.engine.layout(width, height, font_size, dpi);
642        self.base.pages.clear();
643    }
644
645    fn set_text_align(&mut self, text_align: TextAlign) {
646        self.base.engine.set_text_align(text_align);
647        self.base.pages.clear();
648    }
649
650    fn set_font_family(&mut self, family_name: &str, search_path: &str) {
651        self.base.engine.set_font_family(family_name, search_path);
652        self.base.pages.clear();
653    }
654
655    fn set_margin_width(&mut self, width: i32) {
656        self.base.engine.set_margin_width(width);
657        self.base.pages.clear();
658    }
659
660    fn set_line_height(&mut self, line_height: f32) {
661        self.base.engine.set_line_height(line_height);
662        self.base.pages.clear();
663    }
664
665    fn set_hyphen_penalty(&mut self, hyphen_penalty: i32) {
666        self.base.engine.set_hyphen_penalty(hyphen_penalty);
667        self.base.pages.clear();
668    }
669
670    fn set_stretch_tolerance(&mut self, stretch_tolerance: f32) {
671        self.base.engine.set_stretch_tolerance(stretch_tolerance);
672        self.base.pages.clear();
673    }
674
675    fn set_ignore_document_css(&mut self, ignore: bool) {
676        self.base.ignore_document_css = ignore;
677        self.base.pages.clear();
678    }
679
680    fn title(&self) -> Option<String> {
681        self.base
682            .content
683            .root()
684            .find("head")
685            .and_then(|head| {
686                head.children()
687                    .find(|child| child.tag_name() == Some("title"))
688            })
689            .map(|child| decode_entities(&child.text()).into_owned())
690    }
691
692    fn author(&self) -> Option<String> {
693        self.base.metadata("author")
694    }
695
696    fn metadata(&self, key: &str) -> Option<String> {
697        self.base.metadata(key)
698    }
699
700    fn save(&self, path: &str) -> Result<(), Error> {
701        let mut file = File::create(path)?;
702        file.write_all(self.text.as_bytes()).map_err(Into::into)
703    }
704
705    fn is_reflowable(&self) -> bool {
706        true
707    }
708
709    fn has_synthetic_page_numbers(&self) -> bool {
710        true
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use super::*;
717    use crate::document::html::layout::DrawCommand;
718    use std::path::PathBuf;
719
720    fn setup_doc(html: &str) -> HtmlDocument {
721        let root_dir = PathBuf::from(
722            std::env::var("TEST_ROOT_DIR").expect("TEST_ROOT_DIR must be set for html tests"),
723        );
724        let mut doc = HtmlDocument::new_from_memory(html);
725        doc.base.engine.layout(600, 800, 12.0, 265);
726        doc.base.engine.set_margin_width(3);
727        doc.base.engine.load_fonts_from(root_dir);
728        doc
729    }
730
731    #[test]
732    fn nested_list_items_are_indented_further_than_outer_items() {
733        let html = r#"<ol><li>Outer item</li><ol style="list-style-type:lower-alpha"><li>Inner item</li></ol></ol>"#;
734        let mut doc = setup_doc(html);
735
736        let pages = doc.base.build_pages();
737        let all_commands: Vec<_> = pages.iter().flatten().collect();
738
739        let text_x_positions: Vec<i32> = all_commands
740            .iter()
741            .filter_map(|cmd| match cmd {
742                DrawCommand::Text(tc) => Some(tc.position.x),
743                DrawCommand::ExtraText(tc) => Some(tc.position.x),
744                _ => None,
745            })
746            .collect();
747
748        assert!(
749            text_x_positions.len() >= 2,
750            "expected at least two text items, got {}",
751            text_x_positions.len()
752        );
753
754        let min_x = text_x_positions.iter().copied().min().unwrap();
755        let max_x = text_x_positions.iter().copied().max().unwrap();
756
757        assert!(
758            max_x > min_x,
759            "inner list item (x={}) should be indented further than outer item (x={})",
760            max_x,
761            min_x
762        );
763    }
764}