Skip to main content

cadmus_core/document/html/
html5.rs

1//! [`Html5Document`] — an HTML document backed by the html5ever parser.
2//!
3//! This module exposes a single public type, [`Html5Document`], which wraps
4//! the shared [`HtmlBase`] rendering pipeline and uses [`parse_html5`] to
5//! build the document tree.
6//!
7//! Use [`Html5Document`] when HTML5 conformance matters more than offset
8//! precision — for example, the dictionary view, which renders content from
9//! third-party dictionaries that may contain entities, void elements, and
10//! implicitly-closed tags. Because node offsets are synthetic (not byte
11//! positions), reading positions must **not** be persisted when using this
12//! type.
13
14use super::layout::TextAlign;
15use super::xml::parse_html5;
16use super::HtmlBase;
17use crate::document::{BoundedText, Document, Location, TocEntry};
18use crate::framebuffer::Pixmap;
19use crate::geom::{Boundary, CycleDir};
20use anyhow::Error;
21use std::path::{Path, PathBuf};
22
23/// Default viewer stylesheet for dictionary rendering.
24const VIEWER_STYLESHEET: &str = "css/dictionary.css";
25/// Default user-editable stylesheet for dictionary rendering.
26const USER_STYLESHEET: &str = "css/dictionary-user.css";
27
28/// HTML document backed by the html5ever spec-compliant parser.
29///
30/// Handles HTML entities, void elements (`<br>`, `<img>`), and implicitly-
31/// closed block tags correctly per the HTML5 spec. Node offsets are **synthetic**
32/// (not byte positions in the source), so this type is **not** suitable for
33/// persisting reading positions to disk. Use it for ephemeral rendering such
34/// as the dictionary view.
35///
36/// For documents where offset accuracy matters (EPUB spine chapters, standalone
37/// HTML files) use [`HtmlDocument`](super::HtmlDocument) instead.
38pub struct Html5Document {
39    /// Shared rendering state (tree, engine, page cache, stylesheets).
40    pub(super) base: HtmlBase,
41}
42
43unsafe impl Send for Html5Document {}
44unsafe impl Sync for Html5Document {}
45
46impl Html5Document {
47    /// Parses an in-memory HTML string using html5ever and returns a
48    /// ready-to-render document.
49    ///
50    /// Defaults to the dictionary viewer and user stylesheets; call
51    /// [`set_viewer_stylesheet`](Self::set_viewer_stylesheet) and
52    /// [`set_user_stylesheet`](Self::set_user_stylesheet) to override them.
53    #[cfg_attr(feature = "tracing", tracing::instrument(skip(text), fields(len = text.len())))]
54    pub fn new_from_memory(text: &str) -> Html5Document {
55        let content = parse_html5(text);
56        Html5Document {
57            base: HtmlBase::new(
58                content,
59                text.len(),
60                PathBuf::default(),
61                PathBuf::from(VIEWER_STYLESHEET),
62                PathBuf::from(USER_STYLESHEET),
63            ),
64        }
65    }
66
67    /// Replaces the document content with a freshly parsed version of `text`
68    /// and clears the page cache.
69    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, text), fields(len = text.len())))]
70    pub fn update(&mut self, text: &str) {
71        self.base.size = text.len();
72        self.base.content = parse_html5(text);
73        self.base.pages.clear();
74    }
75
76    /// Overrides the viewer stylesheet path. Clears the page cache.
77    pub fn set_viewer_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
78        self.base.viewer_stylesheet = path.as_ref().to_path_buf();
79        self.base.pages.clear();
80    }
81
82    /// Overrides the user stylesheet path. Clears the page cache.
83    pub fn set_user_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
84        self.base.user_stylesheet = path.as_ref().to_path_buf();
85        self.base.pages.clear();
86    }
87}
88
89impl Document for Html5Document {
90    /// Returns the current page dimensions in pixels as `(width, height)`.
91    #[inline]
92    fn dims(&self, _index: usize) -> Option<(f32, f32)> {
93        Some((
94            self.base.engine.dims.0 as f32,
95            self.base.engine.dims.1 as f32,
96        ))
97    }
98
99    /// Returns the byte length of the source content as a proxy for page count.
100    fn pages_count(&self) -> usize {
101        self.base.size
102    }
103
104    /// Always returns `None`; the dictionary has no table of contents.
105    fn toc(&mut self) -> Option<Vec<TocEntry>> {
106        None
107    }
108
109    /// Always returns `None`; chapter metadata is not applicable.
110    fn chapter<'a>(&mut self, _offset: usize, _toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
111        None
112    }
113
114    /// Always returns `None`; chapter-relative navigation is not applicable.
115    fn chapter_relative<'a>(
116        &mut self,
117        _offset: usize,
118        _dir: CycleDir,
119        _toc: &'a [TocEntry],
120    ) -> Option<&'a TocEntry> {
121        None
122    }
123
124    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
125    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
126        self.base.resolve_location(loc)
127    }
128
129    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
130    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
131        self.base.words(loc)
132    }
133
134    /// Always returns `None`; line-level layout is not exposed.
135    fn lines(&mut self, _loc: Location) -> Option<(Vec<BoundedText>, usize)> {
136        None
137    }
138
139    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
140    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
141        self.base.images(loc)
142    }
143
144    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc)))]
145    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
146        self.base.links(loc)
147    }
148
149    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(loc = ?loc, scale, samples)))]
150    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
151        self.base.pixmap(loc, scale, samples)
152    }
153
154    #[cfg_attr(
155        feature = "tracing",
156        tracing::instrument(skip(self), fields(width, height, font_size, dpi))
157    )]
158    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
159        self.base.engine.layout(width, height, font_size, dpi);
160        self.base.pages.clear();
161    }
162
163    fn set_text_align(&mut self, text_align: TextAlign) {
164        self.base.engine.set_text_align(text_align);
165        self.base.pages.clear();
166    }
167
168    fn set_font_family(&mut self, family_name: &str, search_path: &str) {
169        self.base.engine.set_font_family(family_name, search_path);
170        self.base.pages.clear();
171    }
172
173    fn set_margin_width(&mut self, width: i32) {
174        self.base.engine.set_margin_width(width);
175        self.base.pages.clear();
176    }
177
178    fn set_line_height(&mut self, line_height: f32) {
179        self.base.engine.set_line_height(line_height);
180        self.base.pages.clear();
181    }
182
183    fn set_hyphen_penalty(&mut self, hyphen_penalty: i32) {
184        self.base.engine.set_hyphen_penalty(hyphen_penalty);
185        self.base.pages.clear();
186    }
187
188    fn set_stretch_tolerance(&mut self, stretch_tolerance: f32) {
189        self.base.engine.set_stretch_tolerance(stretch_tolerance);
190        self.base.pages.clear();
191    }
192
193    fn set_ignore_document_css(&mut self, ignore: bool) {
194        self.base.ignore_document_css = ignore;
195        self.base.pages.clear();
196    }
197
198    /// Always returns `None`; the dictionary does not expose a title.
199    fn title(&self) -> Option<String> {
200        None
201    }
202
203    /// Always returns `None`; the dictionary does not expose an author.
204    fn author(&self) -> Option<String> {
205        None
206    }
207
208    fn metadata(&self, key: &str) -> Option<String> {
209        self.base.metadata(key)
210    }
211
212    /// No-op; `Html5Document` is always constructed from memory and has no
213    /// file path to save to.
214    fn save(&self, _path: &str) -> Result<(), Error> {
215        Ok(())
216    }
217
218    fn is_reflowable(&self) -> bool {
219        true
220    }
221
222    fn has_synthetic_page_numbers(&self) -> bool {
223        true
224    }
225}