cadmus_core/document/html/
mod.rs

1pub mod css;
2pub mod dom;
3pub mod engine;
4pub mod layout;
5pub mod parse;
6pub mod style;
7pub mod xml;
8
9use self::css::CssParser;
10use self::dom::{NodeRef, XmlTree};
11use self::engine::{Engine, Page, ResourceFetcher};
12use self::layout::{DrawCommand, ImageCommand, TextAlign, TextCommand};
13use self::layout::{DrawState, LoopContext, RootData, StyleData};
14use self::style::StyleSheet;
15use self::xml::XmlParser;
16use crate::document::{BoundedText, Document, Location, TextLocation, TocEntry};
17use crate::framebuffer::Pixmap;
18use crate::geom::{Boundary, CycleDir, Edge};
19use crate::helpers::{decode_entities, Normalize};
20use crate::unit::pt_to_px;
21use anyhow::Error;
22use fxhash::FxHashMap;
23use std::fs::{self, File};
24use std::io::{Read, Write};
25use std::path::{Path, PathBuf};
26
27const VIEWER_STYLESHEET: &str = "css/html.css";
28const USER_STYLESHEET: &str = "css/html-user.css";
29
30type UriCache = FxHashMap<String, usize>;
31
32pub struct HtmlDocument {
33    text: String,
34    content: XmlTree,
35    engine: Engine,
36    pages: Vec<Page>,
37    parent: PathBuf,
38    size: usize,
39    viewer_stylesheet: PathBuf,
40    user_stylesheet: PathBuf,
41    ignore_document_css: bool,
42}
43
44impl ResourceFetcher for PathBuf {
45    fn fetch(&mut self, name: &str) -> Result<Vec<u8>, Error> {
46        let mut file = File::open(self.join(name))?;
47        let mut buf = Vec::new();
48        file.read_to_end(&mut buf)?;
49        Ok(buf)
50    }
51}
52
53unsafe impl Send for HtmlDocument {}
54unsafe impl Sync for HtmlDocument {}
55
56impl HtmlDocument {
57    pub fn new<P: AsRef<Path>>(path: P) -> Result<HtmlDocument, Error> {
58        let mut file = File::open(&path)?;
59        let size = file.metadata()?.len() as usize;
60        let mut text = String::new();
61        file.read_to_string(&mut text)?;
62        let mut content = XmlParser::new(&text).parse();
63        content.wrap_lost_inlines();
64        let parent = path.as_ref().parent().unwrap_or_else(|| Path::new(""));
65
66        Ok(HtmlDocument {
67            text,
68            content,
69            engine: Engine::new(),
70            pages: Vec::new(),
71            parent: parent.to_path_buf(),
72            size,
73            viewer_stylesheet: PathBuf::from(VIEWER_STYLESHEET),
74            user_stylesheet: PathBuf::from(USER_STYLESHEET),
75            ignore_document_css: false,
76        })
77    }
78
79    pub fn new_from_memory(text: &str) -> HtmlDocument {
80        let size = text.len();
81        let mut content = XmlParser::new(text).parse();
82        content.wrap_lost_inlines();
83
84        HtmlDocument {
85            text: text.to_string(),
86            content,
87            engine: Engine::new(),
88            pages: Vec::new(),
89            parent: PathBuf::default(),
90            size,
91            viewer_stylesheet: PathBuf::from(VIEWER_STYLESHEET),
92            user_stylesheet: PathBuf::from(USER_STYLESHEET),
93            ignore_document_css: false,
94        }
95    }
96
97    pub fn update(&mut self, text: &str) {
98        self.size = text.len();
99        self.content = XmlParser::new(text).parse();
100        self.content.wrap_lost_inlines();
101        self.text = text.to_string();
102        self.pages.clear();
103    }
104
105    pub fn set_margin(&mut self, margin: &Edge) {
106        self.engine.set_margin(margin);
107        self.pages.clear();
108    }
109
110    pub fn set_font_size(&mut self, font_size: f32) {
111        self.engine.set_font_size(font_size);
112        self.pages.clear();
113    }
114
115    pub fn set_viewer_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
116        self.viewer_stylesheet = path.as_ref().to_path_buf();
117        self.pages.clear();
118    }
119
120    pub fn set_user_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
121        self.user_stylesheet = path.as_ref().to_path_buf();
122        self.pages.clear();
123    }
124
125    #[inline]
126    fn page_index(&mut self, offset: usize) -> Option<usize> {
127        if self.pages.is_empty() {
128            self.pages = self.build_pages();
129        }
130        if self.pages.len() < 2
131            || self.pages[1].first().map(|dc| offset < dc.offset()) == Some(true)
132        {
133            return Some(0);
134        } else if self.pages[self.pages.len() - 1]
135            .first()
136            .map(|dc| offset >= dc.offset())
137            == Some(true)
138        {
139            return Some(self.pages.len() - 1);
140        } else {
141            for i in 1..self.pages.len() - 1 {
142                if self.pages[i].first().map(|dc| offset >= dc.offset()) == Some(true)
143                    && self.pages[i + 1].first().map(|dc| offset < dc.offset()) == Some(true)
144                {
145                    return Some(i);
146                }
147            }
148        }
149        None
150    }
151
152    fn resolve_link(&mut self, uri: &str, cache: &mut UriCache) -> Option<usize> {
153        let frag_index = uri.find('#')?;
154        let name = &uri[..frag_index];
155        let content = self.content.clone();
156        self.cache_uris(content.root(), name, cache);
157        cache.get(uri).cloned()
158    }
159
160    fn cache_uris(&mut self, node: NodeRef, name: &str, cache: &mut UriCache) {
161        if let Some(id) = node.attribute("id") {
162            cache.insert(format!("{}#{}", name, id), node.offset());
163        }
164        for child in node.children() {
165            self.cache_uris(child, name, cache);
166        }
167    }
168
169    fn build_pages(&mut self) -> Vec<Page> {
170        let mut stylesheet = StyleSheet::new();
171        let spine_dir = PathBuf::default();
172
173        if let Ok(text) = fs::read_to_string(&self.viewer_stylesheet) {
174            let mut css = CssParser::new(&text).parse();
175            stylesheet.append(&mut css, true);
176        }
177
178        if let Ok(text) = fs::read_to_string(&self.user_stylesheet) {
179            let mut css = CssParser::new(&text).parse();
180            stylesheet.append(&mut css, true);
181        }
182
183        if !self.ignore_document_css {
184            let mut inner_css = StyleSheet::new();
185
186            if let Some(head) = self.content.root().find("head") {
187                for child in head.children() {
188                    if child.tag_name() == Some("link")
189                        && child.attribute("rel") == Some("stylesheet")
190                    {
191                        if let Some(href) = child.attribute("href") {
192                            if let Some(name) = spine_dir.join(href).normalize().to_str() {
193                                if let Ok(buf) = self.parent.fetch(name) {
194                                    if let Ok(text) = String::from_utf8(buf) {
195                                        let mut css = CssParser::new(&text).parse();
196                                        inner_css.append(&mut css, false);
197                                    }
198                                }
199                            }
200                        }
201                    } else if child.tag_name() == Some("style")
202                        && child.attribute("type") == Some("text/css")
203                    {
204                        let mut css = CssParser::new(&child.text()).parse();
205                        inner_css.append(&mut css, false);
206                    }
207                }
208            }
209
210            stylesheet.append(&mut inner_css, true);
211        }
212
213        let mut pages = Vec::new();
214
215        let mut rect = self.engine.rect();
216        rect.shrink(&self.engine.margin);
217
218        let language = self
219            .content
220            .root()
221            .find("html")
222            .and_then(|html| html.attribute("xml:lang"))
223            .map(String::from);
224
225        let style = StyleData {
226            language,
227            font_size: self.engine.font_size,
228            line_height: pt_to_px(
229                self.engine.line_height * self.engine.font_size,
230                self.engine.dpi,
231            )
232            .round() as i32,
233            text_align: self.engine.text_align,
234            start_x: rect.min.x,
235            end_x: rect.max.x,
236            width: rect.max.x - rect.min.x,
237            ..Default::default()
238        };
239
240        let loop_context = LoopContext::default();
241        let mut draw_state = DrawState {
242            position: rect.min,
243            ..Default::default()
244        };
245
246        let root_data = RootData {
247            start_offset: 0,
248            spine_dir,
249            rect,
250        };
251
252        pages.push(Vec::new());
253
254        self.engine.build_display_list(
255            self.content.root(),
256            &style,
257            &loop_context,
258            &stylesheet,
259            &root_data,
260            &mut self.parent,
261            &mut draw_state,
262            &mut pages,
263        );
264
265        pages.retain(|page| !page.is_empty());
266
267        if pages.is_empty() {
268            pages.push(vec![DrawCommand::Marker(self.content.root().offset())]);
269        }
270
271        pages
272    }
273
274    pub fn categories(&self) -> Option<String> {
275        None
276    }
277
278    pub fn description(&self) -> Option<String> {
279        self.metadata("description")
280    }
281
282    pub fn language(&self) -> Option<String> {
283        self.content
284            .root()
285            .find("html")
286            .and_then(|html| html.attribute("xml:lang"))
287            .map(String::from)
288    }
289
290    pub fn year(&self) -> Option<String> {
291        self.metadata("date").map(|s| s.chars().take(4).collect())
292    }
293}
294
295impl Document for HtmlDocument {
296    #[inline]
297    fn dims(&self, _index: usize) -> Option<(f32, f32)> {
298        Some((self.engine.dims.0 as f32, self.engine.dims.1 as f32))
299    }
300
301    fn pages_count(&self) -> usize {
302        self.size
303    }
304
305    fn toc(&mut self) -> Option<Vec<TocEntry>> {
306        None
307    }
308
309    fn chapter<'a>(&mut self, _offset: usize, _toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
310        None
311    }
312
313    fn chapter_relative<'a>(
314        &mut self,
315        _offset: usize,
316        _dir: CycleDir,
317        _toc: &'a [TocEntry],
318    ) -> Option<&'a TocEntry> {
319        None
320    }
321
322    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
323        self.engine.load_fonts();
324
325        match loc {
326            Location::Exact(offset) => {
327                let page_index = self.page_index(offset)?;
328                self.pages[page_index].first().map(DrawCommand::offset)
329            }
330            Location::Previous(offset) => {
331                let page_index = self.page_index(offset)?;
332                if page_index > 0 {
333                    self.pages[page_index - 1].first().map(DrawCommand::offset)
334                } else {
335                    None
336                }
337            }
338            Location::Next(offset) => {
339                let page_index = self.page_index(offset)?;
340                if page_index < self.pages.len() - 1 {
341                    self.pages[page_index + 1].first().map(DrawCommand::offset)
342                } else {
343                    None
344                }
345            }
346            Location::LocalUri(_, ref uri) | Location::Uri(ref uri) => {
347                let mut cache = FxHashMap::default();
348                self.resolve_link(uri, &mut cache)
349            }
350        }
351    }
352
353    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
354        let offset = self.resolve_location(loc)?;
355        let page_index = self.page_index(offset)?;
356
357        Some((
358            self.pages[page_index]
359                .iter()
360                .filter_map(|dc| match dc {
361                    DrawCommand::Text(TextCommand {
362                        text, rect, offset, ..
363                    }) => Some(BoundedText {
364                        text: text.clone(),
365                        rect: (*rect).into(),
366                        location: TextLocation::Dynamic(*offset),
367                    }),
368                    _ => None,
369                })
370                .collect(),
371            offset,
372        ))
373    }
374
375    fn lines(&mut self, _loc: Location) -> Option<(Vec<BoundedText>, usize)> {
376        None
377    }
378
379    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
380        let offset = self.resolve_location(loc)?;
381        let page_index = self.page_index(offset)?;
382
383        Some((
384            self.pages[page_index]
385                .iter()
386                .filter_map(|dc| match dc {
387                    DrawCommand::Image(ImageCommand { rect, .. }) => Some((*rect).into()),
388                    _ => None,
389                })
390                .collect(),
391            offset,
392        ))
393    }
394
395    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
396        let offset = self.resolve_location(loc)?;
397        let page_index = self.page_index(offset)?;
398
399        Some((
400            self.pages[page_index]
401                .iter()
402                .filter_map(|dc| match dc {
403                    DrawCommand::Text(TextCommand {
404                        uri, rect, offset, ..
405                    })
406                    | DrawCommand::Image(ImageCommand {
407                        uri, rect, offset, ..
408                    }) if uri.is_some() => Some(BoundedText {
409                        text: uri.clone().unwrap(),
410                        rect: (*rect).into(),
411                        location: TextLocation::Dynamic(*offset),
412                    }),
413                    _ => None,
414                })
415                .collect(),
416            offset,
417        ))
418    }
419
420    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
421        let offset = self.resolve_location(loc)?;
422        let page_index = self.page_index(offset)?;
423        let page = self.pages[page_index].clone();
424        let pixmap = self
425            .engine
426            .render_page(&page, scale, samples, &mut self.parent)?;
427
428        Some((pixmap, offset))
429    }
430
431    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
432        self.engine.layout(width, height, font_size, dpi);
433        self.pages.clear();
434    }
435
436    fn set_text_align(&mut self, text_align: TextAlign) {
437        self.engine.set_text_align(text_align);
438        self.pages.clear();
439    }
440
441    fn set_font_family(&mut self, family_name: &str, search_path: &str) {
442        self.engine.set_font_family(family_name, search_path);
443        self.pages.clear();
444    }
445
446    fn set_margin_width(&mut self, width: i32) {
447        self.engine.set_margin_width(width);
448        self.pages.clear();
449    }
450
451    fn set_line_height(&mut self, line_height: f32) {
452        self.engine.set_line_height(line_height);
453        self.pages.clear();
454    }
455
456    fn set_hyphen_penalty(&mut self, hyphen_penalty: i32) {
457        self.engine.set_hyphen_penalty(hyphen_penalty);
458        self.pages.clear();
459    }
460
461    fn set_stretch_tolerance(&mut self, stretch_tolerance: f32) {
462        self.engine.set_stretch_tolerance(stretch_tolerance);
463        self.pages.clear();
464    }
465
466    fn set_ignore_document_css(&mut self, ignore: bool) {
467        self.ignore_document_css = ignore;
468        self.pages.clear();
469    }
470
471    fn title(&self) -> Option<String> {
472        self.content
473            .root()
474            .find("head")
475            .and_then(|head| {
476                head.children()
477                    .find(|child| child.tag_name() == Some("title"))
478            })
479            .map(|child| decode_entities(&child.text()).into_owned())
480    }
481
482    fn author(&self) -> Option<String> {
483        self.metadata("author")
484    }
485
486    fn metadata(&self, key: &str) -> Option<String> {
487        self.content
488            .root()
489            .find("head")
490            .and_then(|head| {
491                head.children().find(|child| {
492                    child.tag_name() == Some("meta") && child.attribute("name") == Some(key)
493                })
494            })
495            .and_then(|child| {
496                child
497                    .attribute("content")
498                    .map(|s| decode_entities(s).into_owned())
499            })
500    }
501
502    fn save(&self, path: &str) -> Result<(), Error> {
503        let mut file = File::create(path)?;
504        file.write_all(self.text.as_bytes()).map_err(Into::into)
505    }
506
507    fn is_reflowable(&self) -> bool {
508        true
509    }
510
511    fn has_synthetic_page_numbers(&self) -> bool {
512        true
513    }
514}