1pub mod css;
2pub mod dom;
3pub mod engine;
4pub mod layout;
5pub mod parse;
6pub mod style;
7pub mod xml;
8
9use self::css::CssParser;
10use self::dom::{NodeRef, XmlTree};
11use self::engine::{Engine, Page, ResourceFetcher};
12use self::layout::{DrawCommand, ImageCommand, TextAlign, TextCommand};
13use self::layout::{DrawState, LoopContext, RootData, StyleData};
14use self::style::StyleSheet;
15use self::xml::XmlParser;
16use crate::document::{BoundedText, Document, Location, TextLocation, TocEntry};
17use crate::framebuffer::Pixmap;
18use crate::geom::{Boundary, CycleDir, Edge};
19use crate::helpers::{decode_entities, Normalize};
20use crate::unit::pt_to_px;
21use anyhow::Error;
22use fxhash::FxHashMap;
23use std::fs::{self, File};
24use std::io::{Read, Write};
25use std::path::{Path, PathBuf};
26
27const VIEWER_STYLESHEET: &str = "css/html.css";
28const USER_STYLESHEET: &str = "css/html-user.css";
29
30type UriCache = FxHashMap<String, usize>;
31
32pub struct HtmlDocument {
33 text: String,
34 content: XmlTree,
35 engine: Engine,
36 pages: Vec<Page>,
37 parent: PathBuf,
38 size: usize,
39 viewer_stylesheet: PathBuf,
40 user_stylesheet: PathBuf,
41 ignore_document_css: bool,
42}
43
44impl ResourceFetcher for PathBuf {
45 fn fetch(&mut self, name: &str) -> Result<Vec<u8>, Error> {
46 let mut file = File::open(self.join(name))?;
47 let mut buf = Vec::new();
48 file.read_to_end(&mut buf)?;
49 Ok(buf)
50 }
51}
52
53unsafe impl Send for HtmlDocument {}
54unsafe impl Sync for HtmlDocument {}
55
56impl HtmlDocument {
57 pub fn new<P: AsRef<Path>>(path: P) -> Result<HtmlDocument, Error> {
58 let mut file = File::open(&path)?;
59 let size = file.metadata()?.len() as usize;
60 let mut text = String::new();
61 file.read_to_string(&mut text)?;
62 let mut content = XmlParser::new(&text).parse();
63 content.wrap_lost_inlines();
64 let parent = path.as_ref().parent().unwrap_or_else(|| Path::new(""));
65
66 Ok(HtmlDocument {
67 text,
68 content,
69 engine: Engine::new(),
70 pages: Vec::new(),
71 parent: parent.to_path_buf(),
72 size,
73 viewer_stylesheet: PathBuf::from(VIEWER_STYLESHEET),
74 user_stylesheet: PathBuf::from(USER_STYLESHEET),
75 ignore_document_css: false,
76 })
77 }
78
79 pub fn new_from_memory(text: &str) -> HtmlDocument {
80 let size = text.len();
81 let mut content = XmlParser::new(text).parse();
82 content.wrap_lost_inlines();
83
84 HtmlDocument {
85 text: text.to_string(),
86 content,
87 engine: Engine::new(),
88 pages: Vec::new(),
89 parent: PathBuf::default(),
90 size,
91 viewer_stylesheet: PathBuf::from(VIEWER_STYLESHEET),
92 user_stylesheet: PathBuf::from(USER_STYLESHEET),
93 ignore_document_css: false,
94 }
95 }
96
97 pub fn update(&mut self, text: &str) {
98 self.size = text.len();
99 self.content = XmlParser::new(text).parse();
100 self.content.wrap_lost_inlines();
101 self.text = text.to_string();
102 self.pages.clear();
103 }
104
105 pub fn set_margin(&mut self, margin: &Edge) {
106 self.engine.set_margin(margin);
107 self.pages.clear();
108 }
109
110 pub fn set_font_size(&mut self, font_size: f32) {
111 self.engine.set_font_size(font_size);
112 self.pages.clear();
113 }
114
115 pub fn set_viewer_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
116 self.viewer_stylesheet = path.as_ref().to_path_buf();
117 self.pages.clear();
118 }
119
120 pub fn set_user_stylesheet<P: AsRef<Path>>(&mut self, path: P) {
121 self.user_stylesheet = path.as_ref().to_path_buf();
122 self.pages.clear();
123 }
124
125 #[inline]
126 fn page_index(&mut self, offset: usize) -> Option<usize> {
127 if self.pages.is_empty() {
128 self.pages = self.build_pages();
129 }
130 if self.pages.len() < 2
131 || self.pages[1].first().map(|dc| offset < dc.offset()) == Some(true)
132 {
133 return Some(0);
134 } else if self.pages[self.pages.len() - 1]
135 .first()
136 .map(|dc| offset >= dc.offset())
137 == Some(true)
138 {
139 return Some(self.pages.len() - 1);
140 } else {
141 for i in 1..self.pages.len() - 1 {
142 if self.pages[i].first().map(|dc| offset >= dc.offset()) == Some(true)
143 && self.pages[i + 1].first().map(|dc| offset < dc.offset()) == Some(true)
144 {
145 return Some(i);
146 }
147 }
148 }
149 None
150 }
151
152 fn resolve_link(&mut self, uri: &str, cache: &mut UriCache) -> Option<usize> {
153 let frag_index = uri.find('#')?;
154 let name = &uri[..frag_index];
155 let content = self.content.clone();
156 self.cache_uris(content.root(), name, cache);
157 cache.get(uri).cloned()
158 }
159
160 fn cache_uris(&mut self, node: NodeRef, name: &str, cache: &mut UriCache) {
161 if let Some(id) = node.attribute("id") {
162 cache.insert(format!("{}#{}", name, id), node.offset());
163 }
164 for child in node.children() {
165 self.cache_uris(child, name, cache);
166 }
167 }
168
169 fn build_pages(&mut self) -> Vec<Page> {
170 let mut stylesheet = StyleSheet::new();
171 let spine_dir = PathBuf::default();
172
173 if let Ok(text) = fs::read_to_string(&self.viewer_stylesheet) {
174 let mut css = CssParser::new(&text).parse();
175 stylesheet.append(&mut css, true);
176 }
177
178 if let Ok(text) = fs::read_to_string(&self.user_stylesheet) {
179 let mut css = CssParser::new(&text).parse();
180 stylesheet.append(&mut css, true);
181 }
182
183 if !self.ignore_document_css {
184 let mut inner_css = StyleSheet::new();
185
186 if let Some(head) = self.content.root().find("head") {
187 for child in head.children() {
188 if child.tag_name() == Some("link")
189 && child.attribute("rel") == Some("stylesheet")
190 {
191 if let Some(href) = child.attribute("href") {
192 if let Some(name) = spine_dir.join(href).normalize().to_str() {
193 if let Ok(buf) = self.parent.fetch(name) {
194 if let Ok(text) = String::from_utf8(buf) {
195 let mut css = CssParser::new(&text).parse();
196 inner_css.append(&mut css, false);
197 }
198 }
199 }
200 }
201 } else if child.tag_name() == Some("style")
202 && child.attribute("type") == Some("text/css")
203 {
204 let mut css = CssParser::new(&child.text()).parse();
205 inner_css.append(&mut css, false);
206 }
207 }
208 }
209
210 stylesheet.append(&mut inner_css, true);
211 }
212
213 let mut pages = Vec::new();
214
215 let mut rect = self.engine.rect();
216 rect.shrink(&self.engine.margin);
217
218 let language = self
219 .content
220 .root()
221 .find("html")
222 .and_then(|html| html.attribute("xml:lang"))
223 .map(String::from);
224
225 let style = StyleData {
226 language,
227 font_size: self.engine.font_size,
228 line_height: pt_to_px(
229 self.engine.line_height * self.engine.font_size,
230 self.engine.dpi,
231 )
232 .round() as i32,
233 text_align: self.engine.text_align,
234 start_x: rect.min.x,
235 end_x: rect.max.x,
236 width: rect.max.x - rect.min.x,
237 ..Default::default()
238 };
239
240 let loop_context = LoopContext::default();
241 let mut draw_state = DrawState {
242 position: rect.min,
243 ..Default::default()
244 };
245
246 let root_data = RootData {
247 start_offset: 0,
248 spine_dir,
249 rect,
250 };
251
252 pages.push(Vec::new());
253
254 self.engine.build_display_list(
255 self.content.root(),
256 &style,
257 &loop_context,
258 &stylesheet,
259 &root_data,
260 &mut self.parent,
261 &mut draw_state,
262 &mut pages,
263 );
264
265 pages.retain(|page| !page.is_empty());
266
267 if pages.is_empty() {
268 pages.push(vec![DrawCommand::Marker(self.content.root().offset())]);
269 }
270
271 pages
272 }
273
274 pub fn categories(&self) -> Option<String> {
275 None
276 }
277
278 pub fn description(&self) -> Option<String> {
279 self.metadata("description")
280 }
281
282 pub fn language(&self) -> Option<String> {
283 self.content
284 .root()
285 .find("html")
286 .and_then(|html| html.attribute("xml:lang"))
287 .map(String::from)
288 }
289
290 pub fn year(&self) -> Option<String> {
291 self.metadata("date").map(|s| s.chars().take(4).collect())
292 }
293}
294
295impl Document for HtmlDocument {
296 #[inline]
297 fn dims(&self, _index: usize) -> Option<(f32, f32)> {
298 Some((self.engine.dims.0 as f32, self.engine.dims.1 as f32))
299 }
300
301 fn pages_count(&self) -> usize {
302 self.size
303 }
304
305 fn toc(&mut self) -> Option<Vec<TocEntry>> {
306 None
307 }
308
309 fn chapter<'a>(&mut self, _offset: usize, _toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
310 None
311 }
312
313 fn chapter_relative<'a>(
314 &mut self,
315 _offset: usize,
316 _dir: CycleDir,
317 _toc: &'a [TocEntry],
318 ) -> Option<&'a TocEntry> {
319 None
320 }
321
322 fn resolve_location(&mut self, loc: Location) -> Option<usize> {
323 self.engine.load_fonts();
324
325 match loc {
326 Location::Exact(offset) => {
327 let page_index = self.page_index(offset)?;
328 self.pages[page_index].first().map(DrawCommand::offset)
329 }
330 Location::Previous(offset) => {
331 let page_index = self.page_index(offset)?;
332 if page_index > 0 {
333 self.pages[page_index - 1].first().map(DrawCommand::offset)
334 } else {
335 None
336 }
337 }
338 Location::Next(offset) => {
339 let page_index = self.page_index(offset)?;
340 if page_index < self.pages.len() - 1 {
341 self.pages[page_index + 1].first().map(DrawCommand::offset)
342 } else {
343 None
344 }
345 }
346 Location::LocalUri(_, ref uri) | Location::Uri(ref uri) => {
347 let mut cache = FxHashMap::default();
348 self.resolve_link(uri, &mut cache)
349 }
350 }
351 }
352
353 fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
354 let offset = self.resolve_location(loc)?;
355 let page_index = self.page_index(offset)?;
356
357 Some((
358 self.pages[page_index]
359 .iter()
360 .filter_map(|dc| match dc {
361 DrawCommand::Text(TextCommand {
362 text, rect, offset, ..
363 }) => Some(BoundedText {
364 text: text.clone(),
365 rect: (*rect).into(),
366 location: TextLocation::Dynamic(*offset),
367 }),
368 _ => None,
369 })
370 .collect(),
371 offset,
372 ))
373 }
374
375 fn lines(&mut self, _loc: Location) -> Option<(Vec<BoundedText>, usize)> {
376 None
377 }
378
379 fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
380 let offset = self.resolve_location(loc)?;
381 let page_index = self.page_index(offset)?;
382
383 Some((
384 self.pages[page_index]
385 .iter()
386 .filter_map(|dc| match dc {
387 DrawCommand::Image(ImageCommand { rect, .. }) => Some((*rect).into()),
388 _ => None,
389 })
390 .collect(),
391 offset,
392 ))
393 }
394
395 fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
396 let offset = self.resolve_location(loc)?;
397 let page_index = self.page_index(offset)?;
398
399 Some((
400 self.pages[page_index]
401 .iter()
402 .filter_map(|dc| match dc {
403 DrawCommand::Text(TextCommand {
404 uri, rect, offset, ..
405 })
406 | DrawCommand::Image(ImageCommand {
407 uri, rect, offset, ..
408 }) if uri.is_some() => Some(BoundedText {
409 text: uri.clone().unwrap(),
410 rect: (*rect).into(),
411 location: TextLocation::Dynamic(*offset),
412 }),
413 _ => None,
414 })
415 .collect(),
416 offset,
417 ))
418 }
419
420 fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
421 let offset = self.resolve_location(loc)?;
422 let page_index = self.page_index(offset)?;
423 let page = self.pages[page_index].clone();
424 let pixmap = self
425 .engine
426 .render_page(&page, scale, samples, &mut self.parent)?;
427
428 Some((pixmap, offset))
429 }
430
431 fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
432 self.engine.layout(width, height, font_size, dpi);
433 self.pages.clear();
434 }
435
436 fn set_text_align(&mut self, text_align: TextAlign) {
437 self.engine.set_text_align(text_align);
438 self.pages.clear();
439 }
440
441 fn set_font_family(&mut self, family_name: &str, search_path: &str) {
442 self.engine.set_font_family(family_name, search_path);
443 self.pages.clear();
444 }
445
446 fn set_margin_width(&mut self, width: i32) {
447 self.engine.set_margin_width(width);
448 self.pages.clear();
449 }
450
451 fn set_line_height(&mut self, line_height: f32) {
452 self.engine.set_line_height(line_height);
453 self.pages.clear();
454 }
455
456 fn set_hyphen_penalty(&mut self, hyphen_penalty: i32) {
457 self.engine.set_hyphen_penalty(hyphen_penalty);
458 self.pages.clear();
459 }
460
461 fn set_stretch_tolerance(&mut self, stretch_tolerance: f32) {
462 self.engine.set_stretch_tolerance(stretch_tolerance);
463 self.pages.clear();
464 }
465
466 fn set_ignore_document_css(&mut self, ignore: bool) {
467 self.ignore_document_css = ignore;
468 self.pages.clear();
469 }
470
471 fn title(&self) -> Option<String> {
472 self.content
473 .root()
474 .find("head")
475 .and_then(|head| {
476 head.children()
477 .find(|child| child.tag_name() == Some("title"))
478 })
479 .map(|child| decode_entities(&child.text()).into_owned())
480 }
481
482 fn author(&self) -> Option<String> {
483 self.metadata("author")
484 }
485
486 fn metadata(&self, key: &str) -> Option<String> {
487 self.content
488 .root()
489 .find("head")
490 .and_then(|head| {
491 head.children().find(|child| {
492 child.tag_name() == Some("meta") && child.attribute("name") == Some(key)
493 })
494 })
495 .and_then(|child| {
496 child
497 .attribute("content")
498 .map(|s| decode_entities(s).into_owned())
499 })
500 }
501
502 fn save(&self, path: &str) -> Result<(), Error> {
503 let mut file = File::create(path)?;
504 file.write_all(self.text.as_bytes()).map_err(Into::into)
505 }
506
507 fn is_reflowable(&self) -> bool {
508 true
509 }
510
511 fn has_synthetic_page_numbers(&self) -> bool {
512 true
513 }
514}