Skip to main content

cadmus_core/document/epub/
mod.rs

1use super::html::css::CssParser;
2use super::html::dom::{NodeRef, XmlTree};
3use super::html::engine::{Engine, Page, ResourceFetcher};
4use super::html::layout::TextAlign;
5use super::html::layout::{DrawCommand, DrawState, ImageCommand, RootData, TextCommand};
6use super::html::layout::{LoopContext, StyleData};
7use super::html::style::StyleSheet;
8use super::html::xml::XmlParser;
9use super::pdf::PdfOpener;
10use crate::document::{chapter_from_uri, BoundedText, Document, Location, TextLocation, TocEntry};
11use crate::framebuffer::Pixmap;
12use crate::geom::{Boundary, CycleDir};
13use crate::helpers::{decode_entities, Normalize};
14use crate::unit::pt_to_px;
15use anyhow::{format_err, Error};
16use fxhash::FxHashMap;
17use percent_encoding::percent_decode_str;
18use std::collections::BTreeSet;
19use std::fs::{self, File};
20use std::io::{Cursor, Read, Seek};
21use std::path::{Path, PathBuf};
22use zip::ZipArchive;
23
24const VIEWER_STYLESHEET: &str = "css/epub.css";
25const USER_STYLESHEET: &str = "css/epub-user.css";
26
27type UriCache = FxHashMap<String, usize>;
28
29impl<R: Read + Seek> ResourceFetcher for ZipArchive<R> {
30    fn fetch(&mut self, name: &str) -> Result<Vec<u8>, Error> {
31        let mut file = self.by_name(name)?;
32        let mut buf = Vec::new();
33        file.read_to_end(&mut buf)?;
34        Ok(buf)
35    }
36}
37
38/// Generic EPUB document that works with any Read + Seek source.
39pub struct EpubDocument<R: Read + Seek> {
40    archive: ZipArchive<R>,
41    info: XmlTree,
42    parent: PathBuf,
43    engine: Engine,
44    spine: Vec<Chunk>,
45    cache: FxHashMap<usize, Vec<Page>>,
46    ignore_document_css: bool,
47}
48
49/// Type alias for file-based EPUB documents (backward compatibility).
50pub type EpubDocumentFile = EpubDocument<File>;
51
52/// Type alias for static EPUB documents (zero-copy for embedded assets).
53pub type EpubDocumentStatic = EpubDocument<Cursor<&'static [u8]>>;
54
55#[derive(Debug)]
56struct Chunk {
57    path: String,
58    size: usize,
59}
60
61unsafe impl<R: Read + Seek> Send for EpubDocument<R> {}
62unsafe impl<R: Read + Seek> Sync for EpubDocument<R> {}
63
64impl<R: Read + Seek> EpubDocument<R> {
65    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
66    fn from_archive(mut archive: ZipArchive<R>) -> Result<Self, Error> {
67        let opf_path = {
68            let mut zf = archive.by_name("META-INF/container.xml")?;
69            let mut text = String::new();
70            zf.read_to_string(&mut text)?;
71            let root = XmlParser::new(&text).parse();
72            root.root()
73                .find("rootfile")
74                .and_then(|e| e.attribute("full-path"))
75                .map(String::from)
76        }
77        .ok_or_else(|| format_err!("can't get the OPF path"))?;
78
79        let parent = Path::new(&opf_path)
80            .parent()
81            .unwrap_or_else(|| Path::new(""));
82
83        let text = {
84            let mut zf = archive.by_name(&opf_path)?;
85            let mut text = String::new();
86            zf.read_to_string(&mut text)?;
87            text
88        };
89
90        let info = XmlParser::new(&text).parse();
91        let mut spine = Vec::new();
92
93        {
94            let manifest = info
95                .root()
96                .find("manifest")
97                .ok_or_else(|| format_err!("the manifest is missing"))?;
98
99            let spn = info
100                .root()
101                .find("spine")
102                .ok_or_else(|| format_err!("the spine is missing"))?;
103
104            for child in spn.children() {
105                let vertebra_opt = child
106                    .attribute("idref")
107                    .and_then(|idref| manifest.find_by_id(idref))
108                    .and_then(|entry| entry.attribute("href"))
109                    .and_then(|href| {
110                        let href = decode_entities(href);
111                        let href = percent_decode_str(&href).decode_utf8_lossy();
112                        let href_path = parent.join::<&str>(href.as_ref());
113                        href_path.to_str().and_then(|path| {
114                            archive
115                                .by_name(path)
116                                .map_err(|e| {
117                                    tracing::error!(
118                                        "Can't retrieve '{}' from the archive: {:#}.",
119                                        path,
120                                        e
121                                    )
122                                    // We're assuming that the size of the spine is less than 4 GiB.
123                                })
124                                .map(|zf| (zf.size() as usize, path.to_string()))
125                                .ok()
126                        })
127                    });
128
129                if let Some((size, path)) = vertebra_opt {
130                    spine.push(Chunk { path, size });
131                }
132            }
133        }
134
135        if spine.is_empty() {
136            return Err(format_err!("the spine is empty"));
137        }
138
139        Ok(EpubDocument {
140            archive,
141            info,
142            parent: parent.to_path_buf(),
143            engine: Engine::new(),
144            spine,
145            cache: FxHashMap::default(),
146            ignore_document_css: false,
147        })
148    }
149
150    fn offset(&self, index: usize) -> usize {
151        self.spine.iter().take(index).map(|c| c.size).sum()
152    }
153
154    fn size(&self) -> usize {
155        self.offset(self.spine.len())
156    }
157
158    fn vertebra_coordinates_with<F>(&self, test: F) -> Option<(usize, usize)>
159    where
160        F: Fn(usize, usize) -> bool,
161    {
162        let mut start_offset = 0;
163        let mut end_offset = start_offset;
164        let mut index = 0;
165
166        while index < self.spine.len() {
167            end_offset += self.spine[index].size;
168            if test(index, end_offset) {
169                return Some((index, start_offset));
170            }
171            start_offset = end_offset;
172            index += 1;
173        }
174
175        None
176    }
177
178    fn vertebra_coordinates(&self, offset: usize) -> Option<(usize, usize)> {
179        self.vertebra_coordinates_with(|_, end_offset| offset < end_offset)
180    }
181
182    fn vertebra_coordinates_from_name(&self, name: &str) -> Option<(usize, usize)> {
183        self.vertebra_coordinates_with(|index, _| self.spine[index].path == name)
184    }
185
186    fn walk_toc_ncx(
187        &mut self,
188        node: NodeRef,
189        toc_dir: &Path,
190        index: &mut usize,
191        cache: &mut UriCache,
192    ) -> Vec<TocEntry> {
193        let mut entries = Vec::new();
194        // TODO: Take `playOrder` into account?
195
196        for child in node.children() {
197            if child.tag_name() == Some("navPoint") {
198                let title = child
199                    .find("navLabel")
200                    .and_then(|label| label.find("text"))
201                    .map(|text| decode_entities(&text.text()).into_owned())
202                    .unwrap_or_default();
203
204                // Example URI: pr03.html#codecomma_and_what_to_do_with_it
205                let rel_uri = child
206                    .find("content")
207                    .and_then(|content| {
208                        content.attribute("src").map(|src| {
209                            percent_decode_str(&decode_entities(src))
210                                .decode_utf8_lossy()
211                                .into_owned()
212                        })
213                    })
214                    .unwrap_or_default();
215
216                let loc = toc_dir
217                    .join(&rel_uri)
218                    .normalize()
219                    .to_str()
220                    .map(|uri| Location::Uri(uri.to_string()));
221
222                let current_index = *index;
223                *index += 1;
224
225                let sub_entries = if child.children().count() > 2 {
226                    self.walk_toc_ncx(child, toc_dir, index, cache)
227                } else {
228                    Vec::new()
229                };
230
231                if let Some(location) = loc {
232                    entries.push(TocEntry {
233                        title,
234                        location,
235                        index: current_index,
236                        children: sub_entries,
237                    });
238                }
239            }
240        }
241
242        entries
243    }
244
245    fn walk_toc_nav(
246        &mut self,
247        node: NodeRef,
248        toc_dir: &Path,
249        index: &mut usize,
250        cache: &mut UriCache,
251    ) -> Vec<TocEntry> {
252        let mut entries = Vec::new();
253
254        for child in node.children() {
255            if child.tag_name() == Some("li") {
256                let link = child.children().find(|child| child.tag_name() == Some("a"));
257                let title = link
258                    .map(|link| decode_entities(&link.text()).into_owned())
259                    .unwrap_or_default();
260                let rel_uri = link
261                    .and_then(|link| {
262                        link.attribute("href").map(|href| {
263                            percent_decode_str(&decode_entities(href))
264                                .decode_utf8_lossy()
265                                .into_owned()
266                        })
267                    })
268                    .unwrap_or_default();
269
270                let loc = toc_dir
271                    .join(&rel_uri)
272                    .normalize()
273                    .to_str()
274                    .map(|uri| Location::Uri(uri.to_string()));
275
276                let current_index = *index;
277                *index += 1;
278
279                let sub_entries = if let Some(sub_list) = child.find("ol") {
280                    self.walk_toc_nav(sub_list, toc_dir, index, cache)
281                } else {
282                    Vec::new()
283                };
284
285                if let Some(location) = loc {
286                    entries.push(TocEntry {
287                        title,
288                        location,
289                        index: current_index,
290                        children: sub_entries,
291                    });
292                }
293            }
294        }
295
296        entries
297    }
298
299    #[inline]
300    fn page_index(&mut self, offset: usize, index: usize, start_offset: usize) -> Option<usize> {
301        if !self.cache.contains_key(&index) {
302            let display_list = self.build_display_list(index, start_offset);
303            self.cache.insert(index, display_list);
304        }
305        self.cache.get(&index).map(|display_list| {
306            if display_list.len() < 2
307                || display_list[1].first().map(|dc| offset < dc.offset()) == Some(true)
308            {
309                return 0;
310            } else if display_list[display_list.len() - 1]
311                .first()
312                .map(|dc| offset >= dc.offset())
313                == Some(true)
314            {
315                return display_list.len() - 1;
316            } else {
317                for i in 1..display_list.len() - 1 {
318                    if display_list[i].first().map(|dc| offset >= dc.offset()) == Some(true)
319                        && display_list[i + 1].first().map(|dc| offset < dc.offset()) == Some(true)
320                    {
321                        return i;
322                    }
323                }
324            }
325            0
326        })
327    }
328
329    fn resolve_link(&mut self, uri: &str, cache: &mut UriCache) -> Option<usize> {
330        let frag_index_opt = uri.find('#');
331        let name = &uri[..frag_index_opt.unwrap_or_else(|| uri.len())];
332
333        let (index, start_offset) = self.vertebra_coordinates_from_name(name)?;
334
335        if frag_index_opt.is_some() {
336            let mut text = String::new();
337            {
338                let mut zf = self.archive.by_name(name).ok()?;
339                zf.read_to_string(&mut text).ok()?;
340            }
341            let root = XmlParser::new(&text).parse();
342            self.cache_uris(root.root(), name, start_offset, cache);
343            cache.get(uri).cloned()
344        } else {
345            let page_index = self.page_index(start_offset, index, start_offset)?;
346            let offset = self
347                .cache
348                .get(&index)
349                .and_then(|display_list| display_list[page_index].first())
350                .map(DrawCommand::offset)?;
351            cache.insert(uri.to_string(), offset);
352            Some(offset)
353        }
354    }
355
356    fn cache_uris(&mut self, node: NodeRef, name: &str, start_offset: usize, cache: &mut UriCache) {
357        if let Some(id) = node.attribute("id") {
358            let location = start_offset + node.offset();
359            cache.insert(format!("{}#{}", name, id), location);
360        }
361        for child in node.children() {
362            self.cache_uris(child, name, start_offset, cache);
363        }
364    }
365
366    fn build_display_list(&mut self, index: usize, start_offset: usize) -> Vec<Page> {
367        let mut text = String::new();
368        let mut spine_dir = PathBuf::default();
369
370        {
371            let path = &self.spine[index].path;
372            if let Some(parent) = Path::new(path).parent() {
373                spine_dir = parent.to_path_buf();
374            }
375
376            if let Ok(mut zf) = self.archive.by_name(path) {
377                zf.read_to_string(&mut text).ok();
378            }
379        }
380
381        let mut root = XmlParser::new(&text).parse();
382        root.wrap_lost_inlines();
383
384        let mut stylesheet = StyleSheet::new();
385
386        if let Ok(text) = fs::read_to_string(VIEWER_STYLESHEET) {
387            let mut css = CssParser::new(&text).parse();
388            stylesheet.append(&mut css, true);
389        }
390
391        if let Ok(text) = fs::read_to_string(USER_STYLESHEET) {
392            let mut css = CssParser::new(&text).parse();
393            stylesheet.append(&mut css, true);
394        }
395
396        if !self.ignore_document_css {
397            let mut inner_css = StyleSheet::new();
398            if let Some(head) = root.root().find("head") {
399                for child in head.children() {
400                    if child.tag_name() == Some("link")
401                        && child.attribute("rel") == Some("stylesheet")
402                    {
403                        if let Some(href) = child.attribute("href") {
404                            if let Some(name) = spine_dir.join(href).normalize().to_str() {
405                                let mut text = String::new();
406                                if let Ok(mut zf) = self.archive.by_name(name) {
407                                    zf.read_to_string(&mut text).ok();
408                                    let mut css = CssParser::new(&text).parse();
409                                    inner_css.append(&mut css, false);
410                                }
411                            }
412                        }
413                    } else if child.tag_name() == Some("style")
414                        && child.attribute("type") == Some("text/css")
415                    {
416                        let mut css = CssParser::new(&child.text()).parse();
417                        inner_css.append(&mut css, false);
418                    }
419                }
420            }
421
422            stylesheet.append(&mut inner_css, true);
423        }
424
425        let mut display_list = Vec::new();
426
427        if let Some(body) = root.root().find("body") {
428            let mut rect = self.engine.rect();
429            rect.shrink(&self.engine.margin);
430
431            let language = self.language().or_else(|| {
432                root.root()
433                    .find("html")
434                    .and_then(|html| html.attribute("xml:lang"))
435                    .map(String::from)
436            });
437
438            let style = StyleData {
439                language,
440                font_size: self.engine.font_size,
441                line_height: pt_to_px(
442                    self.engine.line_height * self.engine.font_size,
443                    self.engine.dpi,
444                )
445                .round() as i32,
446                text_align: self.engine.text_align,
447                start_x: rect.min.x,
448                end_x: rect.max.x,
449                width: rect.max.x - rect.min.x,
450                ..Default::default()
451            };
452
453            let loop_context = LoopContext::default();
454            let mut draw_state = DrawState {
455                position: rect.min,
456                ..Default::default()
457            };
458
459            let root_data = RootData {
460                start_offset,
461                spine_dir,
462                rect,
463            };
464
465            display_list.push(Vec::new());
466
467            self.engine.build_display_list(
468                body,
469                &style,
470                &loop_context,
471                &stylesheet,
472                &root_data,
473                &mut self.archive,
474                &mut draw_state,
475                &mut display_list,
476            );
477
478            display_list.retain(|page| !page.is_empty());
479
480            if display_list.is_empty() {
481                display_list.push(vec![DrawCommand::Marker(start_offset + body.offset())]);
482            }
483        } else {
484            display_list.push(vec![DrawCommand::Marker(start_offset)]);
485        }
486
487        display_list
488    }
489
490    pub fn categories(&self) -> BTreeSet<String> {
491        let mut result = BTreeSet::new();
492
493        if let Some(md) = self.info.root().find("metadata") {
494            for child in md.children() {
495                if child.tag_qualified_name() == Some("dc:subject") {
496                    let text = child.text();
497                    let subject = decode_entities(&text);
498                    // Pipe separated list of BISAC categories
499                    if subject.contains(" / ") {
500                        for categ in subject.split('|') {
501                            let start_index = if let Some(index) = categ.find(" - ") {
502                                index + 3
503                            } else {
504                                0
505                            };
506                            result.insert(categ[start_index..].trim().replace(" / ", "."));
507                        }
508                    } else {
509                        result.insert(subject.into_owned());
510                    }
511                }
512            }
513        }
514
515        result
516    }
517
518    fn chapter_aux<'a>(
519        &mut self,
520        toc: &'a [TocEntry],
521        offset: usize,
522        next_offset: usize,
523        path: &str,
524        end_offset: &mut usize,
525        chap_before: &mut Option<&'a TocEntry>,
526        offset_before: &mut usize,
527        chap_after: &mut Option<&'a TocEntry>,
528        offset_after: &mut usize,
529    ) {
530        for entry in toc {
531            if let Location::Uri(ref uri) = entry.location {
532                if uri.starts_with(path) {
533                    if let Some(entry_offset) = self.resolve_location(entry.location.clone()) {
534                        if entry_offset < offset
535                            && (chap_before.is_none() || entry_offset > *offset_before)
536                        {
537                            *chap_before = Some(entry);
538                            *offset_before = entry_offset;
539                        }
540                        if entry_offset >= offset
541                            && entry_offset < next_offset
542                            && (chap_after.is_none() || entry_offset < *offset_after)
543                        {
544                            *chap_after = Some(entry);
545                            *offset_after = entry_offset;
546                        }
547                        if entry_offset >= next_offset && entry_offset < *end_offset {
548                            *end_offset = entry_offset;
549                        }
550                    }
551                }
552            }
553            self.chapter_aux(
554                &entry.children,
555                offset,
556                next_offset,
557                path,
558                end_offset,
559                chap_before,
560                offset_before,
561                chap_after,
562                offset_after,
563            );
564        }
565    }
566
567    fn previous_chapter<'a>(
568        &mut self,
569        chap: Option<&TocEntry>,
570        start_offset: usize,
571        end_offset: usize,
572        toc: &'a [TocEntry],
573    ) -> Option<&'a TocEntry> {
574        for entry in toc.iter().rev() {
575            let result = self.previous_chapter(chap, start_offset, end_offset, &entry.children);
576            if result.is_some() {
577                return result;
578            }
579
580            if let Some(chap) = chap {
581                if entry.index < chap.index {
582                    let entry_offset = self.resolve_location(entry.location.clone())?;
583                    if entry_offset < start_offset || entry_offset >= end_offset {
584                        return Some(entry);
585                    }
586                }
587            } else {
588                let entry_offset = self.resolve_location(entry.location.clone())?;
589                if entry_offset < start_offset {
590                    return Some(entry);
591                }
592            }
593        }
594        None
595    }
596
597    fn next_chapter<'a>(
598        &mut self,
599        chap: Option<&TocEntry>,
600        start_offset: usize,
601        end_offset: usize,
602        toc: &'a [TocEntry],
603    ) -> Option<&'a TocEntry> {
604        for entry in toc {
605            if let Some(chap) = chap {
606                if entry.index > chap.index {
607                    let entry_offset = self.resolve_location(entry.location.clone())?;
608                    if entry_offset < start_offset || entry_offset >= end_offset {
609                        return Some(entry);
610                    }
611                }
612            } else {
613                let entry_offset = self.resolve_location(entry.location.clone())?;
614                if entry_offset >= end_offset {
615                    return Some(entry);
616                }
617            }
618
619            let result = self.next_chapter(chap, start_offset, end_offset, &entry.children);
620            if result.is_some() {
621                return result;
622            }
623        }
624        None
625    }
626
627    pub fn series(&self) -> Option<(String, String)> {
628        self.info.root().find("metadata").and_then(|md| {
629            let mut title = None;
630            let mut index = None;
631
632            for child in md.children() {
633                if child.tag_name() == Some("meta") {
634                    if child.attribute("name") == Some("calibre:series") {
635                        title = child
636                            .attribute("content")
637                            .map(|s| decode_entities(s).into_owned());
638                    } else if child.attribute("name") == Some("calibre:series_index") {
639                        index = child
640                            .attribute("content")
641                            .map(|s| decode_entities(s).into_owned());
642                    } else if child.attribute("property") == Some("belongs-to-collection") {
643                        title = Some(decode_entities(&child.text()).into_owned());
644                    } else if child.attribute("property") == Some("group-position") {
645                        index = Some(decode_entities(&child.text()).into_owned());
646                    }
647                }
648
649                if title.is_some() && index.is_some() {
650                    break;
651                }
652            }
653
654            title.into_iter().zip(index).next()
655        })
656    }
657
658    pub fn cover_image(&self) -> Option<&str> {
659        self.info
660            .root()
661            .find("metadata")
662            .and_then(|md| {
663                md.children().find(|child| {
664                    child.tag_name() == Some("meta") && child.attribute("name") == Some("cover")
665                })
666            })
667            .and_then(|entry| entry.attribute("content"))
668            .and_then(|cover_id| {
669                self.info
670                    .root()
671                    .find("manifest")
672                    .and_then(|entry| entry.find_by_id(cover_id))
673                    .and_then(|entry| entry.attribute("href"))
674            })
675            .or_else(|| {
676                self.info
677                    .root()
678                    .find("manifest")
679                    .and_then(|mf| {
680                        mf.children().find(|child| {
681                            (child
682                                .attribute("href")
683                                .map_or(false, |hr| hr.contains("cover") || hr.contains("Cover"))
684                                || child.id().map_or(false, |id| id.contains("cover")))
685                                && child
686                                    .attribute("media-type")
687                                    .map_or(false, |mt| mt.starts_with("image/"))
688                        })
689                    })
690                    .and_then(|entry| entry.attribute("href"))
691            })
692    }
693
694    pub fn description(&self) -> Option<String> {
695        self.metadata("dc:description")
696    }
697
698    pub fn publisher(&self) -> Option<String> {
699        self.metadata("dc:publisher")
700    }
701
702    pub fn language(&self) -> Option<String> {
703        self.metadata("dc:language")
704    }
705
706    pub fn year(&self) -> Option<String> {
707        self.metadata("dc:date")
708            .map(|s| s.chars().take(4).collect())
709    }
710}
711
712impl EpubDocumentFile {
713    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
714        let file = File::open(path)?;
715        let archive = ZipArchive::new(file)?;
716        Self::from_archive(archive)
717    }
718}
719
720impl EpubDocumentStatic {
721    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
722    pub fn new_from_static(bytes: &'static [u8]) -> Result<Self, Error> {
723        let cursor = Cursor::new(bytes);
724        let archive = ZipArchive::new(cursor)?;
725        Self::from_archive(archive)
726    }
727}
728
729impl<R: Read + Seek> Document for EpubDocument<R> {
730    fn preview_pixmap(&mut self, width: f32, height: f32, samples: usize) -> Option<Pixmap> {
731        let opener = PdfOpener::new()?;
732        self.cover_image()
733            .map(|path| self.parent.join(path).to_string_lossy().into_owned())
734            .and_then(|path| {
735                self.archive
736                    .fetch(&path)
737                    .ok()
738                    .and_then(|buf| opener.open_memory(&path, &buf))
739                    .and_then(|mut doc| {
740                        doc.dims(0).and_then(|dims| {
741                            let scale = (width / dims.0).min(height / dims.1);
742                            doc.pixmap(Location::Exact(0), scale, samples)
743                        })
744                    })
745            })
746            .or_else(|| {
747                self.dims(0).and_then(|dims| {
748                    let scale = (width / dims.0).min(height / dims.1);
749                    self.pixmap(Location::Exact(0), scale, samples)
750                })
751            })
752            .map(|(pixmap, _)| pixmap)
753    }
754
755    #[inline]
756    fn dims(&self, _index: usize) -> Option<(f32, f32)> {
757        Some((self.engine.dims.0 as f32, self.engine.dims.1 as f32))
758    }
759
760    fn pages_count(&self) -> usize {
761        self.spine.iter().map(|c| c.size).sum()
762    }
763
764    fn toc(&mut self) -> Option<Vec<TocEntry>> {
765        let name = self
766            .info
767            .root()
768            .find("spine")
769            .and_then(|spine| spine.attribute("toc"))
770            .and_then(|toc_id| {
771                self.info
772                    .root()
773                    .find("manifest")
774                    .and_then(|manifest| manifest.find_by_id(toc_id))
775                    .and_then(|entry| entry.attribute("href"))
776            })
777            .or_else(|| {
778                self.info
779                    .root()
780                    .find("manifest")
781                    .and_then(|manifest| {
782                        manifest.children().find(|child| {
783                            child
784                                .attribute("properties")
785                                .iter()
786                                .any(|props| props.split_whitespace().any(|prop| prop == "nav"))
787                        })
788                    })
789                    .and_then(|entry| entry.attribute("href"))
790            })
791            .map(|href| {
792                self.parent
793                    .join(href)
794                    .normalize()
795                    .to_string_lossy()
796                    .into_owned()
797            })?;
798
799        let toc_dir = Path::new(&name).parent().unwrap_or_else(|| Path::new(""));
800
801        let mut text = String::new();
802        if let Ok(mut zf) = self.archive.by_name(&name) {
803            zf.read_to_string(&mut text).ok()?;
804        } else {
805            return None;
806        }
807
808        let root = XmlParser::new(&text).parse();
809
810        if name.ends_with(".ncx") {
811            root.root()
812                .find("navMap")
813                .map(|map| self.walk_toc_ncx(map, toc_dir, &mut 0, &mut FxHashMap::default()))
814        } else {
815            root.root()
816                .descendants()
817                .find(|desc| {
818                    desc.tag_name() == Some("nav") && desc.attribute("epub:type") == Some("toc")
819                })
820                .and_then(|map| map.find("ol"))
821                .map(|map| self.walk_toc_nav(map, toc_dir, &mut 0, &mut FxHashMap::default()))
822        }
823    }
824
825    fn chapter<'a>(&mut self, offset: usize, toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
826        let next_offset = self
827            .resolve_location(Location::Next(offset))
828            .unwrap_or(usize::MAX);
829        let (index, start_offset) = self.vertebra_coordinates(offset)?;
830        let path = self.spine[index].path.clone();
831        let mut end_offset = start_offset + self.spine[index].size;
832        let mut chap_before = None;
833        let mut chap_after = None;
834        let mut offset_before = 0;
835        let mut offset_after = usize::MAX;
836
837        self.chapter_aux(
838            toc,
839            offset,
840            next_offset,
841            &path,
842            &mut end_offset,
843            &mut chap_before,
844            &mut offset_before,
845            &mut chap_after,
846            &mut offset_after,
847        );
848
849        if chap_after.is_none() && chap_before.is_none() {
850            for i in (0..index).rev() {
851                let chap = chapter_from_uri(&self.spine[i].path, toc);
852                if chap.is_some() {
853                    end_offset = if let Some(j) = (index + 1..self.spine.len())
854                        .find(|&j| chapter_from_uri(&self.spine[j].path, toc).is_some())
855                    {
856                        self.offset(j)
857                    } else {
858                        self.size()
859                    };
860                    let chap_offset = self.offset(i);
861                    let progress =
862                        (offset - chap_offset) as f32 / (end_offset - chap_offset) as f32;
863                    return chap.zip(Some(progress));
864                }
865            }
866            None
867        } else {
868            match (chap_after, chap_before) {
869                (Some(..), _) => chap_after.zip(Some(0.0)),
870                (None, Some(..)) => chap_before.zip(Some(
871                    (offset - offset_before) as f32 / (end_offset - offset_before) as f32,
872                )),
873                _ => None,
874            }
875        }
876    }
877
878    fn chapter_relative<'a>(
879        &mut self,
880        offset: usize,
881        dir: CycleDir,
882        toc: &'a [TocEntry],
883    ) -> Option<&'a TocEntry> {
884        let next_offset = self
885            .resolve_location(Location::Next(offset))
886            .unwrap_or(usize::MAX);
887        let chap = self.chapter(offset, toc).map(|(c, _)| c);
888
889        match dir {
890            CycleDir::Previous => self.previous_chapter(chap, offset, next_offset, toc),
891            CycleDir::Next => self.next_chapter(chap, offset, next_offset, toc),
892        }
893    }
894
895    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
896        self.engine.load_fonts();
897
898        match loc {
899            Location::Exact(offset) => {
900                let (index, start_offset) = self.vertebra_coordinates(offset)?;
901                let page_index = self.page_index(offset, index, start_offset)?;
902                self.cache
903                    .get(&index)
904                    .and_then(|display_list| display_list[page_index].first())
905                    .map(DrawCommand::offset)
906            }
907            Location::Previous(offset) => {
908                let (index, start_offset) = self.vertebra_coordinates(offset)?;
909                let page_index = self.page_index(offset, index, start_offset)?;
910                if page_index > 0 {
911                    self.cache.get(&index).and_then(|display_list| {
912                        display_list[page_index - 1]
913                            .first()
914                            .map(DrawCommand::offset)
915                    })
916                } else {
917                    if index == 0 {
918                        return None;
919                    }
920                    let (index, start_offset) =
921                        (index - 1, start_offset - self.spine[index - 1].size);
922                    if !self.cache.contains_key(&index) {
923                        let display_list = self.build_display_list(index, start_offset);
924                        self.cache.insert(index, display_list);
925                    }
926                    self.cache.get(&index).and_then(|display_list| {
927                        display_list
928                            .last()
929                            .and_then(|page| page.first())
930                            .map(DrawCommand::offset)
931                    })
932                }
933            }
934            Location::Next(offset) => {
935                let (index, start_offset) = self.vertebra_coordinates(offset)?;
936                let page_index = self.page_index(offset, index, start_offset)?;
937                if page_index < self.cache.get(&index).map(Vec::len)? - 1 {
938                    self.cache.get(&index).and_then(|display_list| {
939                        display_list[page_index + 1]
940                            .first()
941                            .map(DrawCommand::offset)
942                    })
943                } else {
944                    if index == self.spine.len() - 1 {
945                        return None;
946                    }
947                    let (index, start_offset) = (index + 1, start_offset + self.spine[index].size);
948                    if !self.cache.contains_key(&index) {
949                        let display_list = self.build_display_list(index, start_offset);
950                        self.cache.insert(index, display_list);
951                    }
952                    self.cache.get(&index).and_then(|display_list| {
953                        display_list
954                            .first()
955                            .and_then(|page| page.first())
956                            .map(|dc| dc.offset())
957                    })
958                }
959            }
960            Location::LocalUri(offset, ref uri) => {
961                let mut cache = FxHashMap::default();
962                let normalized_uri: String = {
963                    let (index, _) = self.vertebra_coordinates(offset)?;
964                    let path = &self.spine[index].path;
965                    if uri.starts_with('#') {
966                        format!("{}{}", path, uri)
967                    } else {
968                        let parent = Path::new(path).parent().unwrap_or_else(|| Path::new(""));
969                        parent.join(uri).normalize().to_string_lossy().into_owned()
970                    }
971                };
972                self.resolve_link(&normalized_uri, &mut cache)
973            }
974            Location::Uri(ref uri) => {
975                let mut cache = FxHashMap::default();
976                self.resolve_link(uri, &mut cache)
977            }
978        }
979    }
980
981    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
982        if self.spine.is_empty() {
983            return None;
984        }
985
986        let offset = self.resolve_location(loc)?;
987        let (index, start_offset) = self.vertebra_coordinates(offset)?;
988        let page_index = self.page_index(offset, index, start_offset)?;
989
990        self.cache.get(&index).map(|display_list| {
991            (
992                display_list[page_index]
993                    .iter()
994                    .filter_map(|dc| match dc {
995                        DrawCommand::Text(TextCommand {
996                            text, rect, offset, ..
997                        }) => Some(BoundedText {
998                            text: text.clone(),
999                            rect: (*rect).into(),
1000                            location: TextLocation::Dynamic(*offset),
1001                        }),
1002                        _ => None,
1003                    })
1004                    .collect(),
1005                offset,
1006            )
1007        })
1008    }
1009
1010    fn lines(&mut self, _loc: Location) -> Option<(Vec<BoundedText>, usize)> {
1011        None
1012    }
1013
1014    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
1015        if self.spine.is_empty() {
1016            return None;
1017        }
1018
1019        let offset = self.resolve_location(loc)?;
1020        let (index, start_offset) = self.vertebra_coordinates(offset)?;
1021        let page_index = self.page_index(offset, index, start_offset)?;
1022
1023        self.cache.get(&index).map(|display_list| {
1024            (
1025                display_list[page_index]
1026                    .iter()
1027                    .filter_map(|dc| match dc {
1028                        DrawCommand::Text(TextCommand {
1029                            uri, rect, offset, ..
1030                        })
1031                        | DrawCommand::Image(ImageCommand {
1032                            uri, rect, offset, ..
1033                        }) if uri.is_some() => Some(BoundedText {
1034                            text: uri.clone().unwrap(),
1035                            rect: (*rect).into(),
1036                            location: TextLocation::Dynamic(*offset),
1037                        }),
1038                        _ => None,
1039                    })
1040                    .collect(),
1041                offset,
1042            )
1043        })
1044    }
1045
1046    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
1047        if self.spine.is_empty() {
1048            return None;
1049        }
1050
1051        let offset = self.resolve_location(loc)?;
1052        let (index, start_offset) = self.vertebra_coordinates(offset)?;
1053        let page_index = self.page_index(offset, index, start_offset)?;
1054
1055        self.cache.get(&index).map(|display_list| {
1056            (
1057                display_list[page_index]
1058                    .iter()
1059                    .filter_map(|dc| match dc {
1060                        DrawCommand::Image(ImageCommand { rect, .. }) => Some((*rect).into()),
1061                        _ => None,
1062                    })
1063                    .collect(),
1064                offset,
1065            )
1066        })
1067    }
1068
1069    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
1070        if self.spine.is_empty() {
1071            return None;
1072        }
1073
1074        let offset = self.resolve_location(loc)?;
1075        let (index, start_offset) = self.vertebra_coordinates(offset)?;
1076
1077        let page_index = self.page_index(offset, index, start_offset)?;
1078        let page = self.cache.get(&index)?.get(page_index)?.clone();
1079
1080        let pixmap = self
1081            .engine
1082            .render_page(&page, scale, samples, &mut self.archive)?;
1083
1084        Some((pixmap, offset))
1085    }
1086
1087    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
1088        self.engine.layout(width, height, font_size, dpi);
1089        self.cache.clear();
1090    }
1091
1092    fn set_text_align(&mut self, text_align: TextAlign) {
1093        self.engine.set_text_align(text_align);
1094        self.cache.clear();
1095    }
1096
1097    fn set_font_family(&mut self, family_name: &str, search_path: &str) {
1098        self.engine.set_font_family(family_name, search_path);
1099        self.cache.clear();
1100    }
1101
1102    fn set_margin_width(&mut self, width: i32) {
1103        self.engine.set_margin_width(width);
1104        self.cache.clear();
1105    }
1106
1107    fn set_line_height(&mut self, line_height: f32) {
1108        self.engine.set_line_height(line_height);
1109        self.cache.clear();
1110    }
1111
1112    fn set_hyphen_penalty(&mut self, hyphen_penalty: i32) {
1113        self.engine.set_hyphen_penalty(hyphen_penalty);
1114        self.cache.clear();
1115    }
1116
1117    fn set_stretch_tolerance(&mut self, stretch_tolerance: f32) {
1118        self.engine.set_stretch_tolerance(stretch_tolerance);
1119        self.cache.clear();
1120    }
1121
1122    fn set_ignore_document_css(&mut self, ignore: bool) {
1123        self.ignore_document_css = ignore;
1124        self.cache.clear();
1125    }
1126
1127    fn title(&self) -> Option<String> {
1128        self.metadata("dc:title")
1129    }
1130
1131    fn author(&self) -> Option<String> {
1132        // TODO: Consider the opf:file-as attribute?
1133        self.metadata("dc:creator")
1134    }
1135
1136    fn metadata(&self, key: &str) -> Option<String> {
1137        self.info
1138            .root()
1139            .find("metadata")
1140            .and_then(|md| {
1141                md.children()
1142                    .find(|child| child.tag_qualified_name() == Some(key))
1143            })
1144            .map(|child| decode_entities(&child.text()).into_owned())
1145    }
1146
1147    fn is_reflowable(&self) -> bool {
1148        true
1149    }
1150
1151    fn has_synthetic_page_numbers(&self) -> bool {
1152        true
1153    }
1154}
1155
1156#[cfg(test)]
1157mod tests {
1158    use super::*;
1159    use crate::document::html::layout::DrawCommand;
1160    use std::path::PathBuf;
1161    fn setup_epub() -> EpubDocumentFile {
1162        let root_dir = PathBuf::from(
1163            std::env::var("TEST_ROOT_DIR").expect("TEST_ROOT_DIR must be set for epub tests"),
1164        );
1165        let epub_path = root_dir.join("docs/book/epub/Cadmus Documentation.epub");
1166        let mut doc = EpubDocumentFile::new(&epub_path).expect("failed to open test epub");
1167        doc.engine.layout(600, 800, 12.0, 265);
1168        doc.engine.set_margin_width(3);
1169        doc.engine.load_fonts_from(root_dir);
1170        doc
1171    }
1172
1173    #[test]
1174    fn next_location_advances_to_next_spine_chapter() {
1175        let mut doc = setup_epub();
1176
1177        let first_offset = doc
1178            .resolve_location(Location::Exact(0))
1179            .expect("should resolve offset 0");
1180
1181        let last_page_offset = doc
1182            .cache
1183            .get(&0)
1184            .and_then(|dl| dl.last())
1185            .and_then(|page| page.first())
1186            .map(|dc| dc.offset())
1187            .expect("spine[0] last page has offset");
1188
1189        let next_offset = doc
1190            .resolve_location(Location::Next(last_page_offset))
1191            .expect("navigating past last page of spine[0] should return Some");
1192
1193        let (next_index, _) = doc
1194            .vertebra_coordinates(next_offset)
1195            .expect("next offset maps to spine");
1196
1197        assert_eq!(
1198            next_index, 1,
1199            "navigating next from last page of spine[0] (offset={}) should land on spine[1], got spine[{}] offset={}",
1200            first_offset, next_index, next_offset
1201        );
1202    }
1203
1204    #[test]
1205    fn first_spine_chapter_produces_pages_with_text() {
1206        let mut doc = setup_epub();
1207
1208        let display_list = doc.build_display_list(0, 0);
1209
1210        assert!(
1211            display_list.len() > 1,
1212            "expected multiple pages, got {}",
1213            display_list.len()
1214        );
1215
1216        let has_text = display_list.iter().any(|page| {
1217            page.iter()
1218                .any(|cmd| matches!(cmd, DrawCommand::Text(_) | DrawCommand::ExtraText(_)))
1219        });
1220        assert!(has_text, "no text draw commands found across all pages");
1221    }
1222
1223    #[test]
1224    fn next_page_exists_from_start() {
1225        let mut doc = setup_epub();
1226
1227        let display_list = doc.build_display_list(0, 0);
1228        doc.cache.insert(0, display_list);
1229
1230        let page_count = doc.cache.get(&0).map(|dl| dl.len()).unwrap_or(0);
1231
1232        assert!(
1233            page_count > 1,
1234            "expected more than one page so next-page navigation works"
1235        );
1236    }
1237
1238    #[test]
1239    fn all_spine_chapters_produce_content() {
1240        let mut doc = setup_epub();
1241
1242        let spine_len = doc.spine.len();
1243        assert!(spine_len > 0, "spine is empty");
1244
1245        let mut start_offset = 0;
1246        for i in 0..spine_len {
1247            let display_list = doc.build_display_list(i, start_offset);
1248            assert!(
1249                !display_list.is_empty(),
1250                "spine chapter {} produced zero pages",
1251                i
1252            );
1253            let has_content = display_list.iter().any(|page| !page.is_empty());
1254            assert!(has_content, "spine chapter {} has only empty pages", i);
1255            start_offset += doc.spine[i].size;
1256        }
1257    }
1258}