cadmus_core/document/
pdf.rs

1use super::mupdf_sys::*;
2
3use super::{chapter, chapter_relative};
4use super::{BoundedText, Document, Location, TextLocation, TocEntry};
5use crate::framebuffer::Pixmap;
6use crate::geom::{Boundary, CycleDir};
7use crate::metadata::TextAlign;
8use crate::unit::pt_to_px;
9use std::char;
10use std::ffi::{CStr, CString};
11use std::fs;
12use std::io::ErrorKind;
13use std::os::unix::ffi::OsStrExt;
14use std::path::Path;
15use std::ptr;
16use std::rc::Rc;
17use std::slice;
18use thiserror::Error;
19use tracing::error;
20
21const USER_STYLESHEET: &str = "css/html-user.css";
22
23/// Error returned when MuPDF fails to open a document.
24#[derive(Debug, Error)]
25pub enum PdfOpenError {
26    #[error("MuPDF error: {0}")]
27    MuPdf(String),
28    #[error("MuPDF returned no error message")]
29    Unknown,
30}
31
32impl Into<Boundary> for FzRect {
33    fn into(self) -> Boundary {
34        Boundary {
35            min: vec2!(self.x0, self.y0),
36            max: vec2!(self.x1, self.y1),
37        }
38    }
39}
40
41struct PdfContext(*mut FzContext);
42
43pub struct PdfOpener(Rc<PdfContext>);
44
45pub struct PdfDocument {
46    ctx: Rc<PdfContext>,
47    doc: *mut FzDocument,
48}
49
50pub struct PdfPage<'a> {
51    ctx: Rc<PdfContext>,
52    page: *mut FzPage,
53    index: usize,
54    _doc: &'a PdfDocument,
55}
56
57impl PdfOpener {
58    #[cfg_attr(feature = "otel", tracing::instrument)]
59    pub fn new() -> Option<PdfOpener> {
60        unsafe {
61            let version = CString::new(FZ_VERSION).unwrap();
62            let ctx = fz_new_context_imp(ptr::null(), ptr::null(), CACHE_SIZE, version.as_ptr());
63
64            if ctx.is_null() {
65                None
66            } else {
67                fz_register_document_handlers(ctx);
68                Some(PdfOpener(Rc::new(PdfContext(ctx))))
69            }
70        }
71    }
72
73    #[cfg_attr(feature = "otel", tracing::instrument(skip(self, path), fields(path = %path.as_ref().display())))]
74    pub fn open<P: AsRef<Path>>(&self, path: P) -> Result<PdfDocument, PdfOpenError> {
75        unsafe {
76            let c_path = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap();
77            let mut err_buf: [libc::c_char; 256] = [0; 256];
78            let doc = mp_open_document_with_error(
79                (self.0).0,
80                c_path.as_ptr(),
81                err_buf.as_mut_ptr(),
82                err_buf.len() as libc::c_int,
83            );
84            if doc.is_null() {
85                let msg = CStr::from_ptr(err_buf.as_ptr())
86                    .to_string_lossy()
87                    .into_owned();
88                Err(if msg.is_empty() {
89                    PdfOpenError::Unknown
90                } else {
91                    PdfOpenError::MuPdf(msg)
92                })
93            } else {
94                Ok(PdfDocument {
95                    ctx: self.0.clone(),
96                    doc,
97                })
98            }
99        }
100    }
101
102    // *magic* is a filename or a MIME type.
103    pub fn open_memory(&self, magic: &str, buf: &[u8]) -> Option<PdfDocument> {
104        unsafe {
105            let stream = fz_open_memory(
106                (self.0).0,
107                buf.as_ptr() as *const libc::c_uchar,
108                buf.len() as libc::size_t,
109            );
110            let c_magic = CString::new(magic).unwrap();
111            let doc = mp_open_document_with_stream((self.0).0, c_magic.as_ptr(), stream);
112            fz_drop_stream((self.0).0, stream);
113            if doc.is_null() {
114                None
115            } else {
116                Some(PdfDocument {
117                    ctx: self.0.clone(),
118                    doc,
119                })
120            }
121        }
122    }
123
124    pub fn load_user_stylesheet(&mut self) {
125        if let Ok(content) = fs::read_to_string(USER_STYLESHEET)
126            .and_then(|s| CString::new(s).map_err(Into::into))
127            .map_err(|e| {
128                if e.kind() != ErrorKind::NotFound {
129                    error!("{:#}", e)
130                }
131            })
132        {
133            unsafe { fz_set_user_css((self.0).0, content.as_ptr()) }
134        }
135    }
136}
137
138unsafe impl Send for PdfDocument {}
139unsafe impl Sync for PdfDocument {}
140
141impl PdfDocument {
142    pub fn page(&self, index: usize) -> Option<PdfPage<'_>> {
143        unsafe {
144            let page = mp_load_page(self.ctx.0, self.doc, index as libc::c_int);
145            if page.is_null() {
146                None
147            } else {
148                Some(PdfPage {
149                    ctx: self.ctx.clone(),
150                    page,
151                    index,
152                    _doc: self,
153                })
154            }
155        }
156    }
157
158    fn walk_toc(&self, outline: *mut FzOutline, index: &mut usize) -> Vec<TocEntry> {
159        unsafe {
160            let mut vec = Vec::new();
161            let mut cur = outline;
162            while !cur.is_null() {
163                let num = mp_page_number_from_location(self.ctx.0, self.doc, (*cur).page);
164                let location = if num > -1 {
165                    Location::Exact(num as usize)
166                } else if !(*cur).uri.is_null() {
167                    let uri = CStr::from_ptr((*cur).uri).to_string_lossy().into_owned();
168                    Location::Uri(uri)
169                } else {
170                    Location::Exact(0)
171                };
172                let title = if !(*cur).title.is_null() {
173                    CStr::from_ptr((*cur).title).to_string_lossy().into_owned()
174                } else {
175                    "Untitled".to_string()
176                };
177                let current_index = *index;
178                *index += 1;
179                let children = if !(*cur).down.is_null() {
180                    self.walk_toc((*cur).down, index)
181                } else {
182                    Vec::new()
183                };
184                vec.push(TocEntry {
185                    title,
186                    location,
187                    index: current_index,
188                    children,
189                });
190                cur = (*cur).next;
191            }
192            vec
193        }
194    }
195
196    pub fn is_protected(&self) -> bool {
197        unsafe { fz_needs_password(self.ctx.0, self.doc) == 1 }
198    }
199}
200
201impl Document for PdfDocument {
202    fn dims(&self, index: usize) -> Option<(f32, f32)> {
203        self.page(index).map(|page| page.dims())
204    }
205
206    fn pages_count(&self) -> usize {
207        unsafe { mp_count_pages(self.ctx.0, self.doc) as usize }
208    }
209
210    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
211        if self.pages_count() == 0 {
212            return None;
213        }
214
215        match loc {
216            Location::Exact(index) => {
217                if index >= self.pages_count() {
218                    None
219                } else {
220                    Some(index)
221                }
222            }
223            Location::Previous(index) => {
224                if index > 0 {
225                    Some(index - 1)
226                } else {
227                    None
228                }
229            }
230            Location::Next(index) => {
231                if index < self.pages_count() - 1 {
232                    Some(index + 1)
233                } else {
234                    None
235                }
236            }
237            Location::LocalUri(_index, uri) => {
238                let c_uri = CString::new(uri).unwrap();
239                let dest = unsafe { fz_resolve_link_dest(self.ctx.0, self.doc, c_uri.as_ptr()) };
240                if dest.loc.page.is_positive() {
241                    Some(dest.loc.page as usize)
242                } else {
243                    None
244                }
245            }
246            _ => None,
247        }
248    }
249
250    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
251        let index = self.resolve_location(loc)?;
252        self.page(index)
253            .and_then(|page| page.pixmap(scale, samples))
254            .map(|pixmap| (pixmap, index))
255    }
256
257    fn toc(&mut self) -> Option<Vec<TocEntry>> {
258        unsafe {
259            let outline = mp_load_outline(self.ctx.0, self.doc);
260            if outline.is_null() {
261                None
262            } else {
263                let mut index = 0;
264                let toc = self.walk_toc(outline, &mut index);
265                fz_drop_outline(self.ctx.0, outline);
266                Some(toc)
267            }
268        }
269    }
270
271    fn chapter<'a>(&mut self, offset: usize, toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
272        chapter(offset, self.pages_count(), toc)
273    }
274
275    fn chapter_relative<'a>(
276        &mut self,
277        offset: usize,
278        dir: CycleDir,
279        toc: &'a [TocEntry],
280    ) -> Option<&'a TocEntry> {
281        chapter_relative(offset, dir, toc)
282    }
283
284    fn metadata(&self, key: &str) -> Option<String> {
285        unsafe {
286            let key = CString::new(key).unwrap();
287            let mut buf: [libc::c_char; 256] = [0; 256];
288            let len = fz_lookup_metadata(
289                self.ctx.0,
290                self.doc,
291                key.as_ptr(),
292                buf.as_mut_ptr(),
293                buf.len() as libc::c_int,
294            );
295            if len == -1 {
296                None
297            } else {
298                Some(CStr::from_ptr(buf.as_ptr()).to_string_lossy().into_owned())
299            }
300        }
301    }
302
303    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
304        let index = self.resolve_location(loc)?;
305        self.page(index)
306            .and_then(|page| page.words())
307            .map(|words| (words, index))
308    }
309
310    fn lines(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
311        let index = self.resolve_location(loc)?;
312        self.page(index)
313            .and_then(|page| page.lines())
314            .map(|lines| (lines, index))
315    }
316
317    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
318        let index = self.resolve_location(loc)?;
319        self.page(index)
320            .and_then(|page| page.images())
321            .map(|images| (images, index))
322    }
323
324    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
325        let index = self.resolve_location(loc)?;
326        self.page(index)
327            .and_then(|page| page.links())
328            .map(|links| (links, index))
329    }
330
331    fn title(&self) -> Option<String> {
332        self.metadata(FZ_META_INFO_TITLE)
333    }
334
335    fn author(&self) -> Option<String> {
336        self.metadata(FZ_META_INFO_AUTHOR)
337    }
338
339    fn is_reflowable(&self) -> bool {
340        unsafe { fz_is_document_reflowable(self.ctx.0, self.doc) == 1 }
341    }
342
343    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
344        let em = pt_to_px(font_size, dpi);
345        unsafe {
346            fz_layout_document(
347                self.ctx.0,
348                self.doc,
349                width as libc::c_float,
350                height as libc::c_float,
351                em as libc::c_float,
352            );
353        }
354    }
355
356    fn set_text_align(&mut self, _text_align: TextAlign) {}
357
358    fn set_font_family(&mut self, _family_name: &str, _search_path: &str) {}
359
360    fn set_margin_width(&mut self, _width: i32) {}
361
362    fn set_line_height(&mut self, _line_height: f32) {}
363
364    fn set_hyphen_penalty(&mut self, _hyphen_penalty: i32) {}
365
366    fn set_stretch_tolerance(&mut self, _stretch_tolerance: f32) {}
367
368    fn set_ignore_document_css(&mut self, ignore: bool) {
369        unsafe {
370            fz_set_use_document_css(self.ctx.0, !ignore as libc::c_int);
371        }
372    }
373}
374
375impl<'a> PdfPage<'a> {
376    pub fn images(&self) -> Option<Vec<Boundary>> {
377        unsafe {
378            let mut images: Vec<Boundary> = Vec::new();
379            let opts = FzTextOptions {
380                flags: FZ_TEXT_PRESERVE_IMAGES,
381                scale: 1.0,
382                clip: FzRect::default(),
383            };
384            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, &opts);
385            if tp.is_null() {
386                return None;
387            }
388
389            let mut block = (*tp).first_block;
390
391            while !block.is_null() {
392                if (*block).kind == FZ_PAGE_BLOCK_IMAGE {
393                    let bnd: Boundary = (*block).bbox.into();
394                    images.retain(|img| !img.overlaps(&bnd));
395                    images.push(bnd);
396                }
397
398                block = (*block).next;
399            }
400
401            fz_drop_stext_page(self.ctx.0, tp);
402            Some(images)
403        }
404    }
405
406    pub fn lines(&self) -> Option<Vec<BoundedText>> {
407        unsafe {
408            let mut lines = Vec::new();
409            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
410            if tp.is_null() {
411                return None;
412            }
413            let mut offset = 0;
414            let mut block = (*tp).first_block;
415
416            while !block.is_null() {
417                if (*block).kind == FZ_PAGE_BLOCK_TEXT {
418                    let text_block = (*block).u.text;
419                    let mut line = text_block.first_line;
420
421                    while !line.is_null() {
422                        let rect = (*line).bbox.into();
423                        lines.push(BoundedText {
424                            rect,
425                            text: String::default(),
426                            location: TextLocation::Static(self.index, offset),
427                        });
428                        offset += 1;
429                        line = (*line).next;
430                    }
431                }
432
433                block = (*block).next;
434            }
435
436            fz_drop_stext_page(self.ctx.0, tp);
437            Some(lines)
438        }
439    }
440
441    pub fn words(&self) -> Option<Vec<BoundedText>> {
442        unsafe {
443            let mut words = Vec::new();
444            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
445            if tp.is_null() {
446                return None;
447            }
448            let mut block = (*tp).first_block;
449            let mut offset = 0;
450
451            while !block.is_null() {
452                if (*block).kind == FZ_PAGE_BLOCK_TEXT {
453                    let text_block = (*block).u.text;
454                    let mut line = text_block.first_line;
455
456                    while !line.is_null() {
457                        let mut chr = (*line).first_char;
458                        let mut text = String::default();
459                        let mut rect = FzRect::default();
460
461                        while !chr.is_null() {
462                            while !chr.is_null() {
463                                if let Some(c) = char::from_u32((*chr).c as u32) {
464                                    if c.is_whitespace() {
465                                        chr = (*chr).next;
466                                        break;
467                                    } else {
468                                        let chr_rect = fz_rect_from_quad((*chr).quad);
469                                        rect = fz_union_rect(rect, chr_rect);
470                                        text.push(c);
471                                    }
472                                }
473                                chr = (*chr).next;
474                            }
475
476                            if !text.is_empty() {
477                                words.push(BoundedText {
478                                    text: text.clone(),
479                                    rect: rect.into(),
480                                    location: TextLocation::Static(self.index, offset),
481                                });
482                                text.clear();
483                                rect = FzRect::default();
484                                offset += 1;
485                            }
486                        }
487
488                        line = (*line).next;
489                    }
490                }
491
492                block = (*block).next;
493            }
494
495            fz_drop_stext_page(self.ctx.0, tp);
496            Some(words)
497        }
498    }
499
500    pub fn links(&self) -> Option<Vec<BoundedText>> {
501        unsafe {
502            let links = mp_load_links(self.ctx.0, self.page);
503
504            if links.is_null() {
505                return None;
506            }
507
508            let mut link = links;
509            let mut result = Vec::new();
510            let mut offset = 0;
511
512            while !link.is_null() {
513                let text = CStr::from_ptr((*link).uri).to_string_lossy().into_owned();
514                let rect = (*link).rect.into();
515                result.push(BoundedText {
516                    text,
517                    rect,
518                    location: TextLocation::Static(self.index, offset),
519                });
520                link = (*link).next;
521                offset += 1;
522            }
523
524            fz_drop_link(self.ctx.0, links);
525
526            Some(result)
527        }
528    }
529
530    pub fn pixmap(&self, scale: f32, color_samples: usize) -> Option<Pixmap> {
531        unsafe {
532            let mat = fz_scale(scale as libc::c_float, scale as libc::c_float);
533            let color_space = if color_samples == 1 {
534                fz_device_gray(self.ctx.0)
535            } else {
536                fz_device_rgb(self.ctx.0)
537            };
538            let pixmap = mp_new_pixmap_from_page(self.ctx.0, self.page, mat, color_space, 0);
539            if pixmap.is_null() {
540                return None;
541            }
542
543            let width = (*pixmap).w as u32;
544            let height = (*pixmap).h as u32;
545            let len = color_samples * (width * height) as usize;
546            let pixmap_data = slice::from_raw_parts((*pixmap).samples, len);
547            let mut data = Vec::new();
548            if data.try_reserve(len).is_err() {
549                fz_drop_pixmap(self.ctx.0, pixmap);
550                return None;
551            }
552            data.extend(pixmap_data);
553
554            fz_drop_pixmap(self.ctx.0, pixmap);
555
556            Some(Pixmap {
557                width,
558                height,
559                samples: color_samples,
560                data,
561            })
562        }
563    }
564
565    pub fn boundary_box(&self) -> Option<Boundary> {
566        unsafe {
567            let mut rect = FzRect::default();
568            let dev = fz_new_bbox_device(self.ctx.0, &mut rect);
569            if dev.is_null() {
570                None
571            } else {
572                fz_run_page(self.ctx.0, self.page, dev, fz_identity, ptr::null_mut());
573                fz_close_device(self.ctx.0, dev);
574                fz_drop_device(self.ctx.0, dev);
575                Some(rect.into())
576            }
577        }
578    }
579
580    pub fn dims(&self) -> (f32, f32) {
581        unsafe {
582            let bounds = fz_bound_page(self.ctx.0, self.page);
583            (
584                (bounds.x1 - bounds.x0) as f32,
585                (bounds.y1 - bounds.y0) as f32,
586            )
587        }
588    }
589
590    pub fn width(&self) -> f32 {
591        let (width, _) = self.dims();
592        width
593    }
594
595    pub fn height(&self) -> f32 {
596        let (_, height) = self.dims();
597        height
598    }
599}
600
601impl Drop for PdfContext {
602    fn drop(&mut self) {
603        unsafe {
604            fz_drop_context(self.0);
605        }
606    }
607}
608
609impl Drop for PdfDocument {
610    fn drop(&mut self) {
611        unsafe {
612            fz_drop_document(self.ctx.0, self.doc);
613        }
614    }
615}
616
617impl<'a> Drop for PdfPage<'a> {
618    fn drop(&mut self) {
619        unsafe {
620            fz_drop_page(self.ctx.0, self.page);
621        }
622    }
623}