cadmus_core/document/
pdf.rs

1use super::mupdf_sys::*;
2
3use super::{chapter, chapter_relative};
4use super::{BoundedText, Document, Location, TextLocation, TocEntry};
5use crate::framebuffer::Pixmap;
6use crate::geom::{Boundary, CycleDir};
7use crate::metadata::TextAlign;
8use crate::unit::pt_to_px;
9use std::char;
10use std::ffi::{CStr, CString};
11use std::fs;
12use std::io::ErrorKind;
13use std::os::unix::ffi::OsStrExt;
14use std::path::Path;
15use std::ptr;
16use std::rc::Rc;
17use std::slice;
18use tracing::error;
19
20const USER_STYLESHEET: &str = "css/html-user.css";
21
22impl Into<Boundary> for FzRect {
23    fn into(self) -> Boundary {
24        Boundary {
25            min: vec2!(self.x0, self.y0),
26            max: vec2!(self.x1, self.y1),
27        }
28    }
29}
30
31struct PdfContext(*mut FzContext);
32
33pub struct PdfOpener(Rc<PdfContext>);
34
35pub struct PdfDocument {
36    ctx: Rc<PdfContext>,
37    doc: *mut FzDocument,
38}
39
40pub struct PdfPage<'a> {
41    ctx: Rc<PdfContext>,
42    page: *mut FzPage,
43    index: usize,
44    _doc: &'a PdfDocument,
45}
46
47impl PdfOpener {
48    pub fn new() -> Option<PdfOpener> {
49        unsafe {
50            let version = CString::new(FZ_VERSION).unwrap();
51            let ctx = fz_new_context_imp(ptr::null(), ptr::null(), CACHE_SIZE, version.as_ptr());
52
53            if ctx.is_null() {
54                None
55            } else {
56                fz_register_document_handlers(ctx);
57                Some(PdfOpener(Rc::new(PdfContext(ctx))))
58            }
59        }
60    }
61
62    pub fn open<P: AsRef<Path>>(&self, path: P) -> Option<PdfDocument> {
63        unsafe {
64            let c_path = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap();
65            let doc = mp_open_document((self.0).0, c_path.as_ptr());
66            if doc.is_null() {
67                None
68            } else {
69                Some(PdfDocument {
70                    ctx: self.0.clone(),
71                    doc,
72                })
73            }
74        }
75    }
76
77    // *magic* is a filename or a MIME type.
78    pub fn open_memory(&self, magic: &str, buf: &[u8]) -> Option<PdfDocument> {
79        unsafe {
80            let stream = fz_open_memory(
81                (self.0).0,
82                buf.as_ptr() as *const libc::c_uchar,
83                buf.len() as libc::size_t,
84            );
85            let c_magic = CString::new(magic).unwrap();
86            let doc = mp_open_document_with_stream((self.0).0, c_magic.as_ptr(), stream);
87            fz_drop_stream((self.0).0, stream);
88            if doc.is_null() {
89                None
90            } else {
91                Some(PdfDocument {
92                    ctx: self.0.clone(),
93                    doc,
94                })
95            }
96        }
97    }
98
99    pub fn load_user_stylesheet(&mut self) {
100        if let Ok(content) = fs::read_to_string(USER_STYLESHEET)
101            .and_then(|s| CString::new(s).map_err(Into::into))
102            .map_err(|e| {
103                if e.kind() != ErrorKind::NotFound {
104                    error!("{:#}", e)
105                }
106            })
107        {
108            unsafe { fz_set_user_css((self.0).0, content.as_ptr()) }
109        }
110    }
111}
112
113unsafe impl Send for PdfDocument {}
114unsafe impl Sync for PdfDocument {}
115
116impl PdfDocument {
117    pub fn page(&self, index: usize) -> Option<PdfPage<'_>> {
118        unsafe {
119            let page = mp_load_page(self.ctx.0, self.doc, index as libc::c_int);
120            if page.is_null() {
121                None
122            } else {
123                Some(PdfPage {
124                    ctx: self.ctx.clone(),
125                    page,
126                    index,
127                    _doc: self,
128                })
129            }
130        }
131    }
132
133    fn walk_toc(&self, outline: *mut FzOutline, index: &mut usize) -> Vec<TocEntry> {
134        unsafe {
135            let mut vec = Vec::new();
136            let mut cur = outline;
137            while !cur.is_null() {
138                let num = mp_page_number_from_location(self.ctx.0, self.doc, (*cur).page);
139                let location = if num > -1 {
140                    Location::Exact(num as usize)
141                } else if !(*cur).uri.is_null() {
142                    let uri = CStr::from_ptr((*cur).uri).to_string_lossy().into_owned();
143                    Location::Uri(uri)
144                } else {
145                    Location::Exact(0)
146                };
147                let title = if !(*cur).title.is_null() {
148                    CStr::from_ptr((*cur).title).to_string_lossy().into_owned()
149                } else {
150                    "Untitled".to_string()
151                };
152                let current_index = *index;
153                *index += 1;
154                let children = if !(*cur).down.is_null() {
155                    self.walk_toc((*cur).down, index)
156                } else {
157                    Vec::new()
158                };
159                vec.push(TocEntry {
160                    title,
161                    location,
162                    index: current_index,
163                    children,
164                });
165                cur = (*cur).next;
166            }
167            vec
168        }
169    }
170
171    pub fn is_protected(&self) -> bool {
172        unsafe { fz_needs_password(self.ctx.0, self.doc) == 1 }
173    }
174}
175
176impl Document for PdfDocument {
177    fn dims(&self, index: usize) -> Option<(f32, f32)> {
178        self.page(index).map(|page| page.dims())
179    }
180
181    fn pages_count(&self) -> usize {
182        unsafe { mp_count_pages(self.ctx.0, self.doc) as usize }
183    }
184
185    fn resolve_location(&mut self, loc: Location) -> Option<usize> {
186        if self.pages_count() == 0 {
187            return None;
188        }
189
190        match loc {
191            Location::Exact(index) => {
192                if index >= self.pages_count() {
193                    None
194                } else {
195                    Some(index)
196                }
197            }
198            Location::Previous(index) => {
199                if index > 0 {
200                    Some(index - 1)
201                } else {
202                    None
203                }
204            }
205            Location::Next(index) => {
206                if index < self.pages_count() - 1 {
207                    Some(index + 1)
208                } else {
209                    None
210                }
211            }
212            Location::LocalUri(_index, uri) => {
213                let c_uri = CString::new(uri).unwrap();
214                let dest = unsafe { fz_resolve_link_dest(self.ctx.0, self.doc, c_uri.as_ptr()) };
215                if dest.loc.page.is_positive() {
216                    Some(dest.loc.page as usize)
217                } else {
218                    None
219                }
220            }
221            _ => None,
222        }
223    }
224
225    fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
226        let index = self.resolve_location(loc)?;
227        self.page(index)
228            .and_then(|page| page.pixmap(scale, samples))
229            .map(|pixmap| (pixmap, index))
230    }
231
232    fn toc(&mut self) -> Option<Vec<TocEntry>> {
233        unsafe {
234            let outline = mp_load_outline(self.ctx.0, self.doc);
235            if outline.is_null() {
236                None
237            } else {
238                let mut index = 0;
239                let toc = self.walk_toc(outline, &mut index);
240                fz_drop_outline(self.ctx.0, outline);
241                Some(toc)
242            }
243        }
244    }
245
246    fn chapter<'a>(&mut self, offset: usize, toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
247        chapter(offset, self.pages_count(), toc)
248    }
249
250    fn chapter_relative<'a>(
251        &mut self,
252        offset: usize,
253        dir: CycleDir,
254        toc: &'a [TocEntry],
255    ) -> Option<&'a TocEntry> {
256        chapter_relative(offset, dir, toc)
257    }
258
259    fn metadata(&self, key: &str) -> Option<String> {
260        unsafe {
261            let key = CString::new(key).unwrap();
262            let mut buf: [libc::c_char; 256] = [0; 256];
263            let len = fz_lookup_metadata(
264                self.ctx.0,
265                self.doc,
266                key.as_ptr(),
267                buf.as_mut_ptr(),
268                buf.len() as libc::c_int,
269            );
270            if len == -1 {
271                None
272            } else {
273                Some(CStr::from_ptr(buf.as_ptr()).to_string_lossy().into_owned())
274            }
275        }
276    }
277
278    fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
279        let index = self.resolve_location(loc)?;
280        self.page(index)
281            .and_then(|page| page.words())
282            .map(|words| (words, index))
283    }
284
285    fn lines(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
286        let index = self.resolve_location(loc)?;
287        self.page(index)
288            .and_then(|page| page.lines())
289            .map(|lines| (lines, index))
290    }
291
292    fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
293        let index = self.resolve_location(loc)?;
294        self.page(index)
295            .and_then(|page| page.images())
296            .map(|images| (images, index))
297    }
298
299    fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
300        let index = self.resolve_location(loc)?;
301        self.page(index)
302            .and_then(|page| page.links())
303            .map(|links| (links, index))
304    }
305
306    fn title(&self) -> Option<String> {
307        self.metadata(FZ_META_INFO_TITLE)
308    }
309
310    fn author(&self) -> Option<String> {
311        self.metadata(FZ_META_INFO_AUTHOR)
312    }
313
314    fn is_reflowable(&self) -> bool {
315        unsafe { fz_is_document_reflowable(self.ctx.0, self.doc) == 1 }
316    }
317
318    fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
319        let em = pt_to_px(font_size, dpi);
320        unsafe {
321            fz_layout_document(
322                self.ctx.0,
323                self.doc,
324                width as libc::c_float,
325                height as libc::c_float,
326                em as libc::c_float,
327            );
328        }
329    }
330
331    fn set_text_align(&mut self, _text_align: TextAlign) {}
332
333    fn set_font_family(&mut self, _family_name: &str, _search_path: &str) {}
334
335    fn set_margin_width(&mut self, _width: i32) {}
336
337    fn set_line_height(&mut self, _line_height: f32) {}
338
339    fn set_hyphen_penalty(&mut self, _hyphen_penalty: i32) {}
340
341    fn set_stretch_tolerance(&mut self, _stretch_tolerance: f32) {}
342
343    fn set_ignore_document_css(&mut self, ignore: bool) {
344        unsafe {
345            fz_set_use_document_css(self.ctx.0, !ignore as libc::c_int);
346        }
347    }
348}
349
350impl<'a> PdfPage<'a> {
351    pub fn images(&self) -> Option<Vec<Boundary>> {
352        unsafe {
353            let mut images: Vec<Boundary> = Vec::new();
354            let opts = FzTextOptions {
355                flags: FZ_TEXT_PRESERVE_IMAGES,
356                scale: 1.0,
357                clip: FzRect::default(),
358            };
359            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, &opts);
360            if tp.is_null() {
361                return None;
362            }
363
364            let mut block = (*tp).first_block;
365
366            while !block.is_null() {
367                if (*block).kind == FZ_PAGE_BLOCK_IMAGE {
368                    let bnd: Boundary = (*block).bbox.into();
369                    images.retain(|img| !img.overlaps(&bnd));
370                    images.push(bnd);
371                }
372
373                block = (*block).next;
374            }
375
376            fz_drop_stext_page(self.ctx.0, tp);
377            Some(images)
378        }
379    }
380
381    pub fn lines(&self) -> Option<Vec<BoundedText>> {
382        unsafe {
383            let mut lines = Vec::new();
384            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
385            if tp.is_null() {
386                return None;
387            }
388            let mut offset = 0;
389            let mut block = (*tp).first_block;
390
391            while !block.is_null() {
392                if (*block).kind == FZ_PAGE_BLOCK_TEXT {
393                    let text_block = (*block).u.text;
394                    let mut line = text_block.first_line;
395
396                    while !line.is_null() {
397                        let rect = (*line).bbox.into();
398                        lines.push(BoundedText {
399                            rect,
400                            text: String::default(),
401                            location: TextLocation::Static(self.index, offset),
402                        });
403                        offset += 1;
404                        line = (*line).next;
405                    }
406                }
407
408                block = (*block).next;
409            }
410
411            fz_drop_stext_page(self.ctx.0, tp);
412            Some(lines)
413        }
414    }
415
416    pub fn words(&self) -> Option<Vec<BoundedText>> {
417        unsafe {
418            let mut words = Vec::new();
419            let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
420            if tp.is_null() {
421                return None;
422            }
423            let mut block = (*tp).first_block;
424            let mut offset = 0;
425
426            while !block.is_null() {
427                if (*block).kind == FZ_PAGE_BLOCK_TEXT {
428                    let text_block = (*block).u.text;
429                    let mut line = text_block.first_line;
430
431                    while !line.is_null() {
432                        let mut chr = (*line).first_char;
433                        let mut text = String::default();
434                        let mut rect = FzRect::default();
435
436                        while !chr.is_null() {
437                            while !chr.is_null() {
438                                if let Some(c) = char::from_u32((*chr).c as u32) {
439                                    if c.is_whitespace() {
440                                        chr = (*chr).next;
441                                        break;
442                                    } else {
443                                        let chr_rect = fz_rect_from_quad((*chr).quad);
444                                        rect = fz_union_rect(rect, chr_rect);
445                                        text.push(c);
446                                    }
447                                }
448                                chr = (*chr).next;
449                            }
450
451                            if !text.is_empty() {
452                                words.push(BoundedText {
453                                    text: text.clone(),
454                                    rect: rect.into(),
455                                    location: TextLocation::Static(self.index, offset),
456                                });
457                                text.clear();
458                                rect = FzRect::default();
459                                offset += 1;
460                            }
461                        }
462
463                        line = (*line).next;
464                    }
465                }
466
467                block = (*block).next;
468            }
469
470            fz_drop_stext_page(self.ctx.0, tp);
471            Some(words)
472        }
473    }
474
475    pub fn links(&self) -> Option<Vec<BoundedText>> {
476        unsafe {
477            let links = mp_load_links(self.ctx.0, self.page);
478
479            if links.is_null() {
480                return None;
481            }
482
483            let mut link = links;
484            let mut result = Vec::new();
485            let mut offset = 0;
486
487            while !link.is_null() {
488                let text = CStr::from_ptr((*link).uri).to_string_lossy().into_owned();
489                let rect = (*link).rect.into();
490                result.push(BoundedText {
491                    text,
492                    rect,
493                    location: TextLocation::Static(self.index, offset),
494                });
495                link = (*link).next;
496                offset += 1;
497            }
498
499            fz_drop_link(self.ctx.0, links);
500
501            Some(result)
502        }
503    }
504
505    pub fn pixmap(&self, scale: f32, color_samples: usize) -> Option<Pixmap> {
506        unsafe {
507            let mat = fz_scale(scale as libc::c_float, scale as libc::c_float);
508            let color_space = if color_samples == 1 {
509                fz_device_gray(self.ctx.0)
510            } else {
511                fz_device_rgb(self.ctx.0)
512            };
513            let pixmap = mp_new_pixmap_from_page(self.ctx.0, self.page, mat, color_space, 0);
514            if pixmap.is_null() {
515                return None;
516            }
517
518            let width = (*pixmap).w as u32;
519            let height = (*pixmap).h as u32;
520            let len = color_samples * (width * height) as usize;
521            let pixmap_data = slice::from_raw_parts((*pixmap).samples, len);
522            let mut data = Vec::new();
523            if data.try_reserve(len).is_err() {
524                fz_drop_pixmap(self.ctx.0, pixmap);
525                return None;
526            }
527            data.extend(pixmap_data);
528
529            fz_drop_pixmap(self.ctx.0, pixmap);
530
531            Some(Pixmap {
532                width,
533                height,
534                samples: color_samples,
535                data,
536            })
537        }
538    }
539
540    pub fn boundary_box(&self) -> Option<Boundary> {
541        unsafe {
542            let mut rect = FzRect::default();
543            let dev = fz_new_bbox_device(self.ctx.0, &mut rect);
544            if dev.is_null() {
545                None
546            } else {
547                fz_run_page(self.ctx.0, self.page, dev, fz_identity, ptr::null_mut());
548                fz_close_device(self.ctx.0, dev);
549                fz_drop_device(self.ctx.0, dev);
550                Some(rect.into())
551            }
552        }
553    }
554
555    pub fn dims(&self) -> (f32, f32) {
556        unsafe {
557            let bounds = fz_bound_page(self.ctx.0, self.page);
558            (
559                (bounds.x1 - bounds.x0) as f32,
560                (bounds.y1 - bounds.y0) as f32,
561            )
562        }
563    }
564
565    pub fn width(&self) -> f32 {
566        let (width, _) = self.dims();
567        width
568    }
569
570    pub fn height(&self) -> f32 {
571        let (_, height) = self.dims();
572        height
573    }
574}
575
576impl Drop for PdfContext {
577    fn drop(&mut self) {
578        unsafe {
579            fz_drop_context(self.0);
580        }
581    }
582}
583
584impl Drop for PdfDocument {
585    fn drop(&mut self) {
586        unsafe {
587            fz_drop_document(self.ctx.0, self.doc);
588        }
589    }
590}
591
592impl<'a> Drop for PdfPage<'a> {
593    fn drop(&mut self) {
594        unsafe {
595            fz_drop_page(self.ctx.0, self.page);
596        }
597    }
598}