1use super::mupdf_sys::*;
2
3use super::{chapter, chapter_relative};
4use super::{BoundedText, Document, Location, TextLocation, TocEntry};
5use crate::framebuffer::Pixmap;
6use crate::geom::{Boundary, CycleDir};
7use crate::metadata::TextAlign;
8use crate::unit::pt_to_px;
9use std::char;
10use std::ffi::{CStr, CString};
11use std::fs;
12use std::io::ErrorKind;
13use std::os::unix::ffi::OsStrExt;
14use std::path::Path;
15use std::ptr;
16use std::rc::Rc;
17use std::slice;
18use thiserror::Error;
19use tracing::error;
20
21const USER_STYLESHEET: &str = "css/html-user.css";
22
23#[derive(Debug, Error)]
25pub enum PdfOpenError {
26 #[error("MuPDF error: {0}")]
27 MuPdf(String),
28 #[error("MuPDF returned no error message")]
29 Unknown,
30}
31
32impl Into<Boundary> for FzRect {
33 fn into(self) -> Boundary {
34 Boundary {
35 min: vec2!(self.x0, self.y0),
36 max: vec2!(self.x1, self.y1),
37 }
38 }
39}
40
41struct PdfContext(*mut FzContext);
42
43pub struct PdfOpener(Rc<PdfContext>);
44
45pub struct PdfDocument {
46 ctx: Rc<PdfContext>,
47 doc: *mut FzDocument,
48}
49
50pub struct PdfPage<'a> {
51 ctx: Rc<PdfContext>,
52 page: *mut FzPage,
53 index: usize,
54 _doc: &'a PdfDocument,
55}
56
57impl PdfOpener {
58 #[cfg_attr(feature = "otel", tracing::instrument)]
59 pub fn new() -> Option<PdfOpener> {
60 unsafe {
61 let version = CString::new(FZ_VERSION).unwrap();
62 let ctx = fz_new_context_imp(ptr::null(), ptr::null(), CACHE_SIZE, version.as_ptr());
63
64 if ctx.is_null() {
65 None
66 } else {
67 fz_register_document_handlers(ctx);
68 Some(PdfOpener(Rc::new(PdfContext(ctx))))
69 }
70 }
71 }
72
73 #[cfg_attr(feature = "otel", tracing::instrument(skip(self, path), fields(path = %path.as_ref().display())))]
74 pub fn open<P: AsRef<Path>>(&self, path: P) -> Result<PdfDocument, PdfOpenError> {
75 unsafe {
76 let c_path = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap();
77 let mut err_buf: [libc::c_char; 256] = [0; 256];
78 let doc = mp_open_document_with_error(
79 (self.0).0,
80 c_path.as_ptr(),
81 err_buf.as_mut_ptr(),
82 err_buf.len() as libc::c_int,
83 );
84 if doc.is_null() {
85 let msg = CStr::from_ptr(err_buf.as_ptr())
86 .to_string_lossy()
87 .into_owned();
88 Err(if msg.is_empty() {
89 PdfOpenError::Unknown
90 } else {
91 PdfOpenError::MuPdf(msg)
92 })
93 } else {
94 Ok(PdfDocument {
95 ctx: self.0.clone(),
96 doc,
97 })
98 }
99 }
100 }
101
102 pub fn open_memory(&self, magic: &str, buf: &[u8]) -> Option<PdfDocument> {
104 unsafe {
105 let stream = fz_open_memory(
106 (self.0).0,
107 buf.as_ptr() as *const libc::c_uchar,
108 buf.len() as libc::size_t,
109 );
110 let c_magic = CString::new(magic).unwrap();
111 let doc = mp_open_document_with_stream((self.0).0, c_magic.as_ptr(), stream);
112 fz_drop_stream((self.0).0, stream);
113 if doc.is_null() {
114 None
115 } else {
116 Some(PdfDocument {
117 ctx: self.0.clone(),
118 doc,
119 })
120 }
121 }
122 }
123
124 pub fn load_user_stylesheet(&mut self) {
125 if let Ok(content) = fs::read_to_string(USER_STYLESHEET)
126 .and_then(|s| CString::new(s).map_err(Into::into))
127 .map_err(|e| {
128 if e.kind() != ErrorKind::NotFound {
129 error!("{:#}", e)
130 }
131 })
132 {
133 unsafe { fz_set_user_css((self.0).0, content.as_ptr()) }
134 }
135 }
136}
137
138unsafe impl Send for PdfDocument {}
139unsafe impl Sync for PdfDocument {}
140
141impl PdfDocument {
142 pub fn page(&self, index: usize) -> Option<PdfPage<'_>> {
143 unsafe {
144 let page = mp_load_page(self.ctx.0, self.doc, index as libc::c_int);
145 if page.is_null() {
146 None
147 } else {
148 Some(PdfPage {
149 ctx: self.ctx.clone(),
150 page,
151 index,
152 _doc: self,
153 })
154 }
155 }
156 }
157
158 fn walk_toc(&self, outline: *mut FzOutline, index: &mut usize) -> Vec<TocEntry> {
159 unsafe {
160 let mut vec = Vec::new();
161 let mut cur = outline;
162 while !cur.is_null() {
163 let num = mp_page_number_from_location(self.ctx.0, self.doc, (*cur).page);
164 let location = if num > -1 {
165 Location::Exact(num as usize)
166 } else if !(*cur).uri.is_null() {
167 let uri = CStr::from_ptr((*cur).uri).to_string_lossy().into_owned();
168 Location::Uri(uri)
169 } else {
170 Location::Exact(0)
171 };
172 let title = if !(*cur).title.is_null() {
173 CStr::from_ptr((*cur).title).to_string_lossy().into_owned()
174 } else {
175 "Untitled".to_string()
176 };
177 let current_index = *index;
178 *index += 1;
179 let children = if !(*cur).down.is_null() {
180 self.walk_toc((*cur).down, index)
181 } else {
182 Vec::new()
183 };
184 vec.push(TocEntry {
185 title,
186 location,
187 index: current_index,
188 children,
189 });
190 cur = (*cur).next;
191 }
192 vec
193 }
194 }
195
196 pub fn is_protected(&self) -> bool {
197 unsafe { fz_needs_password(self.ctx.0, self.doc) == 1 }
198 }
199}
200
201impl Document for PdfDocument {
202 fn dims(&self, index: usize) -> Option<(f32, f32)> {
203 self.page(index).map(|page| page.dims())
204 }
205
206 fn pages_count(&self) -> usize {
207 unsafe { mp_count_pages(self.ctx.0, self.doc) as usize }
208 }
209
210 fn resolve_location(&mut self, loc: Location) -> Option<usize> {
211 if self.pages_count() == 0 {
212 return None;
213 }
214
215 match loc {
216 Location::Exact(index) => {
217 if index >= self.pages_count() {
218 None
219 } else {
220 Some(index)
221 }
222 }
223 Location::Previous(index) => {
224 if index > 0 {
225 Some(index - 1)
226 } else {
227 None
228 }
229 }
230 Location::Next(index) => {
231 if index < self.pages_count() - 1 {
232 Some(index + 1)
233 } else {
234 None
235 }
236 }
237 Location::LocalUri(_index, uri) => {
238 let c_uri = CString::new(uri).unwrap();
239 let dest = unsafe { fz_resolve_link_dest(self.ctx.0, self.doc, c_uri.as_ptr()) };
240 if dest.loc.page.is_positive() {
241 Some(dest.loc.page as usize)
242 } else {
243 None
244 }
245 }
246 _ => None,
247 }
248 }
249
250 fn pixmap(&mut self, loc: Location, scale: f32, samples: usize) -> Option<(Pixmap, usize)> {
251 let index = self.resolve_location(loc)?;
252 self.page(index)
253 .and_then(|page| page.pixmap(scale, samples))
254 .map(|pixmap| (pixmap, index))
255 }
256
257 fn toc(&mut self) -> Option<Vec<TocEntry>> {
258 unsafe {
259 let outline = mp_load_outline(self.ctx.0, self.doc);
260 if outline.is_null() {
261 None
262 } else {
263 let mut index = 0;
264 let toc = self.walk_toc(outline, &mut index);
265 fz_drop_outline(self.ctx.0, outline);
266 Some(toc)
267 }
268 }
269 }
270
271 fn chapter<'a>(&mut self, offset: usize, toc: &'a [TocEntry]) -> Option<(&'a TocEntry, f32)> {
272 chapter(offset, self.pages_count(), toc)
273 }
274
275 fn chapter_relative<'a>(
276 &mut self,
277 offset: usize,
278 dir: CycleDir,
279 toc: &'a [TocEntry],
280 ) -> Option<&'a TocEntry> {
281 chapter_relative(offset, dir, toc)
282 }
283
284 fn metadata(&self, key: &str) -> Option<String> {
285 unsafe {
286 let key = CString::new(key).unwrap();
287 let mut buf: [libc::c_char; 256] = [0; 256];
288 let len = fz_lookup_metadata(
289 self.ctx.0,
290 self.doc,
291 key.as_ptr(),
292 buf.as_mut_ptr(),
293 buf.len() as libc::c_int,
294 );
295 if len == -1 {
296 None
297 } else {
298 Some(CStr::from_ptr(buf.as_ptr()).to_string_lossy().into_owned())
299 }
300 }
301 }
302
303 fn words(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
304 let index = self.resolve_location(loc)?;
305 self.page(index)
306 .and_then(|page| page.words())
307 .map(|words| (words, index))
308 }
309
310 fn lines(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
311 let index = self.resolve_location(loc)?;
312 self.page(index)
313 .and_then(|page| page.lines())
314 .map(|lines| (lines, index))
315 }
316
317 fn images(&mut self, loc: Location) -> Option<(Vec<Boundary>, usize)> {
318 let index = self.resolve_location(loc)?;
319 self.page(index)
320 .and_then(|page| page.images())
321 .map(|images| (images, index))
322 }
323
324 fn links(&mut self, loc: Location) -> Option<(Vec<BoundedText>, usize)> {
325 let index = self.resolve_location(loc)?;
326 self.page(index)
327 .and_then(|page| page.links())
328 .map(|links| (links, index))
329 }
330
331 fn title(&self) -> Option<String> {
332 self.metadata(FZ_META_INFO_TITLE)
333 }
334
335 fn author(&self) -> Option<String> {
336 self.metadata(FZ_META_INFO_AUTHOR)
337 }
338
339 fn is_reflowable(&self) -> bool {
340 unsafe { fz_is_document_reflowable(self.ctx.0, self.doc) == 1 }
341 }
342
343 fn layout(&mut self, width: u32, height: u32, font_size: f32, dpi: u16) {
344 let em = pt_to_px(font_size, dpi);
345 unsafe {
346 fz_layout_document(
347 self.ctx.0,
348 self.doc,
349 width as libc::c_float,
350 height as libc::c_float,
351 em as libc::c_float,
352 );
353 }
354 }
355
356 fn set_text_align(&mut self, _text_align: TextAlign) {}
357
358 fn set_font_family(&mut self, _family_name: &str, _search_path: &str) {}
359
360 fn set_margin_width(&mut self, _width: i32) {}
361
362 fn set_line_height(&mut self, _line_height: f32) {}
363
364 fn set_hyphen_penalty(&mut self, _hyphen_penalty: i32) {}
365
366 fn set_stretch_tolerance(&mut self, _stretch_tolerance: f32) {}
367
368 fn set_ignore_document_css(&mut self, ignore: bool) {
369 unsafe {
370 fz_set_use_document_css(self.ctx.0, !ignore as libc::c_int);
371 }
372 }
373}
374
375impl<'a> PdfPage<'a> {
376 pub fn images(&self) -> Option<Vec<Boundary>> {
377 unsafe {
378 let mut images: Vec<Boundary> = Vec::new();
379 let opts = FzTextOptions {
380 flags: FZ_TEXT_PRESERVE_IMAGES,
381 scale: 1.0,
382 clip: FzRect::default(),
383 };
384 let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, &opts);
385 if tp.is_null() {
386 return None;
387 }
388
389 let mut block = (*tp).first_block;
390
391 while !block.is_null() {
392 if (*block).kind == FZ_PAGE_BLOCK_IMAGE {
393 let bnd: Boundary = (*block).bbox.into();
394 images.retain(|img| !img.overlaps(&bnd));
395 images.push(bnd);
396 }
397
398 block = (*block).next;
399 }
400
401 fz_drop_stext_page(self.ctx.0, tp);
402 Some(images)
403 }
404 }
405
406 pub fn lines(&self) -> Option<Vec<BoundedText>> {
407 unsafe {
408 let mut lines = Vec::new();
409 let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
410 if tp.is_null() {
411 return None;
412 }
413 let mut offset = 0;
414 let mut block = (*tp).first_block;
415
416 while !block.is_null() {
417 if (*block).kind == FZ_PAGE_BLOCK_TEXT {
418 let text_block = (*block).u.text;
419 let mut line = text_block.first_line;
420
421 while !line.is_null() {
422 let rect = (*line).bbox.into();
423 lines.push(BoundedText {
424 rect,
425 text: String::default(),
426 location: TextLocation::Static(self.index, offset),
427 });
428 offset += 1;
429 line = (*line).next;
430 }
431 }
432
433 block = (*block).next;
434 }
435
436 fz_drop_stext_page(self.ctx.0, tp);
437 Some(lines)
438 }
439 }
440
441 pub fn words(&self) -> Option<Vec<BoundedText>> {
442 unsafe {
443 let mut words = Vec::new();
444 let tp = mp_new_stext_page_from_page(self.ctx.0, self.page, ptr::null());
445 if tp.is_null() {
446 return None;
447 }
448 let mut block = (*tp).first_block;
449 let mut offset = 0;
450
451 while !block.is_null() {
452 if (*block).kind == FZ_PAGE_BLOCK_TEXT {
453 let text_block = (*block).u.text;
454 let mut line = text_block.first_line;
455
456 while !line.is_null() {
457 let mut chr = (*line).first_char;
458 let mut text = String::default();
459 let mut rect = FzRect::default();
460
461 while !chr.is_null() {
462 while !chr.is_null() {
463 if let Some(c) = char::from_u32((*chr).c as u32) {
464 if c.is_whitespace() {
465 chr = (*chr).next;
466 break;
467 } else {
468 let chr_rect = fz_rect_from_quad((*chr).quad);
469 rect = fz_union_rect(rect, chr_rect);
470 text.push(c);
471 }
472 }
473 chr = (*chr).next;
474 }
475
476 if !text.is_empty() {
477 words.push(BoundedText {
478 text: text.clone(),
479 rect: rect.into(),
480 location: TextLocation::Static(self.index, offset),
481 });
482 text.clear();
483 rect = FzRect::default();
484 offset += 1;
485 }
486 }
487
488 line = (*line).next;
489 }
490 }
491
492 block = (*block).next;
493 }
494
495 fz_drop_stext_page(self.ctx.0, tp);
496 Some(words)
497 }
498 }
499
500 pub fn links(&self) -> Option<Vec<BoundedText>> {
501 unsafe {
502 let links = mp_load_links(self.ctx.0, self.page);
503
504 if links.is_null() {
505 return None;
506 }
507
508 let mut link = links;
509 let mut result = Vec::new();
510 let mut offset = 0;
511
512 while !link.is_null() {
513 let text = CStr::from_ptr((*link).uri).to_string_lossy().into_owned();
514 let rect = (*link).rect.into();
515 result.push(BoundedText {
516 text,
517 rect,
518 location: TextLocation::Static(self.index, offset),
519 });
520 link = (*link).next;
521 offset += 1;
522 }
523
524 fz_drop_link(self.ctx.0, links);
525
526 Some(result)
527 }
528 }
529
530 pub fn pixmap(&self, scale: f32, color_samples: usize) -> Option<Pixmap> {
531 unsafe {
532 let mat = fz_scale(scale as libc::c_float, scale as libc::c_float);
533 let color_space = if color_samples == 1 {
534 fz_device_gray(self.ctx.0)
535 } else {
536 fz_device_rgb(self.ctx.0)
537 };
538 let pixmap = mp_new_pixmap_from_page(self.ctx.0, self.page, mat, color_space, 0);
539 if pixmap.is_null() {
540 return None;
541 }
542
543 let width = (*pixmap).w as u32;
544 let height = (*pixmap).h as u32;
545 let len = color_samples * (width * height) as usize;
546 let pixmap_data = slice::from_raw_parts((*pixmap).samples, len);
547 let mut data = Vec::new();
548 if data.try_reserve(len).is_err() {
549 fz_drop_pixmap(self.ctx.0, pixmap);
550 return None;
551 }
552 data.extend(pixmap_data);
553
554 fz_drop_pixmap(self.ctx.0, pixmap);
555
556 Some(Pixmap {
557 width,
558 height,
559 samples: color_samples,
560 data,
561 })
562 }
563 }
564
565 pub fn boundary_box(&self) -> Option<Boundary> {
566 unsafe {
567 let mut rect = FzRect::default();
568 let dev = fz_new_bbox_device(self.ctx.0, &mut rect);
569 if dev.is_null() {
570 None
571 } else {
572 fz_run_page(self.ctx.0, self.page, dev, fz_identity, ptr::null_mut());
573 fz_close_device(self.ctx.0, dev);
574 fz_drop_device(self.ctx.0, dev);
575 Some(rect.into())
576 }
577 }
578 }
579
580 pub fn dims(&self) -> (f32, f32) {
581 unsafe {
582 let bounds = fz_bound_page(self.ctx.0, self.page);
583 (
584 (bounds.x1 - bounds.x0) as f32,
585 (bounds.y1 - bounds.y0) as f32,
586 )
587 }
588 }
589
590 pub fn width(&self) -> f32 {
591 let (width, _) = self.dims();
592 width
593 }
594
595 pub fn height(&self) -> f32 {
596 let (_, height) = self.dims();
597 height
598 }
599}
600
601impl Drop for PdfContext {
602 fn drop(&mut self) {
603 unsafe {
604 fz_drop_context(self.0);
605 }
606 }
607}
608
609impl Drop for PdfDocument {
610 fn drop(&mut self) {
611 unsafe {
612 fz_drop_document(self.ctx.0, self.doc);
613 }
614 }
615}
616
617impl<'a> Drop for PdfPage<'a> {
618 fn drop(&mut self) {
619 unsafe {
620 fz_drop_page(self.ctx.0, self.page);
621 }
622 }
623}