Skip to main content

cadmus_core/library/
importer.rs

1use crate::document::file_kind;
2use crate::fl;
3use crate::helpers::{Fingerprint, Fp, IsHidden};
4use crate::library::db::Db as LibraryDb;
5use crate::metadata::{extract_metadata_from_document, FileInfo, Info};
6use crate::settings::ImportSettings;
7use crate::task::ShutdownSignal;
8use crate::view::{Event, NotificationEvent, ViewId};
9use fxhash::FxHashMap;
10use std::path::{Path, PathBuf};
11use std::sync::mpsc::Sender;
12use tracing::{debug, error, info};
13use walkdir::{DirEntry, WalkDir};
14
15enum PendingRelocation {
16    FingerprintChanged {
17        new_fp: Fp,
18        old_fp: Fp,
19        file_size: u64,
20    },
21}
22
23impl PendingRelocation {
24    fn old_fp(&self) -> Fp {
25        match self {
26            PendingRelocation::FingerprintChanged { old_fp, .. } => *old_fp,
27        }
28    }
29}
30
31struct ScanContext<'a> {
32    hub: &'a Sender<Event>,
33    notif_id: ViewId,
34    shutdown: &'a ShutdownSignal,
35}
36
37struct ScanResult {
38    books_to_insert: Vec<(Fp, Info)>,
39    path_updates: Vec<(Fp, PathBuf, PathBuf)>,
40    books_to_delete: Vec<Fp>,
41    pending_relocations: Vec<PendingRelocation>,
42    thumbnails_to_delete: Vec<Fp>,
43}
44
45#[cfg(feature = "emulator")]
46const IGNORED_TOP_LEVEL_DIRS: &[&str] = &["target", "node_modules", "thirdparty"];
47
48#[cfg_attr(feature = "tracing", tracing::instrument(skip(home)))]
49fn walk_files(home: &Path) -> Vec<DirEntry> {
50    WalkDir::new(home)
51        .min_depth(1)
52        .into_iter()
53        .filter_entry(|e| {
54            if e.is_hidden() {
55                return false;
56            }
57            #[cfg(feature = "emulator")]
58            if e.depth() == 1 && e.file_type().is_dir() {
59                if let Some(name) = e.file_name().to_str() {
60                    if IGNORED_TOP_LEVEL_DIRS.contains(&name) {
61                        return false;
62                    }
63                }
64            }
65            true
66        })
67        .filter_map(|e| e.ok())
68        .filter(|e| !e.file_type().is_dir())
69        .collect()
70}
71
72#[cfg_attr(
73    feature = "tracing",
74    tracing::instrument(
75        skip(home, settings, ctx, handles_by_fp, handles_by_path),
76        fields(total)
77    )
78)]
79fn scan_entries(
80    home: &Path,
81    entries: &[DirEntry],
82    settings: &ImportSettings,
83    ctx: &ScanContext<'_>,
84    handles_by_fp: &mut FxHashMap<Fp, PathBuf>,
85    handles_by_path: &mut FxHashMap<PathBuf, Fp>,
86) -> Option<ScanResult> {
87    let total = entries.len();
88    tracing::Span::current().record("total", total);
89
90    let mut books_to_insert: Vec<(Fp, Info)> = Vec::new();
91    let mut path_updates: Vec<(Fp, PathBuf, PathBuf)> = Vec::new();
92    let mut books_to_delete: Vec<Fp> = Vec::new();
93    let mut pending_relocations: Vec<PendingRelocation> = Vec::new();
94    let mut thumbnails_to_delete: Vec<Fp> = Vec::new();
95
96    for (idx, entry) in entries.iter().enumerate() {
97        if ctx.shutdown.should_stop() {
98            tracing::info!("import scan interrupted by shutdown");
99            return None;
100        }
101
102        let path = entry.path();
103        let relat = path.strip_prefix(home).unwrap_or(path);
104
105        let kind = file_kind(path);
106        let is_known_to_db = handles_by_path.contains_key(relat);
107        let allowed_kind = kind.filter(|k| settings.is_kind_allowed(*k));
108
109        if !is_known_to_db && allowed_kind.is_none() {
110            send_progress(ctx.hub, ctx.notif_id, idx, total);
111            continue;
112        }
113
114        let fp = match path.fingerprint() {
115            Ok(fp) => fp,
116            Err(e) => {
117                error!(path = ?path, error = %e, "failed to compute fingerprint, skipping");
118                send_progress(ctx.hub, ctx.notif_id, idx, total);
119                continue;
120            }
121        };
122
123        if handles_by_fp.contains_key(&fp) {
124            if relat != handles_by_fp[&fp] {
125                debug!(
126                    fp = %fp,
127                    old_path = %handles_by_fp[&fp].display(),
128                    new_path = %relat.display(),
129                    "updated book path"
130                );
131                let old_path = handles_by_fp.remove(&fp).unwrap();
132                handles_by_path.remove(&old_path);
133                handles_by_fp.insert(fp, relat.to_path_buf());
134                handles_by_path.insert(relat.to_path_buf(), fp);
135                path_updates.push((fp, relat.to_path_buf(), path.to_path_buf()));
136            }
137            send_progress(ctx.hub, ctx.notif_id, idx, total);
138            continue;
139        }
140
141        if let Some(old_fp) = handles_by_path.get(relat).cloned() {
142            debug!(
143                path = %relat.display(),
144                old_fp = %old_fp,
145                new_fp = %fp,
146                "updated book fingerprint"
147            );
148
149            handles_by_fp.remove(&old_fp);
150            handles_by_path.remove(relat);
151            handles_by_fp.insert(fp, relat.to_path_buf());
152            handles_by_path.insert(relat.to_path_buf(), fp);
153            books_to_delete.push(old_fp);
154
155            pending_relocations.push(PendingRelocation::FingerprintChanged {
156                new_fp: fp,
157                old_fp,
158                file_size: entry.metadata().map(|m| m.len()).unwrap_or(0),
159            });
160
161            thumbnails_to_delete.push(old_fp);
162            send_progress(ctx.hub, ctx.notif_id, idx, total);
163            continue;
164        }
165
166        if let Some(kind) = allowed_kind {
167            info!(fp = %fp, path = %relat.display(), "added new entry");
168            let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
169            let mut info = Info {
170                file: FileInfo {
171                    path: relat.to_path_buf(),
172                    absolute_path: path.to_path_buf(),
173                    kind: kind.as_str().to_owned(),
174                    size,
175                },
176                ..Default::default()
177            };
178            if settings.metadata_kinds.contains(&info.file.kind) {
179                extract_metadata_from_document(home, &mut info);
180            }
181            handles_by_fp.insert(fp, relat.to_path_buf());
182            handles_by_path.insert(relat.to_path_buf(), fp);
183            books_to_insert.push((fp, info));
184        }
185
186        send_progress(ctx.hub, ctx.notif_id, idx, total);
187    }
188
189    Some(ScanResult {
190        books_to_insert,
191        path_updates,
192        books_to_delete,
193        pending_relocations,
194        thumbnails_to_delete,
195    })
196}
197
198fn send_progress(hub: &Sender<Event>, notif_id: ViewId, idx: usize, total: usize) {
199    let Some(percent) = ((idx + 1) * 100).checked_div(total) else {
200        return;
201    };
202    let percent = percent as u8;
203    debug!(percent, "import progress");
204    hub.send(Event::Notification(NotificationEvent::UpdateProgress(
205        notif_id, percent,
206    )))
207    .ok();
208}
209
210#[cfg_attr(
211    feature = "tracing",
212    tracing::instrument(skip(db, home, settings, pending_relocations, books_to_insert))
213)]
214fn resolve_relocations(
215    db: &LibraryDb,
216    library_id: i64,
217    home: &Path,
218    settings: &ImportSettings,
219    pending_relocations: Vec<PendingRelocation>,
220    books_to_insert: &mut Vec<(Fp, Info)>,
221) {
222    let old_fps: Vec<Fp> = pending_relocations
223        .iter()
224        .map(PendingRelocation::old_fp)
225        .collect();
226
227    let mut fetched = db
228        .batch_get_books_by_fingerprints(library_id, &old_fps)
229        .unwrap_or_default();
230
231    for relocation in pending_relocations {
232        match relocation {
233            PendingRelocation::FingerprintChanged {
234                new_fp,
235                old_fp,
236                file_size,
237            } => {
238                if let Some(mut info) = fetched.remove(&old_fp) {
239                    if settings.sync_metadata && settings.metadata_kinds.contains(&info.file.kind) {
240                        extract_metadata_from_document(home, &mut info);
241                    }
242                    info.file.size = file_size;
243                    books_to_insert.push((new_fp, info));
244                }
245            }
246        }
247    }
248}
249
250#[cfg_attr(feature = "tracing", tracing::instrument(skip(handles_by_fp, home)))]
251fn find_deleted_books(handles_by_fp: &FxHashMap<Fp, PathBuf>, home: &Path) -> Vec<Fp> {
252    handles_by_fp
253        .iter()
254        .filter(|(_, relat)| relat.as_os_str().is_empty() || !home.join(relat).exists())
255        .map(|(fp, relat)| {
256            info!(fp = %fp, path = %relat.display(), "removing deleted entry");
257            *fp
258        })
259        .collect()
260}
261
262#[cfg_attr(
263    feature = "tracing",
264    tracing::instrument(skip(
265        db,
266        books_to_insert,
267        path_updates,
268        books_to_delete,
269        thumbnails_to_delete
270    ))
271)]
272fn flush_to_db(
273    db: &LibraryDb,
274    library_id: i64,
275    books_to_insert: Vec<(Fp, Info)>,
276    path_updates: Vec<(Fp, PathBuf, PathBuf)>,
277    books_to_delete: Vec<Fp>,
278    thumbnails_to_delete: Vec<Fp>,
279) {
280    if let Err(e) = db.batch_delete_thumbnails(&thumbnails_to_delete) {
281        error!(
282            error = %e,
283            count = thumbnails_to_delete.len(),
284            "batch delete thumbnails failed"
285        );
286    }
287
288    if !books_to_insert.is_empty() {
289        let book_refs: Vec<(Fp, &Info)> = books_to_insert
290            .iter()
291            .map(|(fp, info)| (*fp, info))
292            .collect();
293        if let Err(e) = db.batch_insert_books(library_id, &book_refs) {
294            error!(error = %e, count = book_refs.len(), "batch insert failed");
295        }
296    }
297
298    if let Err(e) = db.batch_update_book_paths(library_id, &path_updates) {
299        error!(
300            error = %e,
301            count = path_updates.len(),
302            "batch update book paths failed"
303        );
304    }
305
306    if !books_to_delete.is_empty() {
307        if let Err(e) = db.batch_delete_books(library_id, &books_to_delete) {
308            error!(error = %e, count = books_to_delete.len(), "batch delete failed");
309        }
310    }
311
312    if let Err(e) = db.compute_sort_keys(library_id) {
313        error!(error = %e, library_id, "failed to compute sort keys");
314    }
315}
316
317/// Runs a full directory scan and syncs the database for one library.
318///
319/// Sends pinned progress notifications to `hub` via `notif_id` while running.
320/// Checks `shutdown` between entries and exits early if shutdown is requested.
321/// On completion or early exit, closes the notification and returns.
322#[cfg_attr(
323    feature = "tracing",
324    tracing::instrument(skip(db, settings, hub, notif_id, shutdown))
325)]
326pub fn run(
327    db: &LibraryDb,
328    library_id: i64,
329    home: &Path,
330    settings: &ImportSettings,
331    hub: &Sender<Event>,
332    notif_id: ViewId,
333    shutdown: &ShutdownSignal,
334) {
335    hub.send(Event::Notification(NotificationEvent::ShowPinned(
336        notif_id,
337        fl!("importer-importing-library"),
338    )))
339    .ok();
340
341    let handles = match db.list_book_handles(library_id) {
342        Ok(h) => h,
343        Err(e) => {
344            error!(error = %e, "failed to load book handles for import");
345            hub.send(Event::Close(notif_id)).ok();
346            return;
347        }
348    };
349
350    let mut handles_by_fp: FxHashMap<Fp, PathBuf> = handles.iter().cloned().collect();
351    let mut handles_by_path: FxHashMap<PathBuf, Fp> =
352        handles.into_iter().map(|(fp, p)| (p, fp)).collect();
353
354    let purged_fps = db
355        .delete_books_with_disallowed_kinds(library_id, &settings.allowed_kinds)
356        .unwrap_or_else(|e| {
357            error!(error = %e, "failed to purge disallowed books");
358            Vec::new()
359        });
360
361    for fp in &purged_fps {
362        if let Some(path) = handles_by_fp.remove(fp) {
363            handles_by_path.remove(&path);
364        }
365    }
366
367    if !purged_fps.is_empty() {
368        if let Err(e) = db.batch_delete_thumbnails(&purged_fps) {
369            error!(error = %e, count = purged_fps.len(), "failed to delete thumbnails for purged books");
370        }
371    }
372
373    let entries = walk_files(home);
374
375    let ctx = ScanContext {
376        hub,
377        notif_id,
378        shutdown,
379    };
380
381    let Some(mut result) = scan_entries(
382        home,
383        &entries,
384        settings,
385        &ctx,
386        &mut handles_by_fp,
387        &mut handles_by_path,
388    ) else {
389        hub.send(Event::Close(notif_id)).ok();
390        return;
391    };
392
393    let mut deleted = find_deleted_books(&handles_by_fp, home);
394    result.books_to_delete.append(&mut deleted);
395
396    if !result.pending_relocations.is_empty() {
397        resolve_relocations(
398            db,
399            library_id,
400            home,
401            settings,
402            result.pending_relocations,
403            &mut result.books_to_insert,
404        );
405    }
406
407    flush_to_db(
408        db,
409        library_id,
410        result.books_to_insert,
411        result.path_updates,
412        result.books_to_delete,
413        result.thumbnails_to_delete,
414    );
415
416    hub.send(Event::Close(notif_id)).ok();
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use crate::db::Database;
423    use crate::library::Library;
424    use crate::metadata::{FileInfo, Info};
425    use crate::settings::ImportSettings;
426    use crate::task::ShutdownSignal;
427    use crate::view::ViewId;
428    use std::sync::mpsc;
429
430    fn create_migrated_db() -> Database {
431        let db = Database::new(":memory:").expect("in-memory db");
432        db.migrate().expect("migrations");
433        db
434    }
435
436    fn run_import(dir: &Path, db: &Database, shutdown: &ShutdownSignal) -> Vec<Event> {
437        let lib = Library::new(dir, db, "test").expect("failed to create library");
438        let (tx, rx) = mpsc::channel();
439        let notif_id = ViewId::MessageNotif(0);
440        run(
441            &lib.db,
442            lib.library_id,
443            dir,
444            &ImportSettings::default(),
445            &tx,
446            notif_id,
447            shutdown,
448        );
449        drop(tx);
450        rx.try_iter().collect()
451    }
452
453    #[test]
454    fn imports_files_when_not_shutdown() {
455        let dir = tempfile::tempdir().expect("tempdir");
456        let db = create_migrated_db();
457        std::fs::write(dir.path().join("book.epub"), b"epub content").expect("write");
458
459        let shutdown = ShutdownSignal::never();
460        let events = run_import(dir.path(), &db, &shutdown);
461
462        assert!(
463            events.iter().any(|e| matches!(e, Event::Close(_))),
464            "expected Close event on normal completion"
465        );
466        assert!(
467            !events.iter().any(|e| matches!(
468                e,
469                Event::Notification(crate::view::NotificationEvent::UpdateProgress(_, 0))
470            )),
471            "progress should advance past 0"
472        );
473    }
474
475    #[test]
476    fn stops_early_when_shutdown_requested() {
477        let dir = tempfile::tempdir().expect("tempdir");
478        let db = create_migrated_db();
479
480        for i in 0..20 {
481            std::fs::write(dir.path().join(format!("book{i}.epub")), b"epub content")
482                .expect("write");
483        }
484
485        let (shutdown_tx, shutdown_rx) = mpsc::channel();
486        let shutdown = ShutdownSignal::new_for_test(shutdown_rx);
487
488        // Signal shutdown before the import starts so scan_entries exits immediately.
489        shutdown_tx.send(()).expect("send shutdown");
490
491        let lib = Library::new(dir.path(), &db, "test").expect("library");
492        let (tx, rx) = mpsc::channel();
493        let notif_id = ViewId::MessageNotif(0);
494        run(
495            &lib.db,
496            lib.library_id,
497            dir.path(),
498            &ImportSettings::default(),
499            &tx,
500            notif_id,
501            &shutdown,
502        );
503        drop(tx);
504        let events: Vec<Event> = rx.try_iter().collect();
505
506        assert!(
507            events.iter().any(|e| matches!(e, Event::Close(_))),
508            "notif must be closed even on early exit"
509        );
510
511        let progress_events: Vec<_> = events
512            .iter()
513            .filter(|e| {
514                matches!(
515                    e,
516                    Event::Notification(crate::view::NotificationEvent::UpdateProgress(_, _))
517                )
518            })
519            .collect();
520        assert!(
521            progress_events.len() < 20,
522            "shutdown should have cut the scan short (got {} progress events)",
523            progress_events.len()
524        );
525    }
526
527    #[test]
528    fn finds_deleted_books_when_file_path_is_empty() {
529        let dir = tempfile::tempdir().expect("tempdir");
530        let db = create_migrated_db();
531        let lib = Library::new(dir.path(), &db, "test").expect("library");
532        let fp = Fp::from_u64(1);
533        let info = Info {
534            title: "test".to_string(),
535            file: FileInfo {
536                path: PathBuf::new(),
537                absolute_path: dir.path().join("missing.epub"),
538                kind: "epub".to_string(),
539                size: 1,
540            },
541            ..Default::default()
542        };
543
544        lib.db
545            .batch_insert_books(lib.library_id, &[(fp, &info)])
546            .expect("insert library book");
547
548        let handles = lib.db.list_book_handles(lib.library_id).expect("handles");
549        let handles_by_fp: FxHashMap<Fp, PathBuf> = handles.into_iter().collect();
550
551        assert_eq!(find_deleted_books(&handles_by_fp, dir.path()), vec![fp]);
552    }
553
554    #[test]
555    fn skips_fingerprinting_disallowed_new_files() {
556        use crate::settings::FileExtension;
557        use fxhash::FxHashSet;
558
559        let dir = tempfile::tempdir().expect("tempdir");
560        let db = create_migrated_db();
561
562        std::fs::write(dir.path().join("book.epub"), b"epub content").expect("write epub");
563        std::fs::write(dir.path().join("ignore.xyz"), b"unsupported content").expect("write xyz");
564
565        let mut allowed: FxHashSet<FileExtension> = FxHashSet::default();
566        allowed.insert(FileExtension::Epub);
567
568        let settings = ImportSettings {
569            allowed_kinds: allowed,
570            ..ImportSettings::default()
571        };
572
573        let lib = Library::new(dir.path(), &db, "test").expect("library");
574        let (tx, rx) = std::sync::mpsc::channel();
575        let notif_id = ViewId::MessageNotif(0);
576        let shutdown = ShutdownSignal::never();
577
578        run(
579            &lib.db,
580            lib.library_id,
581            dir.path(),
582            &settings,
583            &tx,
584            notif_id,
585            &shutdown,
586        );
587        drop(tx);
588        let _events: Vec<Event> = rx.try_iter().collect();
589
590        let handles = lib.db.list_book_handles(lib.library_id).expect("handles");
591        let paths: Vec<_> = handles.iter().map(|(_, p)| p.clone()).collect();
592
593        assert!(
594            paths.iter().any(|p| p.ends_with("book.epub")),
595            "epub should be imported"
596        );
597        assert!(
598            !paths.iter().any(|p| p.ends_with("ignore.xyz")),
599            "unsupported kind should not be imported"
600        );
601    }
602
603    #[test]
604    fn purges_disallowed_books_on_import() {
605        use crate::settings::FileExtension;
606        use fxhash::FxHashSet;
607
608        let dir = tempfile::tempdir().expect("tempdir");
609        let db = create_migrated_db();
610
611        std::fs::write(dir.path().join("book.epub"), b"epub content").expect("write epub");
612        std::fs::write(dir.path().join("doc.pdf"), b"pdf content").expect("write pdf");
613
614        let lib = Library::new(dir.path(), &db, "test").expect("library");
615        let (tx, rx) = std::sync::mpsc::channel();
616        let notif_id = ViewId::MessageNotif(0);
617        let shutdown = ShutdownSignal::never();
618
619        run(
620            &lib.db,
621            lib.library_id,
622            dir.path(),
623            &ImportSettings::default(),
624            &tx,
625            notif_id,
626            &shutdown,
627        );
628        drop(tx);
629        let _: Vec<Event> = rx.try_iter().collect();
630
631        let handles = lib.db.list_book_handles(lib.library_id).expect("handles");
632        assert_eq!(handles.len(), 2, "both files should be imported initially");
633
634        let mut epub_only: FxHashSet<FileExtension> = FxHashSet::default();
635        epub_only.insert(FileExtension::Epub);
636
637        let settings = ImportSettings {
638            allowed_kinds: epub_only,
639            ..ImportSettings::default()
640        };
641
642        let (tx2, rx2) = std::sync::mpsc::channel();
643        run(
644            &lib.db,
645            lib.library_id,
646            dir.path(),
647            &settings,
648            &tx2,
649            notif_id,
650            &shutdown,
651        );
652        drop(tx2);
653        let _: Vec<Event> = rx2.try_iter().collect();
654
655        let handles = lib
656            .db
657            .list_book_handles(lib.library_id)
658            .expect("handles after purge");
659        let paths: Vec<_> = handles.iter().map(|(_, p)| p.clone()).collect();
660
661        assert_eq!(handles.len(), 1, "only epub should remain after purge");
662        assert!(
663            paths.iter().any(|p| p.ends_with("book.epub")),
664            "epub should still be present"
665        );
666    }
667}