Skip to main content

cadmus_core/dictionary/monolingual/
service.rs

1//! High-level service for monolingual dictionary management.
2//!
3//! [`MonolingualDictionaryService`] is the single public entry point for all
4//! monolingual dictionary operations: querying the remote catalogue, listing
5//! installed dictionaries, and installing a new one.
6
7use super::client::MonolingualClient;
8use super::db::Db;
9use super::errors::MonolingualError;
10use super::metadata::{download_url, download_url_no_etym, DictionariesResponse, DictionaryEntry};
11use crate::db::Database;
12use std::collections::HashSet;
13use std::fs;
14use std::io::{self};
15use std::path::{Path, PathBuf};
16use std::sync::{Arc, Mutex};
17use zip::ZipArchive;
18
19/// Subdirectory inside the dictionaries root where reader-dict downloads live.
20const READER_DICT_SUBDIR: &str = "reader-dict";
21
22/// Provides monolingual dictionary management: querying available dictionaries,
23/// listing installed ones, and downloading + extracting new ones.
24///
25/// All network metadata is cached in the application SQLite database.
26/// Downloaded dictionaries are extracted to
27/// `<dict_dir>/reader-dict/<lang>/`.
28///
29/// The service is cheaply cloneable (`Arc`-backed). All clones share the same
30/// `pending_installs` set, so concurrent-download guards work correctly across
31/// the UI thread (which holds the original) and background threads (which hold
32/// clones).
33#[derive(Clone, Debug)]
34pub struct MonolingualDictionaryService {
35    client: MonolingualClient,
36    db: Db,
37    dict_dir: PathBuf,
38    pending_installs: Arc<Mutex<HashSet<String>>>,
39}
40
41impl MonolingualDictionaryService {
42    /// Creates a new service.
43    ///
44    /// # Arguments
45    ///
46    /// * `database` - Application SQLite database used for metadata caching.
47    /// * `dict_dir` - Root directory where dictionaries are stored. Downloads
48    ///   are placed in `<dict_dir>/reader-dict/<lang>/`.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error if the HTTP client cannot be built.
53    #[cfg_attr(feature = "tracing", tracing::instrument(skip(database), fields(dict_dir = %dict_dir.display())))]
54    pub fn new(database: &Database, dict_dir: &Path) -> Result<Self, MonolingualError> {
55        let client = MonolingualClient::new()?;
56        let db = Db::new(database);
57        Ok(Self {
58            client,
59            db,
60            dict_dir: dict_dir.to_path_buf(),
61            pending_installs: Arc::new(Mutex::new(HashSet::new())),
62        })
63    }
64
65    /// Returns all dictionaries available for download from the remote API.
66    ///
67    /// Metadata is served from the SQLite cache when available; otherwise a
68    /// network request is made and the result is cached.
69    ///
70    /// # Errors
71    ///
72    /// Returns an error if the metadata cannot be loaded from cache or network.
73    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
74    pub fn get_available_dictionaries(
75        &self,
76    ) -> Result<Vec<(String, DictionaryEntry)>, MonolingualError> {
77        let metadata = self.load_metadata()?;
78
79        let monolingual = metadata
80            .into_iter()
81            .filter_map(|(lang, mut targets)| targets.remove(&lang).map(|entry| (lang, entry)))
82            .collect();
83
84        Ok(monolingual)
85    }
86
87    /// Returns the cached metadata entry for a single language.
88    ///
89    /// This does not make any network requests. Returns `None` if no entry for
90    /// `lang` has been cached yet.
91    ///
92    /// # Errors
93    ///
94    /// Returns an error if the database read fails.
95    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(lang = %lang)))]
96    pub fn get_entry_for_lang(
97        &self,
98        lang: &str,
99    ) -> Result<Option<DictionaryEntry>, MonolingualError> {
100        Ok(self.db.get_entry(lang)?)
101    }
102
103    /// Returns the language codes of all locally installed dictionaries.
104    ///
105    /// A dictionary is considered installed when its language directory exists
106    /// inside `<dict_dir>/reader-dict/` and contains at least one `.index`
107    /// file paired with a `.dict` or `.dict.dz` file.
108    ///
109    /// # Errors
110    ///
111    /// Returns an error if the directory cannot be read.
112    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
113    pub fn get_installed_dictionaries(&self) -> Result<Vec<String>, MonolingualError> {
114        let root = self.reader_dict_dir();
115
116        if !root.exists() {
117            return Ok(Vec::new());
118        }
119
120        let mut installed = Vec::new();
121
122        for entry in fs::read_dir(&root)? {
123            let entry = entry?;
124            let path = entry.path();
125
126            if !path.is_dir() {
127                continue;
128            }
129
130            if has_dict_pair(&path) {
131                if let Some(lang) = path.file_name().and_then(|n| n.to_str()) {
132                    installed.push(lang.to_string());
133                }
134            }
135        }
136
137        Ok(installed)
138    }
139
140    /// Returns `true` if a download is already in progress for `lang`.
141    ///
142    /// This can be used by callers to suppress duplicate install requests before
143    /// spawning a background thread.
144    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), ret(level=tracing::Level::TRACE)))]
145    pub fn is_installing(&self, lang: &str) -> bool {
146        #[cfg(feature = "tracing")]
147        let _span = tracing::info_span!("lock").entered();
148        self.pending_installs.lock().unwrap().contains(lang)
149    }
150
151    /// Downloads and installs a dictionary for the given language.
152    ///
153    /// The archive is downloaded to a temporary file, then extracted into
154    /// `<dict_dir>/reader-dict/<lang>/` and the files are renamed to
155    /// `Reader-Dict-<lang>.index` and `Reader-Dict-<lang>.dict[.dz]`. Any
156    /// existing files in that directory are overwritten.
157    ///
158    /// Returns [`MonolingualError::InstallationInProgress`] immediately if a
159    /// download for the same language is already running. The caller should
160    /// check [`Self::is_installing`] on the UI thread before spawning a thread
161    /// to get a user-visible early exit; this check inside `install_dictionary`
162    /// provides a safety net against races.
163    ///
164    /// # Arguments
165    ///
166    /// * `entry` - Metadata entry for the dictionary to install. The language
167    ///   code and version are derived from this entry.
168    /// * `include_etymologies` - When `true`, the full archive (with
169    ///   etymologies) is downloaded; when `false`, the smaller no-etymology
170    ///   variant is used.
171    /// * `progress_callback` - Called after each downloaded chunk with
172    ///   `(bytes_downloaded_so_far, total_bytes)`.
173    ///
174    /// # Errors
175    ///
176    /// Returns an error if a download for the language is already in progress,
177    /// if the download fails, if the archive cannot be parsed, or if files
178    /// cannot be written to disk.
179    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, entry, progress_callback), fields(lang = %lang, include_etymologies)))]
180    pub fn install_dictionary<F>(
181        &self,
182        lang: &str,
183        entry: &DictionaryEntry,
184        include_etymologies: bool,
185        progress_callback: &mut F,
186    ) -> Result<(), MonolingualError>
187    where
188        F: FnMut(u64, u64),
189    {
190        {
191            #[cfg(feature = "tracing")]
192            let _span = tracing::info_span!("lock").entered();
193
194            let mut pending = self.pending_installs.lock().unwrap();
195            if pending.contains(lang) {
196                return Err(MonolingualError::InstallationInProgress(lang.to_string()));
197            }
198            pending.insert(lang.to_string());
199        }
200
201        let result = self.do_install(lang, entry, include_etymologies, progress_callback);
202
203        {
204            #[cfg(feature = "tracing")]
205            let _span = tracing::info_span!("lock").entered();
206            self.pending_installs.lock().unwrap().remove(lang);
207        }
208
209        result
210    }
211
212    #[cfg_attr(
213        feature = "tracing",
214        tracing::instrument(skip(self, entry, progress_callback))
215    )]
216    fn do_install<F>(
217        &self,
218        lang: &str,
219        entry: &DictionaryEntry,
220        include_etymologies: bool,
221        progress_callback: &mut F,
222    ) -> Result<(), MonolingualError>
223    where
224        F: FnMut(u64, u64),
225    {
226        let url = if include_etymologies {
227            download_url(lang)
228        } else {
229            download_url_no_etym(lang)
230        };
231
232        tracing::info!(lang, url = %url, "Downloading dictionary");
233
234        let dest = self.lang_dir(lang);
235        fs::create_dir_all(&dest)?;
236
237        let temp_path = dest.join(".download.tmp");
238
239        self.client.download(&url, &temp_path, progress_callback)?;
240
241        tracing::debug!(lang, dest = %dest.display(), "Extracting dictionary archive");
242
243        let file = fs::File::open(&temp_path)?;
244        extract_zip_renamed(file, &dest, lang)?;
245
246        fs::remove_file(&temp_path)?;
247
248        if let Err(e) = self.db.record_install(lang, entry.updated.into()) {
249            tracing::warn!(lang, error = %e, "Failed to record dictionary install");
250        }
251
252        tracing::info!(lang, dest = %dest.display(), "Dictionary installed");
253
254        Ok(())
255    }
256
257    /// Removes the installed dictionary record for `lang` from the database.
258    ///
259    /// Logs a warning on failure rather than propagating the error, as this is
260    /// a best-effort cleanup step called from event handlers.
261    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
262    pub fn remove_installed(&self, lang: &str) {
263        if let Err(e) = self.db.remove_installed(lang) {
264            tracing::warn!(lang, error = %e, "Failed to remove installed dictionary record");
265        }
266    }
267
268    /// Returns `true` if a newer version of the dictionary for `lang` is
269    /// available on the server than the currently installed version.
270    ///
271    /// Returns `false` on any error to avoid surfacing spurious update badges.
272    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
273    pub fn is_update_available(&self, lang: &str) -> bool {
274        self.db.is_update_available(lang).unwrap_or(false)
275    }
276
277    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
278    fn load_metadata(&self) -> Result<DictionariesResponse, MonolingualError> {
279        if let Some(cached_at) = self.db.get_most_recent_cached_at()? {
280            match self.client.is_metadata_modified_since(cached_at) {
281                Ok(false) => {
282                    tracing::debug!("Cache is fresh (304), using cached metadata");
283                    if let Some(cached) = self.get_cached_metadata()? {
284                        return Ok(cached);
285                    }
286                }
287                Ok(true) => {
288                    tracing::debug!("API has newer data (200), refreshing cache");
289                }
290                Err(e) => {
291                    tracing::warn!(error = %e, "HEAD check failed, falling back to cache");
292                    if let Some(cached) = self.get_cached_metadata()? {
293                        return Ok(cached);
294                    }
295                }
296            }
297        }
298
299        self.fetch_and_cache_metadata().or_else(|_| {
300            self.get_cached_metadata()?
301                .ok_or_else(|| MonolingualError::NotFound("metadata unavailable".to_string()))
302        })
303    }
304
305    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
306    fn fetch_and_cache_metadata(&self) -> Result<DictionariesResponse, MonolingualError> {
307        let metadata = self.client.fetch_metadata()?;
308
309        for (source_lang, targets) in &metadata {
310            if let Some(entry) = targets.get(source_lang.as_str()) {
311                self.db.upsert_entry(source_lang, entry)?;
312            }
313        }
314
315        tracing::debug!("Cached monolingual metadata to database");
316        Ok(metadata)
317    }
318
319    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
320    fn get_cached_metadata(&self) -> Result<Option<DictionariesResponse>, MonolingualError> {
321        let entries = self.db.get_all_entries()?;
322
323        if entries.is_empty() {
324            tracing::debug!("No cached metadata found in database");
325            return Ok(None);
326        }
327
328        let mut response = DictionariesResponse::new();
329        for (lang, entry) in entries {
330            response
331                .entry(lang.clone())
332                .or_default()
333                .insert(lang, entry);
334        }
335
336        tracing::debug!("Loaded cached metadata from database");
337        Ok(Some(response))
338    }
339
340    fn reader_dict_dir(&self) -> PathBuf {
341        self.dict_dir.join(READER_DICT_SUBDIR)
342    }
343
344    fn lang_dir(&self, lang: &str) -> PathBuf {
345        self.reader_dict_dir().join(lang)
346    }
347}
348
349/// Returns `true` when `dir` contains at least one `.index` file that is
350/// paired with a `.dict` or `.dict.dz` file sharing the same stem.
351fn has_dict_pair(dir: &Path) -> bool {
352    let Ok(entries) = fs::read_dir(dir) else {
353        return false;
354    };
355
356    for entry in entries.flatten() {
357        let path = entry.path();
358        let name = match path.file_name().and_then(|n| n.to_str()) {
359            Some(n) => n.to_string(),
360            None => continue,
361        };
362
363        if !name.ends_with(".index") {
364            continue;
365        }
366
367        let stem = &name[..name.len() - ".index".len()];
368        let dict = dir.join(format!("{stem}.dict"));
369        let dict_dz = dir.join(format!("{stem}.dict.dz"));
370
371        if dict.exists() || dict_dz.exists() {
372            return true;
373        }
374    }
375
376    false
377}
378
379/// Extracts all entries from a ZIP archive into `dest`, renaming each
380/// file to `Reader-Dict-<lang><ext>` where `<ext>` is `.index`, `.dict`,
381/// or `.dict.dz`.
382///
383/// Files with unrecognised extensions are skipped. Directories inside the ZIP
384/// are ignored because all output files land flat in `dest`.
385#[cfg_attr(feature = "tracing", tracing::instrument(skip(reader)))]
386fn extract_zip_renamed<R: std::io::Read + std::io::Seek>(
387    reader: R,
388    dest: &Path,
389    lang: &str,
390) -> Result<(), MonolingualError> {
391    let mut archive = ZipArchive::new(reader)
392        .map_err(|e| MonolingualError::Extraction(format!("failed to open zip archive: {e}")))?;
393
394    for i in 0..archive.len() {
395        let mut file = archive.by_index(i).map_err(|e| {
396            MonolingualError::Extraction(format!("failed to read zip entry {i}: {e}"))
397        })?;
398
399        if file.is_dir() {
400            continue;
401        }
402
403        let original_name = match file.enclosed_name() {
404            Some(p) => p
405                .file_name()
406                .and_then(|n| n.to_str())
407                .unwrap_or("")
408                .to_string(),
409            None => {
410                tracing::warn!(index = i, "Skipping zip entry with unsafe path");
411                continue;
412            }
413        };
414
415        let target_name = dict_file_target_name(&original_name, lang);
416        let Some(target_name) = target_name else {
417            tracing::debug!(
418                original_name,
419                "Skipping zip entry with unrecognised extension"
420            );
421            continue;
422        };
423
424        let out_path = dest.join(&target_name);
425        let mut out_file = fs::File::create(&out_path)?;
426        io::copy(&mut file, &mut out_file)?;
427        tracing::debug!(path = %out_path.display(), "Extracted file");
428    }
429
430    Ok(())
431}
432
433/// Maps a ZIP entry filename to its renamed output filename `<lang>.<ext>`.
434///
435/// Recognised extensions (in priority order):
436/// - `.dict.dz` → `Reader-Dict-<lang>.dict.dz`
437/// - `.dict`    → `Reader-Dict-<lang>.dict`
438/// - `.index`   → `Reader-Dict-<lang>.index`
439///
440/// Returns `None` for any other extension.
441fn dict_file_target_name(original: &str, lang: &str) -> Option<String> {
442    for ext in &[".dict.dz", ".dict", ".index"] {
443        if original.ends_with(ext) {
444            return Some(format!("Reader-Dict-{lang}{ext}"));
445        }
446    }
447    None
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453    use crate::db::Database;
454    use crate::dictionary::monolingual::metadata::DictionaryEntry;
455    use chrono::NaiveDate;
456    use std::io::Cursor;
457    use std::io::Write;
458    use tempfile::TempDir;
459
460    fn create_test_service() -> (MonolingualDictionaryService, TempDir, Database) {
461        crate::crypto::init_crypto_provider();
462        let dir = TempDir::new().expect("failed to create temp dir");
463        let database = Database::new(":memory:").expect("failed to create in-memory database");
464        database.migrate().expect("failed to run migrations");
465        let service = MonolingualDictionaryService::new(&database, dir.path())
466            .expect("failed to create service");
467        (service, dir, database)
468    }
469
470    fn make_entry(year: i32, month: u32, day: u32) -> DictionaryEntry {
471        DictionaryEntry {
472            formats: "df,dic,dictorg,kobo,mobi,stardict".to_string(),
473            updated: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
474            words: 1_381_375,
475        }
476    }
477
478    #[test]
479    fn test_get_installed_empty_when_no_dir() {
480        let (service, _dir, _db) = create_test_service();
481        let installed = service.get_installed_dictionaries().unwrap();
482        assert!(installed.is_empty());
483    }
484
485    #[test]
486    fn test_get_installed_empty_when_dir_exists_but_empty() {
487        let (service, dir, _db) = create_test_service();
488        fs::create_dir_all(dir.path().join(READER_DICT_SUBDIR)).unwrap();
489        let installed = service.get_installed_dictionaries().unwrap();
490        assert!(installed.is_empty());
491    }
492
493    #[test]
494    fn test_get_installed_detects_dict_pair() {
495        let (service, dir, _db) = create_test_service();
496        let lang_dir = dir.path().join(READER_DICT_SUBDIR).join("en");
497        fs::create_dir_all(&lang_dir).unwrap();
498        fs::File::create(lang_dir.join("dict.index")).unwrap();
499        fs::File::create(lang_dir.join("dict.dict")).unwrap();
500
501        let installed = service.get_installed_dictionaries().unwrap();
502        assert_eq!(installed, vec!["en".to_string()]);
503    }
504
505    #[test]
506    fn test_get_installed_detects_dict_dz_pair() {
507        let (service, dir, _db) = create_test_service();
508        let lang_dir = dir.path().join(READER_DICT_SUBDIR).join("fr");
509        fs::create_dir_all(&lang_dir).unwrap();
510        fs::File::create(lang_dir.join("dict.index")).unwrap();
511        fs::File::create(lang_dir.join("dict.dict.dz")).unwrap();
512
513        let installed = service.get_installed_dictionaries().unwrap();
514        assert_eq!(installed, vec!["fr".to_string()]);
515    }
516
517    #[test]
518    fn test_get_installed_ignores_index_without_dict() {
519        let (service, dir, _db) = create_test_service();
520        let lang_dir = dir.path().join(READER_DICT_SUBDIR).join("de");
521        fs::create_dir_all(&lang_dir).unwrap();
522        fs::File::create(lang_dir.join("dict.index")).unwrap();
523
524        let installed = service.get_installed_dictionaries().unwrap();
525        assert!(installed.is_empty());
526    }
527
528    #[test]
529    fn test_install_dictionary_extracts_zip_renamed() {
530        let (_service, dir, _db) = create_test_service();
531
532        let zip_bytes = make_test_zip(&[
533            ("dictorg-en-en.index", b"index content"),
534            ("dictorg-en-en.dict", b"dict content"),
535        ]);
536
537        let dest = dir.path().join(READER_DICT_SUBDIR).join("en");
538        fs::create_dir_all(&dest).unwrap();
539        extract_zip_renamed(Cursor::new(&zip_bytes), &dest, "en").unwrap();
540
541        assert!(dest.join("Reader-Dict-en.index").exists());
542        assert!(dest.join("Reader-Dict-en.dict").exists());
543    }
544
545    fn make_test_zip(entries: &[(&str, &[u8])]) -> Vec<u8> {
546        let mut buf = Vec::new();
547        {
548            let cursor = Cursor::new(&mut buf);
549            let mut zip = zip::ZipWriter::new(cursor);
550            let options = zip::write::SimpleFileOptions::default();
551            for (name, content) in entries {
552                zip.start_file(*name, options).unwrap();
553                zip.write_all(content).unwrap();
554            }
555            zip.finish().unwrap();
556        }
557        buf
558    }
559
560    #[test]
561    fn test_is_installing_false_initially() {
562        let (service, _dir, _db) = create_test_service();
563        assert!(!service.is_installing("en"));
564    }
565
566    #[test]
567    fn test_is_installing_true_while_pending() {
568        let (service, _dir, _db) = create_test_service();
569        service
570            .pending_installs
571            .lock()
572            .unwrap()
573            .insert("fr".to_string());
574        assert!(service.is_installing("fr"));
575        assert!(!service.is_installing("en"));
576    }
577
578    #[test]
579    fn test_is_installing_false_after_removal() {
580        let (service, _dir, _db) = create_test_service();
581        service
582            .pending_installs
583            .lock()
584            .unwrap()
585            .insert("en".to_string());
586        service.pending_installs.lock().unwrap().remove("en");
587        assert!(!service.is_installing("en"));
588    }
589
590    #[test]
591    fn test_concurrent_install_same_lang_returns_error() {
592        let (service, _dir, _db) = create_test_service();
593        service
594            .pending_installs
595            .lock()
596            .unwrap()
597            .insert("de".to_string());
598
599        let entry = make_entry(2026, 4, 1);
600        let err = service
601            .install_dictionary("de", &entry, false, &mut |_, _| {})
602            .expect_err("expected InstallationInProgress error");
603
604        assert!(
605            matches!(err, MonolingualError::InstallationInProgress(_)),
606            "unexpected error variant: {err}"
607        );
608    }
609
610    #[test]
611    fn test_pending_cleared_after_failed_install() {
612        let (service, _dir, _db) = create_test_service();
613
614        let entry = make_entry(2026, 4, 1);
615        let _ = service.install_dictionary("zz", &entry, false, &mut |_, _| {});
616        assert!(!service.is_installing("zz"));
617    }
618
619    #[test]
620    fn test_is_installing_shared_across_clones() {
621        let (service, _dir, _db) = create_test_service();
622        let clone = service.clone();
623
624        service
625            .pending_installs
626            .lock()
627            .unwrap()
628            .insert("ja".to_string());
629
630        assert!(clone.is_installing("ja"));
631    }
632
633    #[test]
634    fn test_get_entry_for_lang_returns_none_when_not_cached() {
635        let (service, _dir, _db) = create_test_service();
636        let result = service.get_entry_for_lang("en").unwrap();
637        assert!(result.is_none());
638    }
639
640    #[test]
641    fn test_get_entry_for_lang_returns_entry_after_cache() {
642        let (service, _dir, _db) = create_test_service();
643
644        let entry = make_entry(2026, 4, 1);
645        service.db.upsert_entry("en", &entry).unwrap();
646
647        let result = service.get_entry_for_lang("en").unwrap();
648        assert!(result.is_some());
649        let fetched = result.unwrap();
650        assert_eq!(fetched.words, 1_381_375);
651        assert_eq!(
652            fetched.updated,
653            NaiveDate::from_ymd_opt(2026, 4, 1).unwrap()
654        );
655    }
656
657    /// Downloads and installs the English dictionary from the live API, then
658    /// verifies that at least one `.index` + `.dict`/`.dict.dz` pair is present.
659    ///
660    /// Run with: `cargo test -- --ignored`
661    #[test]
662    #[ignore = "requires network access to www.reader-dict.com"]
663    fn test_install_dictionary_live() {
664        let (service, dir, _db) = create_test_service();
665
666        let entry = service
667            .get_available_dictionaries()
668            .unwrap()
669            .into_iter()
670            .find(|(l, _)| l == "en")
671            .map(|(_, e)| e)
672            .expect("English dictionary should be available");
673
674        service
675            .install_dictionary("en", &entry, false, &mut |_, _| {})
676            .expect("install_dictionary failed");
677
678        let lang_dir = dir.path().join(READER_DICT_SUBDIR).join("en");
679        assert!(
680            lang_dir.exists(),
681            "language directory should exist after install"
682        );
683        assert!(
684            has_dict_pair(&lang_dir),
685            "expected .index + .dict/.dict.dz pair in {lang_dir:?}"
686        );
687
688        let installed = service
689            .get_installed_dictionaries()
690            .expect("get_installed_dictionaries failed");
691        assert!(
692            installed.contains(&"en".to_string()),
693            "expected 'en' in installed list, got {installed:?}"
694        );
695    }
696}