1mod dictreader;
9mod errors;
10#[cfg(feature = "bench")]
11pub mod indexing;
12#[cfg(not(feature = "bench"))]
13mod indexing;
14
15pub(crate) mod db_index;
16mod monolingual;
17
18pub(crate) use monolingual::MonolingualDictionaryService;
19
20use std::path::Path;
21
22use self::dictreader::DictReader;
23use self::indexing::IndexReader;
24pub(crate) use self::indexing::{apply_transform, normalize, Entry};
25use crate::db::Database;
26use crate::helpers::Fp;
27
28pub struct Dictionary {
35 content: Box<dyn DictReader>,
36 index: Box<dyn IndexReader>,
37 metadata: Metadata,
38}
39
40pub struct Metadata {
44 pub all_chars: bool,
45 pub case_sensitive: bool,
46}
47
48impl Dictionary {
49 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(word = %word, fuzzy)))]
57 pub fn lookup(
58 &mut self,
59 word: &str,
60 fuzzy: bool,
61 ) -> Result<Vec<[String; 2]>, errors::DictError> {
62 let query = apply_transform(
63 word,
64 !self.metadata.all_chars,
65 !self.metadata.case_sensitive,
66 );
67 let entries = self.index.load_and_find(&query, fuzzy, &self.metadata);
68 let mut results = Vec::new();
69 for entry in entries.into_iter() {
70 results.push([
71 entry.original.unwrap_or(entry.headword),
72 self.content.fetch_definition(entry.offset, entry.size)?,
73 ]);
74 }
75 Ok(results)
76 }
77
78 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(name = %name)))]
82 pub fn metadata(&mut self, name: &str) -> Result<String, errors::DictError> {
83 let mut query = format!("00-database-{}", name);
84 if !self.metadata.all_chars {
85 query = query.replace(|c: char| !c.is_alphanumeric(), "");
86 }
87 let entries = self.index.find(&query, false);
88 let entry = entries
89 .get(0)
90 .ok_or_else(|| errors::DictError::WordNotFound(name.into()))?;
91 self.content
92 .fetch_definition(entry.offset, entry.size)
93 .map(|def| {
94 let start = def
95 .find('\n')
96 .filter(|pos| *pos < def.len() - 1)
97 .unwrap_or(0);
98 def[start..].trim().to_string()
99 })
100 }
101
102 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
107 pub fn short_name(&mut self) -> Result<String, errors::DictError> {
108 self.metadata("short")
109 }
110
111 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self)))]
116 pub fn url(&mut self) -> Result<String, errors::DictError> {
117 self.metadata("url")
118 }
119}
120
121#[cfg_attr(feature = "tracing", tracing::instrument(skip(database), fields(fingerprint = %fingerprint)))]
125pub fn resolve_dict_id(database: &Database, fingerprint: &Fp) -> Option<i64> {
126 let fp_str = fingerprint.to_string();
127 let pool = database.pool().clone();
128
129 crate::db::runtime::RUNTIME
130 .block_on(async {
131 sqlx::query_scalar!(
132 "SELECT dict_id FROM dictionary_index_meta WHERE fingerprint = ?",
133 fp_str
134 )
135 .fetch_optional(&pool)
136 .await
137 .ok()
138 .flatten()
139 })
140 .flatten()
141}
142
143#[cfg_attr(feature = "tracing", tracing::instrument(skip(database), fields(fingerprint = %fingerprint)))]
151pub fn load_dictionary_from_db<P: AsRef<Path> + std::fmt::Debug>(
152 content_path: P,
153 database: &Database,
154 fingerprint: Fp,
155) -> Result<Dictionary, errors::DictError> {
156 let dict_id = match resolve_dict_id(database, &fingerprint) {
157 Some(id) => id,
158 None => {
159 tracing::warn!(fingerprint = %fingerprint, "dictionary not yet indexed, skipping");
160 return Err(errors::DictError::InvalidFileFormat(
161 "dictionary not yet indexed".into(),
162 None,
163 ));
164 }
165 };
166 let content = dictreader::load_dict(content_path)?;
167 let index = Box::new(db_index::DbIndexReader::new(database, Some(dict_id)));
168 Ok(load_dictionary(content, index))
169}
170
171#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
177pub fn load_dictionary(content: Box<dyn DictReader>, index: Box<dyn IndexReader>) -> Dictionary {
178 let all_chars = !index.find("00-database-allchars", false).is_empty();
179 let word = if all_chars {
180 "00-database-case-sensitive"
181 } else {
182 "00databasecasesensitive"
183 };
184 let case_sensitive = !index.find(word, false).is_empty();
185 Dictionary {
186 content,
187 index,
188 metadata: Metadata {
189 all_chars,
190 case_sensitive,
191 },
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198 use crate::db::runtime::RUNTIME;
199
200 const PATH_CASE_SENSITIVE_DICT: &str = "src/dictionary/testdata/case_sensitive_dict.dict";
201 const PATH_CASE_INSENSITIVE_DICT: &str = "src/dictionary/testdata/case_insensitive_dict.dict";
202 type TestEntry = (&'static str, i64, i64, Option<&'static str>);
203
204 const CASE_INSENSITIVE_ENTRIES: &[TestEntry] = &[
205 ("00-database-allchars", 1, 1, None),
206 ("bar", 443, 30, None),
207 ("foo", 428, 15, None),
208 ("straße", 516, 44, None),
209 ];
210
211 const CASE_SENSITIVE_ENTRIES: &[TestEntry] = &[
212 ("00-database-allchars", 1, 1, None),
213 ("00-database-case-sensitive", 2, 1, None),
214 ("Bar", 459, 30, None),
215 ("foo", 444, 15, None),
216 ("straße", 532, 44, None),
217 ];
218
219 fn load_test_dictionary(
220 content_path: &str,
221 entries: &[TestEntry],
222 case_sensitive: bool,
223 all_chars: bool,
224 ) -> Result<Dictionary, errors::DictError> {
225 let db = Database::new(":memory:").expect("in-memory db");
226 db.migrate().expect("migrations");
227
228 let fp = Fp::from_u64(1);
229 let fp_str = fp.to_string();
230
231 RUNTIME.block_on(async {
232 sqlx::query!(
233 r#"INSERT INTO dictionary_index_meta (fingerprint, dict_path, total_lines, indexed_lines, completed)
234 VALUES (?, ?, ?, 0, 0)"#,
235 fp_str,
236 content_path,
237 0_i64,
238 )
239 .execute(db.pool())
240 .await
241 .expect("insert meta");
242
243 for (word, offset, size, original) in entries {
244 let normalized = apply_transform(word, !all_chars, !case_sensitive);
245 let stored_original = if normalized != *word {
246 Some(*word)
247 } else {
248 None
249 };
250 let final_original = original.or(stored_original);
251
252 sqlx::query!(
253 r#"INSERT OR IGNORE INTO dictionary_index_entry (dict_id, word, offset, size, original)
254 VALUES (?, ?, ?, ?, ?)"#,
255 1_i64,
256 normalized,
257 offset,
258 size,
259 final_original,
260 )
261 .execute(db.pool())
262 .await
263 .expect("insert entry");
264 }
265 });
266
267 load_dictionary_from_db(content_path, &db, fp)
268 }
269
270 fn assert_dict_word_exists(
271 mut dict: Dictionary,
272 headword: &str,
273 definition: &str,
274 ) -> Dictionary {
275 let r = dict.lookup(headword, false);
276 assert!(r.is_ok());
277 let search = r.unwrap();
278 assert_eq!(search.len(), 1);
279 assert!(search[0][1].contains(definition));
280
281 dict
282 }
283
284 #[test]
285 fn test_load_dictionary_from_db() {
286 let r = load_test_dictionary(
287 PATH_CASE_INSENSITIVE_DICT,
288 CASE_INSENSITIVE_ENTRIES,
289 false,
290 true,
291 );
292 assert!(r.is_ok());
293 }
294
295 #[test]
296 fn test_dictionary_lookup_case_insensitive() {
297 let r = load_test_dictionary(
298 PATH_CASE_INSENSITIVE_DICT,
299 CASE_INSENSITIVE_ENTRIES,
300 false,
301 true,
302 );
303 let mut dict = r.unwrap();
304
305 dict = assert_dict_word_exists(dict, "bar", "test for case-sensitivity");
306 dict = assert_dict_word_exists(dict, "Bar", "test for case-sensitivity");
307 assert_dict_word_exists(dict, "straße", "test for non-latin case-sensitivity");
308 }
309
310 #[test]
311 fn test_dictionary_lookup_case_insensitive_fuzzy() {
312 let r = load_test_dictionary(
313 PATH_CASE_INSENSITIVE_DICT,
314 CASE_INSENSITIVE_ENTRIES,
315 false,
316 true,
317 );
318 let mut dict = r.unwrap();
319
320 let r = dict.lookup("ba", true);
321 assert!(r.is_ok());
322 let search = r.unwrap();
323 assert_eq!(search.len(), 1);
324 assert_eq!(search[0][0], "bar");
325 assert!(search[0][1].contains("test for case-sensitivity"));
326 }
327
328 #[test]
329 fn test_dictionary_lookup_case_sensitive() {
330 let r = load_test_dictionary(PATH_CASE_SENSITIVE_DICT, CASE_SENSITIVE_ENTRIES, true, true);
331 let mut dict = r.unwrap();
332
333 dict = assert_dict_word_exists(dict, "Bar", "test for case-sensitivity");
334 dict = assert_dict_word_exists(dict, "straße", "test for non-latin case-sensitivity");
335
336 let r = dict.lookup("bar", false);
337 assert!(r.unwrap().is_empty());
338
339 let r = dict.lookup("strasse", false);
340 assert!(r.unwrap().is_empty());
341 }
342
343 #[test]
344 fn test_dictionary_lookup_case_sensitive_fuzzy() {
345 let r = load_test_dictionary(PATH_CASE_SENSITIVE_DICT, CASE_SENSITIVE_ENTRIES, true, true);
346 let mut dict = r.unwrap();
347
348 let r = dict.lookup("Ba", true);
349 assert!(r.is_ok());
350 let search = r.unwrap();
351 assert_eq!(search.len(), 1);
352 assert_eq!(search[0][0], "Bar");
353 assert!(search[0][1].contains("test for case-sensitivity"));
354 }
355}