1use levenshtein::levenshtein;
7use sqlx::SqlitePool;
8
9use crate::db::runtime::RUNTIME;
10use crate::db::Database;
11
12use super::indexing::{Entry, IndexReader};
13use super::Metadata;
14
15fn escape_like_prefix(prefix: &str) -> String {
18 prefix
19 .replace('\\', "\\\\")
20 .replace('%', "\\%")
21 .replace('_', "\\_")
22}
23
24pub struct DbIndexReader {
29 pool: SqlitePool,
30 dict_id: Option<i64>,
31}
32
33impl DbIndexReader {
34 pub fn new(database: &Database, dict_id: Option<i64>) -> Self {
36 Self {
37 pool: database.pool().clone(),
38 dict_id,
39 }
40 }
41
42 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword)))]
43 async fn exact_scoped(&self, headword: &str, id: i64) -> Vec<Entry> {
44 match sqlx::query!(
45 r#"SELECT word, offset, size, original
46 FROM dictionary_index_entry
47 WHERE dict_id = ? AND word = ?"#,
48 id,
49 headword,
50 )
51 .fetch_all(&self.pool)
52 .await
53 {
54 Ok(rows) => rows
55 .into_iter()
56 .map(|r| Entry {
57 headword: r.word,
58 offset: r.offset as u64,
59 size: r.size as u64,
60 original: r.original,
61 })
62 .collect(),
63 Err(e) => {
64 tracing::error!(error = %e, "exact scoped dictionary index query failed");
65 Vec::new()
66 }
67 }
68 }
69
70 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword)))]
71 async fn exact_global(&self, headword: &str) -> Vec<Entry> {
72 match sqlx::query!(
73 r#"SELECT word, offset, size, original
74 FROM dictionary_index_entry
75 WHERE word = ?"#,
76 headword,
77 )
78 .fetch_all(&self.pool)
79 .await
80 {
81 Ok(rows) => rows
82 .into_iter()
83 .map(|r| Entry {
84 headword: r.word,
85 offset: r.offset as u64,
86 size: r.size as u64,
87 original: r.original,
88 })
89 .collect(),
90 Err(e) => {
91 tracing::error!(error = %e, "exact global dictionary index query failed");
92 Vec::new()
93 }
94 }
95 }
96
97 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword, prefix = %prefix)))]
98 async fn fuzzy_scoped(&self, headword: &str, prefix: &str, id: i64) -> Vec<Entry> {
99 match sqlx::query!(
100 r#"SELECT word, offset, size, original
101 FROM dictionary_index_entry
102 WHERE dict_id = ? AND word LIKE ? || '%' ESCAPE '\'"#,
103 id,
104 prefix,
105 )
106 .fetch_all(&self.pool)
107 .await
108 {
109 Ok(rows) => rows
110 .into_iter()
111 .filter(|r| levenshtein(headword, &r.word) <= 1)
112 .map(|r| Entry {
113 headword: r.word,
114 offset: r.offset as u64,
115 size: r.size as u64,
116 original: r.original,
117 })
118 .collect(),
119 Err(e) => {
120 tracing::error!(error = %e, "fuzzy scoped dictionary index query failed");
121 Vec::new()
122 }
123 }
124 }
125
126 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword, prefix = %prefix)))]
127 async fn fuzzy_global(&self, headword: &str, prefix: &str) -> Vec<Entry> {
128 match sqlx::query!(
129 r#"SELECT word, offset, size, original
130 FROM dictionary_index_entry
131 WHERE word LIKE ? || '%' ESCAPE '\'"#,
132 prefix,
133 )
134 .fetch_all(&self.pool)
135 .await
136 {
137 Ok(rows) => rows
138 .into_iter()
139 .filter(|r| levenshtein(headword, &r.word) <= 1)
140 .map(|r| Entry {
141 headword: r.word,
142 offset: r.offset as u64,
143 size: r.size as u64,
144 original: r.original,
145 })
146 .collect(),
147 Err(e) => {
148 tracing::error!(error = %e, "fuzzy global dictionary index query failed");
149 Vec::new()
150 }
151 }
152 }
153
154 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword )))]
155 fn query_exact(&self, headword: &str) -> Vec<Entry> {
156 let headword = headword.to_string();
157
158 RUNTIME.block_on(async {
159 if let Some(id) = self.dict_id {
160 self.exact_scoped(&headword, id).await
161 } else {
162 self.exact_global(&headword).await
163 }
164 })
165 }
166
167 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword )))]
168 fn query_fuzzy(&self, headword: &str) -> Vec<Entry> {
169 let prefix_len = headword
170 .char_indices()
171 .nth(3)
172 .map(|(i, _)| i)
173 .unwrap_or(headword.len());
174 let prefix = escape_like_prefix(&headword[..prefix_len]);
175 let headword = headword.to_string();
176
177 RUNTIME.block_on(async {
178 if let Some(id) = self.dict_id {
179 self.fuzzy_scoped(&headword, &prefix, id).await
180 } else {
181 self.fuzzy_global(&headword, &prefix).await
182 }
183 })
184 }
185}
186
187impl IndexReader for DbIndexReader {
188 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self, _metadata), fields(headword = %headword, fuzzy)))]
189 fn load_and_find(&mut self, headword: &str, fuzzy: bool, _metadata: &Metadata) -> Vec<Entry> {
190 self.find(headword, fuzzy)
191 }
192
193 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(headword = %headword, fuzzy)))]
194 fn find(&self, headword: &str, fuzzy: bool) -> Vec<Entry> {
195 if fuzzy {
196 self.query_fuzzy(headword)
197 } else {
198 self.query_exact(headword)
199 }
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206 use crate::db::runtime::RUNTIME;
207
208 fn setup_db() -> Database {
209 let db = Database::new(":memory:").expect("in-memory db");
210 db.migrate().expect("migrations");
211 db
212 }
213
214 fn insert_meta(pool: &SqlitePool, dict_id: i64, fp: &str) {
215 RUNTIME.block_on(async {
216 sqlx::query!(
217 "INSERT OR IGNORE INTO dictionary_index_meta (dict_id, fingerprint, dict_path, total_lines, indexed_lines, completed) VALUES (?, ?, ?, 0, 0, 1)",
218 dict_id,
219 fp,
220 fp,
221 )
222 .execute(pool)
223 .await
224 .expect("insert meta");
225 });
226 }
227
228 fn insert_entry(
229 pool: &SqlitePool,
230 dict_id: i64,
231 fp: &str,
232 word: &str,
233 offset: i64,
234 size: i64,
235 original: Option<&str>,
236 ) {
237 insert_meta(pool, dict_id, fp);
238 RUNTIME.block_on(async {
239 sqlx::query!(
240 "INSERT INTO dictionary_index_entry (dict_id, word, offset, size, original) VALUES (?, ?, ?, ?, ?)",
241 dict_id,
242 word,
243 offset,
244 size,
245 original,
246 )
247 .execute(pool)
248 .await
249 .expect("insert entry");
250 });
251 }
252
253 const DICT_ID_1: i64 = 1;
254 const DICT_ID_2: i64 = 2;
255
256 #[test]
257 fn test_exact_lookup_with_dict_id() {
258 let db = setup_db();
259 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
260 insert_entry(db.pool(), DICT_ID_2, "fp2", "world", 10, 5, None);
261
262 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
263 let results = reader.find("hello", false);
264 assert_eq!(results.len(), 1);
265 assert_eq!(results[0].headword, "hello");
266 assert_eq!(results[0].offset, 0);
267 assert_eq!(results[0].size, 10);
268 }
269
270 #[test]
271 fn test_exact_lookup_scoped_dict_id_excludes_other() {
272 let db = setup_db();
273 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
274 insert_entry(db.pool(), DICT_ID_2, "fp2", "hello", 20, 8, None);
275
276 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
277 let results = reader.find("hello", false);
278 assert_eq!(results.len(), 1);
279 assert_eq!(results[0].offset, 0);
280 }
281
282 #[test]
283 fn test_exact_lookup_no_dict_id_finds_all() {
284 let db = setup_db();
285 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
286 insert_entry(db.pool(), DICT_ID_2, "fp2", "hello", 20, 8, None);
287
288 let reader = DbIndexReader::new(&db, None);
289 let results = reader.find("hello", false);
290 assert_eq!(results.len(), 2);
291 }
292
293 #[test]
294 fn test_exact_lookup_no_match() {
295 let db = setup_db();
296 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
297
298 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
299 let results = reader.find("world", false);
300 assert!(results.is_empty());
301 }
302
303 #[test]
304 fn test_fuzzy_lookup_with_dict_id() {
305 let db = setup_db();
306 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
307 insert_entry(db.pool(), DICT_ID_1, "fp1", "helo", 10, 5, None);
308 insert_entry(db.pool(), DICT_ID_1, "fp1", "world", 15, 5, None);
309
310 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
311 let results = reader.find("hello", true);
312 assert_eq!(results.len(), 2);
313 let words: Vec<&str> = results.iter().map(|e| e.headword.as_str()).collect();
314 assert!(words.contains(&"hello"));
315 assert!(words.contains(&"helo"));
316 }
317
318 #[test]
319 fn test_fuzzy_lookup_no_dict_id_cross_dict() {
320 let db = setup_db();
321 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
322 insert_entry(db.pool(), DICT_ID_2, "fp2", "helo", 10, 5, None);
323
324 let reader = DbIndexReader::new(&db, None);
325 let results = reader.find("hello", true);
326 assert_eq!(results.len(), 2);
327 }
328
329 #[test]
330 fn test_load_and_find_delegates_to_find() {
331 let db = setup_db();
332 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, None);
333
334 let mut reader = DbIndexReader::new(&db, Some(DICT_ID_1));
335 let metadata = Metadata {
336 all_chars: true,
337 case_sensitive: false,
338 };
339 let results = reader.load_and_find("hello", false, &metadata);
340 assert_eq!(results.len(), 1);
341 assert_eq!(results[0].headword, "hello");
342 }
343
344 #[test]
345 fn test_original_field_preserved() {
346 let db = setup_db();
347 insert_entry(db.pool(), DICT_ID_1, "fp1", "hello", 0, 10, Some("Hello"));
348
349 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
350 let results = reader.find("hello", false);
351 assert_eq!(results.len(), 1);
352 assert_eq!(results[0].original.as_deref(), Some("Hello"));
353 }
354
355 #[test]
356 fn test_multiple_definitions_same_word_all_returned() {
357 let db = setup_db();
358 insert_entry(db.pool(), DICT_ID_1, "fp1", "pain", 100, 20, Some("Pain"));
359 insert_entry(db.pool(), DICT_ID_1, "fp1", "pain", 200, 30, Some("PAIN"));
360 insert_entry(db.pool(), DICT_ID_1, "fp1", "pain", 300, 40, None);
361
362 let reader = DbIndexReader::new(&db, Some(DICT_ID_1));
363 let results = reader.find("pain", false);
364 assert_eq!(results.len(), 3);
365 let offsets: Vec<u64> = results.iter().map(|e| e.offset).collect();
366 assert!(offsets.contains(&100));
367 assert!(offsets.contains(&200));
368 assert!(offsets.contains(&300));
369 }
370}