cadmus_core/dictionary/
mod.rs1mod dictreader;
7mod errors;
8mod indexing;
9
10use std::path::Path;
11
12use self::dictreader::DictReader;
13use self::indexing::IndexReader;
14
15pub struct Dictionary {
22 content: Box<dyn DictReader>,
23 index: Box<dyn IndexReader>,
24 metadata: Metadata,
25}
26
27pub struct Metadata {
31 pub all_chars: bool,
32 pub case_sensitive: bool,
33}
34
35impl Dictionary {
36 pub fn lookup(
41 &mut self,
42 word: &str,
43 fuzzy: bool,
44 ) -> Result<Vec<[String; 2]>, errors::DictError> {
45 let mut query = word.to_string();
46 if !self.metadata.case_sensitive {
47 query = query.to_lowercase();
48 }
49 if !self.metadata.all_chars {
50 query = query
51 .chars()
52 .filter(|c| c.is_alphanumeric() || c.is_whitespace())
53 .collect();
54 }
55 let entries = self.index.load_and_find(&query, fuzzy, &self.metadata);
56 let mut results = Vec::new();
57 for entry in entries.into_iter() {
58 results.push([
59 entry.original.unwrap_or(entry.headword),
60 self.content.fetch_definition(entry.offset, entry.size)?,
61 ]);
62 }
63 Ok(results)
64 }
65
66 pub fn metadata(&mut self, name: &str) -> Result<String, errors::DictError> {
70 let mut query = format!("00-database-{}", name);
71 if !self.metadata.all_chars {
72 query = query.replace(|c: char| !c.is_alphanumeric(), "");
73 }
74 let entries = self.index.find(&query, false);
75 let entry = entries
76 .get(0)
77 .ok_or_else(|| errors::DictError::WordNotFound(name.into()))?;
78 self.content
79 .fetch_definition(entry.offset, entry.size)
80 .map(|def| {
81 let start = def
82 .find('\n')
83 .filter(|pos| *pos < def.len() - 1)
84 .unwrap_or(0);
85 def[start..].trim().to_string()
86 })
87 }
88
89 pub fn short_name(&mut self) -> Result<String, errors::DictError> {
94 self.metadata("short")
95 }
96
97 pub fn url(&mut self) -> Result<String, errors::DictError> {
102 self.metadata("url")
103 }
104}
105
106pub fn load_dictionary_from_file<P: AsRef<Path>>(
111 content_path: P,
112 index_path: P,
113) -> Result<Dictionary, errors::DictError> {
114 let content = dictreader::load_dict(content_path)?;
115 let index = Box::new(indexing::parse_index_from_file(index_path, true)?);
116 Ok(load_dictionary(content, index))
117}
118
119pub fn load_dictionary(content: Box<dyn DictReader>, index: Box<dyn IndexReader>) -> Dictionary {
126 let all_chars = !index.find("00-database-allchars", false).is_empty();
127 let word = if all_chars {
128 "00-database-case-sensitive"
129 } else {
130 "00databasecasesensitive"
131 };
132 let case_sensitive = !index.find(word, false).is_empty();
133 Dictionary {
134 content,
135 index,
136 metadata: Metadata {
137 all_chars,
138 case_sensitive,
139 },
140 }
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 const PATH_CASE_SENSITIVE_DICT: &str = "src/dictionary/testdata/case_sensitive_dict.dict";
148 const PATH_CASE_SENSITIVE_INDEX: &str = "src/dictionary/testdata/case_sensitive_dict.index";
149 const PATH_CASE_INSENSITIVE_DICT: &str = "src/dictionary/testdata/case_insensitive_dict.dict";
150 const PATH_CASE_INSENSITIVE_INDEX: &str = "src/dictionary/testdata/case_insensitive_dict.index";
151
152 fn assert_dict_word_exists(
153 mut dict: Dictionary,
154 headword: &str,
155 definition: &str,
156 ) -> Dictionary {
157 let r = dict.lookup(headword, false);
158 assert!(r.is_ok());
159 let search = r.unwrap();
160 assert_eq!(search.len(), 1);
161 assert!(search[0][1].contains(definition));
162
163 dict
164 }
165
166 #[test]
167 fn test_load_dictionary_from_file() {
168 let r = load_dictionary_from_file(PATH_CASE_INSENSITIVE_DICT, PATH_CASE_INSENSITIVE_INDEX);
169 assert!(r.is_ok());
170 }
171
172 #[test]
173 fn test_dictionary_lookup_case_insensitive() {
174 let r = load_dictionary_from_file(PATH_CASE_INSENSITIVE_DICT, PATH_CASE_INSENSITIVE_INDEX);
175 let mut dict = r.unwrap();
176
177 dict = assert_dict_word_exists(dict, "bar", "test for case-sensitivity");
178 dict = assert_dict_word_exists(dict, "Bar", "test for case-sensitivity");
179 assert_dict_word_exists(dict, "straße", "test for non-latin case-sensitivity");
180 }
181
182 #[test]
183 fn test_dictionary_lookup_case_insensitive_fuzzy() {
184 let r = load_dictionary_from_file(PATH_CASE_INSENSITIVE_DICT, PATH_CASE_INSENSITIVE_INDEX);
185 let mut dict = r.unwrap();
186
187 let r = dict.lookup("ba", true);
188 assert!(r.is_ok());
189 let search = r.unwrap();
190 assert_eq!(search.len(), 1);
191 assert_eq!(search[0][0], "bar");
192 assert!(search[0][1].contains("test for case-sensitivity"));
193 }
194
195 #[test]
196 fn test_dictionary_lookup_case_sensitive() {
197 let r = load_dictionary_from_file(PATH_CASE_SENSITIVE_DICT, PATH_CASE_SENSITIVE_INDEX);
198 let mut dict = r.unwrap();
199
200 dict = assert_dict_word_exists(dict, "Bar", "test for case-sensitivity");
201 dict = assert_dict_word_exists(dict, "straße", "test for non-latin case-sensitivity");
202
203 let r = dict.lookup("bar", false);
204 assert!(r.unwrap().is_empty());
205
206 let r = dict.lookup("strasse", false);
207 assert!(r.unwrap().is_empty());
208 }
209
210 #[test]
211 fn test_dictionary_lookup_case_sensitive_fuzzy() {
212 let r = load_dictionary_from_file(PATH_CASE_SENSITIVE_DICT, PATH_CASE_SENSITIVE_INDEX);
213 let mut dict = r.unwrap();
214
215 let r = dict.lookup("Ba", true);
216 assert!(r.is_ok());
217 let search = r.unwrap();
218 assert_eq!(search.len(), 1);
219 assert_eq!(search[0][0], "Bar");
220 assert!(search[0][1].contains("test for case-sensitivity"));
221 }
222}