cadmus_core/dictionary/
indexing.rs1use super::Metadata;
4
5#[derive(Debug, Clone)]
6pub struct Entry {
7 pub headword: String,
8 pub offset: u64,
9 pub size: u64,
10 pub original: Option<String>,
11}
12
13pub trait IndexReader {
14 fn load_and_find(&mut self, headword: &str, fuzzy: bool, metadata: &Metadata) -> Vec<Entry>;
15 fn find(&self, headword: &str, fuzzy: bool) -> Vec<Entry>;
16}
17
18pub(crate) fn apply_transform(
23 headword: &str,
24 needs_char_filter: bool,
25 needs_lowercase: bool,
26) -> String {
27 let filtered: String = if needs_char_filter {
28 headword
29 .chars()
30 .filter(|c| c.is_alphanumeric() || c.is_whitespace())
31 .collect()
32 } else {
33 headword.to_owned()
34 };
35
36 if needs_lowercase {
37 filtered.to_lowercase()
38 } else {
39 filtered
40 }
41}
42
43fn normalize_internal(entries: &[Entry], metadata: &Metadata) -> Vec<Entry> {
44 let needs_char_filter = !metadata.all_chars;
45 let needs_lowercase = !metadata.case_sensitive;
46
47 if !needs_char_filter && !needs_lowercase && is_sorted(entries) {
48 return entries.to_vec();
49 }
50
51 let mut result: Vec<Entry> = entries
52 .iter()
53 .map(|entry| {
54 let transformed = apply_transform(&entry.headword, needs_char_filter, needs_lowercase);
55 let original = if transformed != entry.headword {
56 Some(entry.headword.clone())
57 } else {
58 None
59 };
60 Entry {
61 headword: transformed,
62 offset: entry.offset,
63 size: entry.size,
64 original,
65 }
66 })
67 .collect();
68
69 if is_sorted(&result) {
70 return result;
71 }
72
73 result.sort_by_cached_key(|e| e.headword.clone());
74 result
75}
76
77fn is_sorted(entries: &[Entry]) -> bool {
78 entries.windows(2).all(|w| w[0].headword <= w[1].headword)
79}
80
81#[cfg(feature = "bench")]
87pub fn normalize(entries: &[Entry], metadata: &Metadata) -> Vec<Entry> {
88 normalize_internal(entries, metadata)
89}
90
91#[cfg(not(feature = "bench"))]
92pub(crate) fn normalize(entries: &[Entry], metadata: &Metadata) -> Vec<Entry> {
93 normalize_internal(entries, metadata)
94}