cadmus_core/document/html/
dom.rs1use fxhash::{FxHashMap, FxHashSet};
2use std::num::NonZeroUsize;
3
4pub type Attributes = FxHashMap<String, String>;
5pub const WRAPPER_TAG_NAME: &str = "anonymous";
6
7#[derive(Debug, Clone)]
8pub enum NodeData {
9 Root,
10 Wrapper(usize),
11 Element(ElementData),
12 Text(TextData),
13 Whitespace(TextData),
14}
15
16#[derive(Debug, Clone)]
17pub struct ElementData {
18 pub offset: usize,
19 pub name: String,
20 pub qualified_name: Option<String>,
21 pub attributes: Attributes,
22}
23
24impl ElementData {
25 fn is_block(&self) -> bool {
26 matches!(
27 self.name.as_str(),
28 "address"
29 | "article"
30 | "aside"
31 | "blockquote"
32 | "body"
33 | "head"
34 | "details"
35 | "dialog"
36 | "dd"
37 | "div"
38 | "dl"
39 | "dt"
40 | "fieldset"
41 | "figcaption"
42 | "figure"
43 | "footer"
44 | "form"
45 | "h1"
46 | "h2"
47 | "h3"
48 | "h4"
49 | "h5"
50 | "h6"
51 | "header"
52 | "hgroup"
53 | "hr"
54 | "html"
55 | "li"
56 | "main"
57 | "nav"
58 | "ol"
59 | "p"
60 | "pre"
61 | "section"
62 | "table"
63 | "thead"
64 | "colgroup"
65 | "tbody"
66 | "tfoot"
67 | "tr"
68 | "caption"
69 | "td"
70 | "th"
71 | "ul"
72 )
73 }
74}
75
76impl NodeData {
77 fn text(&self) -> Option<&str> {
78 match *self {
79 NodeData::Text(TextData { ref text, .. })
80 | NodeData::Whitespace(TextData { ref text, .. }) => Some(text),
81 _ => None,
82 }
83 }
84
85 fn offset(&self) -> usize {
86 match *self {
87 NodeData::Text(TextData { offset, .. })
88 | NodeData::Whitespace(TextData { offset, .. })
89 | NodeData::Element(ElementData { offset, .. }) => offset,
90 NodeData::Wrapper(offset) => offset,
91 NodeData::Root => 0,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
97pub struct TextData {
98 pub offset: usize,
99 pub text: String,
100}
101
102pub fn element(name: &str, offset: usize, attributes: Attributes) -> NodeData {
103 let colon = name.find(':');
104 NodeData::Element(ElementData {
105 offset,
106 name: name[colon.map(|index| index + 1).unwrap_or(0)..].to_string(),
107 qualified_name: colon.map(|_| name.to_string()),
108 attributes,
109 })
110}
111
112pub fn text(text: &str, offset: usize) -> NodeData {
113 NodeData::Text(TextData {
114 offset,
115 text: text.to_string(),
116 })
117}
118
119pub fn whitespace(text: &str, offset: usize) -> NodeData {
120 NodeData::Whitespace(TextData {
121 offset,
122 text: text.to_string(),
123 })
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
127pub struct NodeId(NonZeroUsize);
128
129impl NodeId {
130 pub fn from_index(n: usize) -> Self {
131 NodeId(unsafe { NonZeroUsize::new_unchecked(n + 1) })
132 }
133
134 pub fn to_index(self) -> usize {
135 self.0.get() - 1
136 }
137}
138
139#[derive(Debug, Clone)]
140pub struct XmlTree {
141 nodes: Vec<Node>,
142}
143
144#[derive(Debug, Clone)]
145pub struct Node {
146 data: NodeData,
147 parent: Option<NodeId>,
148 previous_sibling: Option<NodeId>,
149 next_sibling: Option<NodeId>,
150 first_child: Option<NodeId>,
151 last_child: Option<NodeId>,
152}
153
154impl Default for Node {
155 fn default() -> Self {
156 Node {
157 data: NodeData::Root,
158 parent: None,
159 previous_sibling: None,
160 next_sibling: None,
161 first_child: None,
162 last_child: None,
163 }
164 }
165}
166
167#[derive(Debug, Copy, Clone)]
168pub struct NodeRef<'a> {
169 pub id: NodeId,
170 pub node: &'a Node,
171 pub tree: &'a XmlTree,
172}
173
174#[derive(Debug)]
175pub struct NodeMut<'a> {
176 pub id: NodeId,
177 pub tree: &'a mut XmlTree,
178}
179
180impl XmlTree {
181 pub fn new() -> Self {
182 XmlTree {
183 nodes: vec![Node::default()],
184 }
185 }
186
187 fn node(&self, id: NodeId) -> &Node {
188 unsafe { self.nodes.get_unchecked(id.to_index()) }
189 }
190
191 fn node_mut(&mut self, id: NodeId) -> &mut Node {
192 unsafe { self.nodes.get_unchecked_mut(id.to_index()) }
193 }
194
195 pub fn get(&self, id: NodeId) -> NodeRef<'_> {
196 NodeRef {
197 id,
198 node: self.node(id),
199 tree: self,
200 }
201 }
202
203 pub fn get_mut(&mut self, id: NodeId) -> NodeMut<'_> {
204 NodeMut { id, tree: self }
205 }
206
207 pub fn root(&self) -> NodeRef<'_> {
208 self.get(NodeId::from_index(0))
209 }
210
211 pub fn root_mut(&mut self) -> NodeMut<'_> {
212 self.get_mut(NodeId::from_index(0))
213 }
214
215 pub fn wrap_lost_inlines(&mut self) {
216 let mut ids = Vec::new();
217 let mut known_ids = FxHashSet::default();
218
219 for n in self.root().descendants().filter(|n| n.is_inline()) {
220 if known_ids.contains(&n.id) {
221 continue;
222 }
223
224 let mut first_id = None;
225 let mut last_id = None;
226
227 for s in n.previous_siblings() {
228 if s.is_block() {
229 first_id = Some(s.next_sibling().unwrap().id);
230 break;
231 } else {
232 known_ids.insert(s.id);
233 }
234 }
235
236 for s in n.next_siblings() {
237 if s.is_block() {
238 last_id = Some(s.previous_sibling().unwrap().id);
239 break;
240 } else {
241 known_ids.insert(s.id);
242 }
243 }
244
245 if first_id.is_some() || last_id.is_some() {
246 let parent = n.parent().unwrap();
247 ids.push([
248 parent.id,
249 first_id.unwrap_or_else(|| parent.node.first_child.unwrap()),
250 last_id.unwrap_or_else(|| parent.node.last_child.unwrap()),
251 ]);
252 }
253 }
254
255 for [parent_id, first_id, last_id] in ids {
256 let offset = self.node(first_id).data.offset();
257 let mut node = self.get_mut(parent_id);
258 node.wrap_range(first_id, last_id, NodeData::Wrapper(offset));
259 }
260 }
261}
262
263impl<'a> NodeRef<'a> {
264 pub fn parent(&self) -> Option<Self> {
265 self.node.parent.map(|id| self.tree.get(id))
266 }
267
268 pub fn parent_element(&self) -> Option<Self> {
269 self.ancestors().find(|n| n.is_element() && !n.is_wrapper())
270 }
271
272 pub fn previous_sibling(&self) -> Option<Self> {
273 self.node.previous_sibling.map(|id| self.tree.get(id))
274 }
275
276 pub fn previous_sibling_element(&self) -> Option<NodeRef<'a>> {
277 self.previous_sibling_elements().next()
278 }
279
280 pub fn next_sibling_element(&self) -> Option<NodeRef<'a>> {
281 self.next_sibling_elements().next()
282 }
283
284 pub fn next_sibling(&self) -> Option<Self> {
285 self.node.next_sibling.map(|id| self.tree.get(id))
286 }
287
288 pub fn first_child(&self) -> Option<Self> {
289 self.node.first_child.map(|id| self.tree.get(id))
290 }
291
292 pub fn last_child(&self) -> Option<Self> {
293 self.node.last_child.map(|id| self.tree.get(id))
294 }
295
296 pub fn ancestors(&self) -> Ancestors<'a> {
297 Ancestors {
298 next: self.parent(),
299 }
300 }
301
302 pub fn ancestor_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
303 self.ancestors()
304 .filter(|n| n.is_element() && !n.is_wrapper())
305 }
306
307 pub fn previous_siblings(&self) -> PreviousSiblings<'a> {
308 PreviousSiblings {
309 next: self.previous_sibling(),
310 }
311 }
312
313 pub fn next_siblings(&self) -> NextSiblings<'a> {
314 NextSiblings {
315 next: self.next_sibling(),
316 }
317 }
318
319 pub fn previous_sibling_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
320 self.previous_siblings().filter(|n| n.is_element())
321 }
322
323 pub fn next_sibling_elements(&self) -> impl Iterator<Item = NodeRef<'a>> {
324 self.next_siblings().filter(|n| n.is_element())
325 }
326
327 pub fn children(&self) -> Children<'a> {
328 Children {
329 next: self.first_child(),
330 }
331 }
332
333 pub fn descendants(&self) -> Descendants<'a> {
334 Descendants {
335 root_id: self.id,
336 next: self.first_child(),
337 }
338 }
339
340 pub fn has_children(&self) -> bool {
341 self.node.first_child.is_some()
342 }
343
344 pub fn is_element(&self) -> bool {
345 matches!(
346 self.node.data,
347 NodeData::Element { .. } | NodeData::Wrapper(..) | NodeData::Root
348 )
349 }
350
351 pub fn is_inline(&self) -> bool {
352 match &self.node.data {
353 NodeData::Element(e) => !e.is_block(),
354 NodeData::Text(..) => true,
355 _ => false,
356 }
357 }
358
359 pub fn is_block(&self) -> bool {
360 match &self.node.data {
361 NodeData::Element(e) => e.is_block(),
362 NodeData::Wrapper(..) | NodeData::Root => true,
363 _ => false,
364 }
365 }
366
367 pub fn is_wrapper(&self) -> bool {
368 matches!(self.node.data, NodeData::Wrapper(..))
369 }
370
371 pub fn data(&self) -> &'a NodeData {
372 &self.node.data
373 }
374
375 pub fn offset(&self) -> usize {
376 self.node.data.offset()
377 }
378
379 pub fn text(&self) -> String {
380 self.node.data.text().map(String::from).unwrap_or_else(|| {
381 self.descendants()
382 .filter_map(|n| n.node.data.text())
383 .fold(String::new(), |mut a, b| {
384 a.push_str(b);
385 a
386 })
387 })
388 }
389
390 pub fn tag_name(&self) -> Option<&'a str> {
391 match self.node.data {
392 NodeData::Element(ElementData { ref name, .. }) => Some(name),
393 NodeData::Wrapper(..) => Some(WRAPPER_TAG_NAME),
394 _ => None,
395 }
396 }
397
398 pub fn tag_qualified_name(&self) -> Option<&'a str> {
399 match self.node.data {
400 NodeData::Element(ElementData {
401 ref qualified_name, ..
402 }) => qualified_name.as_deref(),
403 _ => None,
404 }
405 }
406
407 pub fn attributes(&self) -> Option<&'a Attributes> {
408 match self.node.data {
409 NodeData::Element(ElementData { ref attributes, .. }) => Some(attributes),
410 _ => None,
411 }
412 }
413
414 pub fn attribute(&self, name: &str) -> Option<&'a str> {
415 self.attributes()
416 .and_then(|a| a.get(name).map(String::as_str))
417 }
418
419 pub fn classes(&self) -> impl Iterator<Item = &'a str> {
420 self.attribute("class").unwrap_or("").split_whitespace()
421 }
422
423 pub fn id(&self) -> Option<&str> {
424 self.attribute("id")
425 }
426
427 pub fn find(&self, tag_name: &str) -> Option<Self> {
428 self.descendants().find(|n| n.tag_name() == Some(tag_name))
429 }
430
431 pub fn find_by_id(&self, id: &str) -> Option<Self> {
432 self.descendants().find(|n| n.id() == Some(id))
433 }
434}
435
436impl<'a> NodeMut<'a> {
437 fn node(&mut self) -> &mut Node {
438 self.tree.node_mut(self.id)
439 }
440
441 pub fn append(&mut self, data: NodeData) -> NodeId {
442 let id = NodeId::from_index(self.tree.nodes.len());
443
444 let node = Node {
445 data,
446 parent: Some(self.id),
447 previous_sibling: self.node().last_child,
448 next_sibling: None,
449 first_child: None,
450 last_child: None,
451 };
452
453 self.tree.nodes.push(node);
454
455 if let Some(last_child) = self.node().last_child {
456 self.tree.node_mut(last_child).next_sibling = Some(id);
457 }
458
459 self.node().last_child = Some(id);
460
461 if self.node().first_child.is_none() {
462 self.node().first_child = Some(id);
463 }
464
465 id
466 }
467
468 pub fn wrap_range(&mut self, first_id: NodeId, last_id: NodeId, data: NodeData) {
469 let before = self.tree.node(first_id).previous_sibling;
470 let after = self.tree.node(last_id).next_sibling;
471 let id = NodeId::from_index(self.tree.nodes.len());
472
473 let node = Node {
474 data,
475 parent: Some(self.id),
476 previous_sibling: before,
477 next_sibling: after,
478 first_child: Some(first_id),
479 last_child: Some(last_id),
480 };
481
482 self.tree.nodes.push(node);
483
484 if let Some(before_id) = before {
485 self.tree.node_mut(before_id).next_sibling = Some(id);
486 }
487
488 if let Some(after_id) = after {
489 self.tree.node_mut(after_id).previous_sibling = Some(id);
490 }
491
492 if let Some(first_child_id) = self.node().first_child {
493 if first_child_id == first_id {
494 self.node().first_child = Some(id);
495 }
496 }
497
498 if let Some(last_child_id) = self.node().last_child {
499 if last_child_id == last_id {
500 self.node().last_child = Some(id);
501 }
502 }
503
504 self.tree.node_mut(first_id).previous_sibling = None;
505 self.tree.node_mut(last_id).next_sibling = None;
506 self.tree.node_mut(first_id).parent = Some(id);
507
508 let mut node_id = first_id;
509 while let Some(next_id) = self.tree.node(node_id).next_sibling {
510 self.tree.node_mut(next_id).parent = Some(id);
511 node_id = next_id;
512 }
513 }
514}
515
516pub struct Ancestors<'a> {
517 next: Option<NodeRef<'a>>,
518}
519
520pub struct NextSiblings<'a> {
521 next: Option<NodeRef<'a>>,
522}
523
524pub struct PreviousSiblings<'a> {
525 next: Option<NodeRef<'a>>,
526}
527
528pub struct Children<'a> {
529 next: Option<NodeRef<'a>>,
530}
531
532pub struct Descendants<'a> {
533 root_id: NodeId,
534 next: Option<NodeRef<'a>>,
535}
536
537impl<'a> Iterator for Ancestors<'a> {
538 type Item = NodeRef<'a>;
539
540 fn next(&mut self) -> Option<Self::Item> {
541 let node = self.next.take();
542 self.next = node.as_ref().and_then(|node| node.parent());
543 node
544 }
545}
546
547impl<'a> Iterator for PreviousSiblings<'a> {
548 type Item = NodeRef<'a>;
549
550 fn next(&mut self) -> Option<Self::Item> {
551 let node = self.next.take();
552 self.next = node.as_ref().and_then(|node| node.previous_sibling());
553 node
554 }
555}
556
557impl<'a> Iterator for NextSiblings<'a> {
558 type Item = NodeRef<'a>;
559
560 fn next(&mut self) -> Option<Self::Item> {
561 let node = self.next.take();
562 self.next = node.as_ref().and_then(|node| node.next_sibling());
563 node
564 }
565}
566
567impl<'a> Iterator for Children<'a> {
568 type Item = NodeRef<'a>;
569
570 fn next(&mut self) -> Option<Self::Item> {
571 let node = self.next.take();
572 self.next = node.as_ref().and_then(|node| node.next_sibling());
573 node
574 }
575}
576
577impl<'a> Iterator for Descendants<'a> {
578 type Item = NodeRef<'a>;
579
580 fn next(&mut self) -> Option<Self::Item> {
581 let node = self.next.take();
582 if let Some(node) = node {
583 self.next = node
584 .first_child()
585 .or_else(|| node.next_sibling())
586 .or_else(|| {
587 node.ancestors()
588 .take_while(|n| n.id != self.root_id)
589 .find(|n| n.node.next_sibling.is_some())
590 .and_then(|n| n.next_sibling())
591 });
592 }
593 node
594 }
595}