pest/span.rs
1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use core::fmt;
11use core::hash::{Hash, Hasher};
12use core::ops::{Bound, RangeBounds};
13use core::ptr;
14use core::str;
15
16use crate::position;
17
18/// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
19///
20/// [two `Position`s]: struct.Position.html#method.span
21/// [`Pair`]: ../iterators/struct.Pair.html#method.span
22#[derive(Clone, Copy)]
23pub struct Span<'i> {
24 input: &'i str,
25 start: usize,
26 end: usize,
27}
28
29impl<'i> Span<'i> {
30 /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
31 pub(crate) fn new_internal(input: &str, start: usize, end: usize) -> Span<'_> {
32 debug_assert!(input.get(start..end).is_some());
33 Span { input, start, end }
34 }
35
36 /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
37 /// into `input`.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// # use pest::Span;
43 /// let input = "Hello!";
44 /// assert_eq!(None, Span::new(input, 100, 0));
45 /// assert!(Span::new(input, 0, input.len()).is_some());
46 /// ```
47 pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> {
48 if input.get(start..end).is_some() {
49 Some(Span { input, start, end })
50 } else {
51 None
52 }
53 }
54
55 /// Attempts to create a new span based on a sub-range.
56 ///
57 /// ```
58 /// use pest::Span;
59 /// let input = "Hello World!";
60 /// let world = Span::new(input, 6, input.len()).unwrap();
61 /// let orl = world.get(1..=3);
62 /// assert!(orl.is_some());
63 /// assert_eq!(orl.unwrap().as_str(), "orl");
64 /// ```
65 ///
66 /// # Examples
67 pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> {
68 let start = match range.start_bound() {
69 Bound::Included(offset) => *offset,
70 Bound::Excluded(offset) => *offset + 1,
71 Bound::Unbounded => 0,
72 };
73 let end = match range.end_bound() {
74 Bound::Included(offset) => *offset + 1,
75 Bound::Excluded(offset) => *offset,
76 Bound::Unbounded => self.as_str().len(),
77 };
78
79 self.as_str().get(start..end).map(|_| Span {
80 input: self.input,
81 start: self.start + start,
82 end: self.start + end,
83 })
84 }
85
86 /// Returns the `Span`'s start byte position as a `usize`.
87 ///
88 /// # Examples
89 ///
90 /// ```
91 /// # use pest::Position;
92 /// let input = "ab";
93 /// let start = Position::from_start(input);
94 /// let end = start.clone();
95 /// let span = start.span(&end);
96 ///
97 /// assert_eq!(span.start(), 0);
98 /// ```
99 #[inline]
100 pub fn start(&self) -> usize {
101 self.start
102 }
103
104 /// Returns the `Span`'s end byte position as a `usize`.
105 ///
106 /// # Examples
107 ///
108 /// ```
109 /// # use pest::Position;
110 /// let input = "ab";
111 /// let start = Position::from_start(input);
112 /// let end = start.clone();
113 /// let span = start.span(&end);
114 ///
115 /// assert_eq!(span.end(), 0);
116 /// ```
117 #[inline]
118 pub fn end(&self) -> usize {
119 self.end
120 }
121
122 /// Returns the `Span`'s start `Position`.
123 ///
124 /// # Examples
125 ///
126 /// ```
127 /// # use pest::Position;
128 /// let input = "ab";
129 /// let start = Position::from_start(input);
130 /// let end = start.clone();
131 /// let span = start.clone().span(&end);
132 ///
133 /// assert_eq!(span.start_pos(), start);
134 /// ```
135 #[inline]
136 pub fn start_pos(&self) -> position::Position<'i> {
137 position::Position::new_internal(self.input, self.start)
138 }
139
140 /// Returns the `Span`'s end `Position`.
141 ///
142 /// # Examples
143 ///
144 /// ```
145 /// # use pest::Position;
146 /// let input = "ab";
147 /// let start = Position::from_start(input);
148 /// let end = start.clone();
149 /// let span = start.span(&end);
150 ///
151 /// assert_eq!(span.end_pos(), end);
152 /// ```
153 #[inline]
154 pub fn end_pos(&self) -> position::Position<'i> {
155 position::Position::new_internal(self.input, self.end)
156 }
157
158 /// Splits the `Span` into a pair of `Position`s.
159 ///
160 /// # Examples
161 ///
162 /// ```
163 /// # use pest::Position;
164 /// let input = "ab";
165 /// let start = Position::from_start(input);
166 /// let end = start.clone();
167 /// let span = start.clone().span(&end);
168 ///
169 /// assert_eq!(span.split(), (start, end));
170 /// ```
171 #[inline]
172 pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
173 let pos1 = position::Position::new_internal(self.input, self.start);
174 let pos2 = position::Position::new_internal(self.input, self.end);
175
176 (pos1, pos2)
177 }
178
179 /// Captures a slice from the `&str` defined by the `Span`.
180 ///
181 /// # Examples
182 ///
183 /// ```
184 /// # use pest;
185 /// # #[allow(non_camel_case_types)]
186 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
187 /// enum Rule {}
188 ///
189 /// let input = "abc";
190 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap();
191 /// let start_pos = state.position().clone();
192 /// state = state.match_string("b").unwrap();
193 /// let span = start_pos.span(&state.position().clone());
194 /// assert_eq!(span.as_str(), "b");
195 /// ```
196 #[inline]
197 pub fn as_str(&self) -> &'i str {
198 // Span's start and end positions are always a UTF-8 borders.
199 &self.input[self.start..self.end]
200 }
201
202 /// Returns the input string of the `Span`.
203 ///
204 /// This function returns the input string of the `Span` as a `&str`. This is the source string
205 /// from which the `Span` was created. The returned `&str` can be used to examine the contents of
206 /// the `Span` or to perform further processing on the string.
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// # use pest;
212 /// # use pest::Span;
213 ///
214 /// // Example: Get input string from a span
215 /// let input = "abc\ndef\nghi";
216 /// let span = Span::new(input, 1, 7).unwrap();
217 /// assert_eq!(span.get_input(), input);
218 /// ```
219 pub fn get_input(&self) -> &'i str {
220 self.input
221 }
222
223 /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line.
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// # use pest;
229 /// # #[allow(non_camel_case_types)]
230 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
231 /// enum Rule {}
232 ///
233 /// let input = "a\nb\nc";
234 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
235 /// let start_pos = state.position().clone();
236 /// state = state.match_string("b\nc").unwrap();
237 /// let span = start_pos.span(&state.position().clone());
238 /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
239 /// ```
240 #[inline]
241 pub fn lines(&self) -> Lines<'_> {
242 Lines {
243 inner: self.lines_span(),
244 }
245 }
246
247 /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line.
248 ///
249 /// # Examples
250 ///
251 /// ```
252 /// # use pest;
253 /// # use pest::Span;
254 /// # #[allow(non_camel_case_types)]
255 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
256 /// enum Rule {}
257 ///
258 /// let input = "a\nb\nc";
259 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
260 /// let start_pos = state.position().clone();
261 /// state = state.match_string("b\nc").unwrap();
262 /// let span = start_pos.span(&state.position().clone());
263 /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]);
264 /// ```
265 pub fn lines_span(&self) -> LinesSpan<'_> {
266 LinesSpan {
267 span: self,
268 pos: self.start,
269 }
270 }
271}
272
273impl<'i> fmt::Debug for Span<'i> {
274 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275 f.debug_struct("Span")
276 .field("str", &self.as_str())
277 .field("start", &self.start)
278 .field("end", &self.end)
279 .finish()
280 }
281}
282
283impl<'i> PartialEq for Span<'i> {
284 fn eq(&self, other: &Span<'i>) -> bool {
285 ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
286 }
287}
288
289impl<'i> Eq for Span<'i> {}
290
291impl<'i> Hash for Span<'i> {
292 fn hash<H: Hasher>(&self, state: &mut H) {
293 (self.input as *const str).hash(state);
294 self.start.hash(state);
295 self.end.hash(state);
296 }
297}
298
299/// Merges two spans into one.
300///
301/// This function merges two spans that are contiguous or overlapping into a single span
302/// that covers the entire range of the two input spans. This is useful when you want to
303/// aggregate information from multiple spans into a single entity.
304///
305/// The function checks if the input spans are overlapping or contiguous by comparing their
306/// start and end positions. If they are, a new span is created with the minimum start position
307/// and the maximum end position of the two input spans.
308///
309/// If the input spans are neither overlapping nor contiguous, the function returns None,
310/// indicating that a merge operation was not possible.
311///
312/// # Examples
313///
314/// ```
315/// # use pest;
316/// # use pest::Span;
317/// # use pest::merge_spans;
318///
319/// // Example 1: Contiguous spans
320/// let input = "abc\ndef\nghi";
321/// let span1 = Span::new(input, 1, 7).unwrap();
322/// let span2 = Span::new(input, 7, 11).unwrap();
323/// let merged = merge_spans(&span1, &span2).unwrap();
324/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
325///
326/// // Example 2: Overlapping spans
327/// let input = "abc\ndef\nghi";
328/// let span1 = Span::new(input, 1, 7).unwrap();
329/// let span2 = Span::new(input, 5, 11).unwrap();
330/// let merged = merge_spans(&span1, &span2).unwrap();
331/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
332///
333/// // Example 3: Non-contiguous spans
334/// let input = "abc\ndef\nghi";
335/// let span1 = Span::new(input, 1, 7).unwrap();
336/// let span2 = Span::new(input, 8, 11).unwrap();
337/// let merged = merge_spans(&span1, &span2);
338/// assert!(merged.is_none());
339/// ```
340pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> {
341 if a.end() >= b.start() && a.start() <= b.end() {
342 // The spans overlap or are contiguous, so they can be merged.
343 Span::new(
344 a.get_input(),
345 core::cmp::min(a.start(), b.start()),
346 core::cmp::max(a.end(), b.end()),
347 )
348 } else {
349 // The spans don't overlap and aren't contiguous, so they can't be merged.
350 None
351 }
352}
353
354/// Line iterator for Spans, created by [`Span::lines_span()`].
355///
356/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each.
357///
358/// [`Span::lines_span()`]: struct.Span.html#method.lines_span
359pub struct LinesSpan<'i> {
360 span: &'i Span<'i>,
361 pos: usize,
362}
363
364impl<'i> Iterator for LinesSpan<'i> {
365 type Item = Span<'i>;
366 fn next(&mut self) -> Option<Self::Item> {
367 if self.pos > self.span.end {
368 return None;
369 }
370 let pos = position::Position::new(self.span.input, self.pos)?;
371 if pos.at_end() {
372 return None;
373 }
374
375 let line_start = pos.find_line_start();
376 self.pos = pos.find_line_end();
377
378 Span::new(self.span.input, line_start, self.pos)
379 }
380}
381
382/// Line iterator for Spans, created by [`Span::lines()`].
383///
384/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each.
385///
386/// [`Span::lines()`]: struct.Span.html#method.lines
387pub struct Lines<'i> {
388 inner: LinesSpan<'i>,
389}
390
391impl<'i> Iterator for Lines<'i> {
392 type Item = &'i str;
393 fn next(&mut self) -> Option<Self::Item> {
394 self.inner.next().map(|span| span.as_str())
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401 use alloc::borrow::ToOwned;
402 use alloc::vec::Vec;
403
404 #[test]
405 fn get() {
406 let input = "abc123abc";
407 let span = Span::new(input, 3, input.len()).unwrap();
408 assert_eq!(span.as_str(), "123abc");
409 assert_eq!(span.input, input);
410
411 let span1 = span.get(..=2);
412 assert!(span1.is_some());
413 assert_eq!(span1.unwrap().input, input);
414 assert_eq!(span1.unwrap().as_str(), "123");
415
416 let span2 = span.get(..);
417 assert!(span2.is_some());
418 assert_eq!(span2.unwrap().input, input);
419 assert_eq!(span2.unwrap().as_str(), "123abc");
420
421 let span3 = span.get(3..);
422 assert!(span3.is_some());
423 assert_eq!(span3.unwrap().input, input);
424 assert_eq!(span3.unwrap().as_str(), "abc");
425
426 let span4 = span.get(0..0);
427 assert!(span4.is_some());
428 assert_eq!(span4.unwrap().input, input);
429 assert_eq!(span4.unwrap().as_str(), "");
430 }
431
432 #[test]
433 fn get_fails() {
434 let input = "abc";
435 let span = Span::new(input, 0, input.len()).unwrap();
436
437 let span1 = span.get(0..100);
438 assert!(span1.is_none());
439
440 let span2 = span.get(100..200);
441 assert!(span2.is_none());
442 }
443
444 #[test]
445 fn span_comp() {
446 let input = "abc\ndef\nghi";
447 let span = Span::new(input, 1, 7).unwrap();
448 let span2 = Span::new(input, 50, 51);
449 assert!(span2.is_none());
450 let span3 = Span::new(input, 0, 8).unwrap();
451 assert!(span != span3);
452 }
453
454 #[test]
455 fn split() {
456 let input = "a";
457 let start = position::Position::from_start(input);
458 let mut end = start;
459
460 assert!(end.skip(1));
461
462 let span = start.clone().span(&end.clone());
463
464 assert_eq!(span.split(), (start, end));
465 }
466
467 #[test]
468 fn lines_mid() {
469 let input = "abc\ndef\nghi";
470 let span = Span::new(input, 1, 7).unwrap();
471 let lines: Vec<_> = span.lines().collect();
472 let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
473
474 assert_eq!(lines.len(), 2);
475 assert_eq!(lines[0], "abc\n".to_owned());
476 assert_eq!(lines[1], "def\n".to_owned());
477 assert_eq!(lines, lines_span) // Verify parity with lines_span()
478 }
479
480 #[test]
481 fn lines_eof() {
482 let input = "abc\ndef\nghi";
483 let span = Span::new(input, 5, 11).unwrap();
484 assert!(span.end_pos().at_end());
485 assert_eq!(span.end(), 11);
486 let lines: Vec<_> = span.lines().collect();
487 let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
488
489 assert_eq!(lines.len(), 2);
490 assert_eq!(lines[0], "def\n".to_owned());
491 assert_eq!(lines[1], "ghi".to_owned());
492 assert_eq!(lines, lines_span) // Verify parity with lines_span()
493 }
494
495 #[test]
496 fn lines_span() {
497 let input = "abc\ndef\nghi";
498 let span = Span::new(input, 1, 7).unwrap();
499 let lines_span: Vec<_> = span.lines_span().collect();
500 let lines: Vec<_> = span.lines().collect();
501
502 assert_eq!(lines_span.len(), 2);
503 assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap());
504 assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap());
505 assert_eq!(
506 lines_span
507 .iter()
508 .map(|span| span.as_str())
509 .collect::<Vec<_>>(),
510 lines
511 );
512 }
513
514 #[test]
515 fn get_input_of_span() {
516 let input = "abc\ndef\nghi";
517 let span = Span::new(input, 1, 7).unwrap();
518
519 assert_eq!(span.get_input(), input);
520 }
521
522 #[test]
523 fn merge_contiguous() {
524 let input = "abc\ndef\nghi";
525 let span1 = Span::new(input, 1, 7).unwrap();
526 let span2 = Span::new(input, 7, 11).unwrap();
527 let merged = merge_spans(&span1, &span2).unwrap();
528
529 assert_eq!(merged, Span::new(input, 1, 11).unwrap());
530 }
531
532 #[test]
533 fn merge_overlapping() {
534 let input = "abc\ndef\nghi";
535 let span1 = Span::new(input, 1, 7).unwrap();
536 let span2 = Span::new(input, 5, 11).unwrap();
537 let merged = merge_spans(&span1, &span2).unwrap();
538
539 assert_eq!(merged, Span::new(input, 1, 11).unwrap());
540 }
541
542 #[test]
543 fn merge_non_contiguous() {
544 let input = "abc\ndef\nghi";
545 let span1 = Span::new(input, 1, 7).unwrap();
546 let span2 = Span::new(input, 8, 11).unwrap();
547 let merged = merge_spans(&span1, &span2);
548
549 assert!(merged.is_none());
550 }
551}