pest/
span.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use core::fmt;
11use core::hash::{Hash, Hasher};
12use core::ops::{Bound, RangeBounds};
13use core::ptr;
14use core::str;
15
16use crate::position;
17
18/// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
19///
20/// [two `Position`s]: struct.Position.html#method.span
21/// [`Pair`]: ../iterators/struct.Pair.html#method.span
22#[derive(Clone, Copy)]
23pub struct Span<'i> {
24    input: &'i str,
25    start: usize,
26    end: usize,
27}
28
29impl<'i> Span<'i> {
30    /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
31    pub(crate) fn new_internal(input: &str, start: usize, end: usize) -> Span<'_> {
32        debug_assert!(input.get(start..end).is_some());
33        Span { input, start, end }
34    }
35
36    /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
37    /// into `input`.
38    ///
39    /// # Examples
40    ///
41    /// ```
42    /// # use pest::Span;
43    /// let input = "Hello!";
44    /// assert_eq!(None, Span::new(input, 100, 0));
45    /// assert!(Span::new(input, 0, input.len()).is_some());
46    /// ```
47    pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> {
48        if input.get(start..end).is_some() {
49            Some(Span { input, start, end })
50        } else {
51            None
52        }
53    }
54
55    /// Attempts to create a new span based on a sub-range.
56    ///
57    /// ```
58    /// use pest::Span;
59    /// let input = "Hello World!";
60    /// let world = Span::new(input, 6, input.len()).unwrap();
61    /// let orl = world.get(1..=3);
62    /// assert!(orl.is_some());
63    /// assert_eq!(orl.unwrap().as_str(), "orl");
64    /// ```
65    ///
66    /// # Examples
67    pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> {
68        let start = match range.start_bound() {
69            Bound::Included(offset) => *offset,
70            Bound::Excluded(offset) => *offset + 1,
71            Bound::Unbounded => 0,
72        };
73        let end = match range.end_bound() {
74            Bound::Included(offset) => *offset + 1,
75            Bound::Excluded(offset) => *offset,
76            Bound::Unbounded => self.as_str().len(),
77        };
78
79        self.as_str().get(start..end).map(|_| Span {
80            input: self.input,
81            start: self.start + start,
82            end: self.start + end,
83        })
84    }
85
86    /// Returns the `Span`'s start byte position as a `usize`.
87    ///
88    /// # Examples
89    ///
90    /// ```
91    /// # use pest::Position;
92    /// let input = "ab";
93    /// let start = Position::from_start(input);
94    /// let end = start.clone();
95    /// let span = start.span(&end);
96    ///
97    /// assert_eq!(span.start(), 0);
98    /// ```
99    #[inline]
100    pub fn start(&self) -> usize {
101        self.start
102    }
103
104    /// Returns the `Span`'s end byte position as a `usize`.
105    ///
106    /// # Examples
107    ///
108    /// ```
109    /// # use pest::Position;
110    /// let input = "ab";
111    /// let start = Position::from_start(input);
112    /// let end = start.clone();
113    /// let span = start.span(&end);
114    ///
115    /// assert_eq!(span.end(), 0);
116    /// ```
117    #[inline]
118    pub fn end(&self) -> usize {
119        self.end
120    }
121
122    /// Returns the `Span`'s start `Position`.
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// # use pest::Position;
128    /// let input = "ab";
129    /// let start = Position::from_start(input);
130    /// let end = start.clone();
131    /// let span = start.clone().span(&end);
132    ///
133    /// assert_eq!(span.start_pos(), start);
134    /// ```
135    #[inline]
136    pub fn start_pos(&self) -> position::Position<'i> {
137        position::Position::new_internal(self.input, self.start)
138    }
139
140    /// Returns the `Span`'s end `Position`.
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// # use pest::Position;
146    /// let input = "ab";
147    /// let start = Position::from_start(input);
148    /// let end = start.clone();
149    /// let span = start.span(&end);
150    ///
151    /// assert_eq!(span.end_pos(), end);
152    /// ```
153    #[inline]
154    pub fn end_pos(&self) -> position::Position<'i> {
155        position::Position::new_internal(self.input, self.end)
156    }
157
158    /// Splits the `Span` into a pair of `Position`s.
159    ///
160    /// # Examples
161    ///
162    /// ```
163    /// # use pest::Position;
164    /// let input = "ab";
165    /// let start = Position::from_start(input);
166    /// let end = start.clone();
167    /// let span = start.clone().span(&end);
168    ///
169    /// assert_eq!(span.split(), (start, end));
170    /// ```
171    #[inline]
172    pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
173        let pos1 = position::Position::new_internal(self.input, self.start);
174        let pos2 = position::Position::new_internal(self.input, self.end);
175
176        (pos1, pos2)
177    }
178
179    /// Captures a slice from the `&str` defined by the `Span`.
180    ///
181    /// # Examples
182    ///
183    /// ```
184    /// # use pest;
185    /// # #[allow(non_camel_case_types)]
186    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
187    /// enum Rule {}
188    ///
189    /// let input = "abc";
190    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap();
191    /// let start_pos = state.position().clone();
192    /// state = state.match_string("b").unwrap();
193    /// let span = start_pos.span(&state.position().clone());
194    /// assert_eq!(span.as_str(), "b");
195    /// ```
196    #[inline]
197    pub fn as_str(&self) -> &'i str {
198        // Span's start and end positions are always a UTF-8 borders.
199        &self.input[self.start..self.end]
200    }
201
202    /// Returns the input string of the `Span`.
203    ///
204    /// This function returns the input string of the `Span` as a `&str`. This is the source string
205    /// from which the `Span` was created. The returned `&str` can be used to examine the contents of
206    /// the `Span` or to perform further processing on the string.
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// # use pest;
212    /// # use pest::Span;
213    ///
214    /// // Example: Get input string from a span
215    /// let input = "abc\ndef\nghi";
216    /// let span = Span::new(input, 1, 7).unwrap();
217    /// assert_eq!(span.get_input(), input);
218    /// ```
219    pub fn get_input(&self) -> &'i str {
220        self.input
221    }
222
223    /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line.
224    ///
225    /// # Examples
226    ///
227    /// ```
228    /// # use pest;
229    /// # #[allow(non_camel_case_types)]
230    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
231    /// enum Rule {}
232    ///
233    /// let input = "a\nb\nc";
234    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
235    /// let start_pos = state.position().clone();
236    /// state = state.match_string("b\nc").unwrap();
237    /// let span = start_pos.span(&state.position().clone());
238    /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
239    /// ```
240    #[inline]
241    pub fn lines(&self) -> Lines<'_> {
242        Lines {
243            inner: self.lines_span(),
244        }
245    }
246
247    /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line.
248    ///
249    /// # Examples
250    ///
251    /// ```
252    /// # use pest;
253    /// # use pest::Span;
254    /// # #[allow(non_camel_case_types)]
255    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
256    /// enum Rule {}
257    ///
258    /// let input = "a\nb\nc";
259    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
260    /// let start_pos = state.position().clone();
261    /// state = state.match_string("b\nc").unwrap();
262    /// let span = start_pos.span(&state.position().clone());
263    /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]);
264    /// ```
265    pub fn lines_span(&self) -> LinesSpan<'_> {
266        LinesSpan {
267            span: self,
268            pos: self.start,
269        }
270    }
271}
272
273impl<'i> fmt::Debug for Span<'i> {
274    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275        f.debug_struct("Span")
276            .field("str", &self.as_str())
277            .field("start", &self.start)
278            .field("end", &self.end)
279            .finish()
280    }
281}
282
283impl<'i> PartialEq for Span<'i> {
284    fn eq(&self, other: &Span<'i>) -> bool {
285        ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
286    }
287}
288
289impl<'i> Eq for Span<'i> {}
290
291impl<'i> Hash for Span<'i> {
292    fn hash<H: Hasher>(&self, state: &mut H) {
293        (self.input as *const str).hash(state);
294        self.start.hash(state);
295        self.end.hash(state);
296    }
297}
298
299/// Merges two spans into one.
300///
301/// This function merges two spans that are contiguous or overlapping into a single span
302/// that covers the entire range of the two input spans. This is useful when you want to
303/// aggregate information from multiple spans into a single entity.
304///
305/// The function checks if the input spans are overlapping or contiguous by comparing their
306/// start and end positions. If they are, a new span is created with the minimum start position
307/// and the maximum end position of the two input spans.
308///
309/// If the input spans are neither overlapping nor contiguous, the function returns None,
310/// indicating that a merge operation was not possible.
311///
312/// # Examples
313///
314/// ```
315/// # use pest;
316/// # use pest::Span;
317/// # use pest::merge_spans;
318///
319/// // Example 1: Contiguous spans
320/// let input = "abc\ndef\nghi";
321/// let span1 = Span::new(input, 1, 7).unwrap();
322/// let span2 = Span::new(input, 7, 11).unwrap();
323/// let merged = merge_spans(&span1, &span2).unwrap();
324/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
325///
326/// // Example 2: Overlapping spans
327/// let input = "abc\ndef\nghi";
328/// let span1 = Span::new(input, 1, 7).unwrap();
329/// let span2 = Span::new(input, 5, 11).unwrap();
330/// let merged = merge_spans(&span1, &span2).unwrap();
331/// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
332///
333/// // Example 3: Non-contiguous spans
334/// let input = "abc\ndef\nghi";
335/// let span1 = Span::new(input, 1, 7).unwrap();
336/// let span2 = Span::new(input, 8, 11).unwrap();
337/// let merged = merge_spans(&span1, &span2);
338/// assert!(merged.is_none());
339/// ```
340pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> {
341    if a.end() >= b.start() && a.start() <= b.end() {
342        // The spans overlap or are contiguous, so they can be merged.
343        Span::new(
344            a.get_input(),
345            core::cmp::min(a.start(), b.start()),
346            core::cmp::max(a.end(), b.end()),
347        )
348    } else {
349        // The spans don't overlap and aren't contiguous, so they can't be merged.
350        None
351    }
352}
353
354/// Line iterator for Spans, created by [`Span::lines_span()`].
355///
356/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each.
357///
358/// [`Span::lines_span()`]: struct.Span.html#method.lines_span
359pub struct LinesSpan<'i> {
360    span: &'i Span<'i>,
361    pos: usize,
362}
363
364impl<'i> Iterator for LinesSpan<'i> {
365    type Item = Span<'i>;
366    fn next(&mut self) -> Option<Self::Item> {
367        if self.pos > self.span.end {
368            return None;
369        }
370        let pos = position::Position::new(self.span.input, self.pos)?;
371        if pos.at_end() {
372            return None;
373        }
374
375        let line_start = pos.find_line_start();
376        self.pos = pos.find_line_end();
377
378        Span::new(self.span.input, line_start, self.pos)
379    }
380}
381
382/// Line iterator for Spans, created by [`Span::lines()`].
383///
384/// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each.
385///
386/// [`Span::lines()`]: struct.Span.html#method.lines
387pub struct Lines<'i> {
388    inner: LinesSpan<'i>,
389}
390
391impl<'i> Iterator for Lines<'i> {
392    type Item = &'i str;
393    fn next(&mut self) -> Option<Self::Item> {
394        self.inner.next().map(|span| span.as_str())
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use alloc::borrow::ToOwned;
402    use alloc::vec::Vec;
403
404    #[test]
405    fn get() {
406        let input = "abc123abc";
407        let span = Span::new(input, 3, input.len()).unwrap();
408        assert_eq!(span.as_str(), "123abc");
409        assert_eq!(span.input, input);
410
411        let span1 = span.get(..=2);
412        assert!(span1.is_some());
413        assert_eq!(span1.unwrap().input, input);
414        assert_eq!(span1.unwrap().as_str(), "123");
415
416        let span2 = span.get(..);
417        assert!(span2.is_some());
418        assert_eq!(span2.unwrap().input, input);
419        assert_eq!(span2.unwrap().as_str(), "123abc");
420
421        let span3 = span.get(3..);
422        assert!(span3.is_some());
423        assert_eq!(span3.unwrap().input, input);
424        assert_eq!(span3.unwrap().as_str(), "abc");
425
426        let span4 = span.get(0..0);
427        assert!(span4.is_some());
428        assert_eq!(span4.unwrap().input, input);
429        assert_eq!(span4.unwrap().as_str(), "");
430    }
431
432    #[test]
433    fn get_fails() {
434        let input = "abc";
435        let span = Span::new(input, 0, input.len()).unwrap();
436
437        let span1 = span.get(0..100);
438        assert!(span1.is_none());
439
440        let span2 = span.get(100..200);
441        assert!(span2.is_none());
442    }
443
444    #[test]
445    fn span_comp() {
446        let input = "abc\ndef\nghi";
447        let span = Span::new(input, 1, 7).unwrap();
448        let span2 = Span::new(input, 50, 51);
449        assert!(span2.is_none());
450        let span3 = Span::new(input, 0, 8).unwrap();
451        assert!(span != span3);
452    }
453
454    #[test]
455    fn split() {
456        let input = "a";
457        let start = position::Position::from_start(input);
458        let mut end = start;
459
460        assert!(end.skip(1));
461
462        let span = start.clone().span(&end.clone());
463
464        assert_eq!(span.split(), (start, end));
465    }
466
467    #[test]
468    fn lines_mid() {
469        let input = "abc\ndef\nghi";
470        let span = Span::new(input, 1, 7).unwrap();
471        let lines: Vec<_> = span.lines().collect();
472        let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
473
474        assert_eq!(lines.len(), 2);
475        assert_eq!(lines[0], "abc\n".to_owned());
476        assert_eq!(lines[1], "def\n".to_owned());
477        assert_eq!(lines, lines_span) // Verify parity with lines_span()
478    }
479
480    #[test]
481    fn lines_eof() {
482        let input = "abc\ndef\nghi";
483        let span = Span::new(input, 5, 11).unwrap();
484        assert!(span.end_pos().at_end());
485        assert_eq!(span.end(), 11);
486        let lines: Vec<_> = span.lines().collect();
487        let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
488
489        assert_eq!(lines.len(), 2);
490        assert_eq!(lines[0], "def\n".to_owned());
491        assert_eq!(lines[1], "ghi".to_owned());
492        assert_eq!(lines, lines_span) // Verify parity with lines_span()
493    }
494
495    #[test]
496    fn lines_span() {
497        let input = "abc\ndef\nghi";
498        let span = Span::new(input, 1, 7).unwrap();
499        let lines_span: Vec<_> = span.lines_span().collect();
500        let lines: Vec<_> = span.lines().collect();
501
502        assert_eq!(lines_span.len(), 2);
503        assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap());
504        assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap());
505        assert_eq!(
506            lines_span
507                .iter()
508                .map(|span| span.as_str())
509                .collect::<Vec<_>>(),
510            lines
511        );
512    }
513
514    #[test]
515    fn get_input_of_span() {
516        let input = "abc\ndef\nghi";
517        let span = Span::new(input, 1, 7).unwrap();
518
519        assert_eq!(span.get_input(), input);
520    }
521
522    #[test]
523    fn merge_contiguous() {
524        let input = "abc\ndef\nghi";
525        let span1 = Span::new(input, 1, 7).unwrap();
526        let span2 = Span::new(input, 7, 11).unwrap();
527        let merged = merge_spans(&span1, &span2).unwrap();
528
529        assert_eq!(merged, Span::new(input, 1, 11).unwrap());
530    }
531
532    #[test]
533    fn merge_overlapping() {
534        let input = "abc\ndef\nghi";
535        let span1 = Span::new(input, 1, 7).unwrap();
536        let span2 = Span::new(input, 5, 11).unwrap();
537        let merged = merge_spans(&span1, &span2).unwrap();
538
539        assert_eq!(merged, Span::new(input, 1, 11).unwrap());
540    }
541
542    #[test]
543    fn merge_non_contiguous() {
544        let input = "abc\ndef\nghi";
545        let span1 = Span::new(input, 1, 7).unwrap();
546        let span2 = Span::new(input, 8, 11).unwrap();
547        let merged = merge_spans(&span1, &span2);
548
549        assert!(merged.is_none());
550    }
551}