rune/grammar/
parser.rs

1use core::fmt;
2use core::mem::take;
3
4use crate::alloc::VecDeque;
5
6use crate::ast::{Kind, OptionSpanned, Span, Token};
7use crate::compile::{Error, ErrorKind, Result, WithSpan};
8use crate::grammar::ws;
9use crate::macros::TokenStreamIter;
10use crate::parse::{Advance, IntoExpectation, Lexer};
11use crate::shared::{rune_trace, FixedVec};
12
13use super::{inner_token, Flavor, InternalChildren, Node, Tree};
14
15/// A checkpoint during tree construction.
16#[derive(Clone)]
17pub(super) struct Checkpoint {
18    span: Span,
19    inner: syntree::Checkpoint<syntree::pointer::PointerUsize>,
20}
21
22impl Checkpoint {
23    /// Get the span of the checkpoint.
24    pub(super) fn span(&self) -> Span {
25        self.span
26    }
27}
28
29pub(super) struct Parser<'a> {
30    lexer: Source<'a>,
31    ws: VecDeque<Token>,
32    buf: VecDeque<(Token, usize)>,
33    tree: syntree::Builder<Kind, Flavor>,
34    eof: Token,
35    include_whitespace: bool,
36}
37
38impl<'a> Parser<'a> {
39    pub(super) fn new(source: Source<'a>) -> Self {
40        let span = source.span().unwrap_or_else(Span::empty).tail();
41
42        let eof = Token {
43            span,
44            kind: Kind::Eof,
45        };
46
47        Self {
48            lexer: source,
49            ws: VecDeque::new(),
50            buf: VecDeque::new(),
51            tree: syntree::Builder::new_with(),
52            eof,
53            include_whitespace: false,
54        }
55    }
56
57    /// Configure whether whitespace should be ignored.
58    pub(super) fn include_whitespace(&mut self, include_whitespace: bool) {
59        self.include_whitespace = include_whitespace;
60    }
61
62    /// Generate an error encompassing the current token.
63    pub(super) fn expected_at(
64        &mut self,
65        at: usize,
66        expected: impl IntoExpectation,
67    ) -> Result<Error> {
68        let tok = self.nth_token(at)?;
69
70        Ok(Error::new(
71            tok.span,
72            ErrorKind::ExpectedSyntax {
73                expected: expected.into_expectation(),
74                actual: tok.kind.into_expectation(),
75            },
76        ))
77    }
78
79    /// Generate an error encompassing the from span.
80    #[tracing::instrument(skip_all)]
81    pub(super) fn error(&mut self, from: Span, kind: ErrorKind) -> Result<Error> {
82        let to = self.nth_token(0)?;
83        let span = from.join(to.span);
84        Ok(Error::new(span, kind))
85    }
86
87    /// Test if we are at EOF.
88    #[tracing::instrument(skip_all)]
89    pub(super) fn is_eof(&mut self) -> Result<bool> {
90        Ok(self.peek()? == Kind::Eof)
91    }
92
93    /// Construct the syntax tree.
94    #[tracing::instrument(skip_all)]
95    pub(crate) fn build(self) -> Result<Tree> {
96        let tree = self.tree.build().with_span(self.eof.span)?;
97        Ok(Tree::new(tree))
98    }
99
100    #[tracing::instrument(skip_all)]
101    pub(super) fn checkpoint(&mut self) -> Result<Checkpoint> {
102        let span = self.flush_ws()?;
103
104        Ok(Checkpoint {
105            span,
106            inner: self.tree.checkpoint().with_span(span)?,
107        })
108    }
109
110    #[tracing::instrument(skip_all)]
111    pub(super) fn bump(&mut self) -> Result<Token> {
112        let tok = self.next()?;
113        emit(&mut self.tree, tok)?;
114        Ok(tok)
115    }
116
117    /// Bump while the given token matches.
118    #[tracing::instrument(skip_all)]
119    pub(super) fn bump_while(&mut self, kind: Kind) -> Result<bool> {
120        let mut any = false;
121
122        while self.peek()? == kind {
123            self.bump()?;
124            any = true;
125        }
126
127        Ok(any)
128    }
129
130    /// Bump if the given token matches.
131    #[tracing::instrument(skip_all)]
132    pub(super) fn bump_if_matches(&mut self, m: fn(Kind) -> bool) -> Result<bool> {
133        self._bump_if_matches(m)
134    }
135
136    #[tracing::instrument(skip_all)]
137    pub(super) fn bump_if(&mut self, kind: Kind) -> Result<bool> {
138        self._bump_if_matches(|k| k == kind)
139    }
140
141    #[inline]
142    fn _bump_if_matches(&mut self, m: impl FnOnce(Kind) -> bool) -> Result<bool> {
143        if m(self.peek()?) {
144            self.bump()?;
145            Ok(true)
146        } else {
147            Ok(false)
148        }
149    }
150
151    /// Open a new node.
152    pub(super) fn open(&mut self, kind: Kind) -> Result<()> {
153        self.tree.open(kind).with_span(Span::point(0))?;
154        Ok(())
155    }
156
157    /// Close the last opened node.
158    pub(super) fn close(&mut self) -> Result<()> {
159        self.tree.close().with_span(Span::point(0))?;
160        Ok(())
161    }
162
163    /// Bump and immediately close a token with the specified kind.
164    #[tracing::instrument(skip_all)]
165    pub(super) fn push(&mut self, kind: Kind) -> Result<()> {
166        let tok = self.next()?;
167        self.tree.open(kind).with_span(tok.span)?;
168        let span = syntree::Span::new(tok.span.start.0, tok.span.end.0);
169        self.tree.token_with(tok.kind, span).with_span(tok.span)?;
170        self.tree.close().with_span(tok.span)?;
171        Ok(())
172    }
173
174    /// Bump an empty node.
175    #[tracing::instrument(skip_all)]
176    pub(super) fn empty(&mut self, kind: Kind) -> Result<()> {
177        self.flush_ws()?;
178        let span = self.nth_token(0)?.span;
179        let s = span.head();
180        let s = syntree::Span::new(s.start.0, s.end.0);
181        self.tree.token_with(kind, s).with_span(span)?;
182        Ok(())
183    }
184
185    /// Close a node at the given checkpoint.
186    #[tracing::instrument(skip_all)]
187    pub(super) fn close_at(&mut self, c: &Checkpoint, kind: Kind) -> Result<()> {
188        self.tree.close_at(&c.inner, kind).with_span(c.span)?;
189        Ok(())
190    }
191
192    /// Peek the next token skipping over whitespace.
193    #[tracing::instrument(skip_all)]
194    pub(super) fn peek(&mut self) -> Result<Kind> {
195        self.nth(0)
196    }
197
198    #[tracing::instrument(skip(self))]
199    pub(super) fn glued(&mut self, n: usize) -> Result<Kind> {
200        self.fill(n)?;
201
202        let Some((tok, 0)) = self.buf.get(n) else {
203            return Ok(Kind::Eof);
204        };
205
206        Ok(tok.kind)
207    }
208
209    /// Flush whitespace.
210    pub(super) fn flush_ws(&mut self) -> Result<Span> {
211        self.fill(0)?;
212
213        let Some((tok, ws)) = self.buf.front_mut() else {
214            for tok in self.ws.drain(..) {
215                emit(&mut self.tree, tok)?;
216            }
217
218            return Ok(self.eof.span);
219        };
220
221        let span = tok.span.head();
222
223        for tok in self.ws.drain(..take(ws)) {
224            emit(&mut self.tree, tok)?;
225        }
226
227        Ok(span)
228    }
229
230    #[inline]
231    pub(super) fn nth(&mut self, n: usize) -> Result<Kind> {
232        self.fill(n)?;
233
234        let Some((tok, _)) = self.buf.get(n) else {
235            return Ok(Kind::Eof);
236        };
237
238        Ok(tok.kind)
239    }
240
241    #[inline]
242    pub(super) fn nth_token(&mut self, n: usize) -> Result<Token> {
243        self.fill(n)?;
244
245        let Some((tok, _)) = self.buf.get(n) else {
246            return Ok(self.eof);
247        };
248
249        Ok(*tok)
250    }
251
252    /// Access an array.
253    pub(super) fn array<const N: usize>(&mut self) -> Result<FixedVec<Token, N>> {
254        if N == 0 {
255            return Ok(FixedVec::new());
256        }
257
258        self.fill(N - 1)?;
259
260        let mut vec = FixedVec::new();
261
262        for index in 0.. {
263            if vec.len() == N {
264                break;
265            }
266
267            if let Some((tok, _)) = self.buf.get(index) {
268                vec.try_push(*tok).with_span(tok.span)?;
269            } else {
270                vec.try_push(self.eof).with_span(self.eof.span)?;
271            }
272        }
273
274        Ok(vec)
275    }
276
277    #[tracing::instrument(skip_all)]
278    fn next(&mut self) -> Result<Token> {
279        self.flush_ws()?;
280
281        let Some((tok, _)) = self.buf.pop_front() else {
282            return Ok(self.eof);
283        };
284
285        rune_trace!("grammar.rs", tok);
286        Ok(tok)
287    }
288
289    fn advance(&mut self, n: usize) -> Result<()> {
290        for (tok, ws) in self.buf.drain(..n) {
291            for tok in self.ws.drain(..ws) {
292                emit(&mut self.tree, tok)?;
293            }
294
295            emit(&mut self.tree, tok)?;
296        }
297
298        Ok(())
299    }
300
301    fn fill(&mut self, n: usize) -> Result<()> {
302        let mut ws = 0;
303
304        while self.buf.len() <= n {
305            let Some(tok) = self.lexer.next()? else {
306                break;
307            };
308
309            if !matches!(tok.kind, ws!()) {
310                self.buf
311                    .try_push_back((tok, take(&mut ws)))
312                    .with_span(tok.span)?;
313            } else if self.include_whitespace {
314                ws += 1;
315                self.ws.try_push_back(tok).with_span(tok.span)?;
316            }
317        }
318
319        Ok(())
320    }
321}
322
323#[inline]
324fn emit(tree: &mut syntree::Builder<Kind, Flavor>, tok: Token) -> Result<()> {
325    let span = syntree::Span::new(tok.span.start.0, tok.span.end.0);
326    tree.token_with(tok.kind, span).with_span(tok.span)?;
327    Ok(())
328}
329
330/// A source adapter.
331pub(super) struct Source<'a> {
332    inner: SourceInner<'a>,
333}
334
335impl<'a> Source<'a> {
336    /// Construct a source based on a lexer.
337    pub(super) fn lexer(lexer: Lexer<'a>) -> Self {
338        Self {
339            inner: SourceInner::Lexer(lexer),
340        }
341    }
342
343    /// Construct a source based on a token stream.
344    pub(super) fn token_stream(iter: TokenStreamIter<'a>) -> Self {
345        Self {
346            inner: SourceInner::TokenStream(iter),
347        }
348    }
349
350    /// Construct a source based on a token stream.
351    pub(super) fn node(node: Node<'a>) -> Self {
352        Self {
353            inner: SourceInner::Node(NodeSource {
354                span: node.span(),
355                children: node.internal_children(),
356            }),
357        }
358    }
359
360    /// Get the span of the source.
361    fn span(&self) -> Option<Span> {
362        match &self.inner {
363            SourceInner::Lexer(lexer) => Some(lexer.span()),
364            SourceInner::TokenStream(token_stream) => token_stream.option_span(),
365            SourceInner::Node(source) => Some(source.span),
366        }
367    }
368
369    /// Get the next token in the stream.
370    fn next(&mut self) -> Result<Option<Token>> {
371        match &mut self.inner {
372            SourceInner::Lexer(lexer) => lexer.next(),
373            SourceInner::TokenStream(token_stream) => Ok(token_stream.next()),
374            SourceInner::Node(source) => Ok(source.children.next().map(inner_token)),
375        }
376    }
377}
378
379impl fmt::Debug for Source<'_> {
380    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
381        fmt::Debug::fmt(&self.inner, f)
382    }
383}
384
385#[derive(Debug)]
386enum SourceInner<'a> {
387    Lexer(Lexer<'a>),
388    TokenStream(TokenStreamIter<'a>),
389    Node(NodeSource<'a>),
390}
391
392impl Advance for Parser<'_> {
393    type Error = Error;
394
395    #[inline]
396    fn advance(&mut self, n: usize) -> Result<()> {
397        Parser::advance(self, n)?;
398        Ok(())
399    }
400}
401
402struct NodeSource<'a> {
403    span: Span,
404    children: InternalChildren<'a>,
405}
406
407impl fmt::Debug for NodeSource<'_> {
408    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409        f.debug_struct("NodeSource")
410            .field("span", &self.span)
411            .finish_non_exhaustive()
412    }
413}