rune/ast/
unescape.rs

1use core::char;
2use core::fmt;
3use core::iter::Peekable;
4use core::ops;
5
6#[derive(Debug)]
7pub(crate) enum ErrorKind {
8    BadEscapeSequence,
9    BadUnicodeEscapeInByteString,
10    BadUnicodeEscape,
11    BadHexEscapeChar,
12    BadHexEscapeByte,
13    BadByteEscape,
14}
15
16impl fmt::Display for ErrorKind {
17    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18        match self {
19            ErrorKind::BadEscapeSequence => write!(f, "Bad escape sequence"),
20            ErrorKind::BadUnicodeEscapeInByteString => {
21                write!(
22                    f,
23                    "Unicode escapes are not supported as a byte or byte string"
24                )
25            }
26            ErrorKind::BadUnicodeEscape => {
27                write!(f, "Bad unicode escape")
28            }
29            ErrorKind::BadHexEscapeChar => {
30                write!(f, "This form of character escape may only be used with characters in the range [\\x00-\\x7f]")
31            }
32            ErrorKind::BadHexEscapeByte => {
33                write!(f,
34                    "This form of byte escape may only be used with characters in the range [\\x00-\\xff]"
35                )
36            }
37            ErrorKind::BadByteEscape => {
38                write!(f, "Bad byte escape")
39            }
40        }
41    }
42}
43
44impl core::error::Error for ErrorKind {}
45
46/// Indicates if we are parsing template escapes.
47#[derive(Debug, Clone, Copy)]
48pub(crate) struct WithTemplate(pub(super) bool);
49
50impl ops::Deref for WithTemplate {
51    type Target = bool;
52
53    fn deref(&self) -> &Self::Target {
54        &self.0
55    }
56}
57
58/// Indicates if we are parsing line continuations or not.
59#[derive(Debug, Clone, Copy)]
60pub(super) struct WithLineCont(pub(super) bool);
61
62impl ops::Deref for WithLineCont {
63    type Target = bool;
64
65    fn deref(&self) -> &Self::Target {
66        &self.0
67    }
68}
69
70/// Parse a byte escape sequence.
71pub(super) fn parse_byte_escape(
72    it: &mut Peekable<impl Iterator<Item = (usize, char)>>,
73    with_line_cont: WithLineCont,
74) -> Result<Option<u8>, ErrorKind> {
75    let (_, c) = it.next().ok_or(ErrorKind::BadEscapeSequence)?;
76
77    Ok(Some(match c {
78        '\n' | '\r' if *with_line_cont => {
79            while let Some((_, c)) = it.peek() {
80                if !char::is_whitespace(*c) {
81                    break;
82                }
83
84                it.next();
85            }
86
87            return Ok(None);
88        }
89        '\'' => b'\'',
90        '\"' => b'\"',
91        'n' => b'\n',
92        'r' => b'\r',
93        't' => b'\t',
94        '\\' => b'\\',
95        '0' => b'\0',
96        'x' => {
97            let result = parse_hex_escape(it)?;
98
99            if result > 0xff {
100                return Err(ErrorKind::BadHexEscapeByte);
101            }
102
103            result as u8
104        }
105        'u' => {
106            return Err(ErrorKind::BadUnicodeEscapeInByteString);
107        }
108        _ => {
109            return Err(ErrorKind::BadEscapeSequence);
110        }
111    }))
112}
113
114/// Parse a byte escape sequence.
115pub(super) fn parse_char_escape(
116    it: &mut Peekable<impl Iterator<Item = (usize, char)>>,
117    with_template: WithTemplate,
118    with_line_cont: WithLineCont,
119) -> Result<Option<char>, ErrorKind> {
120    let (_, c) = it.next().ok_or(ErrorKind::BadEscapeSequence)?;
121
122    Ok(Some(match c {
123        '\n' | '\r' if *with_line_cont => {
124            while let Some((_, c)) = it.peek() {
125                if !char::is_whitespace(*c) {
126                    break;
127                }
128
129                it.next();
130            }
131
132            return Ok(None);
133        }
134        '$' if *with_template => '$',
135        '`' if *with_template => '`',
136        '\'' => '\'',
137        '\"' => '\"',
138        'n' => '\n',
139        'r' => '\r',
140        't' => '\t',
141        '\\' => '\\',
142        '0' => '\0',
143        'x' => {
144            let result = parse_hex_escape(it)?;
145
146            if result > 0x7f {
147                return Err(ErrorKind::BadHexEscapeChar);
148            }
149
150            if let Some(c) = char::from_u32(result) {
151                c
152            } else {
153                return Err(ErrorKind::BadByteEscape);
154            }
155        }
156        'u' => parse_unicode_escape(it)?,
157        _ => {
158            return Err(ErrorKind::BadEscapeSequence);
159        }
160    }))
161}
162
163/// Parse a hex escape.
164pub(super) fn parse_hex_escape(
165    it: &mut Peekable<impl Iterator<Item = (usize, char)>>,
166) -> Result<u32, ErrorKind> {
167    let mut result = 0u32;
168
169    for _ in 0..2 {
170        let (_, c) = it.next().ok_or(ErrorKind::BadByteEscape)?;
171
172        result = result.checked_shl(4).ok_or(ErrorKind::BadByteEscape)?;
173
174        result += match c {
175            '0'..='9' => c as u32 - '0' as u32,
176            'a'..='f' => c as u32 - 'a' as u32 + 10,
177            'A'..='F' => c as u32 - 'A' as u32 + 10,
178            _ => return Err(ErrorKind::BadByteEscape),
179        };
180    }
181
182    Ok(result)
183}
184
185/// Parse a unicode escape.
186pub(super) fn parse_unicode_escape(
187    it: &mut Peekable<impl Iterator<Item = (usize, char)>>,
188) -> Result<char, ErrorKind> {
189    match it.next() {
190        Some((_, '{')) => (),
191        _ => return Err(ErrorKind::BadUnicodeEscape),
192    };
193
194    let mut first = true;
195    let mut result = 0u32;
196
197    loop {
198        let (_, c) = it.next().ok_or(ErrorKind::BadUnicodeEscape)?;
199
200        match c {
201            '}' => {
202                if first {
203                    return Err(ErrorKind::BadUnicodeEscape);
204                }
205
206                if let Some(c) = char::from_u32(result) {
207                    return Ok(c);
208                }
209
210                return Err(ErrorKind::BadUnicodeEscape);
211            }
212            c => {
213                first = false;
214
215                result = match result.checked_shl(4) {
216                    Some(result) => result,
217                    None => {
218                        return Err(ErrorKind::BadUnicodeEscape);
219                    }
220                };
221
222                result += match c {
223                    '0'..='9' => c as u32 - '0' as u32,
224                    'a'..='f' => c as u32 - 'a' as u32 + 10,
225                    'A'..='F' => c as u32 - 'A' as u32 + 10,
226                    _ => {
227                        return Err(ErrorKind::BadUnicodeEscape);
228                    }
229                };
230            }
231        }
232    }
233}