rune/modules/char.rs
1//! The character module for Rune.
2
3use core::char::ParseCharError;
4
5use crate::runtime::{Value, VmErrorKind, VmResult};
6use crate::{ContextError, Module};
7
8use crate as rune;
9
10/// The character module for Rune.
11#[rune::module(::std::char)]
12pub fn module() -> Result<Module, ContextError> {
13 let mut module = Module::from_meta(self::module_meta)?;
14
15 module.ty::<ParseCharError>()?;
16 module.function_meta(from_i64)?;
17 module.function_meta(to_i64)?;
18 module.function_meta(is_alphabetic)?;
19 module.function_meta(is_alphanumeric)?;
20 module.function_meta(is_control)?;
21 module.function_meta(is_lowercase)?;
22 module.function_meta(is_numeric)?;
23 module.function_meta(is_uppercase)?;
24 module.function_meta(is_whitespace)?;
25 module.function_meta(to_digit)?;
26 Ok(module)
27}
28
29/// Try to convert a number into a character.
30///
31/// # Examples
32///
33/// ```rune
34/// let c = char::from_i64(80);
35/// assert!(c.is_some());
36/// ```
37#[rune::function]
38fn from_i64(value: i64) -> VmResult<Option<Value>> {
39 if value < 0 {
40 VmResult::err(VmErrorKind::Underflow)
41 } else if value > u32::MAX as i64 {
42 VmResult::err(VmErrorKind::Overflow)
43 } else {
44 let Some(c) = core::char::from_u32(value as u32) else {
45 return VmResult::Ok(None);
46 };
47
48 VmResult::Ok(Some(Value::from(c)))
49 }
50}
51
52/// Convert a character into an integer.
53///
54/// # Examples
55///
56/// ```rune
57/// let c = char::from_i64(80)?;
58/// assert_eq!(c.to_i64(), 80);
59/// ```
60#[rune::function(instance)]
61fn to_i64(value: char) -> VmResult<i64> {
62 VmResult::Ok(value as i64)
63}
64
65/// Returns `true` if this `char` has the `Alphabetic` property.
66///
67/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
68/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
69///
70/// [Unicode Standard]: https://www.unicode.org/versions/latest/
71/// [ucd]: https://www.unicode.org/reports/tr44/
72/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
73///
74/// # Examples
75///
76/// ```rune
77/// assert!('a'.is_alphabetic());
78/// assert!('京'.is_alphabetic());
79///
80/// let c = '💝';
81/// // love is many things, but it is not alphabetic
82/// assert!(!c.is_alphabetic());
83/// ```
84#[rune::function(instance)]
85#[inline]
86fn is_alphabetic(c: char) -> bool {
87 char::is_alphabetic(c)
88}
89
90/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
91///
92/// [`is_alphabetic()`]: #method.is_alphabetic
93/// [`is_numeric()`]: #method.is_numeric
94///
95/// # Examples
96///
97/// Basic usage:
98///
99/// ```rune
100/// assert!('٣'.is_alphanumeric());
101/// assert!('7'.is_alphanumeric());
102/// assert!('৬'.is_alphanumeric());
103/// assert!('¾'.is_alphanumeric());
104/// assert!('①'.is_alphanumeric());
105/// assert!('K'.is_alphanumeric());
106/// assert!('و'.is_alphanumeric());
107/// assert!('藏'.is_alphanumeric());
108/// ```
109#[rune::function(instance)]
110#[inline]
111fn is_alphanumeric(c: char) -> bool {
112 char::is_alphanumeric(c)
113}
114
115/// Returns `true` if this `char` has the general category for control codes.
116///
117/// Control codes (code points with the general category of `Cc`) are described
118/// in Chapter 4 (Character Properties) of the [Unicode Standard] and specified
119/// in the [Unicode Character Database][ucd] [`UnicodeData.txt`].
120///
121/// [Unicode Standard]: https://www.unicode.org/versions/latest/
122/// [ucd]: https://www.unicode.org/reports/tr44/
123/// [`UnicodeData.txt`]:
124/// https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
125///
126/// # Examples
127///
128/// Basic usage:
129///
130/// ```rune
131/// // U+009C, STRING TERMINATOR
132/// assert!('\u{009c}'.is_control());
133/// assert!(!'q'.is_control());
134/// ```
135#[rune::function(instance)]
136#[inline]
137fn is_control(c: char) -> bool {
138 char::is_control(c)
139}
140
141/// Returns `true` if this `char` has the `Lowercase` property.
142///
143/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode
144/// Standard] and specified in the [Unicode Character Database][ucd]
145/// [`DerivedCoreProperties.txt`].
146///
147/// [Unicode Standard]: https://www.unicode.org/versions/latest/
148/// [ucd]: https://www.unicode.org/reports/tr44/
149/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
150///
151/// # Examples
152///
153/// Basic usage:
154///
155/// ```rune
156/// assert!('a'.is_lowercase());
157/// assert!('δ'.is_lowercase());
158/// assert!(!'A'.is_lowercase());
159/// assert!(!'Δ'.is_lowercase());
160///
161/// // The various Chinese scripts and punctuation do not have case, and so:
162/// assert!(!'中'.is_lowercase());
163/// assert!(!' '.is_lowercase());
164/// ```
165#[rune::function(instance)]
166#[inline]
167fn is_lowercase(c: char) -> bool {
168 char::is_lowercase(c)
169}
170
171/// Returns `true` if this `char` has one of the general categories for numbers.
172///
173/// The general categories for numbers (`Nd` for decimal digits, `Nl` for
174/// letter-like numeric characters, and `No` for other numeric characters) are
175/// specified in the [Unicode Character Database][ucd] [`UnicodeData.txt`].
176///
177/// This method doesn't cover everything that could be considered a number, e.g.
178/// ideographic numbers like '三'. If you want everything including characters
179/// with overlapping purposes then you might want to use a unicode or
180/// language-processing library that exposes the appropriate character
181/// properties instead of looking at the unicode categories.
182///
183/// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
184/// `is_ascii_digit` or `is_digit` instead.
185///
186/// [Unicode Standard]: https://www.unicode.org/versions/latest/
187/// [ucd]: https://www.unicode.org/reports/tr44/
188/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
189///
190/// # Examples
191///
192/// Basic usage:
193///
194/// ```rune
195/// assert!('٣'.is_numeric());
196/// assert!('7'.is_numeric());
197/// assert!('৬'.is_numeric());
198/// assert!('¾'.is_numeric());
199/// assert!('①'.is_numeric());
200/// assert!(!'K'.is_numeric());
201/// assert!(!'و'.is_numeric());
202/// assert!(!'藏'.is_numeric());
203/// assert!(!'三'.is_numeric());
204/// ```
205#[rune::function(instance)]
206#[inline]
207fn is_numeric(c: char) -> bool {
208 char::is_numeric(c)
209}
210
211/// Returns `true` if this `char` has the `Uppercase` property.
212///
213/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode
214/// Standard] and specified in the [Unicode Character Database][ucd]
215/// [`DerivedCoreProperties.txt`].
216///
217/// [Unicode Standard]: https://www.unicode.org/versions/latest/
218/// [ucd]: https://www.unicode.org/reports/tr44/
219/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
220///
221/// # Examples
222///
223/// Basic usage:
224///
225/// ```rune
226/// assert!(!'a'.is_uppercase());
227/// assert!(!'δ'.is_uppercase());
228/// assert!('A'.is_uppercase());
229/// assert!('Δ'.is_uppercase());
230///
231/// // The various Chinese scripts and punctuation do not have case, and so:
232/// assert!(!'中'.is_uppercase());
233/// assert!(!' '.is_uppercase());
234/// ```
235#[rune::function(instance)]
236#[inline]
237fn is_uppercase(c: char) -> bool {
238 char::is_uppercase(c)
239}
240
241/// Returns `true` if this `char` has the `White_Space` property.
242///
243/// `White_Space` is specified in the [Unicode Character Database][ucd]
244/// [`PropList.txt`].
245///
246/// [ucd]: https://www.unicode.org/reports/tr44/
247/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
248///
249/// # Examples
250///
251/// Basic usage:
252///
253/// ```rune
254/// assert!(' '.is_whitespace());
255///
256/// // line break
257/// assert!('\n'.is_whitespace());
258///
259/// // a non-breaking space
260/// assert!('\u{A0}'.is_whitespace());
261///
262/// assert!(!'越'.is_whitespace());
263/// ```
264#[rune::function(instance)]
265#[inline]
266fn is_whitespace(c: char) -> bool {
267 char::is_whitespace(c)
268}
269
270/// Converts a `char` to a digit in the given radix.
271///
272/// A 'radix' here is sometimes also called a 'base'. A radix of two
273/// indicates a binary number, a radix of ten, decimal, and a radix of
274/// sixteen, hexadecimal, to give some common values. Arbitrary
275/// radices are supported.
276///
277/// 'Digit' is defined to be only the following characters:
278///
279/// * `0-9`
280/// * `a-z`
281/// * `A-Z`
282///
283/// # Errors
284///
285/// Returns `None` if the `char` does not refer to a digit in the given radix.
286///
287/// # Panics
288///
289/// Panics if given a radix larger than 36.
290///
291/// # Examples
292///
293/// Basic usage:
294///
295/// ```rune
296/// assert_eq!('1'.to_digit(10), Some(1));
297/// assert_eq!('f'.to_digit(16), Some(15));
298/// ```
299///
300/// Passing a non-digit results in failure:
301///
302/// ```rune
303/// assert_eq!('f'.to_digit(10), None);
304/// assert_eq!('z'.to_digit(16), None);
305/// ```
306///
307/// Passing a large radix, causing a panic:
308///
309/// ```rune,should_panic
310/// // this panics
311/// let _ = '1'.to_digit(37);
312/// ```
313#[rune::function(instance)]
314#[inline]
315fn to_digit(c: char, radix: u32) -> VmResult<Option<u32>> {
316 if radix > 36 {
317 return VmResult::panic("to_digit: radix is too high (maximum 36)");
318 }
319
320 VmResult::Ok(char::to_digit(c, radix))
321}