rune/modules/
string.rs

1//! Strings.
2
3use core::char;
4use core::cmp::Ordering;
5use core::num::{ParseFloatError, ParseIntError};
6
7use crate as rune;
8use crate::alloc;
9use crate::alloc::fmt::TryWrite;
10use crate::alloc::prelude::*;
11use crate::alloc::string::FromUtf8Error;
12use crate::compile::Named;
13use crate::runtime::{
14    Bytes, Formatter, FromValue, Function, Hasher, Inline, MaybeTypeOf, Range, RangeFrom,
15    RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Ref, Repr, ToValue, TypeOf, Value,
16    VmError, VmErrorKind,
17};
18use crate::{Any, ContextError, Module, TypeHash};
19
20/// Strings.
21///
22/// Strings in Rune are declared with the literal `"string"` syntax, but can also be
23/// interacted with through the fundamental [`String`] type.
24///
25/// ```rune
26/// let string1 = "Hello";
27/// let string2 = String::new();
28/// string2.push_str("Hello");
29///
30/// assert_eq!(string1, string2);
31/// ```
32#[rune::module(::std::string)]
33pub fn module() -> Result<Module, ContextError> {
34    let mut m = Module::from_meta(self::module__meta)?;
35
36    m.ty::<String>()?;
37
38    m.function_meta(string_from)?;
39    m.function_meta(string_from_str)?;
40    m.function_meta(string_new)?;
41    m.function_meta(string_with_capacity)?;
42    m.function_meta(len)?;
43    m.function_meta(starts_with)?;
44    m.function_meta(ends_with)?;
45    m.function_meta(capacity)?;
46    m.function_meta(clear)?;
47    m.function_meta(contains)?;
48    m.function_meta(push)?;
49    m.function_meta(push_str)?;
50    m.function_meta(reserve)?;
51    m.function_meta(reserve_exact)?;
52    m.function_meta(from_utf8)?;
53    m.function_meta(as_bytes)?;
54    m.function_meta(into_bytes)?;
55    m.function_meta(shrink_to_fit)?;
56    m.function_meta(char_at)?;
57    m.function_meta(split)?;
58    m.function_meta(split_once)?;
59    m.associated_function("split_str", __rune_fn__split)?;
60    m.function_meta(trim)?;
61    m.function_meta(trim_end)?;
62    m.function_meta(replace)?;
63    m.function_meta(is_empty)?;
64    m.function_meta(chars)?;
65    m.function_meta(get__meta)?;
66    m.function_meta(parse_int)?;
67    m.function_meta(parse_float)?;
68    m.function_meta(parse_char)?;
69    m.function_meta(to_lowercase)?;
70    m.function_meta(to_uppercase)?;
71
72    m.function_meta(add)?;
73    m.function_meta(add_assign)?;
74    m.function_meta(index_get)?;
75
76    m.function_meta(clone__meta)?;
77    m.implement_trait::<String>(rune::item!(::std::clone::Clone))?;
78
79    m.function_meta(partial_eq__meta)?;
80    m.implement_trait::<String>(rune::item!(::std::cmp::PartialEq))?;
81
82    m.function_meta(eq__meta)?;
83    m.implement_trait::<String>(rune::item!(::std::cmp::Eq))?;
84
85    m.function_meta(partial_cmp__meta)?;
86    m.implement_trait::<String>(rune::item!(::std::cmp::PartialOrd))?;
87
88    m.function_meta(cmp__meta)?;
89    m.implement_trait::<String>(rune::item!(::std::cmp::Ord))?;
90
91    m.function_meta(hash__meta)?;
92
93    m.function_meta(display_fmt__meta)?;
94    m.function_meta(debug_fmt__meta)?;
95
96    m.ty::<Chars>()?;
97    m.function_meta(Chars::next__meta)?;
98    m.function_meta(Chars::next_back__meta)?;
99    m.implement_trait::<Chars>(rune::item!(::std::iter::Iterator))?;
100    m.implement_trait::<Chars>(rune::item!(::std::iter::DoubleEndedIterator))?;
101
102    macro_rules! split {
103        ($ty:ty) => {
104            m.ty::<Split<$ty>>()?;
105            m.function_meta(Split::<$ty>::next__meta)?;
106            m.implement_trait::<Split<$ty>>(rune::item!(::std::iter::Iterator))?;
107        };
108    }
109
110    split!(Function);
111    split!(String);
112    split!(char);
113    Ok(m)
114}
115
116/// Converts a vector of bytes to a `String`.
117///
118/// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes
119/// ([`Vec<u8>`]) is made of bytes, so this function converts between the two.
120/// Not all byte slices are valid `String`s, however: `String` requires that it
121/// is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
122/// UTF-8, and then does the conversion.
123///
124/// If you are sure that the byte slice is valid UTF-8, and you don't want to
125/// incur the overhead of the validity check, there is an unsafe version of this
126/// function, [`from_utf8_unchecked`], which has the same behavior but skips the
127/// check.
128///
129/// The inverse of this method is [`into_bytes`].
130///
131/// # Errors
132///
133/// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
134/// provided bytes are not UTF-8. The vector you moved in is also included.
135///
136/// # Examples
137///
138/// Basic usage:
139///
140/// ```rune
141/// // some bytes, in a vector
142/// let sparkle_heart = Bytes::from_vec([240u8, 159u8, 146u8, 150u8]);
143///
144/// // We know these bytes are valid, so we'll use `unwrap()`.
145/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
146///
147/// assert_eq!("💖", sparkle_heart);
148/// ```
149///
150/// Incorrect bytes:
151///
152/// ```rune
153/// // some invalid bytes, in a vector
154/// let sparkle_heart = Bytes::from_vec([0u8, 159u8, 146u8, 150u8]);
155///
156/// assert!(String::from_utf8(sparkle_heart).is_err());
157/// ```
158///
159/// See the docs for [`FromUtf8Error`] for more details on what you can do with
160/// this error.
161///
162/// [`from_utf8_unchecked`]: String::from_utf8_unchecked
163/// [`Vec<u8>`]: crate::vec::Vec "Vec"
164/// [`&str`]: prim@str "&str"
165/// [`into_bytes`]: String::into_bytes
166#[rune::function(free, path = String::from_utf8)]
167fn from_utf8(bytes: &[u8]) -> Result<Result<String, FromUtf8Error>, VmError> {
168    let vec = Vec::try_from(bytes)?;
169    Ok(String::from_utf8(vec))
170}
171
172/// Returns a byte slice of this `String`'s contents.
173///
174/// The inverse of this method is [`from_utf8`].
175///
176/// [`from_utf8`]: String::from_utf8
177///
178/// # Examples
179///
180/// Basic usage:
181///
182/// ```rune
183/// let s = "hello";
184/// assert_eq!(b"hello", s.as_bytes());
185/// assert!(is_readable(s));
186/// ```
187#[rune::function(instance)]
188fn as_bytes(s: &str) -> Result<Bytes, VmError> {
189    Ok(Bytes::from_vec(Vec::try_from(s.as_bytes())?))
190}
191
192/// Constructs a string from another string.
193///
194/// # Examples
195///
196/// Basic usage:
197///
198/// ```rune
199/// let s = String::from("hello");
200/// assert_eq!(s, "hello");
201/// ```
202#[rune::function(free, path = String::from)]
203fn string_from(value: &str) -> Result<String, VmError> {
204    Ok(String::try_from(value)?)
205}
206
207#[rune::function(free, path = String::from_str, deprecated = "Use String::from instead")]
208fn string_from_str(value: &str) -> Result<String, VmError> {
209    Ok(String::try_from(value)?)
210}
211
212/// Creates a new empty `String`.
213///
214/// Given that the `String` is empty, this will not allocate any initial buffer.
215/// While that means that this initial operation is very inexpensive, it may
216/// cause excessive allocation later when you add data. If you have an idea of
217/// how much data the `String` will hold, consider the [`with_capacity`] method
218/// to prevent excessive re-allocation.
219///
220/// [`with_capacity`]: String::with_capacity
221///
222/// # Examples
223///
224/// Basic usage:
225///
226/// ```rune
227/// let s = String::new();
228/// ```
229#[rune::function(free, path = String::new)]
230fn string_new() -> String {
231    String::new()
232}
233
234/// Creates a new empty `String` with at least the specified capacity.
235///
236/// `String`s have an internal buffer to hold their data. The capacity is the
237/// length of that buffer, and can be queried with the [`capacity`] method. This
238/// method creates an empty `String`, but one with an initial buffer that can
239/// hold at least `capacity` bytes. This is useful when you may be appending a
240/// bunch of data to the `String`, reducing the number of reallocations it needs
241/// to do.
242///
243/// [`capacity`]: String::capacity
244///
245/// If the given capacity is `0`, no allocation will occur, and this method is
246/// identical to the [`new`] method.
247///
248/// [`new`]: String::new
249///
250/// # Examples
251///
252/// Basic usage:
253///
254/// ```rune
255/// let s = String::with_capacity(10);
256///
257/// // The String contains no chars, even though it has capacity for more
258/// assert_eq!(s.len(), 0);
259///
260/// // These are all done without reallocating...
261/// let cap = s.capacity();
262///
263/// for _ in 0..10 {
264///     s.push('a');
265/// }
266///
267/// assert_eq!(s.capacity(), cap);
268///
269/// // ...but this may make the string reallocate
270/// s.push('a');
271/// ```
272#[rune::function(free, path = String::with_capacity)]
273fn string_with_capacity(capacity: usize) -> Result<String, VmError> {
274    Ok(String::try_with_capacity(capacity)?)
275}
276
277/// Returns the length of `self`.
278///
279/// This length is in bytes, not [`char`]s or graphemes. In other words, it
280/// might not be what a human considers the length of the string.
281///
282/// [`char`]: prim@char
283///
284/// # Examples
285///
286/// Basic usage:
287///
288/// ```rune
289/// let len = "foo".len();
290/// assert_eq!(3, len);
291///
292/// assert_eq!("ƒoo".len(), 4); // fancy f!
293/// assert_eq!("ƒoo".chars().count(), 3);
294/// ```
295#[rune::function(instance)]
296fn len(this: &str) -> usize {
297    this.len()
298}
299
300/// Returns `true` if the given pattern matches a prefix of this string slice.
301///
302/// Returns `false` if it does not.
303///
304/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
305/// or closure that determines if a character matches.
306///
307/// [`char`]: prim@char
308/// [pattern]: self::pattern
309///
310/// # Examples
311///
312/// Basic usage:
313///
314/// ```rune
315/// let bananas = "bananas";
316///
317/// assert!(bananas.starts_with("bana"));
318/// assert!(!bananas.starts_with("nana"));
319/// ```
320#[rune::function(instance)]
321fn starts_with(this: &str, other: &str) -> bool {
322    this.starts_with(other)
323}
324
325/// Returns `true` if the given pattern matches a suffix of this string slice.
326///
327/// Returns `false` if it does not.
328///
329/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
330/// or closure that determines if a character matches.
331///
332/// [`char`]: prim@char
333/// [pattern]: self::pattern
334///
335/// # Examples
336///
337/// Basic usage:
338///
339/// ```rune
340/// let bananas = "bananas";
341///
342/// assert!(bananas.ends_with("anas"));
343/// assert!(!bananas.ends_with("nana"));
344/// ```
345#[rune::function(instance)]
346fn ends_with(this: &str, other: &str) -> bool {
347    this.ends_with(other)
348}
349
350/// Returns this `String`'s capacity, in bytes.
351///
352/// # Examples
353///
354/// Basic usage:
355///
356/// ```rune
357/// let s = String::with_capacity(10);
358///
359/// assert!(s.capacity() >= 10);
360/// ```
361#[rune::function(instance)]
362fn capacity(this: &String) -> usize {
363    this.capacity()
364}
365
366/// Truncates this `String`, removing all contents.
367///
368/// While this means the `String` will have a length of zero, it does not touch
369/// its capacity.
370///
371/// # Examples
372///
373/// Basic usage:
374///
375/// ```rune
376/// let s = "foo";
377///
378/// s.clear();
379///
380/// assert!(s.is_empty());
381/// assert_eq!(0, s.len());
382/// assert_eq!(3, s.capacity());
383/// ```
384#[rune::function(instance)]
385fn clear(this: &mut String) {
386    this.clear();
387}
388
389/// Returns `true` if the given pattern matches a sub-slice of this string
390/// slice.
391///
392/// Returns `false` if it does not.
393///
394/// The [pattern] can be a `String`, [`char`], or a function or closure that
395/// determines if a character matches.
396///
397/// [`char`]: prim@char
398/// [pattern]: self::pattern
399///
400/// # Examples
401///
402/// Basic usage:
403///
404/// ```rune
405/// let bananas = "bananas";
406///
407/// assert!(bananas.contains("nana"));
408/// assert!(!bananas.contains("apples"));
409/// ```
410#[rune::function(instance)]
411fn contains(this: &str, other: &str) -> bool {
412    this.contains(other)
413}
414
415/// Appends the given [`char`] to the end of this `String`.
416///
417/// # Examples
418///
419/// Basic usage:
420///
421/// ```rune
422/// let s = "abc";
423///
424/// s.push('1');
425/// s.push('2');
426/// s.push('3');
427///
428/// assert_eq!("abc123", s);
429/// ```
430#[rune::function(instance)]
431fn push(this: &mut String, c: char) -> Result<(), VmError> {
432    this.try_push(c)?;
433    Ok(())
434}
435
436/// Appends a given string slice onto the end of this `String`.
437///
438/// # Examples
439///
440/// Basic usage:
441///
442/// ```rune
443/// let s = "foo";
444///
445/// s.push_str("bar");
446///
447/// assert_eq!("foobar", s);
448/// ```
449#[rune::function(instance)]
450fn push_str(this: &mut String, other: &str) -> Result<(), VmError> {
451    this.try_push_str(other)?;
452    Ok(())
453}
454
455/// Reserves capacity for at least `additional` bytes more than the current
456/// length. The allocator may reserve more space to speculatively avoid frequent
457/// allocations. After calling `reserve`, capacity will be greater than or equal
458/// to `self.len() + additional`. Does nothing if capacity is already
459/// sufficient.
460///
461/// # Panics
462///
463/// Panics if the new capacity overflows [`usize`].
464///
465/// # Examples
466///
467/// Basic usage:
468///
469/// ```rune
470/// let s = String::new();
471///
472/// s.reserve(10);
473///
474/// assert!(s.capacity() >= 10);
475/// ```
476///
477/// This might not actually increase the capacity:
478///
479/// ```rune
480/// let s = String::with_capacity(10);
481/// s.push('a');
482/// s.push('b');
483///
484/// // s now has a length of 2 and a capacity of at least 10
485/// let capacity = s.capacity();
486/// assert_eq!(2, s.len());
487/// assert!(capacity >= 10);
488///
489/// // Since we already have at least an extra 8 capacity, calling this...
490/// s.reserve(8);
491///
492/// // ... doesn't actually increase.
493/// assert_eq!(capacity, s.capacity());
494/// ```
495#[rune::function(instance)]
496fn reserve(this: &mut String, additional: usize) -> Result<(), VmError> {
497    this.try_reserve(additional)?;
498    Ok(())
499}
500
501/// Reserves the minimum capacity for at least `additional` bytes more than the
502/// current length. Unlike [`reserve`], this will not deliberately over-allocate
503/// to speculatively avoid frequent allocations. After calling `reserve_exact`,
504/// capacity will be greater than or equal to `self.len() + additional`. Does
505/// nothing if the capacity is already sufficient.
506///
507/// [`reserve`]: String::reserve
508///
509/// # Panics
510///
511/// Panics if the new capacity overflows [`usize`].
512///
513/// # Examples
514///
515/// Basic usage:
516///
517/// ```rune
518/// let s = String::new();
519///
520/// s.reserve_exact(10);
521///
522/// assert!(s.capacity() >= 10);
523/// ```
524///
525/// This might not actually increase the capacity:
526///
527/// ```rune
528/// let s = String::with_capacity(10);
529/// s.push('a');
530/// s.push('b');
531///
532/// // s now has a length of 2 and a capacity of at least 10
533/// let capacity = s.capacity();
534/// assert_eq!(2, s.len());
535/// assert!(capacity >= 10);
536///
537/// // Since we already have at least an extra 8 capacity, calling this...
538/// s.reserve_exact(8);
539///
540/// // ... doesn't actually increase.
541/// assert_eq!(capacity, s.capacity());
542/// ```
543#[rune::function(instance)]
544fn reserve_exact(this: &mut String, additional: usize) -> alloc::Result<()> {
545    this.try_reserve_exact(additional)
546}
547
548/// Returns a byte slice of this `String`'s contents while moving the string.
549///
550/// The inverse of this method is [`from_utf8`].
551///
552/// [`from_utf8`]: String::from_utf8
553///
554/// # Examples
555///
556/// Basic usage:
557///
558/// ```rune
559/// let s = "hello";
560/// assert_eq!(b"hello", s.into_bytes());
561/// assert!(!is_readable(s));
562/// ```
563#[rune::function(instance)]
564fn into_bytes(s: String) -> Bytes {
565    Bytes::from_vec(s.into_bytes())
566}
567
568/// Checks that `index`-th byte is the first byte in a UTF-8 code point sequence
569/// or the end of the string.
570///
571/// The start and end of the string (when `index == self.len()`) are considered
572/// to be boundaries.
573///
574/// Returns `false` if `index` is greater than `self.len()`.
575///
576/// # Examples
577///
578/// ```rune
579/// let s = "Löwe 老虎 Léopard";
580/// assert!(s.is_char_boundary(0));
581/// // start of `老`
582/// assert!(s.is_char_boundary(6));
583/// assert!(s.is_char_boundary(s.len()));
584///
585/// // second byte of `ö`
586/// assert!(!s.is_char_boundary(2));
587///
588/// // third byte of `老`
589/// assert!(!s.is_char_boundary(8));
590/// ```
591#[rune::function(instance)]
592fn is_char_boundary(s: &str, index: usize) -> bool {
593    s.is_char_boundary(index)
594}
595
596/// Access the character at the given byte index.
597///
598/// Returns `None` if the index is out of bounds or not a character boundary.
599///
600/// # Examples
601///
602/// ```rune
603/// let s = "おはよう";
604/// assert_eq!(s.char_at(0), Some('お'));
605/// assert_eq!(s.char_at(1), None);
606/// assert_eq!(s.char_at(2), None);
607/// assert_eq!(s.char_at(3), Some('は'));
608/// ```
609#[rune::function(instance)]
610fn char_at(s: &str, index: usize) -> Option<char> {
611    if !s.is_char_boundary(index) {
612        return None;
613    }
614
615    s[index..].chars().next()
616}
617
618/// Clones the string and its underlying storage.
619///
620/// # Examples
621///
622/// Basic usage:
623///
624/// ```rune
625/// let a = "h";
626/// let b = a;
627/// b.push('i');
628///
629/// // `a` and `b` refer to the same underlying string.
630/// assert_eq!(a, b);
631///
632/// let c = b.clone();
633/// c.push('!');
634/// assert_ne!(a, c);
635/// ```
636#[rune::function(keep, instance, protocol = CLONE)]
637fn clone(this: &String) -> alloc::Result<String> {
638    this.try_clone()
639}
640
641/// Test two strings for partial equality.
642///
643/// # Examples
644///
645/// ```rune
646/// use std::ops::partial_eq;
647///
648/// assert_eq!(partial_eq("a", "a"), true);
649/// assert_eq!(partial_eq("a", "ab"), false);
650/// assert_eq!(partial_eq("ab", "a"), false);
651/// ```
652#[rune::function(keep, instance, protocol = PARTIAL_EQ)]
653#[inline]
654fn partial_eq(this: &str, rhs: &str) -> bool {
655    this.eq(rhs)
656}
657
658/// Test two strings for total equality.
659///
660/// # Examples
661///
662/// ```rune
663/// use std::ops::eq;
664///
665/// assert_eq!(eq("a", "a"), true);
666/// assert_eq!(eq("a", "ab"), false);
667/// assert_eq!(eq("ab", "a"), false);
668/// ```
669#[rune::function(keep, instance, protocol = EQ)]
670#[inline]
671fn eq(this: &str, rhs: &str) -> bool {
672    this.eq(rhs)
673}
674
675/// Perform a partial ordered comparison between two strings.
676///
677/// # Examples
678///
679/// ```rune
680/// assert!("a" < "ab");
681/// assert!("ab" > "a");
682/// assert!("a" == "a");
683/// ```
684///
685/// Using explicit functions:
686///
687/// ```rune
688/// use std::cmp::Ordering;
689/// use std::ops::partial_cmp;
690///
691/// assert_eq!(partial_cmp("a", "ab"), Some(Ordering::Less));
692/// assert_eq!(partial_cmp("ab", "a"), Some(Ordering::Greater));
693/// assert_eq!(partial_cmp("a", "a"), Some(Ordering::Equal));
694/// ```
695#[rune::function(keep, instance, protocol = PARTIAL_CMP)]
696#[inline]
697fn partial_cmp(this: &str, rhs: &str) -> Option<Ordering> {
698    this.partial_cmp(rhs)
699}
700
701/// Perform a totally ordered comparison between two strings.
702///
703/// # Examples
704///
705/// ```rune
706/// use std::cmp::Ordering;
707/// use std::ops::cmp;
708///
709/// assert_eq!(cmp("a", "ab"), Ordering::Less);
710/// assert_eq!(cmp("ab", "a"), Ordering::Greater);
711/// assert_eq!(cmp("a", "a"), Ordering::Equal);
712/// ```
713#[rune::function(keep, instance, protocol = CMP)]
714#[inline]
715fn cmp(this: &str, rhs: &str) -> Ordering {
716    this.cmp(rhs)
717}
718
719/// Hash the string.
720///
721/// # Examples
722///
723/// ```rune
724/// use std::ops::hash;
725///
726/// let a = "hello";
727/// let b = "hello";
728///
729/// assert_eq!(hash(a), hash(b));
730/// ```
731#[rune::function(keep, instance, protocol = HASH)]
732fn hash(this: &str, hasher: &mut Hasher) {
733    hasher.write_str(this);
734}
735
736/// Write a display representation of a string.
737///
738/// # Examples
739///
740/// ```rune
741/// println!("{}", "Hello");
742/// ```
743#[rune::function(keep, instance, protocol = DISPLAY_FMT)]
744#[inline]
745fn display_fmt(this: &str, f: &mut Formatter) -> alloc::Result<()> {
746    write!(f, "{this}")
747}
748
749/// Write a debug representation of a string.
750///
751/// # Examples
752///
753/// ```rune
754/// println!("{:?}", "Hello");
755/// ```
756#[rune::function(keep, instance, protocol = DEBUG_FMT)]
757#[inline]
758fn debug_fmt(this: &str, f: &mut Formatter) -> alloc::Result<()> {
759    write!(f, "{this:?}")
760}
761
762/// Shrinks the capacity of this `String` to match its length.
763///
764/// # Examples
765///
766/// Basic usage:
767///
768/// ```rune
769/// let s = "foo";
770///
771/// s.reserve(100);
772/// assert!(s.capacity() >= 100);
773///
774/// s.shrink_to_fit();
775/// assert_eq!(3, s.capacity());
776/// ```
777#[rune::function(instance)]
778fn shrink_to_fit(s: &mut String) -> alloc::Result<()> {
779    s.try_shrink_to_fit()
780}
781
782/// An iterator over substrings of this string slice, separated by
783/// characters matched by a pattern.
784///
785/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
786/// function or closure that determines if a character matches.
787///
788/// [`char`]: prim@char
789/// [pattern]: self::pattern
790///
791/// # Iterator behavior
792///
793/// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
794/// allows a reverse search and forward/reverse search yields the same
795/// elements. This is true for, e.g., [`char`], but not for `&str`.
796///
797/// If the pattern allows a reverse search but its results might differ
798/// from a forward search, the [`rsplit`] method can be used.
799///
800/// [`rsplit`]: str::rsplit
801///
802/// # Examples
803///
804/// Simple patterns:
805///
806/// ```rune
807/// let v = "Mary had a little lamb".split(' ').collect::<Vec>();
808/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
809///
810/// let v = "".split('X').collect::<Vec>();
811/// assert_eq!(v, [""]);
812///
813/// let v = "lionXXtigerXleopard".split('X').collect::<Vec>();
814/// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
815///
816/// let v = "lion::tiger::leopard".split("::").collect::<Vec>();
817/// assert_eq!(v, ["lion", "tiger", "leopard"]);
818///
819/// let v = "abc1def2ghi".split(char::is_numeric).collect::<Vec>();
820/// assert_eq!(v, ["abc", "def", "ghi"]);
821///
822/// let v = "lionXtigerXleopard".split(char::is_uppercase).collect::<Vec>();
823/// assert_eq!(v, ["lion", "tiger", "leopard"]);
824/// ```
825///
826/// A more complex pattern, using a closure:
827///
828/// ```rune
829/// let v = "abc1defXghi".split(|c| c == '1' || c == 'X').collect::<Vec>();
830/// assert_eq!(v, ["abc", "def", "ghi"]);
831/// ```
832///
833/// If a string contains multiple contiguous separators, you will end up
834/// with empty strings in the output:
835///
836/// ```rune
837/// let x = "||||a||b|c";
838/// let d = x.split('|').collect::<Vec>();
839///
840/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
841/// ```
842///
843/// Contiguous separators are separated by the empty string.
844///
845/// ```rune
846/// let x = "(///)";
847/// let d = x.split('/').collect::<Vec>();
848///
849/// assert_eq!(d, ["(", "", "", ")"]);
850/// ```
851///
852/// Separators at the start or end of a string are neighbored
853/// by empty strings.
854///
855/// ```rune
856/// let d = "010".split("0").collect::<Vec>();
857/// assert_eq!(d, ["", "1", ""]);
858/// ```
859///
860/// When the empty string is used as a separator, it separates
861/// every character in the string, along with the beginning
862/// and end of the string.
863///
864/// ```rune
865/// let f = "rust".split("").collect::<Vec>();
866/// assert_eq!(f, ["", "r", "u", "s", "t", ""]);
867/// ```
868///
869/// Contiguous separators can lead to possibly surprising behavior
870/// when whitespace is used as the separator. This code is correct:
871///
872/// ```rune
873/// let x = "    a  b c";
874/// let d = x.split(' ').collect::<Vec>();
875///
876/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
877/// ```
878///
879/// It does _not_ give you:
880///
881/// ```rune,ignore
882/// assert_eq!(d, ["a", "b", "c"]);
883/// ```
884///
885/// Use [`split_whitespace`] for this behavior.
886///
887/// [`split_whitespace`]: str::split_whitespace
888#[rune::function(instance, deprecated = "Use String::split instead")]
889fn split(this: Ref<str>, value: Value) -> Result<Value, VmError> {
890    match value.as_ref() {
891        Repr::Inline(Inline::Char(c)) => Ok(rune::to_value(Split::new(this, *c))?),
892        Repr::Inline(value) => Err(VmError::from([
893            VmErrorKind::expected::<String>(value.type_info()),
894            VmErrorKind::bad_argument(0),
895        ])),
896        Repr::Dynamic(value) => Err(VmError::from([
897            VmErrorKind::expected::<String>(value.type_info()),
898            VmErrorKind::bad_argument(0),
899        ])),
900        Repr::Any(value) => match value.type_hash() {
901            String::HASH => {
902                let s = value.borrow_ref::<String>()?;
903
904                let split = rune::to_value(Split::new(this, String::try_from(s.as_str())?))?;
905
906                Ok(split)
907            }
908            Function::HASH => {
909                let f = value.borrow_ref::<Function>()?;
910                let split = rune::to_value(Split::new(this, f.try_clone()?))?;
911                Ok(split)
912            }
913            _ => Err(VmError::from([
914                VmErrorKind::expected::<String>(value.type_info()),
915                VmErrorKind::bad_argument(0),
916            ])),
917        },
918    }
919}
920
921/// Splits the string on the first occurrence of the specified delimiter and
922/// returns prefix before delimiter and suffix after delimiter.
923///
924/// # Examples
925///
926/// ```rune
927/// assert_eq!("cfg".split_once('='), None);
928/// assert_eq!("cfg=".split_once('='), Some(("cfg", "")));
929/// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
930/// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
931/// ```
932#[rune::function(instance)]
933fn split_once(this: &str, value: Value) -> Result<Option<(String, String)>, VmError> {
934    let outcome = match value.as_ref() {
935        Repr::Inline(Inline::Char(pat)) => this.split_once(*pat),
936        Repr::Inline(value) => {
937            return Err(VmError::from([
938                VmErrorKind::expected::<String>(value.type_info()),
939                VmErrorKind::bad_argument(0),
940            ]));
941        }
942        Repr::Dynamic(value) => {
943            return Err(VmError::from([
944                VmErrorKind::expected::<String>(value.type_info()),
945                VmErrorKind::bad_argument(0),
946            ]));
947        }
948        Repr::Any(value) => match value.type_hash() {
949            String::HASH => {
950                let s = value.borrow_ref::<String>()?;
951                this.split_once(s.as_str())
952            }
953            Function::HASH => {
954                let f = value.borrow_ref::<Function>()?;
955                let mut err = None;
956
957                let outcome = this.split_once(|c: char| match f.call::<bool>((c,)) {
958                    Ok(b) => b,
959                    Err(e) => {
960                        if err.is_none() {
961                            err = Some(e);
962                        }
963
964                        false
965                    }
966                });
967
968                if let Some(e) = err.take() {
969                    return Err(e);
970                }
971
972                outcome
973            }
974            _ => {
975                return Err(VmError::from([
976                    VmErrorKind::expected::<String>(value.type_info()),
977                    VmErrorKind::bad_argument(0),
978                ]));
979            }
980        },
981    };
982
983    let Some((a, b)) = outcome else {
984        return Ok(None);
985    };
986
987    Ok(Some((a.try_to_owned()?, b.try_to_owned()?)))
988}
989
990/// Returns a string slice with leading and trailing whitespace removed.
991///
992/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
993/// Property `White_Space`, which includes newlines.
994///
995/// # Examples
996///
997/// Basic usage:
998///
999/// ```rune
1000/// let s = "\n Hello\tworld\t\n";
1001///
1002/// assert_eq!("Hello\tworld", s.trim());
1003/// ```
1004#[rune::function(instance)]
1005fn trim(this: &str) -> alloc::Result<String> {
1006    this.trim().try_to_owned()
1007}
1008
1009/// Returns a string slice with trailing whitespace removed.
1010///
1011/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
1012/// Property `White_Space`, which includes newlines.
1013///
1014/// # Text directionality
1015///
1016/// A string is a sequence of bytes. `end` in this context means the last
1017/// position of that byte string; for a left-to-right language like English or
1018/// Russian, this will be right side, and for right-to-left languages like
1019/// Arabic or Hebrew, this will be the left side.
1020///
1021/// # Examples
1022///
1023/// Basic usage:
1024///
1025/// ```rune
1026/// let s = "\n Hello\tworld\t\n";
1027/// assert_eq!("\n Hello\tworld", s.trim_end());
1028/// ```
1029///
1030/// Directionality:
1031///
1032/// ```rune
1033/// let s = "  English  ";
1034/// assert!(Some('h') == s.trim_end().chars().rev().next());
1035///
1036/// let s = "  עברית  ";
1037/// assert!(Some('ת') == s.trim_end().chars().rev().next());
1038/// ```
1039#[rune::function(instance)]
1040fn trim_end(this: &str) -> alloc::Result<String> {
1041    this.trim_end().try_to_owned()
1042}
1043
1044/// Returns `true` if `self` has a length of zero bytes.
1045///
1046/// # Examples
1047///
1048/// Basic usage:
1049///
1050/// ```rune
1051/// let s = "";
1052/// assert!(s.is_empty());
1053///
1054/// let s = "not empty";
1055/// assert!(!s.is_empty());
1056/// ```
1057#[rune::function(instance)]
1058fn is_empty(this: &str) -> bool {
1059    this.is_empty()
1060}
1061
1062/// Replaces all matches of a pattern with another string.
1063///
1064/// `replace` creates a new [`String`], and copies the data from this string
1065/// slice into it. While doing so, it attempts to find matches of a pattern. If
1066/// it finds any, it replaces them with the replacement string slice.
1067///
1068/// # Examples
1069///
1070/// Basic usage:
1071///
1072/// ```rune
1073/// let s = "this is old";
1074///
1075/// assert_eq!("this is new", s.replace("old", "new"));
1076/// assert_eq!("than an old", s.replace("is", "an"));
1077/// ```
1078///
1079/// When the pattern doesn't match, it returns this string slice as [`String`]:
1080///
1081/// ```rune
1082/// let s = "this is old";
1083/// assert_eq!(s, s.replace("cookie monster", "little lamb"));
1084/// ```
1085#[rune::function(instance)]
1086fn replace(this: &str, from: &str, to: &str) -> alloc::Result<String> {
1087    alloc::str::replace(this, from, to)
1088}
1089
1090/// Returns an iterator over the [`char`]s of a string slice.
1091///
1092/// As a string slice consists of valid UTF-8, we can iterate through a string
1093/// slice by [`char`]. This method returns such an iterator.
1094///
1095/// It's important to remember that [`char`] represents a Unicode Scalar Value,
1096/// and might not match your idea of what a 'character' is. Iteration over
1097/// grapheme clusters may be what you actually want. This functionality is not
1098/// provided by Rust's standard library, check crates.io instead.
1099///
1100/// # Examples
1101///
1102/// Basic usage:
1103///
1104/// ```rune
1105/// let word = "goodbye";
1106///
1107/// let count = word.chars().count();
1108/// assert_eq!(7, count);
1109///
1110/// let chars = word.chars();
1111///
1112/// assert_eq!(Some('g'), chars.next());
1113/// assert_eq!(Some('o'), chars.next());
1114/// assert_eq!(Some('o'), chars.next());
1115/// assert_eq!(Some('d'), chars.next());
1116/// assert_eq!(Some('b'), chars.next());
1117/// assert_eq!(Some('y'), chars.next());
1118/// assert_eq!(Some('e'), chars.next());
1119///
1120/// assert_eq!(None, chars.next());
1121/// ```
1122///
1123/// Remember, [`char`]s might not match your intuition about characters:
1124///
1125/// [`char`]: prim@char
1126///
1127/// ```rune
1128/// let y = "y̆";
1129///
1130/// let chars = y.chars();
1131///
1132/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1133/// assert_eq!(Some('\u{0306}'), chars.next());
1134///
1135/// assert_eq!(None, chars.next());
1136/// ```
1137#[rune::function(instance)]
1138fn chars(s: Ref<str>) -> Chars {
1139    Chars::new(s)
1140}
1141
1142/// Returns a subslice of `str`.
1143///
1144/// This is the non-panicking alternative to indexing the `str`. Returns
1145/// [`None`] whenever equivalent indexing operation would panic.
1146///
1147/// # Examples
1148///
1149/// ```rune
1150/// let v = "🗻∈🌏";
1151///
1152/// assert_eq!(Some("🗻"), v.get(0..4));
1153///
1154/// // indices not on UTF-8 sequence boundaries
1155/// assert!(v.get(1..).is_none());
1156/// assert!(v.get(..8).is_none());
1157///
1158/// // out of bounds
1159/// assert!(v.get(..42).is_none());
1160/// ```
1161#[rune::function(keep, instance)]
1162fn get(this: &str, key: Value) -> Result<Option<String>, VmError> {
1163    use crate::runtime::TypeOf;
1164
1165    let slice = match key.as_any() {
1166        Some(value) => match value.type_hash() {
1167            RangeFrom::HASH => {
1168                let range = value.borrow_ref::<RangeFrom>()?;
1169                let start = range.start.as_usize()?;
1170                this.get(start..)
1171            }
1172            RangeFull::HASH => {
1173                _ = value.borrow_ref::<RangeFull>()?;
1174                this.get(..)
1175            }
1176            RangeInclusive::HASH => {
1177                let range = value.borrow_ref::<RangeInclusive>()?;
1178                let start = range.start.as_usize()?;
1179                let end = range.end.as_usize()?;
1180                this.get(start..=end)
1181            }
1182            RangeToInclusive::HASH => {
1183                let range = value.borrow_ref::<RangeToInclusive>()?;
1184                let end = range.end.as_usize()?;
1185                this.get(..=end)
1186            }
1187            RangeTo::HASH => {
1188                let range = value.borrow_ref::<RangeTo>()?;
1189                let end = range.end.as_usize()?;
1190                this.get(..end)
1191            }
1192            Range::HASH => {
1193                let range = value.borrow_ref::<Range>()?;
1194                let start = range.start.as_usize()?;
1195                let end = range.end.as_usize()?;
1196                this.get(start..end)
1197            }
1198            _ => {
1199                return Err(VmError::from(VmErrorKind::UnsupportedIndexGet {
1200                    target: String::type_info(),
1201                    index: value.type_info(),
1202                }))
1203            }
1204        },
1205        _ => {
1206            return Err(VmError::from(VmErrorKind::UnsupportedIndexGet {
1207                target: String::type_info(),
1208                index: key.type_info(),
1209            }))
1210        }
1211    };
1212
1213    let Some(slice) = slice else {
1214        return Ok(None);
1215    };
1216
1217    Ok(Some(slice.try_to_owned()?))
1218}
1219
1220/// The add operation for strings.
1221#[rune::function(instance, protocol = ADD)]
1222fn add(a: &str, b: &str) -> Result<String, VmError> {
1223    let mut string = String::try_with_capacity(a.len() + b.len())?;
1224    string.try_push_str(a)?;
1225    string.try_push_str(b)?;
1226    Ok(string)
1227}
1228
1229/// The add assign operation for strings.
1230#[rune::function(instance, protocol = ADD_ASSIGN)]
1231fn add_assign(this: &mut String, other: &str) -> Result<(), VmError> {
1232    this.try_push_str(other)?;
1233    Ok(())
1234}
1235
1236/// Get a specific string index.
1237#[rune::function(instance, protocol = INDEX_GET)]
1238fn index_get(s: &str, key: Value) -> Result<String, VmError> {
1239    match get(s, key)? {
1240        Some(slice) => Ok(slice),
1241        None => Err(VmError::panic("missing string slice")),
1242    }
1243}
1244
1245/// Parses this string into an integer.
1246///
1247/// # Errors
1248///
1249/// Will return [`Err`] if it's not possible to parse this string slice into an
1250/// integer.
1251///
1252/// # Examples
1253///
1254/// Basic usage
1255///
1256/// ```rune
1257/// let four = "4".parse::<i64>()?;
1258/// assert_eq!(4, four);
1259/// ```
1260#[rune::function(instance, path = parse::<i64>)]
1261fn parse_int(s: &str) -> Result<i64, ParseIntError> {
1262    str::parse::<i64>(s)
1263}
1264
1265/// Parses this string into a float.
1266///
1267/// # Errors
1268///
1269/// Will return [`Err`] if it's not possible to parse this string slice into an
1270/// float.
1271///
1272/// # Examples
1273///
1274/// Basic usage
1275///
1276/// ```rune
1277/// let pi = "3.1415".parse::<f64>()?;
1278/// assert_eq!(3.1415, pi);
1279/// ```
1280#[rune::function(instance, path = parse::<f64>)]
1281fn parse_float(s: &str) -> Result<f64, ParseFloatError> {
1282    str::parse::<f64>(s)
1283}
1284
1285/// Parses this string into a character.
1286///
1287/// # Errors
1288///
1289/// Will return [`Err`] if it's not possible to parse this string slice into an
1290/// integer.
1291///
1292/// # Examples
1293///
1294/// Basic usage
1295///
1296/// ```rune
1297/// let a = "a".parse::<char>()?;
1298/// assert_eq!('a', a);
1299/// ```
1300#[rune::function(instance, path = parse::<char>)]
1301fn parse_char(s: &str) -> Result<char, char::ParseCharError> {
1302    str::parse::<char>(s)
1303}
1304
1305/// Returns the lowercase equivalent of this string slice, as a new [`String`].
1306///
1307/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1308/// `Lowercase`.
1309///
1310/// Since some characters can expand into multiple characters when changing
1311/// the case, this function returns a [`String`] instead of modifying the
1312/// parameter in-place.
1313///
1314/// # Examples
1315///
1316/// Basic usage:
1317///
1318/// ```rune
1319/// let s = "HELLO";
1320///
1321/// assert_eq!("hello", s.to_lowercase());
1322/// ```
1323///
1324/// A tricky example, with sigma:
1325///
1326/// ```rune
1327/// let sigma = "Σ";
1328///
1329/// assert_eq!("σ", sigma.to_lowercase());
1330///
1331/// // but at the end of a word, it's ς, not σ:
1332/// let odysseus = "ὈΔΥΣΣΕΎΣ";
1333///
1334/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
1335/// ```
1336///
1337/// Languages without case are not changed:
1338///
1339/// ```rune
1340/// let new_year = "农历新年";
1341///
1342/// assert_eq!(new_year, new_year.to_lowercase());
1343/// ```
1344#[rune::function(instance)]
1345fn to_lowercase(s: &str) -> Result<String, VmError> {
1346    let mut lowercase = String::try_with_capacity(s.len())?;
1347
1348    for (i, c) in s.char_indices() {
1349        // Inlined code to from std::str to handle upper-case sigma,
1350        // since it is the only Unicode character that is context-dependent
1351        // See https://github.com/rust-lang/rust/issues/26035 for more context
1352        if c == 'Σ' {
1353            lowercase.try_push_str(map_uppercase_sigma(s, i))?;
1354        } else {
1355            lowercase.try_extend(c.to_lowercase())?;
1356        }
1357    }
1358
1359    return Ok(lowercase);
1360
1361    fn map_uppercase_sigma(from: &str, i: usize) -> &'static str {
1362        // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1363        // for the definition of `Final_Sigma`.
1364        debug_assert!('Σ'.len_utf8() == 2);
1365        let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1366            && !case_ignorable_then_cased(from[i + 2..].chars());
1367        if is_word_final {
1368            "ς"
1369        } else {
1370            "σ"
1371        }
1372    }
1373
1374    fn case_ignorable_then_cased<I: core::iter::Iterator<Item = char>>(mut iter: I) -> bool {
1375        match iter.find(|&c| !unicode::case_ignorable::lookup(c)) {
1376            Some(c) => unicode::cased::lookup(c),
1377            None => false,
1378        }
1379    }
1380}
1381
1382/// Returns the uppercase equivalent of this string slice, as a new [`String`].
1383///
1384/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1385/// `Uppercase`.
1386///
1387/// Since some characters can expand into multiple characters when changing
1388/// the case, this function returns a [`String`] instead of modifying the
1389/// parameter in-place.
1390///
1391/// # Examples
1392///
1393/// Basic usage:
1394///
1395/// ```rune
1396/// let s = "hello";
1397///
1398/// assert_eq!("HELLO", s.to_uppercase());
1399/// ```
1400///
1401/// Scripts without case are not changed:
1402///
1403/// ```rune
1404/// let new_year = "农历新年";
1405///
1406/// assert_eq!(new_year, new_year.to_uppercase());
1407/// ```
1408///
1409/// One character can become multiple:
1410/// ```rune
1411/// let s = "tschüß";
1412///
1413/// assert_eq!("TSCHÜSS", s.to_uppercase());
1414/// ```
1415#[rune::function(instance)]
1416fn to_uppercase(s: &str) -> Result<String, VmError> {
1417    let mut uppercase = String::try_with_capacity(s.len())?;
1418    uppercase.try_extend(s.chars().flat_map(|c| c.to_uppercase()))?;
1419    Ok(uppercase)
1420}
1421
1422#[derive(Any)]
1423#[rune(item = ::std::string)]
1424struct Chars {
1425    string: Ref<str>,
1426    start: usize,
1427    end: usize,
1428}
1429
1430impl Chars {
1431    fn new(string: Ref<str>) -> Self {
1432        let end = string.len();
1433        Self {
1434            string,
1435            start: 0,
1436            end,
1437        }
1438    }
1439
1440    #[rune::function(keep, protocol = NEXT)]
1441    fn next(&mut self) -> Option<char> {
1442        let string = self.string.get(self.start..self.end)?;
1443        let c = string.chars().next()?;
1444        self.start += c.len_utf8();
1445        Some(c)
1446    }
1447
1448    #[rune::function(keep, protocol = NEXT_BACK)]
1449    fn next_back(&mut self) -> Option<char> {
1450        let string = self.string.get(self.start..self.end)?;
1451        let c = string.chars().next_back()?;
1452        self.end -= c.len_utf8();
1453        Some(c)
1454    }
1455}
1456
1457trait Pattern: 'static + TryClone + Named + FromValue + ToValue + MaybeTypeOf + TypeOf {
1458    fn test(&self, tail: &str) -> Result<(bool, usize), VmError>;
1459
1460    fn is_empty(&self) -> bool;
1461}
1462
1463impl Pattern for String {
1464    fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1465        if tail.starts_with(self.as_str()) {
1466            Ok((true, self.len()))
1467        } else {
1468            let Some(c) = tail.chars().next() else {
1469                return Ok((false, 0));
1470            };
1471
1472            Ok((false, c.len_utf8()))
1473        }
1474    }
1475
1476    #[inline]
1477    fn is_empty(&self) -> bool {
1478        String::is_empty(self)
1479    }
1480}
1481
1482impl Pattern for char {
1483    fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1484        let Some(c) = tail.chars().next() else {
1485            return Ok((false, 0));
1486        };
1487
1488        Ok((c == *self, c.len_utf8()))
1489    }
1490
1491    #[inline]
1492    fn is_empty(&self) -> bool {
1493        false
1494    }
1495}
1496
1497impl Pattern for Function {
1498    fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1499        let Some(c) = tail.chars().next() else {
1500            return Ok((false, 0));
1501        };
1502
1503        Ok((self.call((c,))?, c.len_utf8()))
1504    }
1505
1506    #[inline]
1507    fn is_empty(&self) -> bool {
1508        false
1509    }
1510}
1511
1512#[derive(Any)]
1513#[rune(item = ::std::string)]
1514struct Split<T>
1515where
1516    T: Pattern,
1517{
1518    string: Option<Ref<str>>,
1519    pattern: T,
1520    from: usize,
1521    to: usize,
1522}
1523
1524impl<T> Split<T>
1525where
1526    T: Pattern,
1527{
1528    fn new(string: Ref<str>, pattern: T) -> Self {
1529        Self {
1530            string: Some(string),
1531            pattern,
1532            from: 0,
1533            to: 0,
1534        }
1535    }
1536
1537    #[rune::function(keep, protocol = NEXT)]
1538    fn next(&mut self) -> Result<Option<String>, VmError> {
1539        let Some(string) = &self.string else {
1540            return Ok(None);
1541        };
1542
1543        if self.from == string.len() && self.from == self.to {
1544            self.string = None;
1545            let out = "".try_to_owned()?;
1546            return Ok(Some(out));
1547        }
1548
1549        while self.to < string.len() {
1550            let Some(tail) = string.get(self.to..) else {
1551                return Ok(None);
1552            };
1553
1554            let (m, len) = self.pattern.test(tail)?;
1555
1556            if m {
1557                let head = string.get(self.from..self.to).unwrap_or_default();
1558                let out = head.try_to_owned()?;
1559
1560                if len == 0 {
1561                    self.from = self.to;
1562                    self.to += tail.chars().next().map_or(0, |c| c.len_utf8());
1563                } else {
1564                    self.to += len;
1565                    self.from = self.to;
1566                }
1567
1568                return Ok(Some(out));
1569            } else {
1570                self.to += len;
1571            }
1572        }
1573
1574        let tail = string.get(self.from..self.to).unwrap_or_default();
1575        self.from = self.to;
1576        let out = tail.try_to_owned()?;
1577
1578        if !self.pattern.is_empty() {
1579            self.string = None;
1580        }
1581
1582        Ok(Some(out))
1583    }
1584
1585    #[rune::function(keep, protocol = INTO_ITER)]
1586    fn into_iter(self) -> Self {
1587        self
1588    }
1589}
1590
1591// Inlined code from core::unicode, since using it directly is marked as using an
1592// unstable library feature
1593mod unicode {
1594    fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
1595        short_offset_run_header & ((1 << 21) - 1)
1596    }
1597
1598    fn decode_length(short_offset_run_header: u32) -> usize {
1599        (short_offset_run_header >> 21) as usize
1600    }
1601
1602    #[inline(always)]
1603    fn skip_search<const SOR: usize, const OFFSETS: usize>(
1604        needle: u32,
1605        short_offset_runs: &[u32; SOR],
1606        offsets: &[u8; OFFSETS],
1607    ) -> bool {
1608        // Note that this *cannot* be past the end of the array, as the last
1609        // element is greater than std::char::MAX (the largest possible needle).
1610        //
1611        // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
1612        // location cannot be past it, so Err(idx) != length either.
1613        //
1614        // This means that we can avoid bounds checking for the accesses below, too.
1615        let last_idx =
1616            match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
1617                Ok(idx) => idx + 1,
1618                Err(idx) => idx,
1619            };
1620
1621        let mut offset_idx = decode_length(short_offset_runs[last_idx]);
1622        let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
1623            decode_length(*next) - offset_idx
1624        } else {
1625            offsets.len() - offset_idx
1626        };
1627        let prev = last_idx
1628            .checked_sub(1)
1629            .map(|prev| decode_prefix_sum(short_offset_runs[prev]))
1630            .unwrap_or(0);
1631
1632        let total = needle - prev;
1633        let mut prefix_sum = 0;
1634        for _ in 0..(length - 1) {
1635            let offset = offsets[offset_idx];
1636            prefix_sum += offset as u32;
1637            if prefix_sum > total {
1638                break;
1639            }
1640            offset_idx += 1;
1641        }
1642        offset_idx % 2 == 1
1643    }
1644
1645    #[rustfmt::skip]
1646    pub mod case_ignorable {
1647        static SHORT_OFFSET_RUNS: [u32; 35] = [
1648            688, 44045149, 572528402, 576724925, 807414908, 878718981, 903913493, 929080568, 933275148,
1649            937491230, 1138818560, 1147208189, 1210124160, 1222707713, 1235291428, 1260457643,
1650            1264654383, 1499535675, 1507925040, 1566646003, 1629566000, 1650551536, 1658941263,
1651            1671540720, 1688321181, 1700908800, 1709298023, 1717688832, 1738661888, 1763828398,
1652            1797383403, 1805773008, 1809970171, 1819148289, 1824457200,
1653        ];
1654        static OFFSETS: [u8; 875] = [
1655            39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2,
1656            1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35,
1657            1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24,
1658            24, 43, 3, 44, 1, 7, 2, 6, 8, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1,
1659            58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2,
1660            57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1,
1661            61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1,
1662            5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9,
1663            98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1,
1664            102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3,
1665            29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118,
1666            3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31,
1667            49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3,
1668            58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1,
1669            3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1,
1670            16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6,
1671            93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3,
1672            2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4,
1673            2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2,
1674            1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3,
1675            1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3,
1676            0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2,
1677            80, 3, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1,
1678            50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1,
1679            3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2,
1680            238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 3, 2, 4, 1, 5,
1681            0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46,
1682            13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2,
1683            3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 0, 17, 6, 15, 0, 5, 59, 7, 9, 4, 0, 1, 63, 17,
1684            64, 2, 1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3,
1685            0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160,
1686            14, 0, 1, 61, 4, 0, 5, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
1687        ];
1688        pub fn lookup(c: char) -> bool {
1689            super::skip_search(
1690                c as u32,
1691                &SHORT_OFFSET_RUNS,
1692                &OFFSETS,
1693            )
1694        }
1695    }
1696
1697    #[rustfmt::skip]
1698    pub mod cased {
1699        static SHORT_OFFSET_RUNS: [u32; 22] = [
1700            4256, 115348384, 136322176, 144711446, 163587254, 320875520, 325101120, 350268208,
1701            392231680, 404815649, 413205504, 421595008, 467733632, 484513952, 492924480, 497144832,
1702            501339814, 578936576, 627171376, 639756544, 643952944, 649261450,
1703        ];
1704        static OFFSETS: [u8; 315] = [
1705            65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5,
1706            96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9,
1707            41, 0, 38, 1, 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 9, 7, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2,
1708            38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13,
1709            5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4,
1710            1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1,
1711            0, 46, 18, 30, 132, 102, 3, 4, 1, 59, 5, 2, 1, 1, 1, 5, 24, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0,
1712            7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1,
1713            7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, 51, 13, 51, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2,
1714            2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2,
1715            25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6,
1716            0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
1717        ];
1718        pub fn lookup(c: char) -> bool {
1719            super::skip_search(
1720                c as u32,
1721                &SHORT_OFFSET_RUNS,
1722                &OFFSETS,
1723            )
1724        }
1725    }
1726}