rune/modules/string.rs
1//! Strings.
2
3use core::char;
4use core::cmp::Ordering;
5use core::num::{ParseFloatError, ParseIntError};
6
7use crate as rune;
8use crate::alloc;
9use crate::alloc::fmt::TryWrite;
10use crate::alloc::prelude::*;
11use crate::alloc::string::FromUtf8Error;
12use crate::compile::Named;
13use crate::runtime::{
14 Bytes, Formatter, FromValue, Function, Hasher, Inline, MaybeTypeOf, Range, RangeFrom,
15 RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Ref, Repr, ToValue, TypeOf, Value,
16 VmError, VmErrorKind,
17};
18use crate::{Any, ContextError, Module, TypeHash};
19
20/// Strings.
21///
22/// Strings in Rune are declared with the literal `"string"` syntax, but can also be
23/// interacted with through the fundamental [`String`] type.
24///
25/// ```rune
26/// let string1 = "Hello";
27/// let string2 = String::new();
28/// string2.push_str("Hello");
29///
30/// assert_eq!(string1, string2);
31/// ```
32#[rune::module(::std::string)]
33pub fn module() -> Result<Module, ContextError> {
34 let mut m = Module::from_meta(self::module__meta)?;
35
36 m.ty::<String>()?;
37
38 m.function_meta(string_from)?;
39 m.function_meta(string_from_str)?;
40 m.function_meta(string_new)?;
41 m.function_meta(string_with_capacity)?;
42 m.function_meta(len)?;
43 m.function_meta(starts_with)?;
44 m.function_meta(ends_with)?;
45 m.function_meta(capacity)?;
46 m.function_meta(clear)?;
47 m.function_meta(contains)?;
48 m.function_meta(push)?;
49 m.function_meta(push_str)?;
50 m.function_meta(reserve)?;
51 m.function_meta(reserve_exact)?;
52 m.function_meta(from_utf8)?;
53 m.function_meta(as_bytes)?;
54 m.function_meta(into_bytes)?;
55 m.function_meta(shrink_to_fit)?;
56 m.function_meta(char_at)?;
57 m.function_meta(split)?;
58 m.function_meta(split_once)?;
59 m.associated_function("split_str", __rune_fn__split)?;
60 m.function_meta(trim)?;
61 m.function_meta(trim_end)?;
62 m.function_meta(replace)?;
63 m.function_meta(is_empty)?;
64 m.function_meta(chars)?;
65 m.function_meta(get__meta)?;
66 m.function_meta(parse_int)?;
67 m.function_meta(parse_float)?;
68 m.function_meta(parse_char)?;
69 m.function_meta(to_lowercase)?;
70 m.function_meta(to_uppercase)?;
71
72 m.function_meta(add)?;
73 m.function_meta(add_assign)?;
74 m.function_meta(index_get)?;
75
76 m.function_meta(clone__meta)?;
77 m.implement_trait::<String>(rune::item!(::std::clone::Clone))?;
78
79 m.function_meta(partial_eq__meta)?;
80 m.implement_trait::<String>(rune::item!(::std::cmp::PartialEq))?;
81
82 m.function_meta(eq__meta)?;
83 m.implement_trait::<String>(rune::item!(::std::cmp::Eq))?;
84
85 m.function_meta(partial_cmp__meta)?;
86 m.implement_trait::<String>(rune::item!(::std::cmp::PartialOrd))?;
87
88 m.function_meta(cmp__meta)?;
89 m.implement_trait::<String>(rune::item!(::std::cmp::Ord))?;
90
91 m.function_meta(hash__meta)?;
92
93 m.function_meta(display_fmt__meta)?;
94 m.function_meta(debug_fmt__meta)?;
95
96 m.ty::<Chars>()?;
97 m.function_meta(Chars::next__meta)?;
98 m.function_meta(Chars::next_back__meta)?;
99 m.implement_trait::<Chars>(rune::item!(::std::iter::Iterator))?;
100 m.implement_trait::<Chars>(rune::item!(::std::iter::DoubleEndedIterator))?;
101
102 macro_rules! split {
103 ($ty:ty) => {
104 m.ty::<Split<$ty>>()?;
105 m.function_meta(Split::<$ty>::next__meta)?;
106 m.implement_trait::<Split<$ty>>(rune::item!(::std::iter::Iterator))?;
107 };
108 }
109
110 split!(Function);
111 split!(String);
112 split!(char);
113 Ok(m)
114}
115
116/// Converts a vector of bytes to a `String`.
117///
118/// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes
119/// ([`Vec<u8>`]) is made of bytes, so this function converts between the two.
120/// Not all byte slices are valid `String`s, however: `String` requires that it
121/// is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
122/// UTF-8, and then does the conversion.
123///
124/// If you are sure that the byte slice is valid UTF-8, and you don't want to
125/// incur the overhead of the validity check, there is an unsafe version of this
126/// function, [`from_utf8_unchecked`], which has the same behavior but skips the
127/// check.
128///
129/// The inverse of this method is [`into_bytes`].
130///
131/// # Errors
132///
133/// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
134/// provided bytes are not UTF-8. The vector you moved in is also included.
135///
136/// # Examples
137///
138/// Basic usage:
139///
140/// ```rune
141/// // some bytes, in a vector
142/// let sparkle_heart = Bytes::from_vec([240u8, 159u8, 146u8, 150u8]);
143///
144/// // We know these bytes are valid, so we'll use `unwrap()`.
145/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
146///
147/// assert_eq!("💖", sparkle_heart);
148/// ```
149///
150/// Incorrect bytes:
151///
152/// ```rune
153/// // some invalid bytes, in a vector
154/// let sparkle_heart = Bytes::from_vec([0u8, 159u8, 146u8, 150u8]);
155///
156/// assert!(String::from_utf8(sparkle_heart).is_err());
157/// ```
158///
159/// See the docs for [`FromUtf8Error`] for more details on what you can do with
160/// this error.
161///
162/// [`from_utf8_unchecked`]: String::from_utf8_unchecked
163/// [`Vec<u8>`]: crate::vec::Vec "Vec"
164/// [`&str`]: prim@str "&str"
165/// [`into_bytes`]: String::into_bytes
166#[rune::function(free, path = String::from_utf8)]
167fn from_utf8(bytes: &[u8]) -> Result<Result<String, FromUtf8Error>, VmError> {
168 let vec = Vec::try_from(bytes)?;
169 Ok(String::from_utf8(vec))
170}
171
172/// Returns a byte slice of this `String`'s contents.
173///
174/// The inverse of this method is [`from_utf8`].
175///
176/// [`from_utf8`]: String::from_utf8
177///
178/// # Examples
179///
180/// Basic usage:
181///
182/// ```rune
183/// let s = "hello";
184/// assert_eq!(b"hello", s.as_bytes());
185/// assert!(is_readable(s));
186/// ```
187#[rune::function(instance)]
188fn as_bytes(s: &str) -> Result<Bytes, VmError> {
189 Ok(Bytes::from_vec(Vec::try_from(s.as_bytes())?))
190}
191
192/// Constructs a string from another string.
193///
194/// # Examples
195///
196/// Basic usage:
197///
198/// ```rune
199/// let s = String::from("hello");
200/// assert_eq!(s, "hello");
201/// ```
202#[rune::function(free, path = String::from)]
203fn string_from(value: &str) -> Result<String, VmError> {
204 Ok(String::try_from(value)?)
205}
206
207#[rune::function(free, path = String::from_str, deprecated = "Use String::from instead")]
208fn string_from_str(value: &str) -> Result<String, VmError> {
209 Ok(String::try_from(value)?)
210}
211
212/// Creates a new empty `String`.
213///
214/// Given that the `String` is empty, this will not allocate any initial buffer.
215/// While that means that this initial operation is very inexpensive, it may
216/// cause excessive allocation later when you add data. If you have an idea of
217/// how much data the `String` will hold, consider the [`with_capacity`] method
218/// to prevent excessive re-allocation.
219///
220/// [`with_capacity`]: String::with_capacity
221///
222/// # Examples
223///
224/// Basic usage:
225///
226/// ```rune
227/// let s = String::new();
228/// ```
229#[rune::function(free, path = String::new)]
230fn string_new() -> String {
231 String::new()
232}
233
234/// Creates a new empty `String` with at least the specified capacity.
235///
236/// `String`s have an internal buffer to hold their data. The capacity is the
237/// length of that buffer, and can be queried with the [`capacity`] method. This
238/// method creates an empty `String`, but one with an initial buffer that can
239/// hold at least `capacity` bytes. This is useful when you may be appending a
240/// bunch of data to the `String`, reducing the number of reallocations it needs
241/// to do.
242///
243/// [`capacity`]: String::capacity
244///
245/// If the given capacity is `0`, no allocation will occur, and this method is
246/// identical to the [`new`] method.
247///
248/// [`new`]: String::new
249///
250/// # Examples
251///
252/// Basic usage:
253///
254/// ```rune
255/// let s = String::with_capacity(10);
256///
257/// // The String contains no chars, even though it has capacity for more
258/// assert_eq!(s.len(), 0);
259///
260/// // These are all done without reallocating...
261/// let cap = s.capacity();
262///
263/// for _ in 0..10 {
264/// s.push('a');
265/// }
266///
267/// assert_eq!(s.capacity(), cap);
268///
269/// // ...but this may make the string reallocate
270/// s.push('a');
271/// ```
272#[rune::function(free, path = String::with_capacity)]
273fn string_with_capacity(capacity: usize) -> Result<String, VmError> {
274 Ok(String::try_with_capacity(capacity)?)
275}
276
277/// Returns the length of `self`.
278///
279/// This length is in bytes, not [`char`]s or graphemes. In other words, it
280/// might not be what a human considers the length of the string.
281///
282/// [`char`]: prim@char
283///
284/// # Examples
285///
286/// Basic usage:
287///
288/// ```rune
289/// let len = "foo".len();
290/// assert_eq!(3, len);
291///
292/// assert_eq!("ƒoo".len(), 4); // fancy f!
293/// assert_eq!("ƒoo".chars().count(), 3);
294/// ```
295#[rune::function(instance)]
296fn len(this: &str) -> usize {
297 this.len()
298}
299
300/// Returns `true` if the given pattern matches a prefix of this string slice.
301///
302/// Returns `false` if it does not.
303///
304/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
305/// or closure that determines if a character matches.
306///
307/// [`char`]: prim@char
308/// [pattern]: self::pattern
309///
310/// # Examples
311///
312/// Basic usage:
313///
314/// ```rune
315/// let bananas = "bananas";
316///
317/// assert!(bananas.starts_with("bana"));
318/// assert!(!bananas.starts_with("nana"));
319/// ```
320#[rune::function(instance)]
321fn starts_with(this: &str, other: &str) -> bool {
322 this.starts_with(other)
323}
324
325/// Returns `true` if the given pattern matches a suffix of this string slice.
326///
327/// Returns `false` if it does not.
328///
329/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
330/// or closure that determines if a character matches.
331///
332/// [`char`]: prim@char
333/// [pattern]: self::pattern
334///
335/// # Examples
336///
337/// Basic usage:
338///
339/// ```rune
340/// let bananas = "bananas";
341///
342/// assert!(bananas.ends_with("anas"));
343/// assert!(!bananas.ends_with("nana"));
344/// ```
345#[rune::function(instance)]
346fn ends_with(this: &str, other: &str) -> bool {
347 this.ends_with(other)
348}
349
350/// Returns this `String`'s capacity, in bytes.
351///
352/// # Examples
353///
354/// Basic usage:
355///
356/// ```rune
357/// let s = String::with_capacity(10);
358///
359/// assert!(s.capacity() >= 10);
360/// ```
361#[rune::function(instance)]
362fn capacity(this: &String) -> usize {
363 this.capacity()
364}
365
366/// Truncates this `String`, removing all contents.
367///
368/// While this means the `String` will have a length of zero, it does not touch
369/// its capacity.
370///
371/// # Examples
372///
373/// Basic usage:
374///
375/// ```rune
376/// let s = "foo";
377///
378/// s.clear();
379///
380/// assert!(s.is_empty());
381/// assert_eq!(0, s.len());
382/// assert_eq!(3, s.capacity());
383/// ```
384#[rune::function(instance)]
385fn clear(this: &mut String) {
386 this.clear();
387}
388
389/// Returns `true` if the given pattern matches a sub-slice of this string
390/// slice.
391///
392/// Returns `false` if it does not.
393///
394/// The [pattern] can be a `String`, [`char`], or a function or closure that
395/// determines if a character matches.
396///
397/// [`char`]: prim@char
398/// [pattern]: self::pattern
399///
400/// # Examples
401///
402/// Basic usage:
403///
404/// ```rune
405/// let bananas = "bananas";
406///
407/// assert!(bananas.contains("nana"));
408/// assert!(!bananas.contains("apples"));
409/// ```
410#[rune::function(instance)]
411fn contains(this: &str, other: &str) -> bool {
412 this.contains(other)
413}
414
415/// Appends the given [`char`] to the end of this `String`.
416///
417/// # Examples
418///
419/// Basic usage:
420///
421/// ```rune
422/// let s = "abc";
423///
424/// s.push('1');
425/// s.push('2');
426/// s.push('3');
427///
428/// assert_eq!("abc123", s);
429/// ```
430#[rune::function(instance)]
431fn push(this: &mut String, c: char) -> Result<(), VmError> {
432 this.try_push(c)?;
433 Ok(())
434}
435
436/// Appends a given string slice onto the end of this `String`.
437///
438/// # Examples
439///
440/// Basic usage:
441///
442/// ```rune
443/// let s = "foo";
444///
445/// s.push_str("bar");
446///
447/// assert_eq!("foobar", s);
448/// ```
449#[rune::function(instance)]
450fn push_str(this: &mut String, other: &str) -> Result<(), VmError> {
451 this.try_push_str(other)?;
452 Ok(())
453}
454
455/// Reserves capacity for at least `additional` bytes more than the current
456/// length. The allocator may reserve more space to speculatively avoid frequent
457/// allocations. After calling `reserve`, capacity will be greater than or equal
458/// to `self.len() + additional`. Does nothing if capacity is already
459/// sufficient.
460///
461/// # Panics
462///
463/// Panics if the new capacity overflows [`usize`].
464///
465/// # Examples
466///
467/// Basic usage:
468///
469/// ```rune
470/// let s = String::new();
471///
472/// s.reserve(10);
473///
474/// assert!(s.capacity() >= 10);
475/// ```
476///
477/// This might not actually increase the capacity:
478///
479/// ```rune
480/// let s = String::with_capacity(10);
481/// s.push('a');
482/// s.push('b');
483///
484/// // s now has a length of 2 and a capacity of at least 10
485/// let capacity = s.capacity();
486/// assert_eq!(2, s.len());
487/// assert!(capacity >= 10);
488///
489/// // Since we already have at least an extra 8 capacity, calling this...
490/// s.reserve(8);
491///
492/// // ... doesn't actually increase.
493/// assert_eq!(capacity, s.capacity());
494/// ```
495#[rune::function(instance)]
496fn reserve(this: &mut String, additional: usize) -> Result<(), VmError> {
497 this.try_reserve(additional)?;
498 Ok(())
499}
500
501/// Reserves the minimum capacity for at least `additional` bytes more than the
502/// current length. Unlike [`reserve`], this will not deliberately over-allocate
503/// to speculatively avoid frequent allocations. After calling `reserve_exact`,
504/// capacity will be greater than or equal to `self.len() + additional`. Does
505/// nothing if the capacity is already sufficient.
506///
507/// [`reserve`]: String::reserve
508///
509/// # Panics
510///
511/// Panics if the new capacity overflows [`usize`].
512///
513/// # Examples
514///
515/// Basic usage:
516///
517/// ```rune
518/// let s = String::new();
519///
520/// s.reserve_exact(10);
521///
522/// assert!(s.capacity() >= 10);
523/// ```
524///
525/// This might not actually increase the capacity:
526///
527/// ```rune
528/// let s = String::with_capacity(10);
529/// s.push('a');
530/// s.push('b');
531///
532/// // s now has a length of 2 and a capacity of at least 10
533/// let capacity = s.capacity();
534/// assert_eq!(2, s.len());
535/// assert!(capacity >= 10);
536///
537/// // Since we already have at least an extra 8 capacity, calling this...
538/// s.reserve_exact(8);
539///
540/// // ... doesn't actually increase.
541/// assert_eq!(capacity, s.capacity());
542/// ```
543#[rune::function(instance)]
544fn reserve_exact(this: &mut String, additional: usize) -> alloc::Result<()> {
545 this.try_reserve_exact(additional)
546}
547
548/// Returns a byte slice of this `String`'s contents while moving the string.
549///
550/// The inverse of this method is [`from_utf8`].
551///
552/// [`from_utf8`]: String::from_utf8
553///
554/// # Examples
555///
556/// Basic usage:
557///
558/// ```rune
559/// let s = "hello";
560/// assert_eq!(b"hello", s.into_bytes());
561/// assert!(!is_readable(s));
562/// ```
563#[rune::function(instance)]
564fn into_bytes(s: String) -> Bytes {
565 Bytes::from_vec(s.into_bytes())
566}
567
568/// Checks that `index`-th byte is the first byte in a UTF-8 code point sequence
569/// or the end of the string.
570///
571/// The start and end of the string (when `index == self.len()`) are considered
572/// to be boundaries.
573///
574/// Returns `false` if `index` is greater than `self.len()`.
575///
576/// # Examples
577///
578/// ```rune
579/// let s = "Löwe 老虎 Léopard";
580/// assert!(s.is_char_boundary(0));
581/// // start of `老`
582/// assert!(s.is_char_boundary(6));
583/// assert!(s.is_char_boundary(s.len()));
584///
585/// // second byte of `ö`
586/// assert!(!s.is_char_boundary(2));
587///
588/// // third byte of `老`
589/// assert!(!s.is_char_boundary(8));
590/// ```
591#[rune::function(instance)]
592fn is_char_boundary(s: &str, index: usize) -> bool {
593 s.is_char_boundary(index)
594}
595
596/// Access the character at the given byte index.
597///
598/// Returns `None` if the index is out of bounds or not a character boundary.
599///
600/// # Examples
601///
602/// ```rune
603/// let s = "おはよう";
604/// assert_eq!(s.char_at(0), Some('お'));
605/// assert_eq!(s.char_at(1), None);
606/// assert_eq!(s.char_at(2), None);
607/// assert_eq!(s.char_at(3), Some('は'));
608/// ```
609#[rune::function(instance)]
610fn char_at(s: &str, index: usize) -> Option<char> {
611 if !s.is_char_boundary(index) {
612 return None;
613 }
614
615 s[index..].chars().next()
616}
617
618/// Clones the string and its underlying storage.
619///
620/// # Examples
621///
622/// Basic usage:
623///
624/// ```rune
625/// let a = "h";
626/// let b = a;
627/// b.push('i');
628///
629/// // `a` and `b` refer to the same underlying string.
630/// assert_eq!(a, b);
631///
632/// let c = b.clone();
633/// c.push('!');
634/// assert_ne!(a, c);
635/// ```
636#[rune::function(keep, instance, protocol = CLONE)]
637fn clone(this: &String) -> alloc::Result<String> {
638 this.try_clone()
639}
640
641/// Test two strings for partial equality.
642///
643/// # Examples
644///
645/// ```rune
646/// use std::ops::partial_eq;
647///
648/// assert_eq!(partial_eq("a", "a"), true);
649/// assert_eq!(partial_eq("a", "ab"), false);
650/// assert_eq!(partial_eq("ab", "a"), false);
651/// ```
652#[rune::function(keep, instance, protocol = PARTIAL_EQ)]
653#[inline]
654fn partial_eq(this: &str, rhs: &str) -> bool {
655 this.eq(rhs)
656}
657
658/// Test two strings for total equality.
659///
660/// # Examples
661///
662/// ```rune
663/// use std::ops::eq;
664///
665/// assert_eq!(eq("a", "a"), true);
666/// assert_eq!(eq("a", "ab"), false);
667/// assert_eq!(eq("ab", "a"), false);
668/// ```
669#[rune::function(keep, instance, protocol = EQ)]
670#[inline]
671fn eq(this: &str, rhs: &str) -> bool {
672 this.eq(rhs)
673}
674
675/// Perform a partial ordered comparison between two strings.
676///
677/// # Examples
678///
679/// ```rune
680/// assert!("a" < "ab");
681/// assert!("ab" > "a");
682/// assert!("a" == "a");
683/// ```
684///
685/// Using explicit functions:
686///
687/// ```rune
688/// use std::cmp::Ordering;
689/// use std::ops::partial_cmp;
690///
691/// assert_eq!(partial_cmp("a", "ab"), Some(Ordering::Less));
692/// assert_eq!(partial_cmp("ab", "a"), Some(Ordering::Greater));
693/// assert_eq!(partial_cmp("a", "a"), Some(Ordering::Equal));
694/// ```
695#[rune::function(keep, instance, protocol = PARTIAL_CMP)]
696#[inline]
697fn partial_cmp(this: &str, rhs: &str) -> Option<Ordering> {
698 this.partial_cmp(rhs)
699}
700
701/// Perform a totally ordered comparison between two strings.
702///
703/// # Examples
704///
705/// ```rune
706/// use std::cmp::Ordering;
707/// use std::ops::cmp;
708///
709/// assert_eq!(cmp("a", "ab"), Ordering::Less);
710/// assert_eq!(cmp("ab", "a"), Ordering::Greater);
711/// assert_eq!(cmp("a", "a"), Ordering::Equal);
712/// ```
713#[rune::function(keep, instance, protocol = CMP)]
714#[inline]
715fn cmp(this: &str, rhs: &str) -> Ordering {
716 this.cmp(rhs)
717}
718
719/// Hash the string.
720///
721/// # Examples
722///
723/// ```rune
724/// use std::ops::hash;
725///
726/// let a = "hello";
727/// let b = "hello";
728///
729/// assert_eq!(hash(a), hash(b));
730/// ```
731#[rune::function(keep, instance, protocol = HASH)]
732fn hash(this: &str, hasher: &mut Hasher) {
733 hasher.write_str(this);
734}
735
736/// Write a display representation of a string.
737///
738/// # Examples
739///
740/// ```rune
741/// println!("{}", "Hello");
742/// ```
743#[rune::function(keep, instance, protocol = DISPLAY_FMT)]
744#[inline]
745fn display_fmt(this: &str, f: &mut Formatter) -> alloc::Result<()> {
746 write!(f, "{this}")
747}
748
749/// Write a debug representation of a string.
750///
751/// # Examples
752///
753/// ```rune
754/// println!("{:?}", "Hello");
755/// ```
756#[rune::function(keep, instance, protocol = DEBUG_FMT)]
757#[inline]
758fn debug_fmt(this: &str, f: &mut Formatter) -> alloc::Result<()> {
759 write!(f, "{this:?}")
760}
761
762/// Shrinks the capacity of this `String` to match its length.
763///
764/// # Examples
765///
766/// Basic usage:
767///
768/// ```rune
769/// let s = "foo";
770///
771/// s.reserve(100);
772/// assert!(s.capacity() >= 100);
773///
774/// s.shrink_to_fit();
775/// assert_eq!(3, s.capacity());
776/// ```
777#[rune::function(instance)]
778fn shrink_to_fit(s: &mut String) -> alloc::Result<()> {
779 s.try_shrink_to_fit()
780}
781
782/// An iterator over substrings of this string slice, separated by
783/// characters matched by a pattern.
784///
785/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
786/// function or closure that determines if a character matches.
787///
788/// [`char`]: prim@char
789/// [pattern]: self::pattern
790///
791/// # Iterator behavior
792///
793/// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
794/// allows a reverse search and forward/reverse search yields the same
795/// elements. This is true for, e.g., [`char`], but not for `&str`.
796///
797/// If the pattern allows a reverse search but its results might differ
798/// from a forward search, the [`rsplit`] method can be used.
799///
800/// [`rsplit`]: str::rsplit
801///
802/// # Examples
803///
804/// Simple patterns:
805///
806/// ```rune
807/// let v = "Mary had a little lamb".split(' ').collect::<Vec>();
808/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
809///
810/// let v = "".split('X').collect::<Vec>();
811/// assert_eq!(v, [""]);
812///
813/// let v = "lionXXtigerXleopard".split('X').collect::<Vec>();
814/// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
815///
816/// let v = "lion::tiger::leopard".split("::").collect::<Vec>();
817/// assert_eq!(v, ["lion", "tiger", "leopard"]);
818///
819/// let v = "abc1def2ghi".split(char::is_numeric).collect::<Vec>();
820/// assert_eq!(v, ["abc", "def", "ghi"]);
821///
822/// let v = "lionXtigerXleopard".split(char::is_uppercase).collect::<Vec>();
823/// assert_eq!(v, ["lion", "tiger", "leopard"]);
824/// ```
825///
826/// A more complex pattern, using a closure:
827///
828/// ```rune
829/// let v = "abc1defXghi".split(|c| c == '1' || c == 'X').collect::<Vec>();
830/// assert_eq!(v, ["abc", "def", "ghi"]);
831/// ```
832///
833/// If a string contains multiple contiguous separators, you will end up
834/// with empty strings in the output:
835///
836/// ```rune
837/// let x = "||||a||b|c";
838/// let d = x.split('|').collect::<Vec>();
839///
840/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
841/// ```
842///
843/// Contiguous separators are separated by the empty string.
844///
845/// ```rune
846/// let x = "(///)";
847/// let d = x.split('/').collect::<Vec>();
848///
849/// assert_eq!(d, ["(", "", "", ")"]);
850/// ```
851///
852/// Separators at the start or end of a string are neighbored
853/// by empty strings.
854///
855/// ```rune
856/// let d = "010".split("0").collect::<Vec>();
857/// assert_eq!(d, ["", "1", ""]);
858/// ```
859///
860/// When the empty string is used as a separator, it separates
861/// every character in the string, along with the beginning
862/// and end of the string.
863///
864/// ```rune
865/// let f = "rust".split("").collect::<Vec>();
866/// assert_eq!(f, ["", "r", "u", "s", "t", ""]);
867/// ```
868///
869/// Contiguous separators can lead to possibly surprising behavior
870/// when whitespace is used as the separator. This code is correct:
871///
872/// ```rune
873/// let x = " a b c";
874/// let d = x.split(' ').collect::<Vec>();
875///
876/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
877/// ```
878///
879/// It does _not_ give you:
880///
881/// ```rune,ignore
882/// assert_eq!(d, ["a", "b", "c"]);
883/// ```
884///
885/// Use [`split_whitespace`] for this behavior.
886///
887/// [`split_whitespace`]: str::split_whitespace
888#[rune::function(instance, deprecated = "Use String::split instead")]
889fn split(this: Ref<str>, value: Value) -> Result<Value, VmError> {
890 match value.as_ref() {
891 Repr::Inline(Inline::Char(c)) => Ok(rune::to_value(Split::new(this, *c))?),
892 Repr::Inline(value) => Err(VmError::from([
893 VmErrorKind::expected::<String>(value.type_info()),
894 VmErrorKind::bad_argument(0),
895 ])),
896 Repr::Dynamic(value) => Err(VmError::from([
897 VmErrorKind::expected::<String>(value.type_info()),
898 VmErrorKind::bad_argument(0),
899 ])),
900 Repr::Any(value) => match value.type_hash() {
901 String::HASH => {
902 let s = value.borrow_ref::<String>()?;
903
904 let split = rune::to_value(Split::new(this, String::try_from(s.as_str())?))?;
905
906 Ok(split)
907 }
908 Function::HASH => {
909 let f = value.borrow_ref::<Function>()?;
910 let split = rune::to_value(Split::new(this, f.try_clone()?))?;
911 Ok(split)
912 }
913 _ => Err(VmError::from([
914 VmErrorKind::expected::<String>(value.type_info()),
915 VmErrorKind::bad_argument(0),
916 ])),
917 },
918 }
919}
920
921/// Splits the string on the first occurrence of the specified delimiter and
922/// returns prefix before delimiter and suffix after delimiter.
923///
924/// # Examples
925///
926/// ```rune
927/// assert_eq!("cfg".split_once('='), None);
928/// assert_eq!("cfg=".split_once('='), Some(("cfg", "")));
929/// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
930/// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
931/// ```
932#[rune::function(instance)]
933fn split_once(this: &str, value: Value) -> Result<Option<(String, String)>, VmError> {
934 let outcome = match value.as_ref() {
935 Repr::Inline(Inline::Char(pat)) => this.split_once(*pat),
936 Repr::Inline(value) => {
937 return Err(VmError::from([
938 VmErrorKind::expected::<String>(value.type_info()),
939 VmErrorKind::bad_argument(0),
940 ]));
941 }
942 Repr::Dynamic(value) => {
943 return Err(VmError::from([
944 VmErrorKind::expected::<String>(value.type_info()),
945 VmErrorKind::bad_argument(0),
946 ]));
947 }
948 Repr::Any(value) => match value.type_hash() {
949 String::HASH => {
950 let s = value.borrow_ref::<String>()?;
951 this.split_once(s.as_str())
952 }
953 Function::HASH => {
954 let f = value.borrow_ref::<Function>()?;
955 let mut err = None;
956
957 let outcome = this.split_once(|c: char| match f.call::<bool>((c,)) {
958 Ok(b) => b,
959 Err(e) => {
960 if err.is_none() {
961 err = Some(e);
962 }
963
964 false
965 }
966 });
967
968 if let Some(e) = err.take() {
969 return Err(e);
970 }
971
972 outcome
973 }
974 _ => {
975 return Err(VmError::from([
976 VmErrorKind::expected::<String>(value.type_info()),
977 VmErrorKind::bad_argument(0),
978 ]));
979 }
980 },
981 };
982
983 let Some((a, b)) = outcome else {
984 return Ok(None);
985 };
986
987 Ok(Some((a.try_to_owned()?, b.try_to_owned()?)))
988}
989
990/// Returns a string slice with leading and trailing whitespace removed.
991///
992/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
993/// Property `White_Space`, which includes newlines.
994///
995/// # Examples
996///
997/// Basic usage:
998///
999/// ```rune
1000/// let s = "\n Hello\tworld\t\n";
1001///
1002/// assert_eq!("Hello\tworld", s.trim());
1003/// ```
1004#[rune::function(instance)]
1005fn trim(this: &str) -> alloc::Result<String> {
1006 this.trim().try_to_owned()
1007}
1008
1009/// Returns a string slice with trailing whitespace removed.
1010///
1011/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
1012/// Property `White_Space`, which includes newlines.
1013///
1014/// # Text directionality
1015///
1016/// A string is a sequence of bytes. `end` in this context means the last
1017/// position of that byte string; for a left-to-right language like English or
1018/// Russian, this will be right side, and for right-to-left languages like
1019/// Arabic or Hebrew, this will be the left side.
1020///
1021/// # Examples
1022///
1023/// Basic usage:
1024///
1025/// ```rune
1026/// let s = "\n Hello\tworld\t\n";
1027/// assert_eq!("\n Hello\tworld", s.trim_end());
1028/// ```
1029///
1030/// Directionality:
1031///
1032/// ```rune
1033/// let s = " English ";
1034/// assert!(Some('h') == s.trim_end().chars().rev().next());
1035///
1036/// let s = " עברית ";
1037/// assert!(Some('ת') == s.trim_end().chars().rev().next());
1038/// ```
1039#[rune::function(instance)]
1040fn trim_end(this: &str) -> alloc::Result<String> {
1041 this.trim_end().try_to_owned()
1042}
1043
1044/// Returns `true` if `self` has a length of zero bytes.
1045///
1046/// # Examples
1047///
1048/// Basic usage:
1049///
1050/// ```rune
1051/// let s = "";
1052/// assert!(s.is_empty());
1053///
1054/// let s = "not empty";
1055/// assert!(!s.is_empty());
1056/// ```
1057#[rune::function(instance)]
1058fn is_empty(this: &str) -> bool {
1059 this.is_empty()
1060}
1061
1062/// Replaces all matches of a pattern with another string.
1063///
1064/// `replace` creates a new [`String`], and copies the data from this string
1065/// slice into it. While doing so, it attempts to find matches of a pattern. If
1066/// it finds any, it replaces them with the replacement string slice.
1067///
1068/// # Examples
1069///
1070/// Basic usage:
1071///
1072/// ```rune
1073/// let s = "this is old";
1074///
1075/// assert_eq!("this is new", s.replace("old", "new"));
1076/// assert_eq!("than an old", s.replace("is", "an"));
1077/// ```
1078///
1079/// When the pattern doesn't match, it returns this string slice as [`String`]:
1080///
1081/// ```rune
1082/// let s = "this is old";
1083/// assert_eq!(s, s.replace("cookie monster", "little lamb"));
1084/// ```
1085#[rune::function(instance)]
1086fn replace(this: &str, from: &str, to: &str) -> alloc::Result<String> {
1087 alloc::str::replace(this, from, to)
1088}
1089
1090/// Returns an iterator over the [`char`]s of a string slice.
1091///
1092/// As a string slice consists of valid UTF-8, we can iterate through a string
1093/// slice by [`char`]. This method returns such an iterator.
1094///
1095/// It's important to remember that [`char`] represents a Unicode Scalar Value,
1096/// and might not match your idea of what a 'character' is. Iteration over
1097/// grapheme clusters may be what you actually want. This functionality is not
1098/// provided by Rust's standard library, check crates.io instead.
1099///
1100/// # Examples
1101///
1102/// Basic usage:
1103///
1104/// ```rune
1105/// let word = "goodbye";
1106///
1107/// let count = word.chars().count();
1108/// assert_eq!(7, count);
1109///
1110/// let chars = word.chars();
1111///
1112/// assert_eq!(Some('g'), chars.next());
1113/// assert_eq!(Some('o'), chars.next());
1114/// assert_eq!(Some('o'), chars.next());
1115/// assert_eq!(Some('d'), chars.next());
1116/// assert_eq!(Some('b'), chars.next());
1117/// assert_eq!(Some('y'), chars.next());
1118/// assert_eq!(Some('e'), chars.next());
1119///
1120/// assert_eq!(None, chars.next());
1121/// ```
1122///
1123/// Remember, [`char`]s might not match your intuition about characters:
1124///
1125/// [`char`]: prim@char
1126///
1127/// ```rune
1128/// let y = "y̆";
1129///
1130/// let chars = y.chars();
1131///
1132/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1133/// assert_eq!(Some('\u{0306}'), chars.next());
1134///
1135/// assert_eq!(None, chars.next());
1136/// ```
1137#[rune::function(instance)]
1138fn chars(s: Ref<str>) -> Chars {
1139 Chars::new(s)
1140}
1141
1142/// Returns a subslice of `str`.
1143///
1144/// This is the non-panicking alternative to indexing the `str`. Returns
1145/// [`None`] whenever equivalent indexing operation would panic.
1146///
1147/// # Examples
1148///
1149/// ```rune
1150/// let v = "🗻∈🌏";
1151///
1152/// assert_eq!(Some("🗻"), v.get(0..4));
1153///
1154/// // indices not on UTF-8 sequence boundaries
1155/// assert!(v.get(1..).is_none());
1156/// assert!(v.get(..8).is_none());
1157///
1158/// // out of bounds
1159/// assert!(v.get(..42).is_none());
1160/// ```
1161#[rune::function(keep, instance)]
1162fn get(this: &str, key: Value) -> Result<Option<String>, VmError> {
1163 use crate::runtime::TypeOf;
1164
1165 let slice = match key.as_any() {
1166 Some(value) => match value.type_hash() {
1167 RangeFrom::HASH => {
1168 let range = value.borrow_ref::<RangeFrom>()?;
1169 let start = range.start.as_usize()?;
1170 this.get(start..)
1171 }
1172 RangeFull::HASH => {
1173 _ = value.borrow_ref::<RangeFull>()?;
1174 this.get(..)
1175 }
1176 RangeInclusive::HASH => {
1177 let range = value.borrow_ref::<RangeInclusive>()?;
1178 let start = range.start.as_usize()?;
1179 let end = range.end.as_usize()?;
1180 this.get(start..=end)
1181 }
1182 RangeToInclusive::HASH => {
1183 let range = value.borrow_ref::<RangeToInclusive>()?;
1184 let end = range.end.as_usize()?;
1185 this.get(..=end)
1186 }
1187 RangeTo::HASH => {
1188 let range = value.borrow_ref::<RangeTo>()?;
1189 let end = range.end.as_usize()?;
1190 this.get(..end)
1191 }
1192 Range::HASH => {
1193 let range = value.borrow_ref::<Range>()?;
1194 let start = range.start.as_usize()?;
1195 let end = range.end.as_usize()?;
1196 this.get(start..end)
1197 }
1198 _ => {
1199 return Err(VmError::from(VmErrorKind::UnsupportedIndexGet {
1200 target: String::type_info(),
1201 index: value.type_info(),
1202 }))
1203 }
1204 },
1205 _ => {
1206 return Err(VmError::from(VmErrorKind::UnsupportedIndexGet {
1207 target: String::type_info(),
1208 index: key.type_info(),
1209 }))
1210 }
1211 };
1212
1213 let Some(slice) = slice else {
1214 return Ok(None);
1215 };
1216
1217 Ok(Some(slice.try_to_owned()?))
1218}
1219
1220/// The add operation for strings.
1221#[rune::function(instance, protocol = ADD)]
1222fn add(a: &str, b: &str) -> Result<String, VmError> {
1223 let mut string = String::try_with_capacity(a.len() + b.len())?;
1224 string.try_push_str(a)?;
1225 string.try_push_str(b)?;
1226 Ok(string)
1227}
1228
1229/// The add assign operation for strings.
1230#[rune::function(instance, protocol = ADD_ASSIGN)]
1231fn add_assign(this: &mut String, other: &str) -> Result<(), VmError> {
1232 this.try_push_str(other)?;
1233 Ok(())
1234}
1235
1236/// Get a specific string index.
1237#[rune::function(instance, protocol = INDEX_GET)]
1238fn index_get(s: &str, key: Value) -> Result<String, VmError> {
1239 match get(s, key)? {
1240 Some(slice) => Ok(slice),
1241 None => Err(VmError::panic("missing string slice")),
1242 }
1243}
1244
1245/// Parses this string into an integer.
1246///
1247/// # Errors
1248///
1249/// Will return [`Err`] if it's not possible to parse this string slice into an
1250/// integer.
1251///
1252/// # Examples
1253///
1254/// Basic usage
1255///
1256/// ```rune
1257/// let four = "4".parse::<i64>()?;
1258/// assert_eq!(4, four);
1259/// ```
1260#[rune::function(instance, path = parse::<i64>)]
1261fn parse_int(s: &str) -> Result<i64, ParseIntError> {
1262 str::parse::<i64>(s)
1263}
1264
1265/// Parses this string into a float.
1266///
1267/// # Errors
1268///
1269/// Will return [`Err`] if it's not possible to parse this string slice into an
1270/// float.
1271///
1272/// # Examples
1273///
1274/// Basic usage
1275///
1276/// ```rune
1277/// let pi = "3.1415".parse::<f64>()?;
1278/// assert_eq!(3.1415, pi);
1279/// ```
1280#[rune::function(instance, path = parse::<f64>)]
1281fn parse_float(s: &str) -> Result<f64, ParseFloatError> {
1282 str::parse::<f64>(s)
1283}
1284
1285/// Parses this string into a character.
1286///
1287/// # Errors
1288///
1289/// Will return [`Err`] if it's not possible to parse this string slice into an
1290/// integer.
1291///
1292/// # Examples
1293///
1294/// Basic usage
1295///
1296/// ```rune
1297/// let a = "a".parse::<char>()?;
1298/// assert_eq!('a', a);
1299/// ```
1300#[rune::function(instance, path = parse::<char>)]
1301fn parse_char(s: &str) -> Result<char, char::ParseCharError> {
1302 str::parse::<char>(s)
1303}
1304
1305/// Returns the lowercase equivalent of this string slice, as a new [`String`].
1306///
1307/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1308/// `Lowercase`.
1309///
1310/// Since some characters can expand into multiple characters when changing
1311/// the case, this function returns a [`String`] instead of modifying the
1312/// parameter in-place.
1313///
1314/// # Examples
1315///
1316/// Basic usage:
1317///
1318/// ```rune
1319/// let s = "HELLO";
1320///
1321/// assert_eq!("hello", s.to_lowercase());
1322/// ```
1323///
1324/// A tricky example, with sigma:
1325///
1326/// ```rune
1327/// let sigma = "Σ";
1328///
1329/// assert_eq!("σ", sigma.to_lowercase());
1330///
1331/// // but at the end of a word, it's ς, not σ:
1332/// let odysseus = "ὈΔΥΣΣΕΎΣ";
1333///
1334/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
1335/// ```
1336///
1337/// Languages without case are not changed:
1338///
1339/// ```rune
1340/// let new_year = "农历新年";
1341///
1342/// assert_eq!(new_year, new_year.to_lowercase());
1343/// ```
1344#[rune::function(instance)]
1345fn to_lowercase(s: &str) -> Result<String, VmError> {
1346 let mut lowercase = String::try_with_capacity(s.len())?;
1347
1348 for (i, c) in s.char_indices() {
1349 // Inlined code to from std::str to handle upper-case sigma,
1350 // since it is the only Unicode character that is context-dependent
1351 // See https://github.com/rust-lang/rust/issues/26035 for more context
1352 if c == 'Σ' {
1353 lowercase.try_push_str(map_uppercase_sigma(s, i))?;
1354 } else {
1355 lowercase.try_extend(c.to_lowercase())?;
1356 }
1357 }
1358
1359 return Ok(lowercase);
1360
1361 fn map_uppercase_sigma(from: &str, i: usize) -> &'static str {
1362 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1363 // for the definition of `Final_Sigma`.
1364 debug_assert!('Σ'.len_utf8() == 2);
1365 let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1366 && !case_ignorable_then_cased(from[i + 2..].chars());
1367 if is_word_final {
1368 "ς"
1369 } else {
1370 "σ"
1371 }
1372 }
1373
1374 fn case_ignorable_then_cased<I: core::iter::Iterator<Item = char>>(mut iter: I) -> bool {
1375 match iter.find(|&c| !unicode::case_ignorable::lookup(c)) {
1376 Some(c) => unicode::cased::lookup(c),
1377 None => false,
1378 }
1379 }
1380}
1381
1382/// Returns the uppercase equivalent of this string slice, as a new [`String`].
1383///
1384/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1385/// `Uppercase`.
1386///
1387/// Since some characters can expand into multiple characters when changing
1388/// the case, this function returns a [`String`] instead of modifying the
1389/// parameter in-place.
1390///
1391/// # Examples
1392///
1393/// Basic usage:
1394///
1395/// ```rune
1396/// let s = "hello";
1397///
1398/// assert_eq!("HELLO", s.to_uppercase());
1399/// ```
1400///
1401/// Scripts without case are not changed:
1402///
1403/// ```rune
1404/// let new_year = "农历新年";
1405///
1406/// assert_eq!(new_year, new_year.to_uppercase());
1407/// ```
1408///
1409/// One character can become multiple:
1410/// ```rune
1411/// let s = "tschüß";
1412///
1413/// assert_eq!("TSCHÜSS", s.to_uppercase());
1414/// ```
1415#[rune::function(instance)]
1416fn to_uppercase(s: &str) -> Result<String, VmError> {
1417 let mut uppercase = String::try_with_capacity(s.len())?;
1418 uppercase.try_extend(s.chars().flat_map(|c| c.to_uppercase()))?;
1419 Ok(uppercase)
1420}
1421
1422#[derive(Any)]
1423#[rune(item = ::std::string)]
1424struct Chars {
1425 string: Ref<str>,
1426 start: usize,
1427 end: usize,
1428}
1429
1430impl Chars {
1431 fn new(string: Ref<str>) -> Self {
1432 let end = string.len();
1433 Self {
1434 string,
1435 start: 0,
1436 end,
1437 }
1438 }
1439
1440 #[rune::function(keep, protocol = NEXT)]
1441 fn next(&mut self) -> Option<char> {
1442 let string = self.string.get(self.start..self.end)?;
1443 let c = string.chars().next()?;
1444 self.start += c.len_utf8();
1445 Some(c)
1446 }
1447
1448 #[rune::function(keep, protocol = NEXT_BACK)]
1449 fn next_back(&mut self) -> Option<char> {
1450 let string = self.string.get(self.start..self.end)?;
1451 let c = string.chars().next_back()?;
1452 self.end -= c.len_utf8();
1453 Some(c)
1454 }
1455}
1456
1457trait Pattern: 'static + TryClone + Named + FromValue + ToValue + MaybeTypeOf + TypeOf {
1458 fn test(&self, tail: &str) -> Result<(bool, usize), VmError>;
1459
1460 fn is_empty(&self) -> bool;
1461}
1462
1463impl Pattern for String {
1464 fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1465 if tail.starts_with(self.as_str()) {
1466 Ok((true, self.len()))
1467 } else {
1468 let Some(c) = tail.chars().next() else {
1469 return Ok((false, 0));
1470 };
1471
1472 Ok((false, c.len_utf8()))
1473 }
1474 }
1475
1476 #[inline]
1477 fn is_empty(&self) -> bool {
1478 String::is_empty(self)
1479 }
1480}
1481
1482impl Pattern for char {
1483 fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1484 let Some(c) = tail.chars().next() else {
1485 return Ok((false, 0));
1486 };
1487
1488 Ok((c == *self, c.len_utf8()))
1489 }
1490
1491 #[inline]
1492 fn is_empty(&self) -> bool {
1493 false
1494 }
1495}
1496
1497impl Pattern for Function {
1498 fn test(&self, tail: &str) -> Result<(bool, usize), VmError> {
1499 let Some(c) = tail.chars().next() else {
1500 return Ok((false, 0));
1501 };
1502
1503 Ok((self.call((c,))?, c.len_utf8()))
1504 }
1505
1506 #[inline]
1507 fn is_empty(&self) -> bool {
1508 false
1509 }
1510}
1511
1512#[derive(Any)]
1513#[rune(item = ::std::string)]
1514struct Split<T>
1515where
1516 T: Pattern,
1517{
1518 string: Option<Ref<str>>,
1519 pattern: T,
1520 from: usize,
1521 to: usize,
1522}
1523
1524impl<T> Split<T>
1525where
1526 T: Pattern,
1527{
1528 fn new(string: Ref<str>, pattern: T) -> Self {
1529 Self {
1530 string: Some(string),
1531 pattern,
1532 from: 0,
1533 to: 0,
1534 }
1535 }
1536
1537 #[rune::function(keep, protocol = NEXT)]
1538 fn next(&mut self) -> Result<Option<String>, VmError> {
1539 let Some(string) = &self.string else {
1540 return Ok(None);
1541 };
1542
1543 if self.from == string.len() && self.from == self.to {
1544 self.string = None;
1545 let out = "".try_to_owned()?;
1546 return Ok(Some(out));
1547 }
1548
1549 while self.to < string.len() {
1550 let Some(tail) = string.get(self.to..) else {
1551 return Ok(None);
1552 };
1553
1554 let (m, len) = self.pattern.test(tail)?;
1555
1556 if m {
1557 let head = string.get(self.from..self.to).unwrap_or_default();
1558 let out = head.try_to_owned()?;
1559
1560 if len == 0 {
1561 self.from = self.to;
1562 self.to += tail.chars().next().map_or(0, |c| c.len_utf8());
1563 } else {
1564 self.to += len;
1565 self.from = self.to;
1566 }
1567
1568 return Ok(Some(out));
1569 } else {
1570 self.to += len;
1571 }
1572 }
1573
1574 let tail = string.get(self.from..self.to).unwrap_or_default();
1575 self.from = self.to;
1576 let out = tail.try_to_owned()?;
1577
1578 if !self.pattern.is_empty() {
1579 self.string = None;
1580 }
1581
1582 Ok(Some(out))
1583 }
1584
1585 #[rune::function(keep, protocol = INTO_ITER)]
1586 fn into_iter(self) -> Self {
1587 self
1588 }
1589}
1590
1591// Inlined code from core::unicode, since using it directly is marked as using an
1592// unstable library feature
1593mod unicode {
1594 fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
1595 short_offset_run_header & ((1 << 21) - 1)
1596 }
1597
1598 fn decode_length(short_offset_run_header: u32) -> usize {
1599 (short_offset_run_header >> 21) as usize
1600 }
1601
1602 #[inline(always)]
1603 fn skip_search<const SOR: usize, const OFFSETS: usize>(
1604 needle: u32,
1605 short_offset_runs: &[u32; SOR],
1606 offsets: &[u8; OFFSETS],
1607 ) -> bool {
1608 // Note that this *cannot* be past the end of the array, as the last
1609 // element is greater than std::char::MAX (the largest possible needle).
1610 //
1611 // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
1612 // location cannot be past it, so Err(idx) != length either.
1613 //
1614 // This means that we can avoid bounds checking for the accesses below, too.
1615 let last_idx =
1616 match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
1617 Ok(idx) => idx + 1,
1618 Err(idx) => idx,
1619 };
1620
1621 let mut offset_idx = decode_length(short_offset_runs[last_idx]);
1622 let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
1623 decode_length(*next) - offset_idx
1624 } else {
1625 offsets.len() - offset_idx
1626 };
1627 let prev = last_idx
1628 .checked_sub(1)
1629 .map(|prev| decode_prefix_sum(short_offset_runs[prev]))
1630 .unwrap_or(0);
1631
1632 let total = needle - prev;
1633 let mut prefix_sum = 0;
1634 for _ in 0..(length - 1) {
1635 let offset = offsets[offset_idx];
1636 prefix_sum += offset as u32;
1637 if prefix_sum > total {
1638 break;
1639 }
1640 offset_idx += 1;
1641 }
1642 offset_idx % 2 == 1
1643 }
1644
1645 #[rustfmt::skip]
1646 pub mod case_ignorable {
1647 static SHORT_OFFSET_RUNS: [u32; 35] = [
1648 688, 44045149, 572528402, 576724925, 807414908, 878718981, 903913493, 929080568, 933275148,
1649 937491230, 1138818560, 1147208189, 1210124160, 1222707713, 1235291428, 1260457643,
1650 1264654383, 1499535675, 1507925040, 1566646003, 1629566000, 1650551536, 1658941263,
1651 1671540720, 1688321181, 1700908800, 1709298023, 1717688832, 1738661888, 1763828398,
1652 1797383403, 1805773008, 1809970171, 1819148289, 1824457200,
1653 ];
1654 static OFFSETS: [u8; 875] = [
1655 39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2,
1656 1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35,
1657 1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24,
1658 24, 43, 3, 44, 1, 7, 2, 6, 8, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1,
1659 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2,
1660 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1,
1661 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1,
1662 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9,
1663 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1,
1664 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3,
1665 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118,
1666 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31,
1667 49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3,
1668 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1,
1669 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1,
1670 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6,
1671 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3,
1672 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4,
1673 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2,
1674 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3,
1675 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3,
1676 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2,
1677 80, 3, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1,
1678 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1,
1679 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2,
1680 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 3, 2, 4, 1, 5,
1681 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46,
1682 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2,
1683 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 0, 17, 6, 15, 0, 5, 59, 7, 9, 4, 0, 1, 63, 17,
1684 64, 2, 1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3,
1685 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160,
1686 14, 0, 1, 61, 4, 0, 5, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
1687 ];
1688 pub fn lookup(c: char) -> bool {
1689 super::skip_search(
1690 c as u32,
1691 &SHORT_OFFSET_RUNS,
1692 &OFFSETS,
1693 )
1694 }
1695 }
1696
1697 #[rustfmt::skip]
1698 pub mod cased {
1699 static SHORT_OFFSET_RUNS: [u32; 22] = [
1700 4256, 115348384, 136322176, 144711446, 163587254, 320875520, 325101120, 350268208,
1701 392231680, 404815649, 413205504, 421595008, 467733632, 484513952, 492924480, 497144832,
1702 501339814, 578936576, 627171376, 639756544, 643952944, 649261450,
1703 ];
1704 static OFFSETS: [u8; 315] = [
1705 65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5,
1706 96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9,
1707 41, 0, 38, 1, 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 9, 7, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2,
1708 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13,
1709 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4,
1710 1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1,
1711 0, 46, 18, 30, 132, 102, 3, 4, 1, 59, 5, 2, 1, 1, 1, 5, 24, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0,
1712 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1,
1713 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, 51, 13, 51, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2,
1714 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2,
1715 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6,
1716 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
1717 ];
1718 pub fn lookup(c: char) -> bool {
1719 super::skip_search(
1720 c as u32,
1721 &SHORT_OFFSET_RUNS,
1722 &OFFSETS,
1723 )
1724 }
1725 }
1726}