rune/modules/string.rs
1//! Strings.
2
3use core::char;
4use core::cmp::Ordering;
5use core::num::{ParseFloatError, ParseIntError};
6
7use crate as rune;
8use crate::alloc::fmt::TryWrite;
9use crate::alloc::prelude::*;
10use crate::alloc::string::FromUtf8Error;
11use crate::alloc::{String, Vec};
12use crate::compile::Named;
13use crate::runtime::{
14 Bytes, Formatter, FromValue, Function, Hasher, Inline, MaybeTypeOf, Panic, Range, RangeFrom,
15 RangeFull, RangeInclusive, RangeTo, RangeToInclusive, Ref, Repr, ToValue, TypeOf, Value,
16 VmErrorKind, VmResult,
17};
18use crate::{Any, ContextError, Module, TypeHash};
19
20/// Strings.
21///
22/// Strings in Rune are declared with the literal `"string"` syntax, but can also be
23/// interacted with through the fundamental [`String`] type.
24///
25/// ```rune
26/// let string1 = "Hello";
27/// let string2 = String::new();
28/// string2.push_str("Hello");
29///
30/// assert_eq!(string1, string2);
31/// ```
32#[rune::module(::std::string)]
33pub fn module() -> Result<Module, ContextError> {
34 let mut m = Module::from_meta(self::module_meta)?;
35
36 m.ty::<String>()?;
37
38 m.function_meta(string_from)?;
39 m.function_meta(string_from_str)?;
40 m.function_meta(string_new)?;
41 m.function_meta(string_with_capacity)?;
42 m.function_meta(len)?;
43 m.function_meta(starts_with)?;
44 m.function_meta(ends_with)?;
45 m.function_meta(capacity)?;
46 m.function_meta(clear)?;
47 m.function_meta(contains)?;
48 m.function_meta(push)?;
49 m.function_meta(push_str)?;
50 m.function_meta(reserve)?;
51 m.function_meta(reserve_exact)?;
52 m.function_meta(from_utf8)?;
53 m.function_meta(as_bytes)?;
54 m.function_meta(into_bytes)?;
55 m.function_meta(shrink_to_fit)?;
56 m.function_meta(char_at)?;
57 m.function_meta(split)?;
58 m.function_meta(split_once)?;
59 m.associated_function("split_str", __rune_fn__split)?;
60 m.function_meta(trim)?;
61 m.function_meta(trim_end)?;
62 m.function_meta(replace)?;
63 m.function_meta(is_empty)?;
64 m.function_meta(chars)?;
65 m.function_meta(get)?;
66 m.function_meta(parse_int)?;
67 m.function_meta(parse_float)?;
68 m.function_meta(parse_char)?;
69 m.function_meta(to_lowercase)?;
70 m.function_meta(to_uppercase)?;
71
72 m.function_meta(add)?;
73 m.function_meta(add_assign)?;
74 m.function_meta(index_get)?;
75
76 m.function_meta(clone__meta)?;
77 m.implement_trait::<String>(rune::item!(::std::clone::Clone))?;
78
79 m.function_meta(partial_eq__meta)?;
80 m.implement_trait::<String>(rune::item!(::std::cmp::PartialEq))?;
81
82 m.function_meta(eq__meta)?;
83 m.implement_trait::<String>(rune::item!(::std::cmp::Eq))?;
84
85 m.function_meta(partial_cmp__meta)?;
86 m.implement_trait::<String>(rune::item!(::std::cmp::PartialOrd))?;
87
88 m.function_meta(cmp__meta)?;
89 m.implement_trait::<String>(rune::item!(::std::cmp::Ord))?;
90
91 m.function_meta(hash__meta)?;
92
93 m.function_meta(display_fmt__meta)?;
94 m.function_meta(debug_fmt__meta)?;
95
96 m.ty::<Chars>()?;
97 m.function_meta(Chars::next__meta)?;
98 m.function_meta(Chars::next_back__meta)?;
99 m.implement_trait::<Chars>(rune::item!(::std::iter::Iterator))?;
100 m.implement_trait::<Chars>(rune::item!(::std::iter::DoubleEndedIterator))?;
101
102 macro_rules! split {
103 ($ty:ty) => {
104 m.ty::<Split<$ty>>()?;
105 m.function_meta(Split::<$ty>::next__meta)?;
106 m.implement_trait::<Split<$ty>>(rune::item!(::std::iter::Iterator))?;
107 };
108 }
109
110 split!(Function);
111 split!(String);
112 split!(char);
113 Ok(m)
114}
115
116/// Converts a vector of bytes to a `String`.
117///
118/// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes
119/// ([`Vec<u8>`]) is made of bytes, so this function converts between the two.
120/// Not all byte slices are valid `String`s, however: `String` requires that it
121/// is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid
122/// UTF-8, and then does the conversion.
123///
124/// If you are sure that the byte slice is valid UTF-8, and you don't want to
125/// incur the overhead of the validity check, there is an unsafe version of this
126/// function, [`from_utf8_unchecked`], which has the same behavior but skips the
127/// check.
128///
129/// The inverse of this method is [`into_bytes`].
130///
131/// # Errors
132///
133/// Returns [`Err`] if the slice is not UTF-8 with a description as to why the
134/// provided bytes are not UTF-8. The vector you moved in is also included.
135///
136/// # Examples
137///
138/// Basic usage:
139///
140/// ```rune
141/// // some bytes, in a vector
142/// let sparkle_heart = Bytes::from_vec([240u8, 159u8, 146u8, 150u8]);
143///
144/// // We know these bytes are valid, so we'll use `unwrap()`.
145/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap();
146///
147/// assert_eq!("💖", sparkle_heart);
148/// ```
149///
150/// Incorrect bytes:
151///
152/// ```rune
153/// // some invalid bytes, in a vector
154/// let sparkle_heart = Bytes::from_vec([0u8, 159u8, 146u8, 150u8]);
155///
156/// assert!(String::from_utf8(sparkle_heart).is_err());
157/// ```
158///
159/// See the docs for [`FromUtf8Error`] for more details on what you can do with
160/// this error.
161///
162/// [`from_utf8_unchecked`]: String::from_utf8_unchecked
163/// [`Vec<u8>`]: crate::vec::Vec "Vec"
164/// [`&str`]: prim@str "&str"
165/// [`into_bytes`]: String::into_bytes
166#[rune::function(free, path = String::from_utf8)]
167fn from_utf8(bytes: &[u8]) -> VmResult<Result<String, FromUtf8Error>> {
168 let vec = vm_try!(Vec::try_from(bytes));
169 VmResult::Ok(String::from_utf8(vec))
170}
171
172/// Returns a byte slice of this `String`'s contents.
173///
174/// The inverse of this method is [`from_utf8`].
175///
176/// [`from_utf8`]: String::from_utf8
177///
178/// # Examples
179///
180/// Basic usage:
181///
182/// ```rune
183/// let s = "hello";
184/// assert_eq!(b"hello", s.as_bytes());
185/// assert!(is_readable(s));
186/// ```
187#[rune::function(instance)]
188fn as_bytes(s: &str) -> VmResult<Bytes> {
189 VmResult::Ok(Bytes::from_vec(vm_try!(Vec::try_from(s.as_bytes()))))
190}
191
192/// Constructs a string from another string.
193///
194/// # Examples
195///
196/// Basic usage:
197///
198/// ```rune
199/// let s = String::from("hello");
200/// assert_eq!(s, "hello");
201/// ```
202#[rune::function(free, path = String::from)]
203fn string_from(value: &str) -> VmResult<String> {
204 VmResult::Ok(vm_try!(String::try_from(value)))
205}
206
207#[rune::function(free, path = String::from_str, deprecated = "Use String::from instead")]
208fn string_from_str(value: &str) -> VmResult<String> {
209 VmResult::Ok(vm_try!(String::try_from(value)))
210}
211
212/// Creates a new empty `String`.
213///
214/// Given that the `String` is empty, this will not allocate any initial buffer.
215/// While that means that this initial operation is very inexpensive, it may
216/// cause excessive allocation later when you add data. If you have an idea of
217/// how much data the `String` will hold, consider the [`with_capacity`] method
218/// to prevent excessive re-allocation.
219///
220/// [`with_capacity`]: String::with_capacity
221///
222/// # Examples
223///
224/// Basic usage:
225///
226/// ```rune
227/// let s = String::new();
228/// ```
229#[rune::function(free, path = String::new)]
230fn string_new() -> String {
231 String::new()
232}
233
234/// Creates a new empty `String` with at least the specified capacity.
235///
236/// `String`s have an internal buffer to hold their data. The capacity is the
237/// length of that buffer, and can be queried with the [`capacity`] method. This
238/// method creates an empty `String`, but one with an initial buffer that can
239/// hold at least `capacity` bytes. This is useful when you may be appending a
240/// bunch of data to the `String`, reducing the number of reallocations it needs
241/// to do.
242///
243/// [`capacity`]: String::capacity
244///
245/// If the given capacity is `0`, no allocation will occur, and this method is
246/// identical to the [`new`] method.
247///
248/// [`new`]: String::new
249///
250/// # Examples
251///
252/// Basic usage:
253///
254/// ```rune
255/// let s = String::with_capacity(10);
256///
257/// // The String contains no chars, even though it has capacity for more
258/// assert_eq!(s.len(), 0);
259///
260/// // These are all done without reallocating...
261/// let cap = s.capacity();
262///
263/// for _ in 0..10 {
264/// s.push('a');
265/// }
266///
267/// assert_eq!(s.capacity(), cap);
268///
269/// // ...but this may make the string reallocate
270/// s.push('a');
271/// ```
272#[rune::function(free, path = String::with_capacity)]
273fn string_with_capacity(capacity: usize) -> VmResult<String> {
274 VmResult::Ok(vm_try!(String::try_with_capacity(capacity)))
275}
276
277/// Returns the length of `self`.
278///
279/// This length is in bytes, not [`char`]s or graphemes. In other words, it
280/// might not be what a human considers the length of the string.
281///
282/// [`char`]: prim@char
283///
284/// # Examples
285///
286/// Basic usage:
287///
288/// ```rune
289/// let len = "foo".len();
290/// assert_eq!(3, len);
291///
292/// assert_eq!("ƒoo".len(), 4); // fancy f!
293/// assert_eq!("ƒoo".chars().count(), 3);
294/// ```
295#[rune::function(instance)]
296fn len(this: &str) -> usize {
297 this.len()
298}
299
300/// Returns `true` if the given pattern matches a prefix of this string slice.
301///
302/// Returns `false` if it does not.
303///
304/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
305/// or closure that determines if a character matches.
306///
307/// [`char`]: prim@char
308/// [pattern]: self::pattern
309///
310/// # Examples
311///
312/// Basic usage:
313///
314/// ```rune
315/// let bananas = "bananas";
316///
317/// assert!(bananas.starts_with("bana"));
318/// assert!(!bananas.starts_with("nana"));
319/// ```
320#[rune::function(instance)]
321fn starts_with(this: &str, other: &str) -> bool {
322 this.starts_with(other)
323}
324
325/// Returns `true` if the given pattern matches a suffix of this string slice.
326///
327/// Returns `false` if it does not.
328///
329/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a function
330/// or closure that determines if a character matches.
331///
332/// [`char`]: prim@char
333/// [pattern]: self::pattern
334///
335/// # Examples
336///
337/// Basic usage:
338///
339/// ```rune
340/// let bananas = "bananas";
341///
342/// assert!(bananas.ends_with("anas"));
343/// assert!(!bananas.ends_with("nana"));
344/// ```
345#[rune::function(instance)]
346fn ends_with(this: &str, other: &str) -> bool {
347 this.ends_with(other)
348}
349
350/// Returns this `String`'s capacity, in bytes.
351///
352/// # Examples
353///
354/// Basic usage:
355///
356/// ```rune
357/// let s = String::with_capacity(10);
358///
359/// assert!(s.capacity() >= 10);
360/// ```
361#[rune::function(instance)]
362fn capacity(this: &String) -> usize {
363 this.capacity()
364}
365
366/// Truncates this `String`, removing all contents.
367///
368/// While this means the `String` will have a length of zero, it does not touch
369/// its capacity.
370///
371/// # Examples
372///
373/// Basic usage:
374///
375/// ```rune
376/// let s = "foo";
377///
378/// s.clear();
379///
380/// assert!(s.is_empty());
381/// assert_eq!(0, s.len());
382/// assert_eq!(3, s.capacity());
383/// ```
384#[rune::function(instance)]
385fn clear(this: &mut String) {
386 this.clear();
387}
388
389/// Returns `true` if the given pattern matches a sub-slice of this string
390/// slice.
391///
392/// Returns `false` if it does not.
393///
394/// The [pattern] can be a `String`, [`char`], or a function or closure that
395/// determines if a character matches.
396///
397/// [`char`]: prim@char
398/// [pattern]: self::pattern
399///
400/// # Examples
401///
402/// Basic usage:
403///
404/// ```rune
405/// let bananas = "bananas";
406///
407/// assert!(bananas.contains("nana"));
408/// assert!(!bananas.contains("apples"));
409/// ```
410#[rune::function(instance)]
411fn contains(this: &str, other: &str) -> bool {
412 this.contains(other)
413}
414
415/// Appends the given [`char`] to the end of this `String`.
416///
417/// # Examples
418///
419/// Basic usage:
420///
421/// ```rune
422/// let s = "abc";
423///
424/// s.push('1');
425/// s.push('2');
426/// s.push('3');
427///
428/// assert_eq!("abc123", s);
429/// ```
430#[rune::function(instance)]
431fn push(this: &mut String, c: char) -> VmResult<()> {
432 vm_try!(this.try_push(c));
433 VmResult::Ok(())
434}
435
436/// Appends a given string slice onto the end of this `String`.
437///
438/// # Examples
439///
440/// Basic usage:
441///
442/// ```rune
443/// let s = "foo";
444///
445/// s.push_str("bar");
446///
447/// assert_eq!("foobar", s);
448/// ```
449#[rune::function(instance)]
450fn push_str(this: &mut String, other: &str) -> VmResult<()> {
451 vm_try!(this.try_push_str(other));
452 VmResult::Ok(())
453}
454
455/// Reserves capacity for at least `additional` bytes more than the current
456/// length. The allocator may reserve more space to speculatively avoid frequent
457/// allocations. After calling `reserve`, capacity will be greater than or equal
458/// to `self.len() + additional`. Does nothing if capacity is already
459/// sufficient.
460///
461/// # Panics
462///
463/// Panics if the new capacity overflows [`usize`].
464///
465/// # Examples
466///
467/// Basic usage:
468///
469/// ```rune
470/// let s = String::new();
471///
472/// s.reserve(10);
473///
474/// assert!(s.capacity() >= 10);
475/// ```
476///
477/// This might not actually increase the capacity:
478///
479/// ```rune
480/// let s = String::with_capacity(10);
481/// s.push('a');
482/// s.push('b');
483///
484/// // s now has a length of 2 and a capacity of at least 10
485/// let capacity = s.capacity();
486/// assert_eq!(2, s.len());
487/// assert!(capacity >= 10);
488///
489/// // Since we already have at least an extra 8 capacity, calling this...
490/// s.reserve(8);
491///
492/// // ... doesn't actually increase.
493/// assert_eq!(capacity, s.capacity());
494/// ```
495#[rune::function(instance)]
496fn reserve(this: &mut String, additional: usize) -> VmResult<()> {
497 vm_try!(this.try_reserve(additional));
498 VmResult::Ok(())
499}
500
501/// Reserves the minimum capacity for at least `additional` bytes more than the
502/// current length. Unlike [`reserve`], this will not deliberately over-allocate
503/// to speculatively avoid frequent allocations. After calling `reserve_exact`,
504/// capacity will be greater than or equal to `self.len() + additional`. Does
505/// nothing if the capacity is already sufficient.
506///
507/// [`reserve`]: String::reserve
508///
509/// # Panics
510///
511/// Panics if the new capacity overflows [`usize`].
512///
513/// # Examples
514///
515/// Basic usage:
516///
517/// ```rune
518/// let s = String::new();
519///
520/// s.reserve_exact(10);
521///
522/// assert!(s.capacity() >= 10);
523/// ```
524///
525/// This might not actually increase the capacity:
526///
527/// ```rune
528/// let s = String::with_capacity(10);
529/// s.push('a');
530/// s.push('b');
531///
532/// // s now has a length of 2 and a capacity of at least 10
533/// let capacity = s.capacity();
534/// assert_eq!(2, s.len());
535/// assert!(capacity >= 10);
536///
537/// // Since we already have at least an extra 8 capacity, calling this...
538/// s.reserve_exact(8);
539///
540/// // ... doesn't actually increase.
541/// assert_eq!(capacity, s.capacity());
542/// ```
543#[rune::function(instance)]
544fn reserve_exact(this: &mut String, additional: usize) -> VmResult<()> {
545 vm_try!(this.try_reserve_exact(additional));
546 VmResult::Ok(())
547}
548
549/// Returns a byte slice of this `String`'s contents while moving the string.
550///
551/// The inverse of this method is [`from_utf8`].
552///
553/// [`from_utf8`]: String::from_utf8
554///
555/// # Examples
556///
557/// Basic usage:
558///
559/// ```rune
560/// let s = "hello";
561/// assert_eq!(b"hello", s.into_bytes());
562/// assert!(!is_readable(s));
563/// ```
564#[rune::function(instance)]
565fn into_bytes(s: String) -> Bytes {
566 Bytes::from_vec(s.into_bytes())
567}
568
569/// Checks that `index`-th byte is the first byte in a UTF-8 code point sequence
570/// or the end of the string.
571///
572/// The start and end of the string (when `index == self.len()`) are considered
573/// to be boundaries.
574///
575/// Returns `false` if `index` is greater than `self.len()`.
576///
577/// # Examples
578///
579/// ```rune
580/// let s = "Löwe 老虎 Léopard";
581/// assert!(s.is_char_boundary(0));
582/// // start of `老`
583/// assert!(s.is_char_boundary(6));
584/// assert!(s.is_char_boundary(s.len()));
585///
586/// // second byte of `ö`
587/// assert!(!s.is_char_boundary(2));
588///
589/// // third byte of `老`
590/// assert!(!s.is_char_boundary(8));
591/// ```
592#[rune::function(instance)]
593fn is_char_boundary(s: &str, index: usize) -> bool {
594 s.is_char_boundary(index)
595}
596
597/// Access the character at the given byte index.
598///
599/// Returns `None` if the index is out of bounds or not a character boundary.
600///
601/// # Examples
602///
603/// ```rune
604/// let s = "おはよう";
605/// assert_eq!(s.char_at(0), Some('お'));
606/// assert_eq!(s.char_at(1), None);
607/// assert_eq!(s.char_at(2), None);
608/// assert_eq!(s.char_at(3), Some('は'));
609/// ```
610#[rune::function(instance)]
611fn char_at(s: &str, index: usize) -> Option<char> {
612 if !s.is_char_boundary(index) {
613 return None;
614 }
615
616 s[index..].chars().next()
617}
618
619/// Clones the string and its underlying storage.
620///
621/// # Examples
622///
623/// Basic usage:
624///
625/// ```rune
626/// let a = "h";
627/// let b = a;
628/// b.push('i');
629///
630/// // `a` and `b` refer to the same underlying string.
631/// assert_eq!(a, b);
632///
633/// let c = b.clone();
634/// c.push('!');
635/// assert_ne!(a, c);
636/// ```
637#[rune::function(keep, instance, protocol = CLONE)]
638fn clone(this: &String) -> VmResult<String> {
639 VmResult::Ok(vm_try!(this.try_clone()))
640}
641
642/// Test two strings for partial equality.
643///
644/// # Examples
645///
646/// ```rune
647/// use std::ops::partial_eq;
648///
649/// assert_eq!(partial_eq("a", "a"), true);
650/// assert_eq!(partial_eq("a", "ab"), false);
651/// assert_eq!(partial_eq("ab", "a"), false);
652/// ```
653#[rune::function(keep, instance, protocol = PARTIAL_EQ)]
654#[inline]
655fn partial_eq(this: &str, rhs: &str) -> bool {
656 this.eq(rhs)
657}
658
659/// Test two strings for total equality.
660///
661/// # Examples
662///
663/// ```rune
664/// use std::ops::eq;
665///
666/// assert_eq!(eq("a", "a"), true);
667/// assert_eq!(eq("a", "ab"), false);
668/// assert_eq!(eq("ab", "a"), false);
669/// ```
670#[rune::function(keep, instance, protocol = EQ)]
671#[inline]
672fn eq(this: &str, rhs: &str) -> bool {
673 this.eq(rhs)
674}
675
676/// Perform a partial ordered comparison between two strings.
677///
678/// # Examples
679///
680/// ```rune
681/// assert!("a" < "ab");
682/// assert!("ab" > "a");
683/// assert!("a" == "a");
684/// ```
685///
686/// Using explicit functions:
687///
688/// ```rune
689/// use std::cmp::Ordering;
690/// use std::ops::partial_cmp;
691///
692/// assert_eq!(partial_cmp("a", "ab"), Some(Ordering::Less));
693/// assert_eq!(partial_cmp("ab", "a"), Some(Ordering::Greater));
694/// assert_eq!(partial_cmp("a", "a"), Some(Ordering::Equal));
695/// ```
696#[rune::function(keep, instance, protocol = PARTIAL_CMP)]
697#[inline]
698fn partial_cmp(this: &str, rhs: &str) -> Option<Ordering> {
699 this.partial_cmp(rhs)
700}
701
702/// Perform a totally ordered comparison between two strings.
703///
704/// # Examples
705///
706/// ```rune
707/// use std::cmp::Ordering;
708/// use std::ops::cmp;
709///
710/// assert_eq!(cmp("a", "ab"), Ordering::Less);
711/// assert_eq!(cmp("ab", "a"), Ordering::Greater);
712/// assert_eq!(cmp("a", "a"), Ordering::Equal);
713/// ```
714#[rune::function(keep, instance, protocol = CMP)]
715#[inline]
716fn cmp(this: &str, rhs: &str) -> Ordering {
717 this.cmp(rhs)
718}
719
720/// Hash the string.
721///
722/// # Examples
723///
724/// ```rune
725/// use std::ops::hash;
726///
727/// let a = "hello";
728/// let b = "hello";
729///
730/// assert_eq!(hash(a), hash(b));
731/// ```
732#[rune::function(keep, instance, protocol = HASH)]
733fn hash(this: &str, hasher: &mut Hasher) {
734 hasher.write_str(this);
735}
736
737/// Write a display representation of a string.
738///
739/// # Examples
740///
741/// ```rune
742/// println!("{}", "Hello");
743/// ```
744#[rune::function(keep, instance, protocol = DISPLAY_FMT)]
745#[inline]
746fn display_fmt(this: &str, f: &mut Formatter) -> VmResult<()> {
747 rune::vm_write!(f, "{this}")
748}
749
750/// Write a debug representation of a string.
751///
752/// # Examples
753///
754/// ```rune
755/// println!("{:?}", "Hello");
756/// ```
757#[rune::function(keep, instance, protocol = DEBUG_FMT)]
758#[inline]
759fn debug_fmt(this: &str, f: &mut Formatter) -> VmResult<()> {
760 rune::vm_write!(f, "{this:?}")
761}
762
763/// Shrinks the capacity of this `String` to match its length.
764///
765/// # Examples
766///
767/// Basic usage:
768///
769/// ```rune
770/// let s = "foo";
771///
772/// s.reserve(100);
773/// assert!(s.capacity() >= 100);
774///
775/// s.shrink_to_fit();
776/// assert_eq!(3, s.capacity());
777/// ```
778#[rune::function(instance)]
779fn shrink_to_fit(s: &mut String) -> VmResult<()> {
780 vm_try!(s.try_shrink_to_fit());
781 VmResult::Ok(())
782}
783
784/// An iterator over substrings of this string slice, separated by
785/// characters matched by a pattern.
786///
787/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
788/// function or closure that determines if a character matches.
789///
790/// [`char`]: prim@char
791/// [pattern]: self::pattern
792///
793/// # Iterator behavior
794///
795/// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
796/// allows a reverse search and forward/reverse search yields the same
797/// elements. This is true for, e.g., [`char`], but not for `&str`.
798///
799/// If the pattern allows a reverse search but its results might differ
800/// from a forward search, the [`rsplit`] method can be used.
801///
802/// [`rsplit`]: str::rsplit
803///
804/// # Examples
805///
806/// Simple patterns:
807///
808/// ```rune
809/// let v = "Mary had a little lamb".split(' ').collect::<Vec>();
810/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
811///
812/// let v = "".split('X').collect::<Vec>();
813/// assert_eq!(v, [""]);
814///
815/// let v = "lionXXtigerXleopard".split('X').collect::<Vec>();
816/// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
817///
818/// let v = "lion::tiger::leopard".split("::").collect::<Vec>();
819/// assert_eq!(v, ["lion", "tiger", "leopard"]);
820///
821/// let v = "abc1def2ghi".split(char::is_numeric).collect::<Vec>();
822/// assert_eq!(v, ["abc", "def", "ghi"]);
823///
824/// let v = "lionXtigerXleopard".split(char::is_uppercase).collect::<Vec>();
825/// assert_eq!(v, ["lion", "tiger", "leopard"]);
826/// ```
827///
828/// A more complex pattern, using a closure:
829///
830/// ```rune
831/// let v = "abc1defXghi".split(|c| c == '1' || c == 'X').collect::<Vec>();
832/// assert_eq!(v, ["abc", "def", "ghi"]);
833/// ```
834///
835/// If a string contains multiple contiguous separators, you will end up
836/// with empty strings in the output:
837///
838/// ```rune
839/// let x = "||||a||b|c";
840/// let d = x.split('|').collect::<Vec>();
841///
842/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
843/// ```
844///
845/// Contiguous separators are separated by the empty string.
846///
847/// ```rune
848/// let x = "(///)";
849/// let d = x.split('/').collect::<Vec>();
850///
851/// assert_eq!(d, ["(", "", "", ")"]);
852/// ```
853///
854/// Separators at the start or end of a string are neighbored
855/// by empty strings.
856///
857/// ```rune
858/// let d = "010".split("0").collect::<Vec>();
859/// assert_eq!(d, ["", "1", ""]);
860/// ```
861///
862/// When the empty string is used as a separator, it separates
863/// every character in the string, along with the beginning
864/// and end of the string.
865///
866/// ```rune
867/// let f = "rust".split("").collect::<Vec>();
868/// assert_eq!(f, ["", "r", "u", "s", "t", ""]);
869/// ```
870///
871/// Contiguous separators can lead to possibly surprising behavior
872/// when whitespace is used as the separator. This code is correct:
873///
874/// ```rune
875/// let x = " a b c";
876/// let d = x.split(' ').collect::<Vec>();
877///
878/// assert_eq!(d, ["", "", "", "", "a", "", "b", "c"]);
879/// ```
880///
881/// It does _not_ give you:
882///
883/// ```rune,ignore
884/// assert_eq!(d, ["a", "b", "c"]);
885/// ```
886///
887/// Use [`split_whitespace`] for this behavior.
888///
889/// [`split_whitespace`]: str::split_whitespace
890#[rune::function(instance, deprecated = "Use String::split instead")]
891fn split(this: Ref<str>, value: Value) -> VmResult<Value> {
892 match value.as_ref() {
893 Repr::Inline(Inline::Char(c)) => {
894 VmResult::Ok(vm_try!(rune::to_value(Split::new(this, *c))))
895 }
896 Repr::Inline(value) => VmResult::err([
897 VmErrorKind::expected::<String>(value.type_info()),
898 VmErrorKind::bad_argument(0),
899 ]),
900 Repr::Dynamic(value) => VmResult::err([
901 VmErrorKind::expected::<String>(value.type_info()),
902 VmErrorKind::bad_argument(0),
903 ]),
904 Repr::Any(value) => match value.type_hash() {
905 String::HASH => {
906 let s = vm_try!(value.borrow_ref::<String>());
907
908 let split = vm_try!(rune::to_value(Split::new(
909 this,
910 vm_try!(String::try_from(s.as_str()))
911 )));
912
913 VmResult::Ok(split)
914 }
915 Function::HASH => {
916 let f = vm_try!(value.borrow_ref::<Function>());
917 let split = vm_try!(rune::to_value(Split::new(this, vm_try!(f.try_clone()))));
918 VmResult::Ok(split)
919 }
920 _ => VmResult::err([
921 VmErrorKind::expected::<String>(value.type_info()),
922 VmErrorKind::bad_argument(0),
923 ]),
924 },
925 }
926}
927
928/// Splits the string on the first occurrence of the specified delimiter and
929/// returns prefix before delimiter and suffix after delimiter.
930///
931/// # Examples
932///
933/// ```rune
934/// assert_eq!("cfg".split_once('='), None);
935/// assert_eq!("cfg=".split_once('='), Some(("cfg", "")));
936/// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
937/// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
938/// ```
939#[rune::function(instance)]
940fn split_once(this: &str, value: Value) -> VmResult<Option<(String, String)>> {
941 let outcome = match value.as_ref() {
942 Repr::Inline(Inline::Char(pat)) => this.split_once(*pat),
943 Repr::Inline(value) => {
944 return VmResult::err([
945 VmErrorKind::expected::<String>(value.type_info()),
946 VmErrorKind::bad_argument(0),
947 ]);
948 }
949 Repr::Dynamic(value) => {
950 return VmResult::err([
951 VmErrorKind::expected::<String>(value.type_info()),
952 VmErrorKind::bad_argument(0),
953 ]);
954 }
955 Repr::Any(value) => match value.type_hash() {
956 String::HASH => {
957 let s = vm_try!(value.borrow_ref::<String>());
958 this.split_once(s.as_str())
959 }
960 Function::HASH => {
961 let f = vm_try!(value.borrow_ref::<Function>());
962 let mut err = None;
963
964 let outcome = this.split_once(|c: char| match f.call::<bool>((c,)) {
965 VmResult::Ok(b) => b,
966 VmResult::Err(e) => {
967 if err.is_none() {
968 err = Some(e);
969 }
970
971 false
972 }
973 });
974
975 if let Some(e) = err.take() {
976 return VmResult::Err(e);
977 }
978
979 outcome
980 }
981 _ => {
982 return VmResult::err([
983 VmErrorKind::expected::<String>(value.type_info()),
984 VmErrorKind::bad_argument(0),
985 ]);
986 }
987 },
988 };
989
990 let Some((a, b)) = outcome else {
991 return VmResult::Ok(None);
992 };
993
994 VmResult::Ok(Some((vm_try!(a.try_to_owned()), vm_try!(b.try_to_owned()))))
995}
996
997/// Returns a string slice with leading and trailing whitespace removed.
998///
999/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
1000/// Property `White_Space`, which includes newlines.
1001///
1002/// # Examples
1003///
1004/// Basic usage:
1005///
1006/// ```rune
1007/// let s = "\n Hello\tworld\t\n";
1008///
1009/// assert_eq!("Hello\tworld", s.trim());
1010/// ```
1011#[rune::function(instance)]
1012fn trim(this: &str) -> VmResult<String> {
1013 VmResult::Ok(vm_try!(this.trim().try_to_owned()))
1014}
1015
1016/// Returns a string slice with trailing whitespace removed.
1017///
1018/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
1019/// Property `White_Space`, which includes newlines.
1020///
1021/// # Text directionality
1022///
1023/// A string is a sequence of bytes. `end` in this context means the last
1024/// position of that byte string; for a left-to-right language like English or
1025/// Russian, this will be right side, and for right-to-left languages like
1026/// Arabic or Hebrew, this will be the left side.
1027///
1028/// # Examples
1029///
1030/// Basic usage:
1031///
1032/// ```rune
1033/// let s = "\n Hello\tworld\t\n";
1034/// assert_eq!("\n Hello\tworld", s.trim_end());
1035/// ```
1036///
1037/// Directionality:
1038///
1039/// ```rune
1040/// let s = " English ";
1041/// assert!(Some('h') == s.trim_end().chars().rev().next());
1042///
1043/// let s = " עברית ";
1044/// assert!(Some('ת') == s.trim_end().chars().rev().next());
1045/// ```
1046#[rune::function(instance)]
1047fn trim_end(this: &str) -> VmResult<String> {
1048 VmResult::Ok(vm_try!(this.trim_end().try_to_owned()))
1049}
1050
1051/// Returns `true` if `self` has a length of zero bytes.
1052///
1053/// # Examples
1054///
1055/// Basic usage:
1056///
1057/// ```rune
1058/// let s = "";
1059/// assert!(s.is_empty());
1060///
1061/// let s = "not empty";
1062/// assert!(!s.is_empty());
1063/// ```
1064#[rune::function(instance)]
1065fn is_empty(this: &str) -> bool {
1066 this.is_empty()
1067}
1068
1069/// Replaces all matches of a pattern with another string.
1070///
1071/// `replace` creates a new [`String`], and copies the data from this string
1072/// slice into it. While doing so, it attempts to find matches of a pattern. If
1073/// it finds any, it replaces them with the replacement string slice.
1074///
1075/// # Examples
1076///
1077/// Basic usage:
1078///
1079/// ```rune
1080/// let s = "this is old";
1081///
1082/// assert_eq!("this is new", s.replace("old", "new"));
1083/// assert_eq!("than an old", s.replace("is", "an"));
1084/// ```
1085///
1086/// When the pattern doesn't match, it returns this string slice as [`String`]:
1087///
1088/// ```rune
1089/// let s = "this is old";
1090/// assert_eq!(s, s.replace("cookie monster", "little lamb"));
1091/// ```
1092#[rune::function(instance)]
1093fn replace(a: &str, from: &str, to: &str) -> VmResult<String> {
1094 VmResult::Ok(vm_try!(String::try_from(a.replace(from, to))))
1095}
1096
1097/// Returns an iterator over the [`char`]s of a string slice.
1098///
1099/// As a string slice consists of valid UTF-8, we can iterate through a string
1100/// slice by [`char`]. This method returns such an iterator.
1101///
1102/// It's important to remember that [`char`] represents a Unicode Scalar Value,
1103/// and might not match your idea of what a 'character' is. Iteration over
1104/// grapheme clusters may be what you actually want. This functionality is not
1105/// provided by Rust's standard library, check crates.io instead.
1106///
1107/// # Examples
1108///
1109/// Basic usage:
1110///
1111/// ```rune
1112/// let word = "goodbye";
1113///
1114/// let count = word.chars().count();
1115/// assert_eq!(7, count);
1116///
1117/// let chars = word.chars();
1118///
1119/// assert_eq!(Some('g'), chars.next());
1120/// assert_eq!(Some('o'), chars.next());
1121/// assert_eq!(Some('o'), chars.next());
1122/// assert_eq!(Some('d'), chars.next());
1123/// assert_eq!(Some('b'), chars.next());
1124/// assert_eq!(Some('y'), chars.next());
1125/// assert_eq!(Some('e'), chars.next());
1126///
1127/// assert_eq!(None, chars.next());
1128/// ```
1129///
1130/// Remember, [`char`]s might not match your intuition about characters:
1131///
1132/// [`char`]: prim@char
1133///
1134/// ```rune
1135/// let y = "y̆";
1136///
1137/// let chars = y.chars();
1138///
1139/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1140/// assert_eq!(Some('\u{0306}'), chars.next());
1141///
1142/// assert_eq!(None, chars.next());
1143/// ```
1144#[rune::function(instance)]
1145fn chars(s: Ref<str>) -> Chars {
1146 Chars::new(s)
1147}
1148
1149/// Returns a subslice of `str`.
1150///
1151/// This is the non-panicking alternative to indexing the `str`. Returns
1152/// [`None`] whenever equivalent indexing operation would panic.
1153///
1154/// # Examples
1155///
1156/// ```rune
1157/// let v = "🗻∈🌏";
1158///
1159/// assert_eq!(Some("🗻"), v.get(0..4));
1160///
1161/// // indices not on UTF-8 sequence boundaries
1162/// assert!(v.get(1..).is_none());
1163/// assert!(v.get(..8).is_none());
1164///
1165/// // out of bounds
1166/// assert!(v.get(..42).is_none());
1167/// ```
1168#[rune::function(instance)]
1169fn get(this: &str, key: Value) -> VmResult<Option<String>> {
1170 use crate::runtime::TypeOf;
1171
1172 let slice = match key.as_any() {
1173 Some(value) => match value.type_hash() {
1174 RangeFrom::HASH => {
1175 let range = vm_try!(value.borrow_ref::<RangeFrom>());
1176 let start = vm_try!(range.start.as_usize());
1177 this.get(start..)
1178 }
1179 RangeFull::HASH => {
1180 _ = vm_try!(value.borrow_ref::<RangeFull>());
1181 this.get(..)
1182 }
1183 RangeInclusive::HASH => {
1184 let range = vm_try!(value.borrow_ref::<RangeInclusive>());
1185 let start = vm_try!(range.start.as_usize());
1186 let end = vm_try!(range.end.as_usize());
1187 this.get(start..=end)
1188 }
1189 RangeToInclusive::HASH => {
1190 let range = vm_try!(value.borrow_ref::<RangeToInclusive>());
1191 let end = vm_try!(range.end.as_usize());
1192 this.get(..=end)
1193 }
1194 RangeTo::HASH => {
1195 let range = vm_try!(value.borrow_ref::<RangeTo>());
1196 let end = vm_try!(range.end.as_usize());
1197 this.get(..end)
1198 }
1199 Range::HASH => {
1200 let range = vm_try!(value.borrow_ref::<Range>());
1201 let start = vm_try!(range.start.as_usize());
1202 let end = vm_try!(range.end.as_usize());
1203 this.get(start..end)
1204 }
1205 _ => {
1206 return VmResult::err(VmErrorKind::UnsupportedIndexGet {
1207 target: String::type_info(),
1208 index: value.type_info(),
1209 })
1210 }
1211 },
1212 _ => {
1213 return VmResult::err(VmErrorKind::UnsupportedIndexGet {
1214 target: String::type_info(),
1215 index: key.type_info(),
1216 })
1217 }
1218 };
1219
1220 let Some(slice) = slice else {
1221 return VmResult::Ok(None);
1222 };
1223
1224 VmResult::Ok(Some(vm_try!(slice.try_to_owned())))
1225}
1226
1227/// The add operation for strings.
1228#[rune::function(instance, protocol = ADD)]
1229fn add(a: &str, b: &str) -> VmResult<String> {
1230 let mut string = vm_try!(String::try_with_capacity(a.len() + b.len()));
1231 vm_try!(string.try_push_str(a));
1232 vm_try!(string.try_push_str(b));
1233 VmResult::Ok(string)
1234}
1235
1236/// The add assign operation for strings.
1237#[rune::function(instance, protocol = ADD_ASSIGN)]
1238fn add_assign(this: &mut String, other: &str) -> VmResult<()> {
1239 vm_try!(this.try_push_str(other));
1240 VmResult::Ok(())
1241}
1242
1243/// Get a specific string index.
1244#[rune::function(instance, protocol = INDEX_GET)]
1245fn index_get(s: &str, key: Value) -> VmResult<String> {
1246 match vm_try!(__rune_fn__get(s, key)) {
1247 Some(slice) => VmResult::Ok(slice),
1248 None => VmResult::err(Panic::custom("missing string slice")),
1249 }
1250}
1251
1252/// Parses this string into an integer.
1253///
1254/// # Errors
1255///
1256/// Will return [`Err`] if it's not possible to parse this string slice into an
1257/// integer.
1258///
1259/// # Examples
1260///
1261/// Basic usage
1262///
1263/// ```rune
1264/// let four = "4".parse::<i64>()?;
1265/// assert_eq!(4, four);
1266/// ```
1267#[rune::function(instance, path = parse::<i64>)]
1268fn parse_int(s: &str) -> Result<i64, ParseIntError> {
1269 str::parse::<i64>(s)
1270}
1271
1272/// Parses this string into a float.
1273///
1274/// # Errors
1275///
1276/// Will return [`Err`] if it's not possible to parse this string slice into an
1277/// float.
1278///
1279/// # Examples
1280///
1281/// Basic usage
1282///
1283/// ```rune
1284/// let pi = "3.1415".parse::<f64>()?;
1285/// assert_eq!(3.1415, pi);
1286/// ```
1287#[rune::function(instance, path = parse::<f64>)]
1288fn parse_float(s: &str) -> Result<f64, ParseFloatError> {
1289 str::parse::<f64>(s)
1290}
1291
1292/// Parses this string into a character.
1293///
1294/// # Errors
1295///
1296/// Will return [`Err`] if it's not possible to parse this string slice into an
1297/// integer.
1298///
1299/// # Examples
1300///
1301/// Basic usage
1302///
1303/// ```rune
1304/// let a = "a".parse::<char>()?;
1305/// assert_eq!('a', a);
1306/// ```
1307#[rune::function(instance, path = parse::<char>)]
1308fn parse_char(s: &str) -> Result<char, char::ParseCharError> {
1309 str::parse::<char>(s)
1310}
1311
1312/// Returns the lowercase equivalent of this string slice, as a new [`String`].
1313///
1314/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1315/// `Lowercase`.
1316///
1317/// Since some characters can expand into multiple characters when changing
1318/// the case, this function returns a [`String`] instead of modifying the
1319/// parameter in-place.
1320///
1321/// # Examples
1322///
1323/// Basic usage:
1324///
1325/// ```rune
1326/// let s = "HELLO";
1327///
1328/// assert_eq!("hello", s.to_lowercase());
1329/// ```
1330///
1331/// A tricky example, with sigma:
1332///
1333/// ```rune
1334/// let sigma = "Σ";
1335///
1336/// assert_eq!("σ", sigma.to_lowercase());
1337///
1338/// // but at the end of a word, it's ς, not σ:
1339/// let odysseus = "ὈΔΥΣΣΕΎΣ";
1340///
1341/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
1342/// ```
1343///
1344/// Languages without case are not changed:
1345///
1346/// ```rune
1347/// let new_year = "农历新年";
1348///
1349/// assert_eq!(new_year, new_year.to_lowercase());
1350/// ```
1351#[rune::function(instance)]
1352fn to_lowercase(s: &str) -> VmResult<String> {
1353 let mut lowercase = vm_try!(String::try_with_capacity(s.len()));
1354 for (i, c) in s.char_indices() {
1355 // Inlined code to from std::str to handle upper-case sigma,
1356 // since it is the only Unicode character that is context-dependent
1357 // See https://github.com/rust-lang/rust/issues/26035 for more context
1358 if c == 'Σ' {
1359 vm_try!(lowercase.try_push_str(map_uppercase_sigma(s, i)));
1360 } else {
1361 vm_try!(lowercase.try_extend(c.to_lowercase()));
1362 }
1363 }
1364
1365 return VmResult::Ok(lowercase);
1366
1367 fn map_uppercase_sigma(from: &str, i: usize) -> &'static str {
1368 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1369 // for the definition of `Final_Sigma`.
1370 debug_assert!('Σ'.len_utf8() == 2);
1371 let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1372 && !case_ignorable_then_cased(from[i + 2..].chars());
1373 if is_word_final {
1374 "ς"
1375 } else {
1376 "σ"
1377 }
1378 }
1379
1380 fn case_ignorable_then_cased<I: core::iter::Iterator<Item = char>>(mut iter: I) -> bool {
1381 match iter.find(|&c| !unicode::case_ignorable::lookup(c)) {
1382 Some(c) => unicode::cased::lookup(c),
1383 None => false,
1384 }
1385 }
1386}
1387
1388/// Returns the uppercase equivalent of this string slice, as a new [`String`].
1389///
1390/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1391/// `Uppercase`.
1392///
1393/// Since some characters can expand into multiple characters when changing
1394/// the case, this function returns a [`String`] instead of modifying the
1395/// parameter in-place.
1396///
1397/// # Examples
1398///
1399/// Basic usage:
1400///
1401/// ```rune
1402/// let s = "hello";
1403///
1404/// assert_eq!("HELLO", s.to_uppercase());
1405/// ```
1406///
1407/// Scripts without case are not changed:
1408///
1409/// ```rune
1410/// let new_year = "农历新年";
1411///
1412/// assert_eq!(new_year, new_year.to_uppercase());
1413/// ```
1414///
1415/// One character can become multiple:
1416/// ```rune
1417/// let s = "tschüß";
1418///
1419/// assert_eq!("TSCHÜSS", s.to_uppercase());
1420/// ```
1421#[rune::function(instance)]
1422fn to_uppercase(s: &str) -> VmResult<String> {
1423 let mut uppercase = vm_try!(String::try_with_capacity(s.len()));
1424 vm_try!(uppercase.try_extend(s.chars().flat_map(|c| c.to_uppercase())));
1425 VmResult::Ok(uppercase)
1426}
1427
1428#[derive(Any)]
1429#[rune(item = ::std::string)]
1430struct Chars {
1431 string: Ref<str>,
1432 start: usize,
1433 end: usize,
1434}
1435
1436impl Chars {
1437 fn new(string: Ref<str>) -> Self {
1438 let end = string.len();
1439 Self {
1440 string,
1441 start: 0,
1442 end,
1443 }
1444 }
1445
1446 #[rune::function(keep, protocol = NEXT)]
1447 fn next(&mut self) -> Option<char> {
1448 let string = self.string.get(self.start..self.end)?;
1449 let c = string.chars().next()?;
1450 self.start += c.len_utf8();
1451 Some(c)
1452 }
1453
1454 #[rune::function(keep, protocol = NEXT_BACK)]
1455 fn next_back(&mut self) -> Option<char> {
1456 let string = self.string.get(self.start..self.end)?;
1457 let c = string.chars().next_back()?;
1458 self.end -= c.len_utf8();
1459 Some(c)
1460 }
1461}
1462
1463trait Pattern: 'static + TryClone + Named + FromValue + ToValue + MaybeTypeOf + TypeOf {
1464 fn test(&self, tail: &str) -> VmResult<(bool, usize)>;
1465
1466 fn is_empty(&self) -> bool;
1467}
1468
1469impl Pattern for String {
1470 fn test(&self, tail: &str) -> VmResult<(bool, usize)> {
1471 if tail.starts_with(self.as_str()) {
1472 VmResult::Ok((true, self.len()))
1473 } else {
1474 let Some(c) = tail.chars().next() else {
1475 return VmResult::Ok((false, 0));
1476 };
1477
1478 VmResult::Ok((false, c.len_utf8()))
1479 }
1480 }
1481
1482 #[inline]
1483 fn is_empty(&self) -> bool {
1484 String::is_empty(self)
1485 }
1486}
1487
1488impl Pattern for char {
1489 fn test(&self, tail: &str) -> VmResult<(bool, usize)> {
1490 let Some(c) = tail.chars().next() else {
1491 return VmResult::Ok((false, 0));
1492 };
1493
1494 VmResult::Ok((c == *self, c.len_utf8()))
1495 }
1496
1497 #[inline]
1498 fn is_empty(&self) -> bool {
1499 false
1500 }
1501}
1502
1503impl Pattern for Function {
1504 fn test(&self, tail: &str) -> VmResult<(bool, usize)> {
1505 let Some(c) = tail.chars().next() else {
1506 return VmResult::Ok((false, 0));
1507 };
1508
1509 VmResult::Ok((vm_try!(self.call::<bool>((c,))), c.len_utf8()))
1510 }
1511
1512 #[inline]
1513 fn is_empty(&self) -> bool {
1514 false
1515 }
1516}
1517
1518#[derive(Any)]
1519#[rune(item = ::std::string)]
1520struct Split<T>
1521where
1522 T: Pattern,
1523{
1524 string: Option<Ref<str>>,
1525 pattern: T,
1526 from: usize,
1527 to: usize,
1528}
1529
1530impl<T> Split<T>
1531where
1532 T: Pattern,
1533{
1534 fn new(string: Ref<str>, pattern: T) -> Self {
1535 Self {
1536 string: Some(string),
1537 pattern,
1538 from: 0,
1539 to: 0,
1540 }
1541 }
1542
1543 #[rune::function(keep, protocol = NEXT)]
1544 fn next(&mut self) -> VmResult<Option<String>> {
1545 let Some(string) = &self.string else {
1546 return VmResult::Ok(None);
1547 };
1548
1549 if self.from == string.len() && self.from == self.to {
1550 self.string = None;
1551 let out = vm_try!("".try_to_owned());
1552 return VmResult::Ok(Some(out));
1553 }
1554
1555 while self.to < string.len() {
1556 let Some(tail) = string.get(self.to..) else {
1557 return VmResult::Ok(None);
1558 };
1559
1560 let (m, len) = vm_try!(self.pattern.test(tail));
1561
1562 if m {
1563 let head = string.get(self.from..self.to).unwrap_or_default();
1564 let out = vm_try!(head.try_to_owned());
1565
1566 if len == 0 {
1567 self.from = self.to;
1568 self.to += tail.chars().next().map_or(0, |c| c.len_utf8());
1569 } else {
1570 self.to += len;
1571 self.from = self.to;
1572 }
1573
1574 return VmResult::Ok(Some(out));
1575 } else {
1576 self.to += len;
1577 }
1578 }
1579
1580 let tail = string.get(self.from..self.to).unwrap_or_default();
1581 self.from = self.to;
1582 let out = vm_try!(tail.try_to_owned());
1583
1584 if !self.pattern.is_empty() {
1585 self.string = None;
1586 }
1587
1588 VmResult::Ok(Some(out))
1589 }
1590
1591 #[rune::function(keep, protocol = INTO_ITER)]
1592 fn into_iter(self) -> Self {
1593 self
1594 }
1595}
1596
1597// Inlined code from core::unicode, since using it directly is marked as using an
1598// unstable library feature
1599mod unicode {
1600 fn decode_prefix_sum(short_offset_run_header: u32) -> u32 {
1601 short_offset_run_header & ((1 << 21) - 1)
1602 }
1603
1604 fn decode_length(short_offset_run_header: u32) -> usize {
1605 (short_offset_run_header >> 21) as usize
1606 }
1607
1608 #[inline(always)]
1609 fn skip_search<const SOR: usize, const OFFSETS: usize>(
1610 needle: u32,
1611 short_offset_runs: &[u32; SOR],
1612 offsets: &[u8; OFFSETS],
1613 ) -> bool {
1614 // Note that this *cannot* be past the end of the array, as the last
1615 // element is greater than std::char::MAX (the largest possible needle).
1616 //
1617 // So, we cannot have found it (i.e. Ok(idx) + 1 != length) and the correct
1618 // location cannot be past it, so Err(idx) != length either.
1619 //
1620 // This means that we can avoid bounds checking for the accesses below, too.
1621 let last_idx =
1622 match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header << 11) {
1623 Ok(idx) => idx + 1,
1624 Err(idx) => idx,
1625 };
1626
1627 let mut offset_idx = decode_length(short_offset_runs[last_idx]);
1628 let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
1629 decode_length(*next) - offset_idx
1630 } else {
1631 offsets.len() - offset_idx
1632 };
1633 let prev = last_idx
1634 .checked_sub(1)
1635 .map(|prev| decode_prefix_sum(short_offset_runs[prev]))
1636 .unwrap_or(0);
1637
1638 let total = needle - prev;
1639 let mut prefix_sum = 0;
1640 for _ in 0..(length - 1) {
1641 let offset = offsets[offset_idx];
1642 prefix_sum += offset as u32;
1643 if prefix_sum > total {
1644 break;
1645 }
1646 offset_idx += 1;
1647 }
1648 offset_idx % 2 == 1
1649 }
1650
1651 #[rustfmt::skip]
1652 pub mod case_ignorable {
1653 static SHORT_OFFSET_RUNS: [u32; 35] = [
1654 688, 44045149, 572528402, 576724925, 807414908, 878718981, 903913493, 929080568, 933275148,
1655 937491230, 1138818560, 1147208189, 1210124160, 1222707713, 1235291428, 1260457643,
1656 1264654383, 1499535675, 1507925040, 1566646003, 1629566000, 1650551536, 1658941263,
1657 1671540720, 1688321181, 1700908800, 1709298023, 1717688832, 1738661888, 1763828398,
1658 1797383403, 1805773008, 1809970171, 1819148289, 1824457200,
1659 ];
1660 static OFFSETS: [u8; 875] = [
1661 39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2,
1662 1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35,
1663 1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24,
1664 24, 43, 3, 44, 1, 7, 2, 6, 8, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1,
1665 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2,
1666 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1,
1667 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1,
1668 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9,
1669 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1,
1670 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3,
1671 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118,
1672 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31,
1673 49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3,
1674 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1,
1675 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1,
1676 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6,
1677 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3,
1678 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4,
1679 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2,
1680 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3,
1681 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3,
1682 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 0, 2,
1683 80, 3, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1,
1684 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1,
1685 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 195, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2,
1686 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 3, 2, 4, 1, 5,
1687 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46,
1688 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2,
1689 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 0, 17, 6, 15, 0, 5, 59, 7, 9, 4, 0, 1, 63, 17,
1690 64, 2, 1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3,
1691 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160,
1692 14, 0, 1, 61, 4, 0, 5, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
1693 ];
1694 pub fn lookup(c: char) -> bool {
1695 super::skip_search(
1696 c as u32,
1697 &SHORT_OFFSET_RUNS,
1698 &OFFSETS,
1699 )
1700 }
1701 }
1702
1703 #[rustfmt::skip]
1704 pub mod cased {
1705 static SHORT_OFFSET_RUNS: [u32; 22] = [
1706 4256, 115348384, 136322176, 144711446, 163587254, 320875520, 325101120, 350268208,
1707 392231680, 404815649, 413205504, 421595008, 467733632, 484513952, 492924480, 497144832,
1708 501339814, 578936576, 627171376, 639756544, 643952944, 649261450,
1709 ];
1710 static OFFSETS: [u8; 315] = [
1711 65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5,
1712 96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9,
1713 41, 0, 38, 1, 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 9, 7, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2,
1714 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13,
1715 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4,
1716 1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1,
1717 0, 46, 18, 30, 132, 102, 3, 4, 1, 59, 5, 2, 1, 1, 1, 5, 24, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0,
1718 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1,
1719 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, 51, 13, 51, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2,
1720 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2,
1721 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6,
1722 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
1723 ];
1724 pub fn lookup(c: char) -> bool {
1725 super::skip_search(
1726 c as u32,
1727 &SHORT_OFFSET_RUNS,
1728 &OFFSETS,
1729 )
1730 }
1731 }
1732}