pub trait ByteSlice: Sealed {
Show 44 methods
// Provided methods
fn as_bstr(&self) -> &BStr { ... }
fn as_bstr_mut(&mut self) -> &mut BStr { ... }
fn to_str(&self) -> Result<&str, Utf8Error> { ... }
unsafe fn to_str_unchecked(&self) -> &str { ... }
fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool { ... }
fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool { ... }
fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool { ... }
fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { ... }
fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { ... }
fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> Find<'h, 'n> ⓘ { ... }
fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> FindReverse<'h, 'n> ⓘ { ... }
fn find_byte(&self, byte: u8) -> Option<usize> { ... }
fn rfind_byte(&self, byte: u8) -> Option<usize> { ... }
fn find_char(&self, ch: char) -> Option<usize> { ... }
fn rfind_char(&self, ch: char) -> Option<usize> { ... }
fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { ... }
fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { ... }
fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { ... }
fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { ... }
fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> ⓘ { ... }
fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> Split<'h, 's> ⓘ { ... }
fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> SplitReverse<'h, 's> ⓘ { ... }
fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])> { ... }
fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])> { ... }
fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitN<'h, 's> ⓘ { ... }
fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitNReverse<'h, 's> ⓘ { ... }
fn bytes(&self) -> Bytes<'_> ⓘ { ... }
fn chars(&self) -> Chars<'_> ⓘ { ... }
fn char_indices(&self) -> CharIndices<'_> ⓘ { ... }
fn utf8_chunks(&self) -> Utf8Chunks<'_> ⓘ { ... }
fn lines(&self) -> Lines<'_> ⓘ { ... }
fn lines_with_terminator(&self) -> LinesWithTerminator<'_> ⓘ { ... }
fn trim_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ { ... }
fn trim_start_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ { ... }
fn trim_end_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ { ... }
fn make_ascii_lowercase(&mut self) { ... }
fn make_ascii_uppercase(&mut self) { ... }
fn escape_bytes(&self) -> EscapeBytes<'_> ⓘ { ... }
fn reverse_bytes(&mut self) { ... }
fn reverse_chars(&mut self) { ... }
fn is_ascii(&self) -> bool { ... }
fn is_utf8(&self) -> bool { ... }
fn last_byte(&self) -> Option<u8> { ... }
fn find_non_ascii_byte(&self) -> Option<usize> { ... }
}
Expand description
A trait that extends &[u8]
with string oriented methods.
This trait is sealed and cannot be implemented outside of bstr
.
Provided Methods§
Sourcefn as_bstr(&self) -> &BStr
fn as_bstr(&self) -> &BStr
Return this byte slice as a &BStr
.
Use &BStr
is useful because of its fmt::Debug
representation
and various other trait implementations (such as PartialEq
and
PartialOrd
). In particular, the Debug
implementation for BStr
shows its bytes as a normal string. For invalid UTF-8, hex escape
sequences are used.
§Examples
Basic usage:
use bstr::ByteSlice;
println!("{:?}", b"foo\xFFbar".as_bstr());
Sourcefn as_bstr_mut(&mut self) -> &mut BStr
fn as_bstr_mut(&mut self) -> &mut BStr
Return this byte slice as a &mut BStr
.
Use &mut BStr
is useful because of its fmt::Debug
representation
and various other trait implementations (such as PartialEq
and
PartialOrd
). In particular, the Debug
implementation for BStr
shows its bytes as a normal string. For invalid UTF-8, hex escape
sequences are used.
§Examples
Basic usage:
use bstr::ByteSlice;
let mut bytes = *b"foo\xFFbar";
println!("{:?}", &mut bytes.as_bstr_mut());
Sourcefn to_str(&self) -> Result<&str, Utf8Error>
fn to_str(&self) -> Result<&str, Utf8Error>
Safely convert this byte string into a &str
if it’s valid UTF-8.
If this byte string is not valid UTF-8, then an error is returned. The error returned indicates the first invalid byte found and the length of the error.
In cases where a lossy conversion to &str
is acceptable, then use one
of the to_str_lossy
or
to_str_lossy_into
methods.
§Examples
Basic usage:
use bstr::{B, ByteSlice, ByteVec};
let s = B("☃βツ").to_str()?;
assert_eq!("☃βツ", s);
let mut bstring = <Vec<u8>>::from("☃βツ");
bstring.push(b'\xFF');
let err = bstring.to_str().unwrap_err();
assert_eq!(8, err.valid_up_to());
Sourceunsafe fn to_str_unchecked(&self) -> &str
unsafe fn to_str_unchecked(&self) -> &str
Unsafely convert this byte string into a &str
, without checking for
valid UTF-8.
§Safety
Callers must ensure that this byte string is valid UTF-8 before
calling this method. Converting a byte string into a &str
that is
not valid UTF-8 is considered undefined behavior.
This routine is useful in performance sensitive contexts where the
UTF-8 validity of the byte string is already known and it is
undesirable to pay the cost of an additional UTF-8 validation check
that to_str
performs.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
// SAFETY: This is safe because string literals are guaranteed to be
// valid UTF-8 by the Rust compiler.
let s = unsafe { B("☃βツ").to_str_unchecked() };
assert_eq!("☃βツ", s);
Sourcefn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool
fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool
Returns true if and only if this byte string contains the given needle.
§Examples
Basic usage:
use bstr::ByteSlice;
assert!(b"foo bar".contains_str("foo"));
assert!(b"foo bar".contains_str("bar"));
assert!(!b"foo".contains_str("foobar"));
Sourcefn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool
fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool
Returns true if and only if this byte string has the given prefix.
§Examples
Basic usage:
use bstr::ByteSlice;
assert!(b"foo bar".starts_with_str("foo"));
assert!(!b"foo bar".starts_with_str("bar"));
assert!(!b"foo".starts_with_str("foobar"));
Sourcefn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool
fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool
Returns true if and only if this byte string has the given suffix.
§Examples
Basic usage:
use bstr::ByteSlice;
assert!(b"foo bar".ends_with_str("bar"));
assert!(!b"foo bar".ends_with_str("foo"));
assert!(!b"bar".ends_with_str("foobar"));
Sourcefn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>
fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>
Returns the index of the first occurrence of the given needle.
The needle may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
Note that if you’re are searching for the same needle in many
different small haystacks, it may be faster to initialize a
Finder
once, and reuse it for each search.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the needle and the haystack. That is, this runs
in O(needle.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
let s = b"foo bar baz";
assert_eq!(Some(0), s.find("foo"));
assert_eq!(Some(4), s.find("bar"));
assert_eq!(None, s.find("quux"));
Sourcefn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>
fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>
Returns the index of the last occurrence of the given needle.
The needle may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
Note that if you’re are searching for the same needle in many
different small haystacks, it may be faster to initialize a
FinderReverse
once, and reuse it for
each search.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the needle and the haystack. That is, this runs
in O(needle.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
let s = b"foo bar baz";
assert_eq!(Some(0), s.rfind("foo"));
assert_eq!(Some(4), s.rfind("bar"));
assert_eq!(Some(8), s.rfind("ba"));
assert_eq!(None, s.rfind("quux"));
Sourcefn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> Find<'h, 'n> ⓘ
fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( &'h self, needle: &'n B, ) -> Find<'h, 'n> ⓘ
Returns an iterator of the non-overlapping occurrences of the given needle. The iterator yields byte offset positions indicating the start of each match.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the needle and the haystack. That is, this runs
in O(needle.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
let s = b"foo bar foo foo quux foo";
let matches: Vec<usize> = s.find_iter("foo").collect();
assert_eq!(matches, vec![0, 8, 12, 21]);
An empty string matches at every position, including the position immediately following the last byte:
use bstr::ByteSlice;
let matches: Vec<usize> = b"foo".find_iter("").collect();
assert_eq!(matches, vec![0, 1, 2, 3]);
let matches: Vec<usize> = b"".find_iter("").collect();
assert_eq!(matches, vec![0]);
Sourcefn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
&'h self,
needle: &'n B,
) -> FindReverse<'h, 'n> ⓘ
fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( &'h self, needle: &'n B, ) -> FindReverse<'h, 'n> ⓘ
Returns an iterator of the non-overlapping occurrences of the given needle in reverse. The iterator yields byte offset positions indicating the start of each match.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the needle and the haystack. That is, this runs
in O(needle.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
let s = b"foo bar foo foo quux foo";
let matches: Vec<usize> = s.rfind_iter("foo").collect();
assert_eq!(matches, vec![21, 12, 8, 0]);
An empty string matches at every position, including the position immediately following the last byte:
use bstr::ByteSlice;
let matches: Vec<usize> = b"foo".rfind_iter("").collect();
assert_eq!(matches, vec![3, 2, 1, 0]);
let matches: Vec<usize> = b"".rfind_iter("").collect();
assert_eq!(matches, vec![0]);
Sourcefn find_byte(&self, byte: u8) -> Option<usize>
fn find_byte(&self, byte: u8) -> Option<usize>
Returns the index of the first occurrence of the given byte. If the
byte does not occur in this byte string, then None
is returned.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
assert_eq!(None, b"foo bar baz".find_byte(b'y'));
Sourcefn rfind_byte(&self, byte: u8) -> Option<usize>
fn rfind_byte(&self, byte: u8) -> Option<usize>
Returns the index of the last occurrence of the given byte. If the
byte does not occur in this byte string, then None
is returned.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
Sourcefn find_char(&self, ch: char) -> Option<usize>
fn find_char(&self, ch: char) -> Option<usize>
Returns the index of the first occurrence of the given codepoint.
If the codepoint does not occur in this byte string, then None
is
returned.
Note that if one searches for the replacement codepoint, \u{FFFD}
,
then only explicit occurrences of that encoding will be found. Invalid
UTF-8 sequences will not be matched.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert_eq!(Some(10), b"foo bar baz".find_char('z'));
assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
assert_eq!(None, b"foo bar baz".find_char('y'));
Sourcefn rfind_char(&self, ch: char) -> Option<usize>
fn rfind_char(&self, ch: char) -> Option<usize>
Returns the index of the last occurrence of the given codepoint.
If the codepoint does not occur in this byte string, then None
is
returned.
Note that if one searches for the replacement codepoint, \u{FFFD}
,
then only explicit occurrences of that encoding will be found. Invalid
UTF-8 sequences will not be matched.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
assert_eq!(None, b"foo bar baz".rfind_char('y'));
Sourcefn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
Returns the index of the first occurrence of any of the bytes in the provided set.
The byteset
may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
, but
note that passing a &str
which contains multibyte characters may not
behave as you expect: each byte in the &str
is treated as an
individual member of the byte set.
Note that order is irrelevant for the byteset
parameter, and
duplicate bytes present in its body are ignored.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the set of bytes and the haystack. That is, this
runs in O(byteset.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
// The empty byteset never matches.
assert_eq!(None, b"abc".find_byteset(b""));
assert_eq!(None, b"".find_byteset(b""));
Sourcefn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
Returns the index of the first occurrence of a byte that is not a member of the provided set.
The byteset
may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
, but
note that passing a &str
which contains multibyte characters may not
behave as you expect: each byte in the &str
is treated as an
individual member of the byte set.
Note that order is irrelevant for the byteset
parameter, and
duplicate bytes present in its body are ignored.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the set of bytes and the haystack. That is, this
runs in O(byteset.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
// The negation of the empty byteset matches everything.
assert_eq!(Some(0), b"abc".find_not_byteset(b""));
// But an empty string never contains anything.
assert_eq!(None, b"".find_not_byteset(b""));
Sourcefn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
Returns the index of the last occurrence of any of the bytes in the provided set.
The byteset
may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
, but
note that passing a &str
which contains multibyte characters may not
behave as you expect: each byte in the &str
is treated as an
individual member of the byte set.
Note that order is irrelevant for the byteset
parameter, and duplicate
bytes present in its body are ignored.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the set of bytes and the haystack. That is, this
runs in O(byteset.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
Sourcefn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>
Returns the index of the last occurrence of a byte that is not a member of the provided set.
The byteset
may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
, but
note that passing a &str
which contains multibyte characters may not
behave as you expect: each byte in the &str
is treated as an
individual member of the byte set.
Note that order is irrelevant for the byteset
parameter, and
duplicate bytes present in its body are ignored.
§Complexity
This routine is guaranteed to have worst case linear time complexity
with respect to both the set of bytes and the haystack. That is, this
runs in O(byteset.len() + haystack.len())
time.
This routine is also guaranteed to have worst case constant space complexity.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
Sourcefn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> ⓘ
fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> ⓘ
Returns an iterator over the fields in a byte string, separated by contiguous codepoints satisfying the given predicate.
If this byte string is not valid UTF-8, then the given closure will be called with a Unicode replacement codepoint when invalid UTF-8 bytes are seen.
§Example
Basic usage:
use bstr::{B, ByteSlice};
let s = b"123foo999999bar1quux123456";
let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
A byte string consisting of all codepoints satisfying the predicate yields no elements:
use bstr::ByteSlice;
assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
Sourcefn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> Split<'h, 's> ⓘ
fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, splitter: &'s B, ) -> Split<'h, 's> ⓘ
Returns an iterator over substrings of this byte string, separated by the given byte string. Each element yielded is guaranteed not to include the splitter substring.
The splitter may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
assert_eq!(x, vec![
B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
]);
let x: Vec<&[u8]> = b"".split_str("X").collect();
assert_eq!(x, vec![b""]);
let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
If a string contains multiple contiguous separators, you will end up with empty strings yielded by the iterator:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
assert_eq!(x, vec![
B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
]);
let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
Separators at the start or end of a string are neighbored by empty strings.
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"010".split_str("0").collect();
assert_eq!(x, vec![B(""), B("1"), B("")]);
When the empty string is used as a separator, it splits every byte in the byte string, along with the beginning and end of the byte string.
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"rust".split_str("").collect();
assert_eq!(x, vec![
B(""), B("r"), B("u"), B("s"), B("t"), B(""),
]);
// Splitting by an empty string is not UTF-8 aware. Elements yielded
// may not be valid UTF-8!
let x: Vec<&[u8]> = B("☃").split_str("").collect();
assert_eq!(x, vec![
B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
]);
Contiguous separators, especially whitespace, can lead to possibly surprising behavior. For example, this code is correct:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
assert_eq!(x, vec![
B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
]);
It does not give you ["a", "b", "c"]
. For that behavior, use
fields
instead.
Sourcefn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
splitter: &'s B,
) -> SplitReverse<'h, 's> ⓘ
fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, splitter: &'s B, ) -> SplitReverse<'h, 's> ⓘ
Returns an iterator over substrings of this byte string, separated by the given byte string, in reverse. Each element yielded is guaranteed not to include the splitter substring.
The splitter may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> =
b"Mary had a little lamb".rsplit_str(" ").collect();
assert_eq!(x, vec![
B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
]);
let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
assert_eq!(x, vec![b""]);
let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
If a string contains multiple contiguous separators, you will end up with empty strings yielded by the iterator:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
assert_eq!(x, vec![
B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
]);
let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
Separators at the start or end of a string are neighbored by empty strings.
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
assert_eq!(x, vec![B(""), B("1"), B("")]);
When the empty string is used as a separator, it splits every byte in the byte string, along with the beginning and end of the byte string.
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
assert_eq!(x, vec![
B(""), B("t"), B("s"), B("u"), B("r"), B(""),
]);
// Splitting by an empty string is not UTF-8 aware. Elements yielded
// may not be valid UTF-8!
let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
Contiguous separators, especially whitespace, can lead to possibly surprising behavior. For example, this code is correct:
use bstr::{B, ByteSlice};
let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
assert_eq!(x, vec![
B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
]);
It does not give you ["a", "b", "c"]
.
Sourcefn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])>
fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &B, ) -> Option<(&'a [u8], &'a [u8])>
Split this byte string at the first occurrence of splitter
.
If the splitter
is found in the byte string, returns a tuple
containing the parts of the string before and after the first occurrence
of splitter
respectively. Otherwise, if there are no occurrences of
splitter
in the byte string, returns None
.
The splitter may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
If you need to split on the last instance of a delimiter instead, see
the ByteSlice::rsplit_once_str
method .
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert_eq!(
B("foo,bar").split_once_str(","),
Some((B("foo"), B("bar"))),
);
assert_eq!(
B("foo,bar,baz").split_once_str(","),
Some((B("foo"), B("bar,baz"))),
);
assert_eq!(B("foo").split_once_str(","), None);
assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
Sourcefn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
&'a self,
splitter: &B,
) -> Option<(&'a [u8], &'a [u8])>
fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &B, ) -> Option<(&'a [u8], &'a [u8])>
Split this byte string at the last occurrence of splitter
.
If the splitter
is found in the byte string, returns a tuple
containing the parts of the string before and after the last occurrence
of splitter
, respectively. Otherwise, if there are no occurrences of
splitter
in the byte string, returns None
.
The splitter may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
If you need to split on the first instance of a delimiter instead, see
the ByteSlice::split_once_str
method.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert_eq!(
B("foo,bar").rsplit_once_str(","),
Some((B("foo"), B("bar"))),
);
assert_eq!(
B("foo,bar,baz").rsplit_once_str(","),
Some((B("foo,bar"), B("baz"))),
);
assert_eq!(B("foo").rsplit_once_str(","), None);
assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
Sourcefn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitN<'h, 's> ⓘ
fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, limit: usize, splitter: &'s B, ) -> SplitN<'h, 's> ⓘ
Returns an iterator of at most limit
substrings of this byte string,
separated by the given byte string. If limit
substrings are yielded,
then the last substring will contain the remainder of this byte string.
The needle may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
let x: Vec<_> = b"".splitn_str(3, "X").collect();
assert_eq!(x, vec![b""]);
let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
assert_eq!(x, vec![B("abcXdef")]);
let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
assert_eq!(x, vec![B("abcdef")]);
let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
assert!(x.is_empty());
Sourcefn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
&'h self,
limit: usize,
splitter: &'s B,
) -> SplitNReverse<'h, 's> ⓘ
fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, limit: usize, splitter: &'s B, ) -> SplitNReverse<'h, 's> ⓘ
Returns an iterator of at most limit
substrings of this byte string,
separated by the given byte string, in reverse. If limit
substrings
are yielded, then the last substring will contain the remainder of this
byte string.
The needle may be any type that can be cheaply converted into a
&[u8]
. This includes, but is not limited to, &str
and &[u8]
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let x: Vec<_> =
b"Mary had a little lamb".rsplitn_str(3, " ").collect();
assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
assert_eq!(x, vec![b""]);
let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
assert_eq!(x, vec![B("abcXdef")]);
let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
assert_eq!(x, vec![B("abcdef")]);
let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
assert!(x.is_empty());
Sourcefn bytes(&self) -> Bytes<'_> ⓘ
fn bytes(&self) -> Bytes<'_> ⓘ
Returns an iterator over the bytes in this byte string.
§Examples
Basic usage:
use bstr::ByteSlice;
let bs = b"foobar";
let bytes: Vec<u8> = bs.bytes().collect();
assert_eq!(bytes, bs);
Sourcefn chars(&self) -> Chars<'_> ⓘ
fn chars(&self) -> Chars<'_> ⓘ
Returns an iterator over the Unicode scalar values in this byte string. If invalid UTF-8 is encountered, then the Unicode replacement codepoint is yielded instead.
§Examples
Basic usage:
use bstr::ByteSlice;
let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
let chars: Vec<char> = bs.chars().collect();
assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
Codepoints can also be iterated over in reverse:
use bstr::ByteSlice;
let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
let chars: Vec<char> = bs.chars().rev().collect();
assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
Sourcefn char_indices(&self) -> CharIndices<'_> ⓘ
fn char_indices(&self) -> CharIndices<'_> ⓘ
Returns an iterator over the Unicode scalar values in this byte string along with their starting and ending byte index positions. If invalid UTF-8 is encountered, then the Unicode replacement codepoint is yielded instead.
Note that this is slightly different from the CharIndices
iterator
provided by the standard library. Aside from working on possibly
invalid UTF-8, this iterator provides both the corresponding starting
and ending byte indices of each codepoint yielded. The ending position
is necessary to slice the original byte string when invalid UTF-8 bytes
are converted into a Unicode replacement codepoint, since a single
replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
(inclusive).
§Examples
Basic usage:
use bstr::ByteSlice;
let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
assert_eq!(chars, vec![
(0, 3, '☃'),
(3, 4, '\u{FFFD}'),
(4, 8, '𝞃'),
(8, 10, '\u{FFFD}'),
(10, 11, 'a'),
]);
Codepoints can also be iterated over in reverse:
use bstr::ByteSlice;
let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
let chars: Vec<(usize, usize, char)> = bs
.char_indices()
.rev()
.collect();
assert_eq!(chars, vec![
(10, 11, 'a'),
(8, 10, '\u{FFFD}'),
(4, 8, '𝞃'),
(3, 4, '\u{FFFD}'),
(0, 3, '☃'),
]);
Sourcefn utf8_chunks(&self) -> Utf8Chunks<'_> ⓘ
fn utf8_chunks(&self) -> Utf8Chunks<'_> ⓘ
Iterate over chunks of valid UTF-8.
The iterator returned yields chunks of valid UTF-8 separated by invalid
UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
which are determined via the “substitution of maximal subparts”
strategy described in the docs for the
ByteSlice::to_str_lossy
method.
§Examples
This example shows how to gather all valid and invalid chunks from a byte slice:
use bstr::{ByteSlice, Utf8Chunk};
let bytes = b"foo\xFD\xFEbar\xFF";
let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
for chunk in bytes.utf8_chunks() {
if !chunk.valid().is_empty() {
valid_chunks.push(chunk.valid());
}
if !chunk.invalid().is_empty() {
invalid_chunks.push(chunk.invalid());
}
}
assert_eq!(valid_chunks, vec!["foo", "bar"]);
assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
Sourcefn lines(&self) -> Lines<'_> ⓘ
fn lines(&self) -> Lines<'_> ⓘ
An iterator over all lines in a byte string, without their terminators.
For this iterator, the only line terminators recognized are \r\n
and
\n
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let s = b"\
foo
bar\r
baz
quux";
let lines: Vec<&[u8]> = s.lines().collect();
assert_eq!(lines, vec![
B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
]);
Sourcefn lines_with_terminator(&self) -> LinesWithTerminator<'_> ⓘ
fn lines_with_terminator(&self) -> LinesWithTerminator<'_> ⓘ
An iterator over all lines in a byte string, including their terminators.
For this iterator, the only line terminator recognized is \n
. (Since
line terminators are included, this also handles \r\n
line endings.)
Line terminators are only included if they are present in the original byte string. For example, the last line in a byte string may not end with a line terminator.
Concatenating all elements yielded by this iterator is guaranteed to yield the original byte string.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let s = b"\
foo
bar\r
baz
quux";
let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
assert_eq!(lines, vec![
B("foo\n"),
B("\n"),
B("bar\r\n"),
B("baz\n"),
B("\n"),
B("\n"),
B("quux"),
]);
Sourcefn trim_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
fn trim_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
Return a byte string slice with leading and trailing characters satisfying the given predicate removed.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let s = b"123foo5bar789";
assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
Sourcefn trim_start_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
fn trim_start_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
Return a byte string slice with leading characters satisfying the given predicate removed.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let s = b"123foo5bar789";
assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
Sourcefn trim_end_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
fn trim_end_with<F: FnMut(char) -> bool>(&self, trim: F) -> &[u8] ⓘ
Return a byte string slice with trailing characters satisfying the given predicate removed.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let s = b"123foo5bar789";
assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
Sourcefn make_ascii_lowercase(&mut self)
fn make_ascii_lowercase(&mut self)
Convert this byte string to its lowercase ASCII equivalent in place.
In this case, lowercase is only defined in ASCII letters. Namely, the
letters A-Z
are converted to a-z
. All other bytes remain unchanged.
If you don’t need to do the conversion in
place and instead prefer convenience, then use
to_ascii_lowercase
instead.
§Examples
Basic usage:
use bstr::ByteSlice;
let mut s = <Vec<u8>>::from("HELLO Β");
s.make_ascii_lowercase();
assert_eq!(s, "hello Β".as_bytes());
Invalid UTF-8 remains as is:
use bstr::{B, ByteSlice, ByteVec};
let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
s.make_ascii_lowercase();
assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
Sourcefn make_ascii_uppercase(&mut self)
fn make_ascii_uppercase(&mut self)
Convert this byte string to its uppercase ASCII equivalent in place.
In this case, uppercase is only defined in ASCII letters. Namely, the
letters a-z
are converted to A-Z
. All other bytes remain unchanged.
If you don’t need to do the conversion in
place and instead prefer convenience, then use
to_ascii_uppercase
instead.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
let mut s = <Vec<u8>>::from("hello β");
s.make_ascii_uppercase();
assert_eq!(s, B("HELLO β"));
Invalid UTF-8 remains as is:
use bstr::{B, ByteSlice, ByteVec};
let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
s.make_ascii_uppercase();
assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
Sourcefn escape_bytes(&self) -> EscapeBytes<'_> ⓘ
fn escape_bytes(&self) -> EscapeBytes<'_> ⓘ
Escapes this byte string into a sequence of char
values.
When the sequence of char
values is concatenated into a string, the
result is always valid UTF-8. Any unprintable or invalid UTF-8 in this
byte string are escaped using using \xNN
notation. Moreover, the
characters \0
, \r
, \n
, \t
and \
are escaped as well.
This is useful when one wants to get a human readable view of the raw bytes that is also valid UTF-8.
The iterator returned implements the Display
trait. So one can do
b"foo\xFFbar".escape_bytes().to_string()
to get a String
with its
bytes escaped.
The dual of this function is [ByteVec::unescape_bytes
].
Note that this is similar to, but not equivalent to the Debug
implementation on BStr
and [BString
]. The Debug
implementations
also use the debug representation for all Unicode codepoints. However,
this escaping routine only escapes individual bytes. All Unicode
codepoints above U+007F
are passed through unchanged without any
escaping.
§Examples
use bstr::{B, ByteSlice};
assert_eq!(r"foo\xFFbar", b"foo\xFFbar".escape_bytes().to_string());
assert_eq!(r"foo\nbar", b"foo\nbar".escape_bytes().to_string());
assert_eq!(r"foo\tbar", b"foo\tbar".escape_bytes().to_string());
assert_eq!(r"foo\\bar", b"foo\\bar".escape_bytes().to_string());
assert_eq!(r"foo☃bar", B("foo☃bar").escape_bytes().to_string());
Sourcefn reverse_bytes(&mut self)
fn reverse_bytes(&mut self)
Reverse the bytes in this string, in place.
This is not necessarily a well formed operation! For example, if this byte string contains valid UTF-8 that isn’t ASCII, then reversing the string will likely result in invalid UTF-8 and otherwise non-sensical content.
Note that this is equivalent to the generic [u8]::reverse
method.
This method is provided to permit callers to explicitly differentiate
between reversing bytes, codepoints and graphemes.
§Examples
Basic usage:
use bstr::ByteSlice;
let mut s = <Vec<u8>>::from("hello");
s.reverse_bytes();
assert_eq!(s, "olleh".as_bytes());
Sourcefn reverse_chars(&mut self)
fn reverse_chars(&mut self)
Reverse the codepoints in this string, in place.
If this byte string is valid UTF-8, then its reversal by codepoint is also guaranteed to be valid UTF-8.
This operation is equivalent to the following, but without allocating:
use bstr::ByteSlice;
let mut s = <Vec<u8>>::from("foo☃bar");
let mut chars: Vec<char> = s.chars().collect();
chars.reverse();
let reversed: String = chars.into_iter().collect();
assert_eq!(reversed, "rab☃oof");
Note that this is not necessarily a well formed operation. For example,
if this byte string contains grapheme clusters with more than one
codepoint, then those grapheme clusters will not necessarily be
preserved. If you’d like to preserve grapheme clusters, then use
reverse_graphemes
instead.
§Examples
Basic usage:
use bstr::ByteSlice;
let mut s = <Vec<u8>>::from("foo☃bar");
s.reverse_chars();
assert_eq!(s, "rab☃oof".as_bytes());
This example shows that not all reversals lead to a well formed string. For example, in this case, combining marks are used to put accents over some letters, and those accent marks must appear after the codepoints they modify.
use bstr::{B, ByteSlice};
let mut s = <Vec<u8>>::from("résumé");
s.reverse_chars();
assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
A word of warning: the above example relies on the fact that
résumé
is in decomposed normal form, which means there are separate
codepoints for the accents above e
. If it is instead in composed
normal form, then the example works:
use bstr::{B, ByteSlice};
let mut s = <Vec<u8>>::from("résumé");
s.reverse_chars();
assert_eq!(s, B("émusér"));
The point here is to be cautious and not assume that just because
reverse_chars
works in one case, that it therefore works in all
cases.
Sourcefn is_ascii(&self) -> bool
fn is_ascii(&self) -> bool
Returns true if and only if every byte in this byte string is ASCII.
ASCII is an encoding that defines 128 codepoints. A byte corresponds to
an ASCII codepoint if and only if it is in the inclusive range
[0, 127]
.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert!(B("abc").is_ascii());
assert!(!B("☃βツ").is_ascii());
assert!(!B(b"\xFF").is_ascii());
Sourcefn is_utf8(&self) -> bool
fn is_utf8(&self) -> bool
Returns true if and only if the entire byte string is valid UTF-8.
If you need location information about where a byte string’s first
invalid UTF-8 byte is, then use the to_str
method.
§Examples
Basic usage:
use bstr::{B, ByteSlice};
assert!(B("abc").is_utf8());
assert!(B("☃βツ").is_utf8());
// invalid bytes
assert!(!B(b"abc\xFF").is_utf8());
// surrogate encoding
assert!(!B(b"\xED\xA0\x80").is_utf8());
// incomplete sequence
assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
// overlong sequence
assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
Sourcefn last_byte(&self) -> Option<u8>
fn last_byte(&self) -> Option<u8>
Returns the last byte in this byte string, if it’s non-empty. If this
byte string is empty, this returns None
.
Note that this is like the generic [u8]::last
, except this returns
the byte by value instead of a reference to the byte.
§Examples
Basic usage:
use bstr::ByteSlice;
assert_eq!(Some(b'z'), b"baz".last_byte());
assert_eq!(None, b"".last_byte());
Sourcefn find_non_ascii_byte(&self) -> Option<usize>
fn find_non_ascii_byte(&self) -> Option<usize>
Returns the index of the first non-ASCII byte in this byte string (if
any such indices exist). Specifically, it returns the index of the
first byte with a value greater than or equal to 0x80
.
§Examples
Basic usage:
use bstr::{ByteSlice, B};
assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
assert_eq!(None, b"abcde".find_non_ascii_byte());
assert_eq!(Some(0), B("😀").find_non_ascii_byte());
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.