Skip to content

Commit 504f6df

Browse files
committed
✨ v2.0.0
1 parent e04bf2e commit 504f6df

File tree

3 files changed

+127
-63
lines changed

3 files changed

+127
-63
lines changed

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[package]
22
name = "split-every"
3-
version = "1.0.0"
3+
version = "2.0.0"
44
edition = "2021"
55
authors = ["JumperBot_"]
6-
description = "Split for every n occurences of a pattern iteratively!"
6+
description = "Split for every n occurrences of a pattern iteratively!"
77
license = "MIT"
8-
keywords = ["split", "string", "iterator", "pattern", "occurences"]
8+
keywords = ["split", "string", "iterator", "pattern", "occurrences"]
99
categories = ["development-tools", "text-processing", "visualization", "parsing", "value-formatting"]
1010
repository = "https://github.com/JumperBot/split-every/"
1111

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ use split_every::{SplitEveryImpl, SplitEvery};
1717
// "I don't really"
1818
// "know what to"
1919
// "say".
20-
let mut splitter: SplitEvery =
21-
"Oh hi there I don't really know what to say".split_every_n_of_str(" ", 3);
20+
let mut splitter: SplitEvery<&str> =
21+
"Oh hi there I don't really know what to say".split_every_n_times(" ", 3);
2222
println!("{}", splitter.next().unwrap());
2323
println!("{}", splitter.next().unwrap());
2424
println!("{}", splitter.next().unwrap());
@@ -27,9 +27,9 @@ println!("{}", splitter.next().unwrap());
2727

2828
---
2929

30-
## ✨ Split For Every N Occurences Of A Pattern Iteratively
30+
## ✨ Split For Every N Occurrences Of A Pattern Iteratively
3131

32-
This crate **helps you** split a `string` for every `n` occurences of a `pattern`.
32+
This crate **helps you** split a `string` for every `n` occurrences of a `pattern`.
3333
It contains an exclusive `iterator`.
3434

3535
---

src/lib.rs

Lines changed: 120 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
//! Split for every n occurences of a pattern iteratively.
2-
//! This crate **helps you** split a `string` for every `n` occurences of a `pattern`.
1+
//! Split for every n occurrences of a pattern iteratively.
2+
//! This crate **helps you** split a `string` for every `n` occurrences of a `pattern`.
33
//! It contains an exclusive `iterator`.
44
//!
55
//! # Examples
@@ -10,39 +10,30 @@
1010
//! // "I don't really"
1111
//! // "know what to"
1212
//! // "say".
13-
//! let mut splitter: SplitEvery =
14-
//! "Oh hi there I don't really know what to say".split_every_n_of_str(" ", 3);
13+
//! let mut splitter: SplitEvery<&str> =
14+
//! "Oh hi there I don't really know what to say".split_every_n_times(" ", 3);
1515
//! println!("{}", splitter.next().unwrap());
1616
//! println!("{}", splitter.next().unwrap());
1717
//! println!("{}", splitter.next().unwrap());
1818
//! println!("{}", splitter.next().unwrap());
1919
//! ```
2020
21+
/// Import all necessary traits and structs.
22+
pub mod prelude {
23+
pub use crate::{SplitEvery, SplitEveryImpl};
24+
}
25+
2126
/// A trait containing all `string` split-every functions.
2227
pub trait SplitEveryImpl: AsRef<str> {
2328
/// This splits a `string` every `n` times a `string` is found.
2429
/// This splits exclusively.
25-
/// The `string` must be `utf8-encoded`.
26-
#[must_use]
27-
fn split_every_n_of_str<'a>(&'a self, pat: &'a str, n: usize) -> SplitEvery<'a> {
28-
assert!(n > 0, "n must be greater than 0");
29-
SplitEvery {
30-
inner: self.as_ref(),
31-
pat: Pattern::Str(pat),
32-
n,
33-
index: 0,
34-
}
35-
}
36-
37-
/// This splits a `string` every `n` times a `char` is found.
38-
/// This splits exclusively.
39-
/// The `string` must be `utf8-encoded`.
4030
#[must_use]
41-
fn split_every_n_of_char(&self, pat: char, n: usize) -> SplitEvery<'_> {
42-
assert!(n > 0, "n must be greater than 0");
31+
#[allow(private_bounds)]
32+
fn split_every_n_times<'a, T: Pattern<'a>>(&'a self, pat: T, n: usize) -> SplitEvery<'a, T> {
4333
SplitEvery {
4434
inner: self.as_ref(),
45-
pat: Pattern::Ch(pat),
35+
pat_byte_len: pat.byte_len(),
36+
pat,
4637
n,
4738
index: 0,
4839
}
@@ -52,76 +43,149 @@ pub trait SplitEveryImpl: AsRef<str> {
5243
impl<T: AsRef<str>> SplitEveryImpl for T {}
5344

5445
/// A convinient substitution to `std::str::pattern::Pattern`.
55-
enum Pattern<'a> {
56-
Str(&'a str),
57-
Ch(char),
58-
}
46+
trait Pattern<'a> {
47+
/// A convinient `find` method.
48+
fn find_in(&self, haystack: &str) -> Option<usize>;
5949

60-
impl<'a> Pattern<'a> {
6150
/// A convinient `len` method.
62-
fn len(&self) -> usize {
63-
match self {
64-
Self::Str(inner) => inner.len(),
65-
Self::Ch(inner) => inner.len_utf8(),
66-
}
51+
fn byte_len(&self) -> Option<usize>;
52+
}
53+
54+
impl<'a> Pattern<'a> for &'a str {
55+
fn find_in(&self, haystack: &str) -> Option<usize> {
56+
haystack.find(self)
57+
}
58+
59+
fn byte_len(&self) -> Option<usize> {
60+
Some(self.len())
6761
}
6862
}
6963

70-
/// An `Iterator` struct for splitting a `string` every `n` occurences of a `pattern`.
71-
pub struct SplitEvery<'a> {
64+
impl<'a> Pattern<'a> for char {
65+
fn find_in(&self, haystack: &str) -> Option<usize> {
66+
haystack.find(*self)
67+
}
68+
69+
fn byte_len(&self) -> Option<usize> {
70+
Some(self.len_utf8())
71+
}
72+
}
73+
74+
impl<'a> Pattern<'a> for &'a [char] {
75+
fn find_in(&self, haystack: &str) -> Option<usize> {
76+
haystack.find(*self)
77+
}
78+
79+
fn byte_len(&self) -> Option<usize> {
80+
Some(self.iter().map(|ch| ch.len_utf8()).sum())
81+
}
82+
}
83+
84+
impl<'a, F: FnMut(char) -> bool + Copy> Pattern<'a> for F {
85+
fn find_in(&self, haystack: &str) -> Option<usize> {
86+
haystack.find(*self)
87+
}
88+
89+
fn byte_len(&self) -> Option<usize> {
90+
None
91+
}
92+
}
93+
94+
impl<'a> Pattern<'a> for &'a String {
95+
fn find_in(&self, haystack: &str) -> Option<usize> {
96+
haystack.find(*self)
97+
}
98+
99+
fn byte_len(&self) -> Option<usize> {
100+
Some(self.len())
101+
}
102+
}
103+
104+
/// An `Iterator` struct for splitting a `string` every `n` occurrences of a `pattern`.
105+
#[allow(private_bounds)]
106+
pub struct SplitEvery<'a, T: Pattern<'a>> {
72107
inner: &'a str,
73-
pat: Pattern<'a>,
108+
pat: T,
109+
pat_byte_len: Option<usize>,
74110
n: usize,
75111
index: usize,
76112
}
77113

78-
impl<'a> Iterator for SplitEvery<'a> {
114+
impl<'a, T: Pattern<'a>> Iterator for SplitEvery<'a, T> {
79115
type Item = &'a str;
80116

81117
fn next(&mut self) -> Option<Self::Item> {
82118
if self.index == self.inner.len() {
83119
return None;
84120
}
85-
let haystack: &str = unsafe { self.inner.get_unchecked(self.index..) };
121+
let iter_haystack: &str = unsafe { self.inner.get_unchecked(self.index..) };
86122
let mut len: usize = 0;
87-
for ind in 0..self.n {
88-
let haystack: &str = unsafe { haystack.get_unchecked(len..) };
89-
if let Some(byte_ind) = match self.pat {
90-
Pattern::Str(inner) => haystack.find(inner),
91-
Pattern::Ch(inner) => haystack.find(inner),
92-
} {
93-
len = unsafe { len.unchecked_add(byte_ind).unchecked_add(self.pat.len()) };
94-
continue;
123+
if let Some(pat_byte_len) = self.pat_byte_len {
124+
for ind in 0..self.n {
125+
let haystack: &str = unsafe { iter_haystack.get_unchecked(len..) };
126+
if let Some(byte_ind) = self.pat.find_in(haystack) {
127+
len = unsafe { len.unchecked_add(byte_ind).unchecked_add(pat_byte_len) };
128+
continue;
129+
}
130+
if ind == 0 {
131+
self.index = self.inner.len();
132+
return Some(haystack);
133+
}
134+
break;
95135
}
96-
if ind == 0 {
97-
self.index = self.inner.len();
98-
return Some(haystack);
136+
self.index = unsafe { self.index.unchecked_add(len) };
137+
Some(unsafe { iter_haystack.get_unchecked(..len.unchecked_sub(pat_byte_len)) })
138+
} else {
139+
let mut last_pat_len: usize = 0;
140+
for ind in 0..self.n {
141+
let haystack: &str = unsafe { iter_haystack.get_unchecked(len..) };
142+
if let Some(byte_ind) = self.pat.find_in(haystack) {
143+
last_pat_len = match unsafe { haystack.as_bytes().get_unchecked(byte_ind) } {
144+
0b0000_0000..=0b0111_1111 => 1,
145+
0b1000_0000..=0b1101_1111 => 2,
146+
0b1110_0000..=0b1110_1111 => 3,
147+
0b1111_0000..=0b1111_1111 => 4,
148+
};
149+
len = unsafe { len.unchecked_add(byte_ind).unchecked_add(last_pat_len) };
150+
continue;
151+
}
152+
if ind == 0 {
153+
self.index = self.inner.len();
154+
return Some(haystack);
155+
}
156+
break;
99157
}
100-
break;
158+
self.index = unsafe { self.index.unchecked_add(len) };
159+
Some(unsafe { iter_haystack.get_unchecked(..len.unchecked_sub(last_pat_len)) })
101160
}
102-
self.index = unsafe { self.index.unchecked_add(len) };
103-
Some(unsafe { haystack.get_unchecked(..len.unchecked_sub(self.pat.len())) })
104161
}
105162
}
106163

107164
#[test]
108165
fn test() {
109-
let mut splitter: SplitEvery = "oh oh oh oh oh".split_every_n_of_str(" ", 2);
166+
let mut splitter: SplitEvery<&str> = "oh oh oh oh oh".split_every_n_times(" ", 2);
110167
assert_eq!(splitter.next(), Some("oh oh"));
111168
assert_eq!(splitter.next(), Some("oh oh"));
112169
assert_eq!(splitter.next(), Some("oh"));
113170
assert_eq!(splitter.next(), None);
114171
assert_eq!(splitter.next(), None);
115172

116-
let mut splitter: SplitEvery = "oboooobobobobob".split_every_n_of_char('o', 3);
173+
let mut splitter = "oh—oh—oh—oh—oh".split_every_n_times(|ch| ch == '—', 2);
174+
assert_eq!(splitter.next(), Some("oh—oh"));
175+
assert_eq!(splitter.next(), Some("oh—oh"));
176+
assert_eq!(splitter.next(), Some("oh"));
177+
assert_eq!(splitter.next(), None);
178+
assert_eq!(splitter.next(), None);
179+
180+
let mut splitter: SplitEvery<char> = "oboooobobobobob".split_every_n_times('o', 3);
117181
assert_eq!(splitter.next(), Some("obo"));
118182
assert_eq!(splitter.next(), Some("oob"));
119183
assert_eq!(splitter.next(), Some("bobob"));
120184
assert_eq!(splitter.next(), Some("b"));
121185
assert_eq!(splitter.next(), None);
122186
assert_eq!(splitter.next(), None);
123187

124-
let mut splitter: SplitEvery = "hhhahahahaha".split_every_n_of_char('h', 1);
188+
let mut splitter: SplitEvery<char> = "hhhahahahaha".split_every_n_times('h', 1);
125189
assert_eq!(splitter.next(), Some(""));
126190
assert_eq!(splitter.next(), Some(""));
127191
assert_eq!(splitter.next(), Some(""));
@@ -133,8 +197,8 @@ fn test() {
133197
assert_eq!(splitter.next(), None);
134198
assert_eq!(splitter.next(), None);
135199

136-
let mut splitter: SplitEvery =
137-
"Oh hi there I don't really know what to say".split_every_n_of_str(" ", 3);
200+
let mut splitter: SplitEvery<&str> =
201+
"Oh hi there I don't really know what to say".split_every_n_times(" ", 3);
138202
assert_eq!(splitter.next().unwrap(), "Oh hi there");
139203
assert_eq!(splitter.next().unwrap(), "I don't really");
140204
assert_eq!(splitter.next().unwrap(), "know what to");

0 commit comments

Comments
 (0)