1- //! Split for every n occurences of a pattern iteratively.
2- //! This crate **helps you** split a `string` for every `n` occurences of a `pattern`.
1+ //! Split for every n occurrences of a pattern iteratively.
2+ //! This crate **helps you** split a `string` for every `n` occurrences of a `pattern`.
33//! It contains an exclusive `iterator`.
44//!
55//! # Examples
1010//! // "I don't really"
1111//! // "know what to"
1212//! // "say".
13- //! let mut splitter: SplitEvery =
14- //! "Oh hi there I don't really know what to say".split_every_n_of_str (" ", 3);
13+ //! let mut splitter: SplitEvery<&str> =
14+ //! "Oh hi there I don't really know what to say".split_every_n_times (" ", 3);
1515//! println!("{}", splitter.next().unwrap());
1616//! println!("{}", splitter.next().unwrap());
1717//! println!("{}", splitter.next().unwrap());
1818//! println!("{}", splitter.next().unwrap());
1919//! ```
2020
21+ /// Import all necessary traits and structs.
22+ pub mod prelude {
23+ pub use crate :: { SplitEvery , SplitEveryImpl } ;
24+ }
25+
2126/// A trait containing all `string` split-every functions.
2227pub trait SplitEveryImpl : AsRef < str > {
2328 /// This splits a `string` every `n` times a `string` is found.
2429 /// This splits exclusively.
25- /// The `string` must be `utf8-encoded`.
26- #[ must_use]
27- fn split_every_n_of_str < ' a > ( & ' a self , pat : & ' a str , n : usize ) -> SplitEvery < ' a > {
28- assert ! ( n > 0 , "n must be greater than 0" ) ;
29- SplitEvery {
30- inner : self . as_ref ( ) ,
31- pat : Pattern :: Str ( pat) ,
32- n,
33- index : 0 ,
34- }
35- }
36-
37- /// This splits a `string` every `n` times a `char` is found.
38- /// This splits exclusively.
39- /// The `string` must be `utf8-encoded`.
4030 #[ must_use]
41- fn split_every_n_of_char ( & self , pat : char , n : usize ) -> SplitEvery < ' _ > {
42- assert ! ( n > 0 , "n must be greater than 0" ) ;
31+ # [ allow ( private_bounds ) ]
32+ fn split_every_n_times < ' a , T : Pattern < ' a > > ( & ' a self , pat : T , n : usize ) -> SplitEvery < ' a , T > {
4333 SplitEvery {
4434 inner : self . as_ref ( ) ,
45- pat : Pattern :: Ch ( pat) ,
35+ pat_byte_len : pat. byte_len ( ) ,
36+ pat,
4637 n,
4738 index : 0 ,
4839 }
@@ -52,76 +43,149 @@ pub trait SplitEveryImpl: AsRef<str> {
5243impl < T : AsRef < str > > SplitEveryImpl for T { }
5344
5445/// A convinient substitution to `std::str::pattern::Pattern`.
55- enum Pattern < ' a > {
56- Str ( & ' a str ) ,
57- Ch ( char ) ,
58- }
46+ trait Pattern < ' a > {
47+ /// A convinient `find` method.
48+ fn find_in ( & self , haystack : & str ) -> Option < usize > ;
5949
60- impl < ' a > Pattern < ' a > {
6150 /// A convinient `len` method.
62- fn len ( & self ) -> usize {
63- match self {
64- Self :: Str ( inner) => inner. len ( ) ,
65- Self :: Ch ( inner) => inner. len_utf8 ( ) ,
66- }
51+ fn byte_len ( & self ) -> Option < usize > ;
52+ }
53+
54+ impl < ' a > Pattern < ' a > for & ' a str {
55+ fn find_in ( & self , haystack : & str ) -> Option < usize > {
56+ haystack. find ( self )
57+ }
58+
59+ fn byte_len ( & self ) -> Option < usize > {
60+ Some ( self . len ( ) )
6761 }
6862}
6963
70- /// An `Iterator` struct for splitting a `string` every `n` occurences of a `pattern`.
71- pub struct SplitEvery < ' a > {
64+ impl < ' a > Pattern < ' a > for char {
65+ fn find_in ( & self , haystack : & str ) -> Option < usize > {
66+ haystack. find ( * self )
67+ }
68+
69+ fn byte_len ( & self ) -> Option < usize > {
70+ Some ( self . len_utf8 ( ) )
71+ }
72+ }
73+
74+ impl < ' a > Pattern < ' a > for & ' a [ char ] {
75+ fn find_in ( & self , haystack : & str ) -> Option < usize > {
76+ haystack. find ( * self )
77+ }
78+
79+ fn byte_len ( & self ) -> Option < usize > {
80+ Some ( self . iter ( ) . map ( |ch| ch. len_utf8 ( ) ) . sum ( ) )
81+ }
82+ }
83+
84+ impl < ' a , F : FnMut ( char ) -> bool + Copy > Pattern < ' a > for F {
85+ fn find_in ( & self , haystack : & str ) -> Option < usize > {
86+ haystack. find ( * self )
87+ }
88+
89+ fn byte_len ( & self ) -> Option < usize > {
90+ None
91+ }
92+ }
93+
94+ impl < ' a > Pattern < ' a > for & ' a String {
95+ fn find_in ( & self , haystack : & str ) -> Option < usize > {
96+ haystack. find ( * self )
97+ }
98+
99+ fn byte_len ( & self ) -> Option < usize > {
100+ Some ( self . len ( ) )
101+ }
102+ }
103+
104+ /// An `Iterator` struct for splitting a `string` every `n` occurrences of a `pattern`.
105+ #[ allow( private_bounds) ]
106+ pub struct SplitEvery < ' a , T : Pattern < ' a > > {
72107 inner : & ' a str ,
73- pat : Pattern < ' a > ,
108+ pat : T ,
109+ pat_byte_len : Option < usize > ,
74110 n : usize ,
75111 index : usize ,
76112}
77113
78- impl < ' a > Iterator for SplitEvery < ' a > {
114+ impl < ' a , T : Pattern < ' a > > Iterator for SplitEvery < ' a , T > {
79115 type Item = & ' a str ;
80116
81117 fn next ( & mut self ) -> Option < Self :: Item > {
82118 if self . index == self . inner . len ( ) {
83119 return None ;
84120 }
85- let haystack : & str = unsafe { self . inner . get_unchecked ( self . index ..) } ;
121+ let iter_haystack : & str = unsafe { self . inner . get_unchecked ( self . index ..) } ;
86122 let mut len: usize = 0 ;
87- for ind in 0 ..self . n {
88- let haystack: & str = unsafe { haystack. get_unchecked ( len..) } ;
89- if let Some ( byte_ind) = match self . pat {
90- Pattern :: Str ( inner) => haystack. find ( inner) ,
91- Pattern :: Ch ( inner) => haystack. find ( inner) ,
92- } {
93- len = unsafe { len. unchecked_add ( byte_ind) . unchecked_add ( self . pat . len ( ) ) } ;
94- continue ;
123+ if let Some ( pat_byte_len) = self . pat_byte_len {
124+ for ind in 0 ..self . n {
125+ let haystack: & str = unsafe { iter_haystack. get_unchecked ( len..) } ;
126+ if let Some ( byte_ind) = self . pat . find_in ( haystack) {
127+ len = unsafe { len. unchecked_add ( byte_ind) . unchecked_add ( pat_byte_len) } ;
128+ continue ;
129+ }
130+ if ind == 0 {
131+ self . index = self . inner . len ( ) ;
132+ return Some ( haystack) ;
133+ }
134+ break ;
95135 }
96- if ind == 0 {
97- self . index = self . inner . len ( ) ;
98- return Some ( haystack) ;
136+ self . index = unsafe { self . index . unchecked_add ( len) } ;
137+ Some ( unsafe { iter_haystack. get_unchecked ( ..len. unchecked_sub ( pat_byte_len) ) } )
138+ } else {
139+ let mut last_pat_len: usize = 0 ;
140+ for ind in 0 ..self . n {
141+ let haystack: & str = unsafe { iter_haystack. get_unchecked ( len..) } ;
142+ if let Some ( byte_ind) = self . pat . find_in ( haystack) {
143+ last_pat_len = match unsafe { haystack. as_bytes ( ) . get_unchecked ( byte_ind) } {
144+ 0b0000_0000 ..=0b0111_1111 => 1 ,
145+ 0b1000_0000 ..=0b1101_1111 => 2 ,
146+ 0b1110_0000 ..=0b1110_1111 => 3 ,
147+ 0b1111_0000 ..=0b1111_1111 => 4 ,
148+ } ;
149+ len = unsafe { len. unchecked_add ( byte_ind) . unchecked_add ( last_pat_len) } ;
150+ continue ;
151+ }
152+ if ind == 0 {
153+ self . index = self . inner . len ( ) ;
154+ return Some ( haystack) ;
155+ }
156+ break ;
99157 }
100- break ;
158+ self . index = unsafe { self . index . unchecked_add ( len) } ;
159+ Some ( unsafe { iter_haystack. get_unchecked ( ..len. unchecked_sub ( last_pat_len) ) } )
101160 }
102- self . index = unsafe { self . index . unchecked_add ( len) } ;
103- Some ( unsafe { haystack. get_unchecked ( ..len. unchecked_sub ( self . pat . len ( ) ) ) } )
104161 }
105162}
106163
107164#[ test]
108165fn test ( ) {
109- let mut splitter: SplitEvery = "oh oh oh oh oh" . split_every_n_of_str ( " " , 2 ) ;
166+ let mut splitter: SplitEvery < & str > = "oh oh oh oh oh" . split_every_n_times ( " " , 2 ) ;
110167 assert_eq ! ( splitter. next( ) , Some ( "oh oh" ) ) ;
111168 assert_eq ! ( splitter. next( ) , Some ( "oh oh" ) ) ;
112169 assert_eq ! ( splitter. next( ) , Some ( "oh" ) ) ;
113170 assert_eq ! ( splitter. next( ) , None ) ;
114171 assert_eq ! ( splitter. next( ) , None ) ;
115172
116- let mut splitter: SplitEvery = "oboooobobobobob" . split_every_n_of_char ( 'o' , 3 ) ;
173+ let mut splitter = "oh—oh—oh—oh—oh" . split_every_n_times ( |ch| ch == '—' , 2 ) ;
174+ assert_eq ! ( splitter. next( ) , Some ( "oh—oh" ) ) ;
175+ assert_eq ! ( splitter. next( ) , Some ( "oh—oh" ) ) ;
176+ assert_eq ! ( splitter. next( ) , Some ( "oh" ) ) ;
177+ assert_eq ! ( splitter. next( ) , None ) ;
178+ assert_eq ! ( splitter. next( ) , None ) ;
179+
180+ let mut splitter: SplitEvery < char > = "oboooobobobobob" . split_every_n_times ( 'o' , 3 ) ;
117181 assert_eq ! ( splitter. next( ) , Some ( "obo" ) ) ;
118182 assert_eq ! ( splitter. next( ) , Some ( "oob" ) ) ;
119183 assert_eq ! ( splitter. next( ) , Some ( "bobob" ) ) ;
120184 assert_eq ! ( splitter. next( ) , Some ( "b" ) ) ;
121185 assert_eq ! ( splitter. next( ) , None ) ;
122186 assert_eq ! ( splitter. next( ) , None ) ;
123187
124- let mut splitter: SplitEvery = "hhhahahahaha" . split_every_n_of_char ( 'h' , 1 ) ;
188+ let mut splitter: SplitEvery < char > = "hhhahahahaha" . split_every_n_times ( 'h' , 1 ) ;
125189 assert_eq ! ( splitter. next( ) , Some ( "" ) ) ;
126190 assert_eq ! ( splitter. next( ) , Some ( "" ) ) ;
127191 assert_eq ! ( splitter. next( ) , Some ( "" ) ) ;
@@ -133,8 +197,8 @@ fn test() {
133197 assert_eq ! ( splitter. next( ) , None ) ;
134198 assert_eq ! ( splitter. next( ) , None ) ;
135199
136- let mut splitter: SplitEvery =
137- "Oh hi there I don't really know what to say" . split_every_n_of_str ( " " , 3 ) ;
200+ let mut splitter: SplitEvery < & str > =
201+ "Oh hi there I don't really know what to say" . split_every_n_times ( " " , 3 ) ;
138202 assert_eq ! ( splitter. next( ) . unwrap( ) , "Oh hi there" ) ;
139203 assert_eq ! ( splitter. next( ) . unwrap( ) , "I don't really" ) ;
140204 assert_eq ! ( splitter. next( ) . unwrap( ) , "know what to" ) ;
0 commit comments