From f28e4655ce64d2569141a73c0d5707b6d3670747 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=88=B1=E5=9B=A0=E6=96=AF=E5=94=90?=
 <1745525+einsitang@users.noreply.github.com>
Date: Mon, 30 Jun 2025 02:04:15 +0000
Subject: [PATCH 1/4] feat:token case insensitivity and support for separate
 splitting

---
 parser.go    | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 tokenizer.go | 27 +++++++++++++++++++---
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/parser.go b/parser.go
index 003608e..2de550d 100644
--- a/parser.go
+++ b/parser.go
@@ -338,6 +338,48 @@ func (p *parsing) parseNumber() bool {
 	return true
 }
 
+// 忽略大小写对比
+func ignoreCaseEquals(a byte, b byte) bool {
+	if a >= 'a' && a <= 'z' {
+		a += 32
+	}
+
+	if b >= 'a' && b <= 'z' {
+		b += 32
+	}
+
+	return a == b
+}
+
+// ignore case match compares next bytes from data with `r`
+//
+// copy match method add ignore case logic
+func (p *parsing) ignoreCaseMatch(r []byte, seek bool) bool {
+	if ignoreCaseEquals(r[0], p.curr) {
+		if len(r) > 1 {
+			if p.ensureBytes(len(r) - 1) {
+				var i = 1
+				for ; i < len(r); i++ {
+					if !ignoreCaseEquals(r[i], p.str[p.pos+i]) {
+						return false
+					}
+				}
+				if seek {
+					p.pos += i - 1
+					p.next()
+				}
+				return true
+			}
+			return false
+		}
+		if seek {
+			p.next()
+		}
+		return true
+	}
+	return false
+}
+
 // match compares next bytes from data with `r`
 func (p *parsing) match(r []byte, seek bool) bool {
 	if r[0] == p.curr {
@@ -435,13 +477,32 @@ func (p *parsing) parseToken() bool {
 		if toks != nil {
 			start := p.pos
 			for _, t := range toks {
-				if p.match(t.Token, true) {
+				
+				var matchFn func(r []byte, seek bool) bool
+				if t.IgnoreCase {
+					matchFn = p.ignoreCaseMatch
+				} else {
+					matchFn = p.match
+				}
+
+				if matchFn(t.Token, true) {
+					// alone split patch
+					if t.Alone && len(p.str) > start+len(t.Token) {
+						nt := p.str[start+len(t.Token)]
+						if nt >= '0' && nt <= '9' || nt >= 'a' && nt <= 'z' || nt >= 'A' && nt <= 'Z' || nt == '_' {
+							// rollback pos
+							p.pos = p.pos - (len(t.Token))
+							p.curr = p.str[p.pos]
+							continue
+						}
+					}
 					p.token.key = t.Key
 					p.token.offset = p.offset + start
 					p.token.value = t.Token
 					p.emmitToken()
 					return true
 				}
+
 			}
 		}
 	}
diff --git a/tokenizer.go b/tokenizer.go
index 22092c6..99d0e8b 100644
--- a/tokenizer.go
+++ b/tokenizer.go
@@ -70,6 +70,10 @@ type tokenRef struct {
 	Key TokenKey
 	// Token value as is. Should be unique.
 	Token []byte
+	// Token Alone flag, with true mean token only split with Non-continuous string
+	Alone bool
+	// ignore token case
+	IgnoreCase bool
 }
 
 // QuoteInjectSettings describes open injection token and close injection token.
@@ -206,18 +210,35 @@ func (t *Tokenizer) AllowNumberUnderscore() *Tokenizer {
 	return t
 }
 
+type DefineTokenOption func(*tokenRef)
+
+func AloneTokenOption(ref *tokenRef) {
+	ref.Alone = true
+}
+
+func InsensitiveTokenOption(ref *tokenRef) {
+	ref.IgnoreCase = true
+}
+
 // DefineTokens add custom token.
 // The `key` is the identifier of `tokens`, `tokens` — slice of tokens as string.
 // If a key already exists, tokens will be rewritten.
-func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string) *Tokenizer {
+func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string, options ...DefineTokenOption) *Tokenizer {
 	var tks []*tokenRef
 	if key < 1 {
 		return t
 	}
 	for _, token := range tokens {
 		ref := tokenRef{
-			Key:   key,
-			Token: s2b(token),
+			Key:        key,
+			Token:      s2b(token),
+			Alone:      false,
+			IgnoreCase: false,
+		}
+		if len(options) > 0 {
+			for _, option := range options {
+				option(&ref)
+			}
 		}
 		head := ref.Token[0]
 		tks = append(tks, &ref)

From 296e054a608559967c4197568c686a5192797f76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=88=B1=E5=9B=A0=E6=96=AF=E5=94=90?=
 <1745525+einsitang@users.noreply.github.com>
Date: Mon, 30 Jun 2025 02:12:52 +0000
Subject: [PATCH 2/4] Initial commit


From 982a7a9de7189567bfcce8d22607850b9eb8e81e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=88=B1=E5=9B=A0=E6=96=AF=E5=94=90?=
 <1745525+einsitang@users.noreply.github.com>
Date: Mon, 30 Jun 2025 11:05:57 +0000
Subject: [PATCH 3/4] feat: add ignore case support for token parsing and
 enhance tests

refactor: rename ignoreCaseAlphabet to upperCaseAlphabet for clarity
---
 parser.go         | 25 +++++++++++++++++--------
 tokenizer.go      | 31 ++++++++++++++++++++++---------
 tokenizer_test.go | 24 +++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/parser.go b/parser.go
index 2de550d..002835e 100644
--- a/parser.go
+++ b/parser.go
@@ -338,17 +338,20 @@ func (p *parsing) parseNumber() bool {
 	return true
 }
 
-// 忽略大小写对比
+// ignore case equals a and b
 func ignoreCaseEquals(a byte, b byte) bool {
-	if a >= 'a' && a <= 'z' {
-		a += 32
-	}
+	return upperCaseAlphabet(a) == upperCaseAlphabet(b)
+}
 
-	if b >= 'a' && b <= 'z' {
-		b += 32
+func upperCaseAlphabet(c byte) byte {
+	if c >= 'a' && c <= 'z' {
+		c -= 32
 	}
+	return c
+}
 
-	return a == b
+func isAlphabet(c byte) bool {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
 }
 
 // ignore case match compares next bytes from data with `r`
@@ -474,10 +477,16 @@ func (p *parsing) parseQuote() bool {
 func (p *parsing) parseToken() bool {
 	if p.curr != 0 {
 		toks := p.t.index[p.curr]
+		if isAlphabet(p.curr) {
+			upCaseByte := upperCaseAlphabet(p.curr)
+			c := p.t.icIndex[upCaseByte]
+			toks = append(toks, c...)
+		}
+		// toks := p.t.index[p.curr]
 		if toks != nil {
 			start := p.pos
 			for _, t := range toks {
-				
+
 				var matchFn func(r []byte, seek bool) bool
 				if t.IgnoreCase {
 					matchFn = p.ignoreCaseMatch
diff --git a/tokenizer.go b/tokenizer.go
index 99d0e8b..db291ad 100644
--- a/tokenizer.go
+++ b/tokenizer.go
@@ -133,8 +133,10 @@ type Tokenizer struct {
 	stopOnUnknown         bool
 	allowNumberUnderscore bool
 	// all defined custom tokens {key: [token1, token2, ...], ...}
-	tokens         map[TokenKey][]*tokenRef
-	index          map[byte][]*tokenRef
+	tokens map[TokenKey][]*tokenRef
+	index  map[byte][]*tokenRef
+	// with ignore case token index
+	icIndex        map[byte][]*tokenRef
 	quotes         []*StringSettings
 	wSpaces        []byte
 	kwMajorSymbols []rune
@@ -148,6 +150,7 @@ func New() *Tokenizer {
 		// flags:   0,
 		tokens:  map[TokenKey][]*tokenRef{},
 		index:   map[byte][]*tokenRef{},
+		icIndex: map[byte][]*tokenRef{},
 		quotes:  []*StringSettings{},
 		wSpaces: DefaultWhiteSpaces,
 	}
@@ -216,7 +219,7 @@ func AloneTokenOption(ref *tokenRef) {
 	ref.Alone = true
 }
 
-func InsensitiveTokenOption(ref *tokenRef) {
+func IgnoreCaseTokenOption(ref *tokenRef) {
 	ref.IgnoreCase = true
 }
 
@@ -240,14 +243,24 @@ func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string, options ...Defin
 				option(&ref)
 			}
 		}
-		head := ref.Token[0]
+
 		tks = append(tks, &ref)
-		if t.index[head] == nil {
-			t.index[head] = []*tokenRef{}
+		var index map[byte][]*tokenRef
+		var head byte
+
+		if ref.IgnoreCase {
+			index = t.icIndex
+			head = upperCaseAlphabet(ref.Token[0])
+		} else {
+			index = t.index
+			head = ref.Token[0]
+		}
+		if index[head] == nil {
+			index[head] = []*tokenRef{}
 		}
-		t.index[head] = append(t.index[head], &ref)
-		sort.Slice(t.index[head], func(i, j int) bool {
-			return len(t.index[head][i].Token) > len(t.index[head][j].Token)
+		index[head] = append(index[head], &ref)
+		sort.Slice(index[head], func(i, j int) bool {
+			return len(index[head][i].Token) > len(index[head][j].Token)
 		})
 	}
 	t.tokens[key] = tks
diff --git a/tokenizer_test.go b/tokenizer_test.go
index 55eeadf..a209573 100644
--- a/tokenizer_test.go
+++ b/tokenizer_test.go
@@ -2,11 +2,32 @@ package tokenizer
 
 import (
 	"bytes"
-	"github.com/stretchr/testify/require"
 	"strings"
 	"testing"
+
+	"github.com/stretchr/testify/require"
 )
 
+func TestTokenizeParse(t *testing.T) {
+	TXKey := TokenKey(100)
+	TRoleKey := TokenKey(101)
+	TAndKey := TokenKey(102)
+
+	tokenizer := New()
+	// ignore case
+	tokenizer.DefineTokens(TXKey, []string{"hello"}, IgnoreCaseTokenOption)
+	tokenizer.DefineTokens(TRoleKey, []string{"Role"}, AloneTokenOption)
+	tokenizer.DefineTokens(TAndKey, []string{"and"})
+	input := "heLlOHhellox and 1 == 0.5+0.5 Role xRolex xandx"
+	stream := tokenizer.ParseString(input)
+	for stream.IsValid() {
+		token := stream.CurrentToken()
+		t.Logf("[%d:%d] %s %v", token.Line(), token.Offset(), token.ValueString(), token.Key())
+		stream.GoNext()
+	}
+
+}
+
 func TestTokenize(t *testing.T) {
 	type item struct {
 		value interface{}
@@ -17,6 +38,7 @@ func TestTokenize(t *testing.T) {
 	wordTokenKey := TokenKey(11)
 	dquoteKey := TokenKey(14)
 	tokenizer.AllowNumberUnderscore()
+
 	tokenizer.DefineTokens(condTokenKey, []string{">=", "<=", "==", ">", "<"})
 	tokenizer.DefineTokens(wordTokenKey, []string{"or", "или"})
 	tokenizer.SetWhiteSpaces([]byte{' ', '\t', '\n'})

From 547b4ffb49cdf32ec48add746be6f2cf9b37a7c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=88=B1=E5=9B=A0=E6=96=AF=E5=94=90?=
 <1745525+einsitang@users.noreply.github.com>
Date: Mon, 30 Jun 2025 11:39:06 +0000
Subject: [PATCH 4/4] update test case

---
 tokenizer_test.go | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tokenizer_test.go b/tokenizer_test.go
index a209573..50444b1 100644
--- a/tokenizer_test.go
+++ b/tokenizer_test.go
@@ -9,16 +9,18 @@ import (
 )
 
 func TestTokenizeParse(t *testing.T) {
-	TXKey := TokenKey(100)
-	TRoleKey := TokenKey(101)
-	TAndKey := TokenKey(102)
+	THello := TokenKey(100)
+	TWorld := TokenKey(101)
+	TRoleKey := TokenKey(105)
+	TAndKey := TokenKey(106)
 
 	tokenizer := New()
 	// ignore case
-	tokenizer.DefineTokens(TXKey, []string{"hello"}, IgnoreCaseTokenOption)
+	tokenizer.DefineTokens(THello, []string{"hello"}, IgnoreCaseTokenOption)
 	tokenizer.DefineTokens(TRoleKey, []string{"Role"}, AloneTokenOption)
+	tokenizer.DefineTokens(TWorld, []string{"world"}, IgnoreCaseTokenOption, AloneTokenOption)
 	tokenizer.DefineTokens(TAndKey, []string{"and"})
-	input := "heLlOHhellox and 1 == 0.5+0.5 Role xRolex xandx"
+	input := "HeLLoWoRlD can match,prefixWorld role and roles both not match,but Role and WorLd is match will"
 	stream := tokenizer.ParseString(input)
 	for stream.IsValid() {
 		token := stream.CurrentToken()