Skip to content
This repository was archived by the owner on Jan 13, 2024. It is now read-only.

Commit bddee4e

Browse files
committed
Add configurable severity levels.
1 parent 39a57b6 commit bddee4e

File tree

8 files changed

+462
-424
lines changed

8 files changed

+462
-424
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ Accuracy was evaluated based on the first 100,000 items from this [dataset of mo
5353

5454
|**Package**|**Time**|**Accuracy**|**Comment**|
5555
|:-----:|:-----:|:-----:|:-----:|
56-
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.49s|90.76%|Current API version is not stable|
57-
|[TwinProduction/go-away](https://github.com/TwinProduction/go-away)|2.23s|82.10%|Many false positives from combined words like "push it"|
56+
|[finnbear/moderation](https://github.com/finnbear/moderation)|1.49s|90.80%|Current API version is not stable|
57+
|[TwinProduction/go-away](https://github.com/TwinProduction/go-away)|2.11s|82.11%|Many false positives from combined words like "push it"|
5858

5959

6060
## Acknowledgements

comparison/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.15
55
replace github.com/finnbear/moderation => ../
66

77
require (
8-
github.com/TwinProduction/go-away v1.1.1
8+
github.com/TwinProduction/go-away v1.1.2
99
github.com/finnbear/moderation v0.5.0
1010
golang.org/x/text v0.3.5 // indirect
1111
)

comparison/go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ github.com/TwinProduction/go-away v1.1.0 h1:AhkmMxDIxI4Dr0/Hki/qtFfLh/02MOtmDEqL
44
github.com/TwinProduction/go-away v1.1.0/go.mod h1:VB/lNzhkzh7Xw2QgU+tYBjMheldukJaIJzVaIx2rh30=
55
github.com/TwinProduction/go-away v1.1.1 h1:hqESSRzR3HiBhXQeH6i7Cshn5AQQN0wRfp3v07zw7dk=
66
github.com/TwinProduction/go-away v1.1.1/go.mod h1:rhlrmkf0W6BXWsJoj96OYT0FU/Z7mtvfrAW3JezrWeA=
7+
github.com/TwinProduction/go-away v1.1.2 h1:cUaSUKwNuDEEWKQayPhhWdGgIALpOiexy5yFZZqv2Zg=
8+
github.com/TwinProduction/go-away v1.1.2/go.mod h1:rhlrmkf0W6BXWsJoj96OYT0FU/Z7mtvfrAW3JezrWeA=
79
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
810
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
911
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=

generator/profanity.csv

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ fascist,0,2,0,0
5353
fashist,0,2,0,0
5454
fag,0,3,1,0
5555
fatty,0,2,0,2
56+
fcken,2,0,2,0
5657
feck,2,0,2,0
5758
felching,0,0,3,0
5859
felate,0,0,3,0
@@ -82,7 +83,7 @@ honkie,0,2,0,0
8283
hooker,0,0,3,0
8384
horny,0,0,3,0
8485
idiot,0,0,0,1
85-
imecile,0,0,0,2
86+
imbecile,0,0,0,2
8687
incest,0,0,3,0
8788
jackass,1,0,0,1
8889
jerk,0,0,0,1
@@ -140,14 +141,14 @@ sex,0,0,1,0
140141
shagger,0,0,3,0
141142
shagging,0,0,3,0
142143
shit,2,0,0,0
143-
sissy,0,0,0,2
144+
sissy,0,0,0,1
144145
skank,0,2,3,0
145146
slut,0,2,2,0
146147
sodomize,0,0,3,0
147148
sodomy,0,0,3,0
148149
spunk,0,0,3,0
149150
stfu,0,0,0,2
150-
stupid,0,0,0,2
151+
stupid,0,0,0,1
151152
suckit,0,0,2,2
152153
suckmy,0,0,3,0
153154
testical,0,0,3,0

moderation.go

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,44 @@ import (
99
"unicode"
1010
)
1111

12-
// Types of inappropriateness
12+
// Types and severities of inappropriateness
1313
//
14-
// For compability, only pass a single Type or a bitwise OR of multiple Type's,
15-
// and always reference Type's by name as their value may change from version
16-
// to version. Other operations on Type's are not supported.
14+
// For compability, always reference them by name as their value may change
15+
// from version to version.
16+
//
17+
// Use a bitwise OR of multiple profanity classifications, and a bitwise AND to
18+
// specify a severity level (default Mild). The definition of Inappropriate
19+
// (mildly profane, mildly offensive, mildly sexual, or severely mean) serves
20+
// as a good example.
21+
//
22+
// Other operations on Type's are NOT supported.
23+
//
24+
// Severities sould be interpreteted on an "at least" basis, e.g. Mild means
25+
// Mild, Moderate, OR Severe.
1726
type Type uint32
1827

1928
const (
20-
Profane Type = 1 << iota
29+
Profane Type = 0b111 << (iota * 3)
2130
Offensive
2231
Sexual
2332
Mean
2433
Spam
25-
Inappropriate = Profane | Offensive | Sexual
34+
Inappropriate = Profane | Offensive | Sexual | (Mean & Severe)
2635
Any = Profane | Offensive | Sexual | Spam | Mean
36+
)
2737

38+
const (
39+
Mild Type = 0b111_111_111_111_111
40+
Moderate = 0b110_110_110_110_110
41+
Severe = 0b100_100_100_100_100
42+
)
43+
44+
const (
2845
countableTypes = 4
2946

30-
minMatchable rune = 0x0020
31-
maxMatchable rune = 0x007E
47+
// A subset of the ASCII range that requires no sanitization
48+
minNormal rune = 0x0020
49+
maxNormal rune = 0x007E
3250
)
3351

3452
var (
@@ -59,15 +77,15 @@ func IsInappropriate(text string) bool {
5977
// Is returns whether a phrase contains words matching the types flag, useful if
6078
// checking only one type or set of types is needed
6179
func Is(text string, types Type) bool {
62-
return Scan(text)&types > 0
80+
return Scan(text)&types != 0
6381
}
6482

6583
// Scan returns a bitmask of all types, useful if checking multiple types or
6684
// sets of types is needed, without multiple calls to Is(text, types)
6785
func Scan(text string) (types Type) {
6886
// Figure out if sanitization is needed, and if so, do it
6987
for _, textRune := range text {
70-
if textRune < minMatchable || maxMatchable < textRune {
88+
if textRune < minNormal || maxNormal < textRune {
7189
// Sanitize
7290
buf := make([]byte, 0, len(text))
7391
_, n, _ := transform.Append(removeAccentsTransform, buf, []byte(text))
@@ -99,7 +117,7 @@ func Scan(text string) (types Type) {
99117
var replacement string
100118
if int(textByte) < len(replacements) {
101119
replacement = replacements[textByte]
102-
} else if textRune > maxMatchable {
120+
} else if textRune > maxNormal {
103121
replacement = runeReplacements[textRune]
104122
if replacement == "" {
105123
lowerRune := unicode.ToLower(textRune)
@@ -118,19 +136,13 @@ func Scan(text string) (types Type) {
118136
textByte = replacement[0]
119137
textBytes = replacement
120138
matchable = true
121-
case textRune < minMatchable || maxMatchable < textRune:
122-
// Unhandled runes (not printable, not representable as byte, etc.)
123-
// matchable = false implied
124-
switch textRune {
125-
case '\n', '\r', '\t':
126-
skippable = true
127-
}
128139
default:
140+
// matchable = false implied
129141
switch textByte {
130142
case '*': // these count as replacements
131143
replaced = true
132144
fallthrough
133-
case ' ', '~', '-', '_', '.', ',': // false positives may contain these
145+
case ' ', '~', '-', '_', '.', ',', '\n', '\r', '\t': // false positives may contain these
134146
skippable = true
135147
}
136148
}
@@ -204,18 +216,30 @@ func Scan(text string) (types Type) {
204216
separate = skippable || !matchable
205217
}
206218

207-
for i := 0; i < countableTypes; i++ {
208-
if countableTypeLevels[i] > 0 {
209-
types |= 1 << i
219+
for i, level := range countableTypeLevels {
220+
var severity Type
221+
222+
if level >= 3 {
223+
severity = 0b100 // severe
224+
} else if level == 2 {
225+
severity = 0b010 // moderate
226+
} else if level == 1 {
227+
severity = 0b001 // mild
210228
}
229+
230+
types |= severity << (i * 3)
211231
}
212232

213233
// Min length is arbitrary, but must be > 0 to avoid dividing by zero
214234
if len(text) > 5 {
215235
spamPercent := (100 / 2) * (upperCount + repetitionCount) / len(text)
216236

217-
if spamPercent > 30 {
218-
types |= Spam
237+
// TODO: Define severe spam
238+
239+
if spamPercent > 50 {
240+
types |= Spam & Moderate
241+
} else if spamPercent > 30 {
242+
types |= Spam & Mild
219243
}
220244
}
221245

moderation_test.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,15 @@ func ExampleIsInappropriate_sh1t() {
120120
// Output: true
121121
}
122122

123+
func ExampleIs_severity() {
124+
fmt.Println(Is("sh1t", Profane), Is("sh1t", Profane&Severe))
125+
// Output: true false
126+
}
127+
123128
func ExampleScan() {
124129
types := Scan("you're a dumbass")
125-
fmt.Println(types&Profane > 0, types&Offensive > 0, types&Sexual > 0, types&Mean > 0)
126-
// Output: true false false true
130+
fmt.Println(types&Profane != 0, types&Offensive != 0, types&Sexual != 0, types&Mean != 0, types&(Mean&Severe) != 0)
131+
// Output: true false false true false
127132
}
128133

129134
func TestAnalyzeWikipedia(t *testing.T) {
@@ -162,10 +167,10 @@ func TestAnalyzeWikipedia(t *testing.T) {
162167
}
163168

164169
/*
165-
censored, _ := Censor(phrase, Inappropriate | Mean)
166-
if offensive {
167-
fmt.Printf("\"%s\" -> \"%s\"\n", phrase, censored)
168-
}
170+
censored, _ := Censor(phrase, Inappropriate | Mean)
171+
if offensive {
172+
fmt.Printf("\"%s\" -> \"%s\"\n", phrase, censored)
173+
}
169174
*/
170175

171176
total++

0 commit comments

Comments
 (0)