@@ -210,4 +210,104 @@ public static string ToHtml(string message, MessageEntity[]? entities)
210210 [ return : NotNullIfNotNull ( nameof ( text ) ) ]
211211 public static string ? Escape ( string ? text )
212212 => text ? . Replace ( "&" , "&" ) . Replace ( "<" , "<" ) . Replace ( ">" , ">" ) ;
213+
214+ /// <summary>Calculate the length of the plain text (excluding the <tags>) from the HTML text</summary>
215+ /// <param name="html">HTML text</param>
216+ /// <returns>Number of characters (HTML &entities; are counted as 1)</returns>
217+ public static int PlainLength ( string html )
218+ {
219+ var span = html . AsSpan ( ) ;
220+ int len = 0 , index ;
221+ while ( ( index = span . IndexOfAny ( '&' , '<' ) ) != - 1 )
222+ {
223+ len += index ;
224+ var c = span [ index ] ;
225+ if ( c == '<' ) c = '>' ; else { c = ';' ; len ++ ; }
226+ span = span [ ( index + 1 ) ..] ;
227+ index = span . IndexOf ( c ) ;
228+ if ( index < 0 ) { span = default ; break ; }
229+ span = span [ ( index + 1 ) ..] ;
230+ }
231+ return len + span . Length ;
232+ }
233+
234+ /// <summary>Convert the HTML text to plain text (excluding the <tags>)</summary>
235+ /// <param name="html">HTML text</param>
236+ /// <returns>Plain text (only &lt; &gt; &amp; &quot; entities are converted)</returns>
237+ public static string ToPlain ( string html )
238+ {
239+ var sb = new StringBuilder ( html . Length ) ;
240+ var span = html . AsSpan ( ) ;
241+ int index ;
242+ while ( ( index = span . IndexOfAny ( '&' , '<' ) ) != - 1 )
243+ {
244+ sb . Append ( span [ ..index ] ) ;
245+ var c = span [ index ] ;
246+ span = span [ ( index + 1 ) ..] ;
247+ c = c == '<' ? '>' : ';' ;
248+ index = span . IndexOf ( c ) ;
249+ if ( index < 0 ) { span = default ; break ; }
250+ if ( c == ';' )
251+ if ( index == 2 && span [ 0 ] == 'l' && span [ 1 ] == 't' ) sb . Append ( '<' ) ;
252+ else if ( index == 2 && span [ 0 ] == 'g' && span [ 1 ] == 't' ) sb . Append ( '>' ) ;
253+ else if ( index == 3 && span [ 0 ] == 'a' && span [ 1 ] == 'm' && span [ 2 ] == 'p' ) sb . Append ( '&' ) ;
254+ else if ( index == 4 && span [ 0 ] == 'q' && span [ 1 ] == 'u' && span [ 2 ] == 'o' && span [ 3 ] == 't' ) sb . Append ( '"' ) ;
255+ else sb . Append ( '&' ) . Append ( span [ ..( index + 1 ) ] ) ;
256+ span = span [ ( index + 1 ) ..] ;
257+ }
258+ sb . Append ( span ) ;
259+ return sb . ToString ( ) ;
260+ }
261+
262+ /// <summary>Truncate the HTML text to the specified number of plain-text characters</summary>
263+ /// <param name="html">HTML text</param>
264+ /// <param name="count">Target count of Unicode characters (including the suffix)</param>
265+ /// <param name="suffix">Suffix to append if a truncation was done</param>
266+ /// <returns>The HTML eventually truncated</returns>
267+ public static string Truncate ( string html , int count , string suffix = "…" )
268+ {
269+ int len = html . Length ;
270+ if ( len <= count ) return html ;
271+ count -= suffix . Length ;
272+ if ( count < 0 ) throw new ArgumentException ( "Invalid count" , nameof ( count ) ) ;
273+ var closingTags = new StringBuilder ( ) ;
274+ int index = 0 ;
275+ for ( ; count > 0 && index < len ; index ++ )
276+ {
277+ var c = html [ index ] ;
278+ if ( c == '&' )
279+ index = html . IndexOf ( ';' , index + 1 ) ;
280+ else if ( c == '<' )
281+ {
282+ int end = html . IndexOf ( '>' , index + 1 ) ;
283+ if ( html [ index + 1 ] == '/' )
284+ {
285+ int idx = 3 ;
286+ while ( closingTags [ idx ++ ] != '>' ) { }
287+ closingTags . Remove ( 0 , idx ) ;
288+ }
289+ else if ( html [ end - 1 ] != '/' )
290+ {
291+ int gap = html . IndexOf ( ' ' , index + 2 , end - index - 2 ) ;
292+ var tag = html . AsSpan ( ) [ ( index == 0 ? 0 : index - 1 ) ..( gap < 0 ? end + 1 : gap + 1 ) ] ;
293+ closingTags . Insert ( 0 , tag ) ;
294+ closingTags [ tag . Length - 1 ] = '>' ;
295+ if ( index == 0 ) closingTags . Insert ( 0 , '<' ) ;
296+ else closingTags [ 0 ] = '<' ;
297+ closingTags [ 1 ] = '/' ;
298+ }
299+ index = end ;
300+ continue ;
301+ }
302+ else if ( char . IsLowSurrogate ( c ) ) // surrogate pairs are counted as 1
303+ continue ;
304+ count -- ;
305+ }
306+ return index == len ? html : html [ ..index ] + suffix + closingTags . ToString ( ) ;
307+ }
308+
309+ #if ! NET6_0_OR_GREATER
310+ private static StringBuilder Append ( this StringBuilder sb , ReadOnlySpan < char > value ) => sb . Append ( value . ToString ( ) ) ;
311+ private static StringBuilder Insert ( this StringBuilder sb , int index , ReadOnlySpan < char > value ) => sb . Insert ( index , value . ToString ( ) ) ;
312+ #endif
213313}
0 commit comments