Skip to content

Commit 3175ad0

Browse files
committed
Added helper methods PlainLength, ToPlain and Truncate in HtmlText
1 parent 36be67a commit 3175ad0

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

src/Telegram.Bot/Extensions/FormatExtensions.cs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,104 @@ public static string ToHtml(string message, MessageEntity[]? entities)
210210
[return: NotNullIfNotNull(nameof(text))]
211211
public static string? Escape(string? text)
212212
=> text?.Replace("&", "&amp;").Replace("<", "&lt;").Replace(">", "&gt;");
213+
214+
/// <summary>Calculate the length of the plain text (excluding the &lt;tags&gt;) from the HTML text</summary>
215+
/// <param name="html">HTML text</param>
216+
/// <returns>Number of characters (HTML &amp;entities; are counted as 1)</returns>
217+
public static int PlainLength(string html)
218+
{
219+
var span = html.AsSpan();
220+
int len = 0, index;
221+
while ((index = span.IndexOfAny('&', '<')) != -1)
222+
{
223+
len += index;
224+
var c = span[index];
225+
if (c == '<') c = '>'; else { c = ';'; len++; }
226+
span = span[(index + 1)..];
227+
index = span.IndexOf(c);
228+
if (index < 0) { span = default; break; }
229+
span = span[(index + 1)..];
230+
}
231+
return len + span.Length;
232+
}
233+
234+
/// <summary>Convert the HTML text to plain text (excluding the &lt;tags&gt;)</summary>
235+
/// <param name="html">HTML text</param>
236+
/// <returns>Plain text (only &amp;lt; &amp;gt; &amp;amp; &amp;quot; entities are converted)</returns>
237+
public static string ToPlain(string html)
238+
{
239+
var sb = new StringBuilder(html.Length);
240+
var span = html.AsSpan();
241+
int index;
242+
while ((index = span.IndexOfAny('&', '<')) != -1)
243+
{
244+
sb.Append(span[..index]);
245+
var c = span[index];
246+
span = span[(index + 1)..];
247+
c = c == '<' ? '>' : ';';
248+
index = span.IndexOf(c);
249+
if (index < 0) { span = default; break; }
250+
if (c == ';')
251+
if (index == 2 && span[0] == 'l' && span[1] == 't') sb.Append('<');
252+
else if (index == 2 && span[0] == 'g' && span[1] == 't') sb.Append('>');
253+
else if (index == 3 && span[0] == 'a' && span[1] == 'm' && span[2] == 'p') sb.Append('&');
254+
else if (index == 4 && span[0] == 'q' && span[1] == 'u' && span[2] == 'o' && span[3] == 't') sb.Append('"');
255+
else sb.Append('&').Append(span[..(index + 1)]);
256+
span = span[(index + 1)..];
257+
}
258+
sb.Append(span);
259+
return sb.ToString();
260+
}
261+
262+
/// <summary>Truncate the HTML text to the specified number of plain-text characters</summary>
263+
/// <param name="html">HTML text</param>
264+
/// <param name="count">Target count of Unicode characters (including the suffix)</param>
265+
/// <param name="suffix">Suffix to append if a truncation was done</param>
266+
/// <returns>The HTML eventually truncated</returns>
267+
public static string Truncate(string html, int count, string suffix = "…")
268+
{
269+
int len = html.Length;
270+
if (len <= count) return html;
271+
count -= suffix.Length;
272+
if (count < 0) throw new ArgumentException("Invalid count", nameof(count));
273+
var closingTags = new StringBuilder();
274+
int index = 0;
275+
for (; count > 0 && index < len; index++)
276+
{
277+
var c = html[index];
278+
if (c == '&')
279+
index = html.IndexOf(';', index + 1);
280+
else if (c == '<')
281+
{
282+
int end = html.IndexOf('>', index + 1);
283+
if (html[index + 1] == '/')
284+
{
285+
int idx = 3;
286+
while (closingTags[idx++] != '>') { }
287+
closingTags.Remove(0, idx);
288+
}
289+
else if (html[end - 1] != '/')
290+
{
291+
int gap = html.IndexOf(' ', index + 2, end - index - 2);
292+
var tag = html.AsSpan()[(index == 0 ? 0 : index - 1)..(gap < 0 ? end + 1 : gap + 1)];
293+
closingTags.Insert(0, tag);
294+
closingTags[tag.Length - 1] = '>';
295+
if (index == 0) closingTags.Insert(0, '<');
296+
else closingTags[0] = '<';
297+
closingTags[1] = '/';
298+
}
299+
index = end;
300+
continue;
301+
}
302+
else if (char.IsLowSurrogate(c)) // surrogate pairs are counted as 1
303+
continue;
304+
count--;
305+
}
306+
return index == len ? html : html[..index] + suffix + closingTags.ToString();
307+
}
308+
309+
#if !NET6_0_OR_GREATER
310+
private static StringBuilder Append(this StringBuilder sb, ReadOnlySpan<char> value) => sb.Append(value.ToString());
311+
private static StringBuilder Insert(this StringBuilder sb, int index, ReadOnlySpan<char> value) => sb.Insert(index, value.ToString());
312+
#endif
213313
}

0 commit comments

Comments
 (0)