2727 mtkHtml, # Raw HTML content
2828 mtkTable, # Table
2929 mtkParagraph, # Paragraph
30+ mtkFootnoteRef, # Footnote reference
31+ mtkFootnoteDef, # Footnote definition
3032 mtkDocument, # Root document node
3133 mtkUnknown # Unknown or unsupported token
3234 mtkEOF # End of file/input
4547 current* : char
4648 pos* , line* , col* : int
4749 strbuf* : string
50+ pendingTokens: seq [MarkdownTokenTuple ] # Buffer for tokens split from text
4851
4952#
5053# Markdown Lexer
@@ -88,6 +91,37 @@ proc initToken(lex: var MarkdownLexer, kind: MarkdownTokenKind, value: sink stri
8891proc newTokenTuple (lex: MarkdownLexer , kind: MarkdownTokenKind , token: string = " " , wsno: int = 0 , attrs: Option [seq [string ]] = none (seq [string ])): MarkdownTokenTuple =
8992 (kind, token, lex.line, lex.col - token.len, lex.pos, wsno, attrs)
9093
94+ proc handleAutoLink (lex: var MarkdownLexer , wsno: int ): MarkdownTokenTuple =
95+ var tempStrBuf = " "
96+ let startPos = lex.pos
97+ while lex.current notin {' ' , '\t ' , '\n ' , '\r ' , '\0 ' }:
98+ tempStrBuf.add (lex.current)
99+ lex.advance ()
100+ return newTokenTuple (lex, mtkLink, wsno= wsno, attrs= some (@ [tempStrBuf, tempStrBuf]))
101+
102+ proc scanTextWithLinks (lex: var MarkdownLexer , wsno: int ): seq [MarkdownTokenTuple ] =
103+ # # Scan plain text and emit mtkText and mtkLink tokens for URLs found anywhere
104+ var tokens: seq [MarkdownTokenTuple ] = @ []
105+ var buf = " "
106+ while lex.current notin {'\n ' , '\r ' , '\0 ' , '*' , '_' , '[' , ']' , '!' , '`' , '<' }:
107+ # Check for http(s):// at current position
108+ if lex.current == 'h' and lex.peek () == 't' and lex.peek (2 ) == 't' and lex.peek (3 ) == 'p' :
109+ let isHttp = lex.peek (4 ) == ':' and lex.peek (5 ) == '/' and lex.peek (6 ) == '/'
110+ let isHttps = lex.peek (4 ) == 's' and lex.peek (5 ) == ':' and lex.peek (6 ) == '/' and lex.peek (7 ) == '/'
111+ if isHttp or isHttps:
112+ # Flush buffer as text token
113+ if buf.len > 0 :
114+ tokens.add (newTokenTuple (lex, mtkText, buf, wsno= wsno))
115+ buf.setLen (0 )
116+ # Handle link
117+ tokens.add (lex.handleAutoLink (wsno))
118+ continue
119+ buf.add (lex.current)
120+ lex.advance ()
121+ if buf.len > 0 :
122+ tokens.add (newTokenTuple (lex, mtkText, buf, wsno= wsno))
123+ return tokens
124+
91125proc nextToken * (lex: var MarkdownLexer ): MarkdownTokenTuple =
92126 # # Lex the next token from the input
93127 var wsno = 0
@@ -117,6 +151,12 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
117151
118152 # let startCol = wsno # not needed anymore
119153
154+ # Return buffered tokens if present
155+ if lex.pendingTokens.len > 0 :
156+ let tok = lex.pendingTokens[0 ]
157+ lex.pendingTokens = lex.pendingTokens[1 ..^ 1 ]
158+ return tok
159+
120160 case lex.current
121161 of '#' :
122162 # Headings (e.g., ## Heading 2)
@@ -179,6 +219,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
179219 lex.advance (); lex.advance () # skip both delimiters
180220 return newTokenTuple (lex, mtkStrong, wsno= wsno)
181221 else :
222+ lex.advance ();
182223 return newTokenTuple (lex, mtkEmphasis, wsno= wsno)
183224 else :
184225 return newTokenTuple (lex, mtkText, repeat (ch, count), wsno= wsno)
@@ -294,7 +335,34 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
294335 lex.advance ()
295336 return newTokenTuple (lex, mtkText, text, wsno= wsno)
296337 of '[' :
297- # Link or Checkbox
338+ # Link, Checkbox, or Footnote
339+ if lex.peek () == '^' :
340+ # Footnote reference or definition
341+ lex.advance () # skip '['
342+ lex.advance () # skip '^'
343+ lex.strbuf.setLen (0 )
344+ while lex.current != ']' and lex.current != '\0 ' :
345+ lex.strbuf.add (lex.current)
346+ lex.advance ()
347+ let footId = lex.strbuf
348+ if lex.current == ']' :
349+ lex.advance ()
350+ if lex.current == ':' and (lex.peek () == ' ' or lex.peek () == '\t ' ):
351+ # Footnote definition: [^id]: text
352+ lex.advance () # skip ':'
353+ while lex.current == ' ' or lex.current == '\t ' :
354+ lex.advance ()
355+ lex.strbuf.setLen (0 )
356+ while lex.current notin {'\n ' , '\r ' , '\0 ' }:
357+ lex.strbuf.add (lex.current)
358+ lex.advance ()
359+ return newTokenTuple (lex, mtkFootnoteDef,
360+ lex.strbuf.strip (), wsno= wsno, attrs= some (@ [footId]))
361+ else :
362+ # Footnote reference: [^id]
363+ return newTokenTuple (lex, mtkFootnoteRef, " " ,
364+ wsno= wsno, attrs= some (@ [footId]))
365+ # Regular link or checkbox
298366 lex.advance ()
299367 lex.strbuf.setLen (0 )
300368 while lex.current != ']' and lex.current != '\0 ' :
@@ -348,6 +416,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
348416 lex.advance (); lex.advance ()
349417 return newTokenTuple (lex, mtkStrong, wsno= wsno)
350418 else :
419+ lex.advance ();
351420 return newTokenTuple (lex, mtkEmphasis, wsno= wsno)
352421 of ' ' :
353422 # Line break (two or more spaces at end of line)
@@ -390,11 +459,10 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
390459 return newTokenTuple (lex, mtkTable, lex.strbuf, wsno= wsno)
391460 else :
392461 # Paragraph or plain text
393- lex.strbuf.setLen (0 )
394- # Stop at markdown delimiters
395- while lex.current notin {'\n ' , '\r ' , '\0 ' , '*' , '_' , '[' , ']' , '!' , '`' , '<' }:
396- lex.strbuf.add (lex.current)
397- lex.advance ()
398- if lex.strbuf.len > 0 :
399- return newTokenTuple (lex, mtkText, lex.strbuf, wsno= wsno)
462+ # Scan for auto links anywhere in the text
463+ let tokens = lex.scanTextWithLinks (wsno)
464+ if tokens.len > 0 :
465+ if tokens.len > 1 :
466+ lex.pendingTokens = tokens[1 ..^ 1 ]
467+ return tokens[0 ]
400468 return newTokenTuple (lex, mtkUnknown, wsno= wsno)
0 commit comments