Skip to content

Commit ca249d2

Browse files
committed
Handle footnotes and automatic URL linking
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent 8dd49bb commit ca249d2

File tree

3 files changed

+157
-11
lines changed

3 files changed

+157
-11
lines changed

src/marvdown/ast.nim

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ type
2727
mdkHtml, # Raw HTML content
2828
mdkTable, # Table
2929
mdkParagraph, # Paragraph
30+
mdkFootnoteDef, # Footnote definition
31+
mdkFootnoteRef, # Footnote reference
3032
mdkDocument, # Root document node
3133
mdkUnknown # Unknown or unsupported node
3234

@@ -82,6 +84,12 @@ type
8284
## Table rows
8385
of mdkUnknown:
8486
info*: string # For unknown or unsupported nodes
87+
of mdkFootnoteRef:
88+
footnoteRefId*: string
89+
## Identifier for the footnote reference
90+
of mdkFootnoteDef:
91+
footnoteId*: string
92+
## Identifier for the footnote definition
8593
else: discard
8694
children*: MarkdownNodeList
8795
## Child nodes (for container nodes)

src/marvdown/lexer.nim

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ type
2727
mtkHtml, # Raw HTML content
2828
mtkTable, # Table
2929
mtkParagraph, # Paragraph
30+
mtkFootnoteRef, # Footnote reference
31+
mtkFootnoteDef, # Footnote definition
3032
mtkDocument, # Root document node
3133
mtkUnknown # Unknown or unsupported token
3234
mtkEOF # End of file/input
@@ -45,6 +47,7 @@ type
4547
current*: char
4648
pos*, line*, col*: int
4749
strbuf*: string
50+
pendingTokens: seq[MarkdownTokenTuple] # Buffer for tokens split from text
4851

4952
#
5053
# Markdown Lexer
@@ -88,6 +91,37 @@ proc initToken(lex: var MarkdownLexer, kind: MarkdownTokenKind, value: sink stri
8891
proc newTokenTuple(lex: MarkdownLexer, kind: MarkdownTokenKind, token: string = "", wsno: int = 0, attrs: Option[seq[string]] = none(seq[string])): MarkdownTokenTuple =
8992
(kind, token, lex.line, lex.col - token.len, lex.pos, wsno, attrs)
9093

94+
proc handleAutoLink(lex: var MarkdownLexer, wsno: int): MarkdownTokenTuple =
95+
var tempStrBuf = ""
96+
let startPos = lex.pos
97+
while lex.current notin {' ', '\t', '\n', '\r', '\0'}:
98+
tempStrBuf.add(lex.current)
99+
lex.advance()
100+
return newTokenTuple(lex, mtkLink, wsno=wsno, attrs=some(@[tempStrBuf, tempStrBuf]))
101+
102+
proc scanTextWithLinks(lex: var MarkdownLexer, wsno: int): seq[MarkdownTokenTuple] =
103+
## Scan plain text and emit mtkText and mtkLink tokens for URLs found anywhere
104+
var tokens: seq[MarkdownTokenTuple] = @[]
105+
var buf = ""
106+
while lex.current notin {'\n', '\r', '\0', '*', '_', '[', ']', '!', '`', '<'}:
107+
# Check for http(s):// at current position
108+
if lex.current == 'h' and lex.peek() == 't' and lex.peek(2) == 't' and lex.peek(3) == 'p':
109+
let isHttp = lex.peek(4) == ':' and lex.peek(5) == '/' and lex.peek(6) == '/'
110+
let isHttps = lex.peek(4) == 's' and lex.peek(5) == ':' and lex.peek(6) == '/' and lex.peek(7) == '/'
111+
if isHttp or isHttps:
112+
# Flush buffer as text token
113+
if buf.len > 0:
114+
tokens.add(newTokenTuple(lex, mtkText, buf, wsno=wsno))
115+
buf.setLen(0)
116+
# Handle link
117+
tokens.add(lex.handleAutoLink(wsno))
118+
continue
119+
buf.add(lex.current)
120+
lex.advance()
121+
if buf.len > 0:
122+
tokens.add(newTokenTuple(lex, mtkText, buf, wsno=wsno))
123+
return tokens
124+
91125
proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
92126
## Lex the next token from the input
93127
var wsno = 0
@@ -117,6 +151,12 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
117151

118152
# let startCol = wsno # not needed anymore
119153

154+
# Return buffered tokens if present
155+
if lex.pendingTokens.len > 0:
156+
let tok = lex.pendingTokens[0]
157+
lex.pendingTokens = lex.pendingTokens[1..^1]
158+
return tok
159+
120160
case lex.current
121161
of '#':
122162
# Headings (e.g., ## Heading 2)
@@ -179,6 +219,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
179219
lex.advance(); lex.advance() # skip both delimiters
180220
return newTokenTuple(lex, mtkStrong, wsno=wsno)
181221
else:
222+
lex.advance();
182223
return newTokenTuple(lex, mtkEmphasis, wsno=wsno)
183224
else:
184225
return newTokenTuple(lex, mtkText, repeat(ch, count), wsno=wsno)
@@ -294,7 +335,34 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
294335
lex.advance()
295336
return newTokenTuple(lex, mtkText, text, wsno=wsno)
296337
of '[':
297-
# Link or Checkbox
338+
# Link, Checkbox, or Footnote
339+
if lex.peek() == '^':
340+
# Footnote reference or definition
341+
lex.advance() # skip '['
342+
lex.advance() # skip '^'
343+
lex.strbuf.setLen(0)
344+
while lex.current != ']' and lex.current != '\0':
345+
lex.strbuf.add(lex.current)
346+
lex.advance()
347+
let footId = lex.strbuf
348+
if lex.current == ']':
349+
lex.advance()
350+
if lex.current == ':' and (lex.peek() == ' ' or lex.peek() == '\t'):
351+
# Footnote definition: [^id]: text
352+
lex.advance() # skip ':'
353+
while lex.current == ' ' or lex.current == '\t':
354+
lex.advance()
355+
lex.strbuf.setLen(0)
356+
while lex.current notin {'\n', '\r', '\0'}:
357+
lex.strbuf.add(lex.current)
358+
lex.advance()
359+
return newTokenTuple(lex, mtkFootnoteDef,
360+
lex.strbuf.strip(), wsno=wsno, attrs=some(@[footId]))
361+
else:
362+
# Footnote reference: [^id]
363+
return newTokenTuple(lex, mtkFootnoteRef, "",
364+
wsno=wsno, attrs=some(@[footId]))
365+
# Regular link or checkbox
298366
lex.advance()
299367
lex.strbuf.setLen(0)
300368
while lex.current != ']' and lex.current != '\0':
@@ -348,6 +416,7 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
348416
lex.advance(); lex.advance()
349417
return newTokenTuple(lex, mtkStrong, wsno=wsno)
350418
else:
419+
lex.advance();
351420
return newTokenTuple(lex, mtkEmphasis, wsno=wsno)
352421
of ' ':
353422
# Line break (two or more spaces at end of line)
@@ -390,11 +459,10 @@ proc nextToken*(lex: var MarkdownLexer): MarkdownTokenTuple =
390459
return newTokenTuple(lex, mtkTable, lex.strbuf, wsno=wsno)
391460
else:
392461
# Paragraph or plain text
393-
lex.strbuf.setLen(0)
394-
# Stop at markdown delimiters
395-
while lex.current notin {'\n', '\r', '\0', '*', '_', '[', ']', '!', '`', '<'}:
396-
lex.strbuf.add(lex.current)
397-
lex.advance()
398-
if lex.strbuf.len > 0:
399-
return newTokenTuple(lex, mtkText, lex.strbuf, wsno=wsno)
462+
# Scan for auto links anywhere in the text
463+
let tokens = lex.scanTextWithLinks(wsno)
464+
if tokens.len > 0:
465+
if tokens.len > 1:
466+
lex.pendingTokens = tokens[1..^1]
467+
return tokens[0]
400468
return newTokenTuple(lex, mtkUnknown, wsno=wsno)

src/marvdown/parser.nim

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ type
3131
## Internal: Counter for generating unique selectors
3232
ast*: seq[MarkdownNode]
3333
## The abstract syntax tree (AST) of the parsed markdown document
34+
footnotes: OrderedTableRef[string, MarkdownNode]
35+
## Footnote definitions parsed from the document
36+
footnotesHtml*: string
37+
## Generated HTML for footnotes at the end of the document
3438

3539
TagType* = enum
3640
tagNone, # No tags allowed
@@ -54,6 +58,10 @@ type
5458
## For allowing use of `style` attribute, enable `allowInlineStyle`.
5559
enableAnchors*: bool
5660
## Enable anchor generation in title blocks (enabled by default)
61+
anchorIcon*: string = "🔗"
62+
## Icon used for anchor links in headings
63+
showFootnotes*: bool = true
64+
## Insert footnotes HTML at the end of the document (default: true)
5765

5866
#
5967
# forward declarations
@@ -503,6 +511,26 @@ let defaultOptions = MarkdownOptions(
503511
enableAnchors: true
504512
)
505513

514+
proc parseFootnoteDef(md: var Markdown): MarkdownNode =
515+
## Parse a footnote definition into a MarkdownNode
516+
let id = md.parser.curr.attrs.get()[0]
517+
let content = md.parser.curr.token.strip()
518+
result = MarkdownNode(
519+
kind: mdkFootnoteDef,
520+
footnoteId: id,
521+
children: MarkdownNodeList(),
522+
line: md.parser.curr.line,
523+
wsno: md.parser.curr.wsno
524+
)
525+
# Parse inline content of the footnote definition
526+
for n in md.parseInline(content):
527+
result.children.items.add(n)
528+
529+
# Store the footnote definition in the Markdown instance
530+
if md.footnotes.isNil:
531+
md.footnotes = newOrderedTable[string, MarkdownNode]()
532+
md.footnotes[id] = result
533+
506534
proc parseMarkdown(md: var Markdown, currentParagraph: var MarkdownNode) =
507535
while md.parser.curr.kind != mtkEOF:
508536
let curr = md.parser.curr
@@ -619,6 +647,22 @@ proc parseMarkdown(md: var Markdown, currentParagraph: var MarkdownNode) =
619647
closeCurrentParagraph()
620648
let bqNode = md.parseBlockquote()
621649
md.ast.add(bqNode)
650+
of mtkFootnoteRef:
651+
withCurrentParagraph do:
652+
let id = curr.attrs.get()[0]
653+
let fnNode = MarkdownNode(
654+
kind: mdkFootnoteRef,
655+
footnoteRefId: id,
656+
line: curr.line,
657+
wsno: curr.wsno
658+
)
659+
currentParagraph.children.items.add(fnNode)
660+
md.advance()
661+
of mtkFootnoteDef:
662+
closeCurrentParagraph() # close any open paragraph
663+
let node = md.parseFootnoteDef()
664+
md.ast.add(node)
665+
md.advance()
622666
else:
623667
closeCurrentParagraph()
624668
md.advance()
@@ -645,14 +689,24 @@ proc toHtml*(md: var Markdown): string =
645689
## Convert the parsed Markdown AST to HTML
646690
for node in md.ast:
647691
add result, md.renderNode(node)
692+
if md.opts.showFootnotes and md.footnotesHtml.len > 0:
693+
add result, "<hr><div class=\"footnotes\">" & md.footnotesHtml & "</div>"
648694

649695
proc getSelectors*(md: Markdown): OrderedTableRef[string, string] =
650696
## Get the headline selectors (anchors) from the parsed Markdown
651697
md.selectors
652698

653699
proc hasSelectors*(md: Markdown): bool =
654700
## Check if there are any headline selectors (anchors) in the parsed Markdown
655-
md.selectors.len > 0
701+
md.selectors != nil and md.selectors.len > 0
702+
703+
proc getFootnotes*(md: Markdown): OrderedTableRef[string, MarkdownNode] =
704+
## Get the footnote definitions from the parsed Markdown
705+
md.footnotes
706+
707+
proc hasFootnotes*(md: Markdown): bool =
708+
## Check if there are any footnote definitions in the parsed Markdown
709+
md.footnotes != nil and md.footnotes.len > 0
656710

657711
proc getTitle*(md: Markdown): string =
658712
## Retrieve the first heading as the document title
@@ -685,7 +739,11 @@ proc renderNode(md: var Markdown, node: MarkdownNode): string =
685739
var linkContent = ""
686740
for child in node.children.items:
687741
linkContent.add(md.renderNode(child))
688-
result = a(href=node.linkHref, title=node.linkTitle, linkContent)
742+
result =
743+
if node.linkTitle.len > 0:
744+
a(href=node.linkHref, title=node.linkTitle, linkContent)
745+
else:
746+
a(href=node.linkHref, linkContent)
689747
of mdkImage:
690748
result = img(src=node.imageSrc, alt=node.imageAlt, title=node.imageTitle)
691749
of mdkList:
@@ -719,7 +777,9 @@ proc renderNode(md: var Markdown, node: MarkdownNode): string =
719777
else: # first occurrence
720778
md.selectorCounter[anchor] = 1
721779
md.selectors[anchor] = anchor
722-
let anchorlink = a(href="#" & anchor, `class`="anchor-link", "🔗")
780+
let anchorlink =
781+
a(href="#" & anchor, `class`="anchor-link",
782+
md.opts.anchorIcon)
723783
add result,
724784
case node.level
725785
of 1: h1(id=anchor, anchorlink, innerContent)
@@ -768,5 +828,15 @@ proc renderNode(md: var Markdown, node: MarkdownNode): string =
768828
for child in node.children.items:
769829
bqContent.add(md.renderNode(child))
770830
result = "<blockquote>" & bqContent & "</blockquote>"
831+
of mdkFootnoteRef:
832+
# Footnote reference rendering
833+
result = "<sup class=\"footnote-ref\"><a href=\"#fn-" & node.footnoteRefId & "\">" & node.footnoteRefId & "</a></sup>"
834+
of mdkFootnoteDef:
835+
# Footnote definition rendering (could be customized)
836+
var fnContent = ""
837+
for child in node.children.items:
838+
fnContent.add(md.renderNode(child))
839+
md.footnotesHtml.add("<div class=\"footnote\" id=\"fn-" & node.footnoteId & "\">" &
840+
"<sup>" & node.footnoteId & "</sup> " & fnContent & "</div>")
771841
else:
772842
discard

0 commit comments

Comments
 (0)