Skip to content

Commit f6cf53b

Browse files
authored
clean markdown output (#24008)
- **site: remove front matter from generated md output** - **site: add markdown output for samples pages** - **hack: resolve links in the markdown output** --------- Signed-off-by: David Karlsson <35727626+dvdksn@users.noreply.github.com>
2 parents 9103dff + d0128a8 commit f6cf53b

File tree

9 files changed

+252
-159
lines changed

9 files changed

+252
-159
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ ARG DOCS_URL="https://docs.docker.com"
5151
ENV HUGO_CACHEDIR="/tmp/hugo_cache"
5252
RUN --mount=type=cache,target=/tmp/hugo_cache \
5353
hugo --gc --minify -e $HUGO_ENV -b $DOCS_URL
54-
RUN ./hack/flatten-markdown.sh public
54+
RUN ./hack/flatten-and-resolve.js public
5555

5656
# lint lints markdown files
5757
FROM ghcr.io/igorshubovych/markdownlint-cli:v0.45.0 AS lint

hack/flatten-and-resolve.js

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Flattens markdown directory structure and resolves all links to absolute paths.
5+
*
6+
* This script:
7+
* 1. Moves index.md files up one level (ai/model-runner/index.md -> ai/model-runner.md)
8+
* 2. Fixes _index.md and index.md references in links
9+
* 3. Strips /manuals/ prefix from paths (Hugo config removes this)
10+
* 4. Resolves all relative links to absolute HTML paths for RAG ingestion
11+
*
12+
* Usage: node flatten-and-resolve.js [public-dir]
13+
*/
14+
15+
const fs = require('fs');
16+
const path = require('path');
17+
18+
const PUBLIC_DIR = path.resolve(process.argv[2] || 'public');
19+
20+
if (!fs.existsSync(PUBLIC_DIR)) {
21+
console.error(`Error: Directory ${PUBLIC_DIR} does not exist`);
22+
process.exit(1);
23+
}
24+
25+
/**
26+
* Recursively find all files matching a predicate
27+
*/
28+
function findFiles(dir, predicate) {
29+
const results = [];
30+
const entries = fs.readdirSync(dir, { withFileTypes: true });
31+
32+
for (const entry of entries) {
33+
const fullPath = path.join(dir, entry.name);
34+
if (entry.isDirectory()) {
35+
results.push(...findFiles(fullPath, predicate));
36+
} else if (entry.isFile() && predicate(entry.name)) {
37+
results.push(fullPath);
38+
}
39+
}
40+
41+
return results;
42+
}
43+
44+
/**
45+
* Step 1: Flatten index.md files
46+
* Move path/to/section/index.md -> path/to/section.md
47+
* Before moving, rewrite sibling links (e.g., "get-started.md" -> "section/get-started.md")
48+
*/
49+
function flattenIndexFiles() {
50+
const indexFiles = findFiles(PUBLIC_DIR, name => name === 'index.md');
51+
let count = 0;
52+
53+
for (const file of indexFiles) {
54+
// Skip root index.md
55+
if (file === path.join(PUBLIC_DIR, 'index.md')) {
56+
continue;
57+
}
58+
59+
const dir = path.dirname(file);
60+
const dirname = path.basename(dir);
61+
62+
// Read content and fix sibling links
63+
let content = fs.readFileSync(file, 'utf8');
64+
65+
// Rewrite relative links that don't start with /, ../, or http
66+
// These are sibling files that will become children after flattening
67+
content = content.replace(
68+
/\[([^\]]+)\]\(([a-zA-Z0-9][^):]*)\)/g,
69+
(match, text, link) => {
70+
// Skip if it's a URL or starts with special chars
71+
if (link.startsWith('http://') || link.startsWith('https://') ||
72+
link.startsWith('#')) {
73+
return match;
74+
}
75+
return `[${text}](${dirname}/${link})`;
76+
}
77+
);
78+
79+
// Also fix reference-style links
80+
content = content.replace(
81+
/^\[([^\]]+)\]:\s+([a-zA-Z0-9][^: ]*\.md)$/gm,
82+
(match, ref, link) => `[${ref}]: ${dirname}/${link}`
83+
);
84+
85+
fs.writeFileSync(file, content, 'utf8');
86+
87+
// Move file up one level
88+
const parentDir = path.dirname(dir);
89+
const newPath = path.join(parentDir, `${dirname}.md`);
90+
fs.renameSync(file, newPath);
91+
92+
count++;
93+
}
94+
95+
console.log(`Flattened ${count} index.md files`);
96+
return count;
97+
}
98+
99+
/**
100+
* Step 2: Fix _index.md and index.md references in all files
101+
* Also strip /manuals/ prefix from paths
102+
*/
103+
function fixIndexReferences() {
104+
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
105+
let count = 0;
106+
107+
for (const file of mdFiles) {
108+
const dir = path.dirname(file);
109+
const dirname = path.basename(dir);
110+
const parentDir = path.dirname(dir);
111+
const parentDirname = path.basename(parentDir);
112+
113+
let content = fs.readFileSync(file, 'utf8');
114+
const original = content;
115+
116+
// Fix path/_index.md or path/index.md -> path.md
117+
content = content.replace(/([a-zA-Z0-9_/-]+)\/_?index\.md/g, '$1.md');
118+
119+
// Fix bare _index.md or index.md -> ../dirname.md
120+
content = content.replace(/_?index\.md/g, `../${dirname}.md`);
121+
122+
// Fix ../_index.md that became ...md -> ../../parentdirname.md
123+
if (parentDir !== PUBLIC_DIR) {
124+
content = content.replace(/\.\.\.md/g, `../../${parentDirname}.md`);
125+
}
126+
127+
// Strip /manuals/ prefix (both /manuals/ and manuals/)
128+
content = content.replace(/\/?manuals\//g, '/');
129+
130+
if (content !== original) {
131+
fs.writeFileSync(file, content, 'utf8');
132+
count++;
133+
}
134+
}
135+
136+
console.log(`Fixed _index.md references in ${count} files`);
137+
return count;
138+
}
139+
140+
/**
141+
* Step 3: Resolve all relative links to absolute HTML paths
142+
*/
143+
function resolveLinks() {
144+
const mdFiles = findFiles(PUBLIC_DIR, name => name.endsWith('.md'));
145+
let count = 0;
146+
147+
for (const file of mdFiles) {
148+
let content = fs.readFileSync(file, 'utf8');
149+
const original = content;
150+
151+
// Process inline links: [text](path)
152+
content = content.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, link) => {
153+
const resolved = resolveLinkPath(link, file);
154+
return `[${text}](${resolved})`;
155+
});
156+
157+
// Process reference-style links: [ref]: path
158+
content = content.replace(/^\[([^\]]+)\]:\s+(.+)$/gm, (match, ref, link) => {
159+
const resolved = resolveLinkPath(link, file);
160+
return `[${ref}]: ${resolved}`;
161+
});
162+
163+
if (content !== original) {
164+
fs.writeFileSync(file, content, 'utf8');
165+
count++;
166+
}
167+
}
168+
169+
console.log(`Resolved links in ${count} files`);
170+
return count;
171+
}
172+
173+
/**
174+
* Resolve a link path to absolute HTML format
175+
*/
176+
function resolveLinkPath(linkPath, currentFile) {
177+
// Skip external URLs and mailto
178+
if (linkPath.startsWith('http://') || linkPath.startsWith('https://') ||
179+
linkPath.startsWith('mailto:')) {
180+
return linkPath;
181+
}
182+
183+
// Skip same-page anchors
184+
if (linkPath.startsWith('#')) {
185+
return linkPath;
186+
}
187+
188+
// Split path and anchor
189+
const hashIndex = linkPath.indexOf('#');
190+
const pathPart = hashIndex >= 0 ? linkPath.substring(0, hashIndex) : linkPath;
191+
const anchorPart = hashIndex >= 0 ? linkPath.substring(hashIndex) : '';
192+
193+
if (!pathPart) {
194+
// Just an anchor
195+
return linkPath;
196+
}
197+
198+
// Handle absolute paths - just convert to HTML format
199+
if (pathPart.startsWith('/')) {
200+
return toHtmlPath(pathPart) + anchorPart;
201+
}
202+
203+
// Resolve relative path to absolute
204+
const currentDir = path.dirname(currentFile);
205+
const absolutePath = path.resolve(currentDir, pathPart);
206+
const relativePath = path.relative(PUBLIC_DIR, absolutePath);
207+
208+
// Convert to URL path (forward slashes)
209+
const urlPath = '/' + relativePath.split(path.sep).join('/');
210+
211+
return toHtmlPath(urlPath) + anchorPart;
212+
}
213+
214+
/**
215+
* Convert a path to HTML format (strip .md, add trailing /)
216+
*/
217+
function toHtmlPath(urlPath) {
218+
if (urlPath.endsWith('.md')) {
219+
return urlPath.slice(0, -3) + '/';
220+
}
221+
return urlPath;
222+
}
223+
224+
// Main execution
225+
console.log('Starting markdown flattening and link resolution...');
226+
console.log('');
227+
228+
const flattenCount = flattenIndexFiles();
229+
const fixCount = fixIndexReferences();
230+
const resolveCount = resolveLinks();
231+
232+
console.log('');
233+
console.log('Done!');
234+
console.log(`- Flattened: ${flattenCount} files`);
235+
console.log(`- Fixed references: ${fixCount} files`);
236+
console.log(`- Resolved links: ${resolveCount} files`);

hack/flatten-markdown.sh

Lines changed: 0 additions & 22 deletions
This file was deleted.

layouts/_default/api.markdown.md

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,5 @@
1-
---
2-
title: {{ .Title }}
3-
url: {{ .Permalink }}
4-
{{- range .Ancestors }}
5-
{{- if and (not .IsHome) .Permalink }}
6-
parent:
7-
title: {{ .Title }}
8-
url: {{ .Permalink }}
9-
{{- break }}
10-
{{- end }}
11-
{{- end }}
12-
{{- if .Ancestors }}
13-
breadcrumbs:
14-
{{- range .Ancestors.Reverse }}
15-
{{- if and (not .IsHome) .Permalink }}
16-
- title: {{ .Title }}
17-
url: {{ .Permalink }}
18-
{{- end }}
19-
{{- end }}
20-
- title: {{ .Title }}
21-
url: {{ .Permalink }}
22-
{{- end }}
23-
{{- with .NextInSection }}
24-
next:
25-
title: {{ .Title }}
26-
url: {{ .Permalink }}
27-
{{- end }}
28-
{{- with .PrevInSection }}
29-
prev:
30-
title: {{ .Title }}
31-
url: {{ .Permalink }}
32-
{{- end }}
33-
{{- $specURL := urls.Parse (printf "/%s%s.yaml" .File.Dir .File.ContentBaseName) }}
34-
openapi_spec: {{ $specURL.String | absURL }}
35-
---
1+
{{- $specURL := urls.Parse (printf "/%s%s.yaml" .File.Dir .File.ContentBaseName) -}}
2+
# {{ .Title }}
363

374
{{ .Content }}
385

layouts/_default/cli.markdown.md

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,7 @@
44
{{- else }}
55
{{- $data = index site.Data .Params.datafile }}
66
{{- end -}}
7-
---
8-
title: {{ .Title }}
9-
url: {{ .Permalink }}
10-
{{- range .Ancestors }}
11-
{{- if and (not .IsHome) .Permalink }}
12-
parent:
13-
title: {{ .Title }}
14-
url: {{ .Permalink }}
15-
{{- break }}
16-
{{- end }}
17-
{{- end }}
18-
{{- if .Ancestors }}
19-
breadcrumbs:
20-
{{- range .Ancestors.Reverse }}
21-
{{- if and (not .IsHome) .Permalink }}
22-
- title: {{ .Title }}
23-
url: {{ .Permalink }}
24-
{{- end }}
25-
{{- end }}
26-
- title: {{ .Title }}
27-
url: {{ .Permalink }}
28-
{{- end }}
29-
{{- with .NextInSection }}
30-
next:
31-
title: {{ .Title }}
32-
url: {{ .Permalink }}
33-
{{- end }}
34-
{{- with .PrevInSection }}
35-
prev:
36-
title: {{ .Title }}
37-
url: {{ .Permalink }}
38-
{{- end }}
39-
---
7+
# {{ .Title }}
408

419
{{ with $data.short }}**Description:** {{ . }}{{ end }}
4210

layouts/_default/list.markdown.md

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,3 @@
1-
---
2-
title: {{ .Title }}
3-
url: {{ .Permalink }}
4-
{{- range .Ancestors }}
5-
{{- if and (not .IsHome) .Permalink }}
6-
parent:
7-
title: {{ .Title }}
8-
url: {{ .Permalink }}
9-
{{- break }}
10-
{{- end }}
11-
{{- end }}
12-
{{- if .Ancestors }}
13-
breadcrumbs:
14-
{{- range .Ancestors.Reverse }}
15-
{{- if and (not .IsHome) .Permalink }}
16-
- title: {{ .Title }}
17-
url: {{ .Permalink }}
18-
{{- end }}
19-
{{- end }}
20-
- title: {{ .Title }}
21-
url: {{ .Permalink }}
22-
{{- end }}
23-
{{- $children := where .Pages "Permalink" "ne" "" }}
24-
{{- if $children }}
25-
children:
26-
{{- range $children }}
27-
- title: {{ .Title }}
28-
url: {{ .Permalink }}
29-
{{- with .Description }}
30-
description: {{ . }}
31-
{{- end }}
32-
{{- end }}
33-
{{- end }}
34-
---
1+
# {{ .Title }}
352

363
{{ .RenderShortcodes }}

0 commit comments

Comments
 (0)