-
Notifications
You must be signed in to change notification settings - Fork 5
feat: add sitemap.xml and robots.txt for SEO #55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
alltheseas
wants to merge
6
commits into
damus-io:master
Choose a base branch
from
alltheseas:feat/sitemap-seo
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
fe16378
feat: add sitemap.xml and robots.txt for SEO
alltheseas 29b0ba7
feat: add prometheus metrics for sitemap generation
alltheseas a71ff71
fix: address sitemap code review findings
alltheseas 08ebfa5
fix: cache base URL and use longer lookback for articles
alltheseas e366c00
fix: normalize base url and allow nostr.json
alltheseas 34db03a
refactor: flatten nested conditionals with guard clauses
alltheseas File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,356 @@ | ||
| //! Sitemap generation for SEO | ||
| //! | ||
| //! Generates XML sitemaps from cached events in nostrdb to help search engines | ||
| //! discover and index Nostr content rendered by notecrumbs. | ||
|
|
||
| use nostr_sdk::ToBech32; | ||
| use nostrdb::{Filter, Ndb, Transaction}; | ||
| use std::fmt::Write; | ||
| use std::sync::OnceLock; | ||
| use std::time::Instant; | ||
|
|
||
| /// Maximum URLs per sitemap (XML sitemap standard limit is 50,000) | ||
| const MAX_SITEMAP_URLS: u64 = 10000; | ||
|
|
||
| /// Lookback period for notes (90 days) - shorter for timely content | ||
| const NOTES_LOOKBACK_DAYS: u64 = 90; | ||
|
|
||
| /// Lookback period for articles (365 days) - longer for evergreen content | ||
| const ARTICLES_LOOKBACK_DAYS: u64 = 365; | ||
|
|
||
| /// Cached base URL (computed once at first access) | ||
| static BASE_URL: OnceLock<String> = OnceLock::new(); | ||
|
|
||
| /// Get the base URL from environment or default | ||
| /// Logs a warning once if not explicitly configured | ||
| fn get_base_url() -> &'static str { | ||
| BASE_URL.get_or_init(|| { | ||
| let url = match std::env::var("NOTECRUMBS_BASE_URL") { | ||
| Ok(url) => url, | ||
| Err(_) => { | ||
| tracing::warn!( | ||
| "NOTECRUMBS_BASE_URL not set, defaulting to https://damus.io - \ | ||
| sitemap/robots.txt may point to wrong domain" | ||
| ); | ||
| "https://damus.io".to_string() | ||
| } | ||
| }; | ||
| normalize_base_url(&url) | ||
| }) | ||
| } | ||
|
|
||
| fn normalize_base_url(url: &str) -> String { | ||
| url.trim_end_matches('/').to_string() | ||
| } | ||
|
|
||
| /// Calculate Unix timestamp for N days ago | ||
| fn days_ago(days: u64) -> u64 { | ||
| std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap_or_default() | ||
| .as_secs() | ||
| .saturating_sub(days * 24 * 60 * 60) | ||
| } | ||
|
|
||
| /// Escape special XML characters in a string | ||
| fn xml_escape(s: &str) -> String { | ||
| let mut result = String::with_capacity(s.len()); | ||
| for c in s.chars() { | ||
| match c { | ||
| '&' => result.push_str("&"), | ||
| '<' => result.push_str("<"), | ||
| '>' => result.push_str(">"), | ||
| '"' => result.push_str("""), | ||
| '\'' => result.push_str("'"), | ||
| _ => result.push(c), | ||
| } | ||
| } | ||
| result | ||
| } | ||
|
|
||
| /// Format a Unix timestamp as an ISO 8601 date (YYYY-MM-DD) | ||
| fn format_lastmod(timestamp: u64) -> String { | ||
| use std::time::{Duration, UNIX_EPOCH}; | ||
|
|
||
| let datetime = UNIX_EPOCH + Duration::from_secs(timestamp); | ||
| let secs_since_epoch = datetime | ||
| .duration_since(UNIX_EPOCH) | ||
| .unwrap_or_default() | ||
| .as_secs(); | ||
|
|
||
| // Simple date formatting without external dependencies | ||
| let days_since_epoch = secs_since_epoch / 86400; | ||
| let mut year = 1970i32; | ||
| let mut remaining_days = days_since_epoch as i32; | ||
|
|
||
| loop { | ||
| let days_in_year = if is_leap_year(year) { 366 } else { 365 }; | ||
| if remaining_days < days_in_year { | ||
| break; | ||
| } | ||
| remaining_days -= days_in_year; | ||
| year += 1; | ||
| } | ||
|
|
||
| let is_leap = is_leap_year(year); | ||
| let days_in_months: [i32; 12] = if is_leap { | ||
| [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] | ||
| } else { | ||
| [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] | ||
| }; | ||
|
|
||
| let mut month = 1u32; | ||
| for days in days_in_months { | ||
| if remaining_days < days { | ||
| break; | ||
| } | ||
| remaining_days -= days; | ||
| month += 1; | ||
| } | ||
|
|
||
| let day = remaining_days + 1; | ||
|
|
||
| format!("{:04}-{:02}-{:02}", year, month, day) | ||
| } | ||
|
|
||
| fn is_leap_year(year: i32) -> bool { | ||
| (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) | ||
| } | ||
|
|
||
| /// Entry in the sitemap | ||
| struct SitemapEntry { | ||
| loc: String, | ||
| lastmod: String, | ||
| priority: &'static str, | ||
| changefreq: &'static str, | ||
| } | ||
|
|
||
| /// Generate sitemap XML from cached events in nostrdb | ||
| pub fn generate_sitemap(ndb: &Ndb) -> Result<String, nostrdb::Error> { | ||
| let start = Instant::now(); | ||
| let base_url = get_base_url(); | ||
| let txn = Transaction::new(ndb)?; | ||
|
|
||
| let mut entries: Vec<SitemapEntry> = Vec::new(); | ||
| let mut notes_count: u64 = 0; | ||
| let mut articles_count: u64 = 0; | ||
| let mut profiles_count: u64 = 0; | ||
|
|
||
| // Add homepage | ||
| entries.push(SitemapEntry { | ||
| loc: base_url.to_string(), | ||
| lastmod: format_lastmod( | ||
| std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap_or_default() | ||
| .as_secs(), | ||
| ), | ||
| priority: "1.0", | ||
| changefreq: "daily", | ||
| }); | ||
|
|
||
| // Query recent notes (kind:1 - short text notes) | ||
| // Use since filter to prioritize recent content for SEO freshness | ||
| let notes_filter = Filter::new() | ||
| .kinds([1]) | ||
| .since(days_ago(NOTES_LOOKBACK_DAYS)) | ||
| .limit(MAX_SITEMAP_URLS) | ||
| .build(); | ||
|
|
||
| let results = ndb.query(&txn, &[notes_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default(); | ||
| for result in results { | ||
| let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else { | ||
| continue; | ||
| }; | ||
| let Some(eid) = nostr_sdk::EventId::from_slice(note.id()).ok() else { | ||
| continue; | ||
| }; | ||
| // to_bech32() returns Result<String, Infallible>, so unwrap is safe | ||
| let bech32 = eid.to_bech32().unwrap(); | ||
| entries.push(SitemapEntry { | ||
| loc: format!("{}/{}", base_url, xml_escape(&bech32)), | ||
| lastmod: format_lastmod(note.created_at()), | ||
| priority: "0.8", | ||
| changefreq: "weekly", | ||
| }); | ||
| notes_count += 1; | ||
| } | ||
|
|
||
| // Query long-form articles (kind:30023) | ||
| // Longer lookback for evergreen content | ||
| let articles_filter = Filter::new() | ||
| .kinds([30023]) | ||
| .since(days_ago(ARTICLES_LOOKBACK_DAYS)) | ||
| .limit(MAX_SITEMAP_URLS) | ||
| .build(); | ||
|
|
||
| let results = ndb.query(&txn, &[articles_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default(); | ||
| for result in results { | ||
| let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else { | ||
| continue; | ||
| }; | ||
|
|
||
| // Extract d-tag identifier - skip if missing or empty to avoid | ||
| // ambiguous URLs and potential collisions across authors | ||
| let identifier = note | ||
| .tags() | ||
| .iter() | ||
| .find(|tag| tag.count() >= 2 && tag.get_unchecked(0).variant().str() == Some("d")) | ||
| .and_then(|tag| tag.get_unchecked(1).variant().str()); | ||
|
|
||
| let Some(identifier) = identifier else { | ||
| continue; | ||
| }; | ||
| if identifier.is_empty() { | ||
| continue; | ||
| } | ||
|
|
||
| let Some(pk) = nostr_sdk::PublicKey::from_slice(note.pubkey()).ok() else { | ||
| continue; | ||
| }; | ||
|
|
||
| // For addressable events, create naddr | ||
| let kind = nostr::Kind::from(note.kind() as u16); | ||
| let coord = nostr::nips::nip01::Coordinate::new(kind, pk).identifier(identifier); | ||
| let Ok(bech32) = coord.to_bech32() else { | ||
| continue; | ||
| }; | ||
|
|
||
| entries.push(SitemapEntry { | ||
| loc: format!("{}/{}", base_url, xml_escape(&bech32)), | ||
| lastmod: format_lastmod(note.created_at()), | ||
| priority: "0.9", | ||
| changefreq: "weekly", | ||
| }); | ||
| articles_count += 1; | ||
| } | ||
|
|
||
| // Query profiles (kind:0 - metadata) | ||
| // No since filter for profiles - they update less frequently | ||
| let profiles_filter = Filter::new() | ||
| .kinds([0]) | ||
| .limit(MAX_SITEMAP_URLS) | ||
| .build(); | ||
|
|
||
| let results = ndb.query(&txn, &[profiles_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default(); | ||
| for result in results { | ||
| let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else { | ||
| continue; | ||
| }; | ||
| let Some(pk) = nostr_sdk::PublicKey::from_slice(note.pubkey()).ok() else { | ||
| continue; | ||
| }; | ||
| // to_bech32() returns Result<String, Infallible>, so unwrap is safe | ||
| let bech32 = pk.to_bech32().unwrap(); | ||
| entries.push(SitemapEntry { | ||
| loc: format!("{}/{}", base_url, xml_escape(&bech32)), | ||
| lastmod: format_lastmod(note.created_at()), | ||
| priority: "0.7", | ||
| changefreq: "weekly", | ||
| }); | ||
| profiles_count += 1; | ||
| } | ||
|
|
||
| // Build XML | ||
| let mut xml = String::with_capacity(entries.len() * 200); | ||
| xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); | ||
| xml.push_str("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n"); | ||
|
|
||
| for entry in &entries { | ||
| let _ = write!( | ||
| xml, | ||
| " <url>\n <loc>{}</loc>\n <lastmod>{}</lastmod>\n <changefreq>{}</changefreq>\n <priority>{}</priority>\n </url>\n", | ||
| entry.loc, entry.lastmod, entry.changefreq, entry.priority | ||
| ); | ||
| } | ||
|
|
||
| xml.push_str("</urlset>\n"); | ||
|
|
||
| // Record metrics (aggregate stats, not user-tracking) | ||
| let duration = start.elapsed(); | ||
| metrics::counter!("sitemap_generations_total", 1); | ||
| metrics::gauge!("sitemap_generation_duration_seconds", duration.as_secs_f64()); | ||
| metrics::gauge!("sitemap_urls_total", entries.len() as f64); | ||
| metrics::gauge!("sitemap_notes_count", notes_count as f64); | ||
| metrics::gauge!("sitemap_articles_count", articles_count as f64); | ||
| metrics::gauge!("sitemap_profiles_count", profiles_count as f64); | ||
|
|
||
| Ok(xml) | ||
| } | ||
|
|
||
| /// Generate robots.txt content | ||
| pub fn generate_robots_txt() -> String { | ||
| let base_url = get_base_url(); | ||
| format!( | ||
| "User-agent: *\n\ | ||
| Allow: /\n\ | ||
| Allow: /.well-known/nostr.json\n\ | ||
| Disallow: /metrics\n\ | ||
| Disallow: /*.json\n\ | ||
| \n\ | ||
| Sitemap: {}/sitemap.xml\n", | ||
| base_url | ||
| ) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_xml_escape() { | ||
| assert_eq!(xml_escape("hello"), "hello"); | ||
| assert_eq!(xml_escape("a&b"), "a&b"); | ||
| assert_eq!(xml_escape("<tag>"), "<tag>"); | ||
| assert_eq!(xml_escape("\"quoted\""), ""quoted""); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_format_lastmod() { | ||
| // 2024-01-01 00:00:00 UTC = 1704067200 | ||
| assert_eq!(format_lastmod(1704067200), "2024-01-01"); | ||
| // 2023-06-15 12:00:00 UTC = 1686830400 | ||
| assert_eq!(format_lastmod(1686830400), "2023-06-15"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_is_leap_year() { | ||
| assert!(is_leap_year(2000)); | ||
| assert!(is_leap_year(2024)); | ||
| assert!(!is_leap_year(1900)); | ||
| assert!(!is_leap_year(2023)); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_normalize_base_url() { | ||
| assert_eq!(normalize_base_url("https://example.com/"), "https://example.com"); | ||
| assert_eq!(normalize_base_url("https://example.com"), "https://example.com"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_days_ago_range() { | ||
| let start = std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap_or_default() | ||
| .as_secs(); | ||
| let cutoff = days_ago(1); | ||
| let end = std::time::SystemTime::now() | ||
| .duration_since(std::time::UNIX_EPOCH) | ||
| .unwrap_or_default() | ||
| .as_secs(); | ||
|
|
||
| let start_cutoff = start.saturating_sub(86400); | ||
| let end_cutoff = end.saturating_sub(86400); | ||
| assert!(cutoff >= start_cutoff); | ||
| assert!(cutoff <= end_cutoff); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_robots_txt_format() { | ||
| let robots = generate_robots_txt(); | ||
| assert!(robots.contains("User-agent: *")); | ||
| assert!(robots.contains("Allow: /")); | ||
| assert!(robots.contains("Disallow: /metrics")); | ||
| assert!(robots.contains("Sitemap:")); | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: damus-io/notecrumbs
Length of output: 75
🏁 Script executed:
Repository: damus-io/notecrumbs
Length of output: 565
🌐 Web query:
metrics crate Rust 0.21.0 counter macro syntax increment💡 Result:
Use the counter! macro to get a Counter handle, then call increment(n); or use the convenience macro increment_counter! to add 1 (or pass labels). Examples:
Get handle and increment by n:
let c = counter!("my_metric");
c.increment(5);
[1]
Increment by 1 directly:
increment_counter!("my_metric");
// or with labels:
increment_counter!("my_metric", "service" => "http");
[2]
Docs (macro reference and examples) show the counter! and increment_counter! usage. [1] [2]
Sources:
[1] docs.rs: metrics::counter! macro.
[2] docs.rs / metrics macro increment_counter! (examples).
🏁 Script executed:
Repository: damus-io/notecrumbs
Length of output: 147
🏁 Script executed:
Repository: damus-io/notecrumbs
Length of output: 116
🌐 Web query:
metrics crate Rust gauge macro syntax 0.21💡 Result:
The gauge! macro (as of metrics 0.21) accepts these forms:
Examples:
(You get a Gauge handle which supports .increment(), .decrement(), .set().) (docs.rs)
If you want the docs page for that macro/version, see the metrics macro docs and the 0.21.0 changelog. (docs.rs)
Citations:
Fix metrics macro syntax for version 0.21.0.
The
counter!()andgauge!()macros in metrics 0.21.0 return handles that require method calls. The current syntax is incorrect:counter!("name", value)should becounter!("name").increment(value)(or useincrement_counter!("name")convenience macro)gauge!("name", value)should begauge!("name").set(value)All metrics calls in lines 269-276 need to be updated to use the correct API.
🤖 Prompt for AI Agents