Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod nip19;
mod pfp;
mod relay_pool;
mod render;
mod sitemap;

use relay_pool::RelayPool;

Expand Down Expand Up @@ -139,6 +140,31 @@ async fn serve(
"/" => {
return html::serve_homepage(r);
}
"/robots.txt" => {
let body = sitemap::generate_robots_txt();
return Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "text/plain; charset=utf-8")
.header(header::CACHE_CONTROL, "public, max-age=86400")
.body(Full::new(Bytes::from(body)))?);
}
"/sitemap.xml" => {
match sitemap::generate_sitemap(&app.ndb) {
Ok(xml) => {
return Ok(Response::builder()
.status(StatusCode::OK)
.header(header::CONTENT_TYPE, "application/xml; charset=utf-8")
.header(header::CACHE_CONTROL, "public, max-age=3600")
.body(Full::new(Bytes::from(xml)))?);
}
Err(err) => {
error!("Failed to generate sitemap: {err}");
return Ok(Response::builder()
.status(StatusCode::INTERNAL_SERVER_ERROR)
.body(Full::new(Bytes::from("Failed to generate sitemap\n")))?);
}
}
}
_ => {}
}

Expand Down
356 changes: 356 additions & 0 deletions src/sitemap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,356 @@
//! Sitemap generation for SEO
//!
//! Generates XML sitemaps from cached events in nostrdb to help search engines
//! discover and index Nostr content rendered by notecrumbs.

use nostr_sdk::ToBech32;
use nostrdb::{Filter, Ndb, Transaction};
use std::fmt::Write;
use std::sync::OnceLock;
use std::time::Instant;

/// Maximum URLs per sitemap (XML sitemap standard limit is 50,000)
const MAX_SITEMAP_URLS: u64 = 10000;

/// Lookback period for notes (90 days) - shorter for timely content
const NOTES_LOOKBACK_DAYS: u64 = 90;

/// Lookback period for articles (365 days) - longer for evergreen content
const ARTICLES_LOOKBACK_DAYS: u64 = 365;

/// Cached base URL (computed once at first access)
static BASE_URL: OnceLock<String> = OnceLock::new();

/// Get the base URL from environment or default
/// Logs a warning once if not explicitly configured
fn get_base_url() -> &'static str {
BASE_URL.get_or_init(|| {
let url = match std::env::var("NOTECRUMBS_BASE_URL") {
Ok(url) => url,
Err(_) => {
tracing::warn!(
"NOTECRUMBS_BASE_URL not set, defaulting to https://damus.io - \
sitemap/robots.txt may point to wrong domain"
);
"https://damus.io".to_string()
}
};
normalize_base_url(&url)
})
}

fn normalize_base_url(url: &str) -> String {
url.trim_end_matches('/').to_string()
}

/// Calculate Unix timestamp for N days ago
fn days_ago(days: u64) -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
.saturating_sub(days * 24 * 60 * 60)
}

/// Escape special XML characters in a string
fn xml_escape(s: &str) -> String {
let mut result = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => result.push_str("&amp;"),
'<' => result.push_str("&lt;"),
'>' => result.push_str("&gt;"),
'"' => result.push_str("&quot;"),
'\'' => result.push_str("&apos;"),
_ => result.push(c),
}
}
result
}

/// Format a Unix timestamp as an ISO 8601 date (YYYY-MM-DD)
fn format_lastmod(timestamp: u64) -> String {
use std::time::{Duration, UNIX_EPOCH};

let datetime = UNIX_EPOCH + Duration::from_secs(timestamp);
let secs_since_epoch = datetime
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();

// Simple date formatting without external dependencies
let days_since_epoch = secs_since_epoch / 86400;
let mut year = 1970i32;
let mut remaining_days = days_since_epoch as i32;

loop {
let days_in_year = if is_leap_year(year) { 366 } else { 365 };
if remaining_days < days_in_year {
break;
}
remaining_days -= days_in_year;
year += 1;
}

let is_leap = is_leap_year(year);
let days_in_months: [i32; 12] = if is_leap {
[31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
} else {
[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
};

let mut month = 1u32;
for days in days_in_months {
if remaining_days < days {
break;
}
remaining_days -= days;
month += 1;
}

let day = remaining_days + 1;

format!("{:04}-{:02}-{:02}", year, month, day)
}

fn is_leap_year(year: i32) -> bool {
(year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
}

/// Entry in the sitemap
struct SitemapEntry {
loc: String,
lastmod: String,
priority: &'static str,
changefreq: &'static str,
}

/// Generate sitemap XML from cached events in nostrdb
pub fn generate_sitemap(ndb: &Ndb) -> Result<String, nostrdb::Error> {
let start = Instant::now();
let base_url = get_base_url();
let txn = Transaction::new(ndb)?;

let mut entries: Vec<SitemapEntry> = Vec::new();
let mut notes_count: u64 = 0;
let mut articles_count: u64 = 0;
let mut profiles_count: u64 = 0;

// Add homepage
entries.push(SitemapEntry {
loc: base_url.to_string(),
lastmod: format_lastmod(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
),
priority: "1.0",
changefreq: "daily",
});

// Query recent notes (kind:1 - short text notes)
// Use since filter to prioritize recent content for SEO freshness
let notes_filter = Filter::new()
.kinds([1])
.since(days_ago(NOTES_LOOKBACK_DAYS))
.limit(MAX_SITEMAP_URLS)
.build();

let results = ndb.query(&txn, &[notes_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default();
for result in results {
let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else {
continue;
};
let Some(eid) = nostr_sdk::EventId::from_slice(note.id()).ok() else {
continue;
};
// to_bech32() returns Result<String, Infallible>, so unwrap is safe
let bech32 = eid.to_bech32().unwrap();
entries.push(SitemapEntry {
loc: format!("{}/{}", base_url, xml_escape(&bech32)),
lastmod: format_lastmod(note.created_at()),
priority: "0.8",
changefreq: "weekly",
});
notes_count += 1;
}

// Query long-form articles (kind:30023)
// Longer lookback for evergreen content
let articles_filter = Filter::new()
.kinds([30023])
.since(days_ago(ARTICLES_LOOKBACK_DAYS))
.limit(MAX_SITEMAP_URLS)
.build();

let results = ndb.query(&txn, &[articles_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default();
for result in results {
let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else {
continue;
};

// Extract d-tag identifier - skip if missing or empty to avoid
// ambiguous URLs and potential collisions across authors
let identifier = note
.tags()
.iter()
.find(|tag| tag.count() >= 2 && tag.get_unchecked(0).variant().str() == Some("d"))
.and_then(|tag| tag.get_unchecked(1).variant().str());

let Some(identifier) = identifier else {
continue;
};
if identifier.is_empty() {
continue;
}

let Some(pk) = nostr_sdk::PublicKey::from_slice(note.pubkey()).ok() else {
continue;
};

// For addressable events, create naddr
let kind = nostr::Kind::from(note.kind() as u16);
let coord = nostr::nips::nip01::Coordinate::new(kind, pk).identifier(identifier);
let Ok(bech32) = coord.to_bech32() else {
continue;
};

entries.push(SitemapEntry {
loc: format!("{}/{}", base_url, xml_escape(&bech32)),
lastmod: format_lastmod(note.created_at()),
priority: "0.9",
changefreq: "weekly",
});
articles_count += 1;
}

// Query profiles (kind:0 - metadata)
// No since filter for profiles - they update less frequently
let profiles_filter = Filter::new()
.kinds([0])
.limit(MAX_SITEMAP_URLS)
.build();

let results = ndb.query(&txn, &[profiles_filter], MAX_SITEMAP_URLS as i32).unwrap_or_default();
for result in results {
let Ok(note) = ndb.get_note_by_key(&txn, result.note_key) else {
continue;
};
let Some(pk) = nostr_sdk::PublicKey::from_slice(note.pubkey()).ok() else {
continue;
};
// to_bech32() returns Result<String, Infallible>, so unwrap is safe
let bech32 = pk.to_bech32().unwrap();
entries.push(SitemapEntry {
loc: format!("{}/{}", base_url, xml_escape(&bech32)),
lastmod: format_lastmod(note.created_at()),
priority: "0.7",
changefreq: "weekly",
});
profiles_count += 1;
}

// Build XML
let mut xml = String::with_capacity(entries.len() * 200);
xml.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
xml.push_str("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");

for entry in &entries {
let _ = write!(
xml,
" <url>\n <loc>{}</loc>\n <lastmod>{}</lastmod>\n <changefreq>{}</changefreq>\n <priority>{}</priority>\n </url>\n",
entry.loc, entry.lastmod, entry.changefreq, entry.priority
);
}

xml.push_str("</urlset>\n");

// Record metrics (aggregate stats, not user-tracking)
let duration = start.elapsed();
metrics::counter!("sitemap_generations_total", 1);
metrics::gauge!("sitemap_generation_duration_seconds", duration.as_secs_f64());
metrics::gauge!("sitemap_urls_total", entries.len() as f64);
metrics::gauge!("sitemap_notes_count", notes_count as f64);
metrics::gauge!("sitemap_articles_count", articles_count as f64);
metrics::gauge!("sitemap_profiles_count", profiles_count as f64);
Comment on lines +269 to +276
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Check for metrics crate version in Cargo.toml
find . -name "Cargo.toml" -type f | head -5 | xargs grep -l "metrics" 2>/dev/null

Repository: damus-io/notecrumbs

Length of output: 75


🏁 Script executed:

# Also verify the actual code at the specified lines
if [ -f "src/sitemap.rs" ]; then
  sed -n '269,276p' src/sitemap.rs
fi

Repository: damus-io/notecrumbs

Length of output: 565


🌐 Web query:

metrics crate Rust 0.21.0 counter macro syntax increment

💡 Result:

Use the counter! macro to get a Counter handle, then call increment(n); or use the convenience macro increment_counter! to add 1 (or pass labels). Examples:

  • Get handle and increment by n:
    let c = counter!("my_metric");
    c.increment(5);
    [1]

  • Increment by 1 directly:
    increment_counter!("my_metric");
    // or with labels:
    increment_counter!("my_metric", "service" => "http");
    [2]

Docs (macro reference and examples) show the counter! and increment_counter! usage. [1] [2]

Sources:
[1] docs.rs: metrics::counter! macro.
[2] docs.rs / metrics macro increment_counter! (examples).


🏁 Script executed:

# Read the Cargo.toml to find the metrics version
cat Cargo.toml | grep -A 2 "^\[dependencies\]" | head -20

Repository: damus-io/notecrumbs

Length of output: 147


🏁 Script executed:

# Or search more directly for metrics
grep "metrics" Cargo.toml

Repository: damus-io/notecrumbs

Length of output: 116


🌐 Web query:

metrics crate Rust gauge macro syntax 0.21

💡 Result:

The gauge! macro (as of metrics 0.21) accepts these forms:

  • gauge!( $name:expr $(, $label_key:expr $(=> $label_value:expr)? )* $(,)? )
  • gauge!( level: $level:expr, $name:expr $(, $label_key:expr $(=> $label_value:expr)? )* $(,)? )
  • gauge!( target: $target:expr, $name:expr $(, $label_key:expr $(=> $label_value:expr)? )* $(,)? )
  • gauge!( target: $target:expr, level: $level:expr, $name:expr $(, $label_key:expr $(=> $label_value:expr)? )* $(,)? )

Examples:

  • let g = gauge!("my_gauge");
  • let g = gauge!("my_gauge", "service" => "http");
  • let g = gauge!(level: Level::DEBUG, "my_gauge");
  • let g = gauge!(format!("{}{}", prefix, "g"), &labels);

(You get a Gauge handle which supports .increment(), .decrement(), .set().) (docs.rs)

If you want the docs page for that macro/version, see the metrics macro docs and the 0.21.0 changelog. (docs.rs)

Citations:


Fix metrics macro syntax for version 0.21.0.

The counter!() and gauge!() macros in metrics 0.21.0 return handles that require method calls. The current syntax is incorrect:

  • counter!("name", value) should be counter!("name").increment(value) (or use increment_counter!("name") convenience macro)
  • gauge!("name", value) should be gauge!("name").set(value)

All metrics calls in lines 269-276 need to be updated to use the correct API.

🤖 Prompt for AI Agents
In `@src/sitemap.rs` around lines 269 - 276, Update the metrics calls to use the
metrics 0.21.0 API: replace the incorrect usage of
counter!("sitemap_generations_total", 1) with either the increment_counter!
convenience macro or call counter!("sitemap_generations_total").increment(1),
and replace gauge!("sitemap_generation_duration_seconds",
duration.as_secs_f64()) and the other gauge calls for "sitemap_urls_total",
"sitemap_notes_count", "sitemap_articles_count", and "sitemap_profiles_count"
with gauge("...").set(value) (or use a suitable gauge-set helper) so the macros
return handles before invoking .increment(...) or .set(...); use the existing
local variables start, duration (computed from start.elapsed()), entries.len(),
notes_count, articles_count, and profiles_count as the values passed to the
handle methods.


Ok(xml)
}

/// Generate robots.txt content
pub fn generate_robots_txt() -> String {
let base_url = get_base_url();
format!(
"User-agent: *\n\
Allow: /\n\
Allow: /.well-known/nostr.json\n\
Disallow: /metrics\n\
Disallow: /*.json\n\
\n\
Sitemap: {}/sitemap.xml\n",
base_url
)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_xml_escape() {
assert_eq!(xml_escape("hello"), "hello");
assert_eq!(xml_escape("a&b"), "a&amp;b");
assert_eq!(xml_escape("<tag>"), "&lt;tag&gt;");
assert_eq!(xml_escape("\"quoted\""), "&quot;quoted&quot;");
}

#[test]
fn test_format_lastmod() {
// 2024-01-01 00:00:00 UTC = 1704067200
assert_eq!(format_lastmod(1704067200), "2024-01-01");
// 2023-06-15 12:00:00 UTC = 1686830400
assert_eq!(format_lastmod(1686830400), "2023-06-15");
}

#[test]
fn test_is_leap_year() {
assert!(is_leap_year(2000));
assert!(is_leap_year(2024));
assert!(!is_leap_year(1900));
assert!(!is_leap_year(2023));
}

#[test]
fn test_normalize_base_url() {
assert_eq!(normalize_base_url("https://example.com/"), "https://example.com");
assert_eq!(normalize_base_url("https://example.com"), "https://example.com");
}

#[test]
fn test_days_ago_range() {
let start = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let cutoff = days_ago(1);
let end = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();

let start_cutoff = start.saturating_sub(86400);
let end_cutoff = end.saturating_sub(86400);
assert!(cutoff >= start_cutoff);
assert!(cutoff <= end_cutoff);
}

#[test]
fn test_robots_txt_format() {
let robots = generate_robots_txt();
assert!(robots.contains("User-agent: *"));
assert!(robots.contains("Allow: /"));
assert!(robots.contains("Disallow: /metrics"));
assert!(robots.contains("Sitemap:"));
}
}
Loading