Skip to content

Commit deed2b8

Browse files
Refactor aggregator for scalability and LinkedIn posting
Updated user agent and enhanced feed configuration with UTM parameters for analytics. Added LinkedIn auto-posting functionality and improved article processing.
1 parent a9eb484 commit deed2b8

File tree

1 file changed

+144
-32
lines changed

1 file changed

+144
-32
lines changed

src/aggregator.js

Lines changed: 144 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,165 @@
1-
// AI-Pulse RSS Aggregator - Secure & Efficient
1+
// AI-Pulse RSS Aggregator - Scalable & Efficient
22
const Parser = require('rss-parser');
3+
const axios = require('axios');
4+
const { Octokit } = require('@octokit/rest');
5+
36
const parser = new Parser({
47
timeout: 10000,
5-
headers: {'User-Agent': 'AI-Pulse/1.0'}
8+
headers: {'User-Agent': 'AI-Pulse/2.0'}
69
});
710

8-
// Secure RSS sources (NO credentials exposed)
9-
const SOURCES = [
10-
{ name: 'OpenAI Blog', url: 'https://openai.com/blog/rss.xml', category: 'Industry' },
11-
{ name: 'Hugging Face', url: 'https://huggingface.co/blog/feed.xml', category: 'Community' },
12-
{ name: 'Papers with Code', url: 'https://paperswithcode.com/latest/rss', category: 'Research' },
13-
{ name: 'MIT Tech Review AI', url: 'https://www.technologyreview.com/topic/artificial-intelligence/feed', category: 'Analysis' },
14-
{ name: 'Towards Data Science', url: 'https://towardsdatascience.com/feed', category: 'Tutorial' },
15-
{ name: 'Substack AI', url: 'https://substack.com/discover/category/technology/feed', category: 'Newsletter' }
16-
];
17-
18-
// Secure article processing (XSS prevention)
19-
function sanitizeArticle(article) {
11+
// Scalable feed configuration by category
12+
const FEED_CATEGORIES = {
13+
ai: [
14+
{ name: 'Medium AI', url: 'https://medium.com/tag/artificial-intelligence/feed', tags: ['AI', 'ML', 'Deep Learning'] },
15+
{ name: 'Towards Data Science', url: 'https://towardsdatascience.com/feed', tags: ['AI', 'Data Science', 'Analytics'] },
16+
],
17+
cybersecurity: [
18+
{ name: 'The Hacker News', url: 'https://feeds.feedburner.com/TheHackersNews', tags: ['Security', 'Vulnerabilities', 'Threats'] },
19+
{ name: 'Bleeping Computer', url: 'https://www.bleepingcomputer.com/feed/', tags: ['Security', 'Malware', 'CVE'] },
20+
{ name: 'Krebs on Security', url: 'https://krebsonsecurity.com/feed/', tags: ['Security', 'Fraud', 'Privacy'] },
21+
]
22+
};
23+
24+
// UTM parameters for analytics tracking
25+
function addUTMParams(url, source, medium = 'rss', campaign = 'ai-pulse') {
26+
const utmParams = `utm_source=${source}&utm_medium=${medium}&utm_campaign=${campaign}&utm_content=aggregator`;
27+
return url.includes('?') ? `${url}&${utmParams}` : `${url}?${utmParams}`;
28+
}
29+
30+
// Sanitize and process articles
31+
function sanitizeArticle(article, sourceName, tags) {
2032
return {
2133
title: article.title?.replace(/<[^>]*>/g, '').slice(0, 200) || 'Untitled',
22-
link: article.link?.match(/^https?:\/\//i) ? article.link : null,
34+
link: addUTMParams(article.link, sourceName.toLowerCase().replace(/\s/g, '-')),
2335
pubDate: new Date(article.pubDate || Date.now()),
24-
source: article.source,
25-
category: article.category,
36+
source: sourceName,
37+
tags: tags,
38+
category: article.categories?.[0] || 'General',
2639
summary: article.contentSnippet?.replace(/<[^>]*>/g, '').slice(0, 300) || ''
2740
};
2841
}
2942

30-
// Main aggregation function
31-
async function aggregateFeeds() {
43+
// Aggregate feeds by category
44+
async function aggregateCategory(categoryName, feeds) {
45+
console.log(`\n📡 Aggregating ${categoryName} feeds...`);
3246
const articles = [];
33-
34-
for (const source of SOURCES) {
47+
48+
for (const feed of feeds) {
3549
try {
36-
const feed = await parser.parseURL(source.url);
37-
const sourceArticles = feed.items.slice(0, 10).map(item =>
38-
sanitizeArticle({...item, source: source.name, category: source.category})
50+
console.log(` ✓ Fetching: ${feed.name}`);
51+
const feedData = await parser.parseURL(feed.url);
52+
const items = feedData.items.slice(0, 10).map(item =>
53+
sanitizeArticle(item, feed.name, feed.tags)
3954
);
40-
articles.push(...sourceArticles);
55+
articles.push(...items);
4156
} catch (error) {
42-
console.error(`Failed to fetch ${source.name}:`, error.message);
57+
console.error(` ✗ Failed to fetch ${feed.name}: ${error.message}`);
58+
}
59+
}
60+
61+
return articles.sort((a, b) => b.pubDate - a.pubDate);
62+
}
63+
64+
// Generate README with categories
65+
function generateREADME(categorizedArticles) {
66+
let readme = `# 🚀 AI-Pulse\n\n`;
67+
readme += `> Curated AI & Cybersecurity news - Auto-updated every 6 hours\n\n`;
68+
readme += `**Last Update:** ${new Date().toUTCString()}\n\n`;
69+
readme += `---\n\n`;
70+
71+
// Generate sections for each category
72+
for (const [category, articles] of Object.entries(categorizedArticles)) {
73+
const emoji = category === 'ai' ? '🤖' : '🔒';
74+
const title = category === 'ai' ? 'Artificial Intelligence' : 'Cybersecurity';
75+
76+
readme += `## ${emoji} ${title}\n\n`;
77+
78+
if (articles.length === 0) {
79+
readme += `*No articles available*\n\n`;
80+
continue;
4381
}
82+
83+
articles.slice(0, 15).forEach((article, index) => {
84+
const tags = article.tags.map(t => `\`${t}\``).join(' ');
85+
readme += `### ${index + 1}. [${article.title}](${article.link})\n`;
86+
readme += `**Source:** ${article.source} | **Tags:** ${tags}\n`;
87+
readme += `${article.summary}\n\n`;
88+
});
89+
90+
readme += `---\n\n`;
4491
}
92+
93+
readme += `\n---\n\n`;
94+
readme += `*Powered by [AI-Pulse](https://github.com/ThePhoenixAgency/AI-Pulse) | 100% Free & Open Source*\n`;
4595

46-
// Sort by date, most recent first
47-
return articles
48-
.filter(a => a.link)
49-
.sort((a, b) => b.pubDate - a.pubDate)
50-
.slice(0, 50); // Top 50 articles
96+
return readme;
97+
}
98+
99+
// LinkedIn auto-posting function
100+
async function postToLinkedIn(article) {
101+
if (!process.env.LINKEDIN_ACCESS_TOKEN) {
102+
console.log('⚠️ LinkedIn token not configured, skipping auto-post');
103+
return;
104+
}
105+
106+
try {
107+
const response = await axios.post(
108+
'https://api.linkedin.com/v2/ugcPosts',
109+
{
110+
author: `urn:li:person:${process.env.LINKEDIN_USER_ID}`,
111+
lifecycleState: 'PUBLISHED',
112+
specificContent: {
113+
'com.linkedin.ugc.ShareContent': {
114+
shareCommentary: {
115+
text: `${article.title}\n\n${article.summary}\n\n🔗 Read more: ${article.link}`
116+
},
117+
shareMediaCategory: 'ARTICLE',
118+
media: [{
119+
status: 'READY',
120+
originalUrl: article.link
121+
}]
122+
}
123+
},
124+
visibility: {
125+
'com.linkedin.ugc.MemberNetworkVisibility': 'PUBLIC'
126+
}
127+
},
128+
{
129+
headers: {
130+
'Authorization': `Bearer ${process.env.LINKEDIN_ACCESS_TOKEN}`,
131+
'Content-Type': 'application/json',
132+
'X-Restli-Protocol-Version': '2.0.0'
133+
}
134+
}
135+
);
136+
console.log('✅ Posted to LinkedIn successfully');
137+
} catch (error) {
138+
console.error('❌ LinkedIn posting failed:', error.response?.data || error.message);
139+
}
140+
}
141+
142+
// Main aggregation function
143+
async function main() {
144+
console.log('🚀 Starting AI-Pulse aggregation...\n');
145+
146+
const categorizedArticles = {};
147+
148+
// Aggregate each category
149+
for (const [categoryName, feeds] of Object.entries(FEED_CATEGORIES)) {
150+
categorizedArticles[categoryName] = await aggregateCategory(categoryName, feeds);
151+
}
152+
153+
// Generate README
154+
const readme = generateREADME(categorizedArticles);
155+
console.log(readme);
156+
157+
// Auto-post top AI article to LinkedIn (optional)
158+
if (categorizedArticles.ai?.length > 0) {
159+
await postToLinkedIn(categorizedArticles.ai[0]);
160+
}
161+
162+
console.log('\n✅ Aggregation complete!');
51163
}
52164

53-
module.exports = { aggregateFeeds };
165+
main().catch(console.error);

0 commit comments

Comments
 (0)