Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,44 @@ We are in transition to a new UI. Some features don't yet exist in the new UI an

Download the latest version from [Releases](https://github.com/brafdlog/caspion/releases) page, or build it from source, with the instructions below. (Mac users, you may follow these [instructions](https://github.com/brafdlog/caspion/issues/276#issuecomment-1282111297)).

### CLI Mode (Headless Scraping)

You can run Caspion from the command line without opening the UI. This is useful for setting up automated scraping via cron jobs.

**Prerequisites:** First configure your accounts and exporters using the normal UI. The CLI mode uses the same configuration.

**From a built/installed app:**

```bash
# macOS
/Applications/caspion.app/Contents/MacOS/caspion --scrape

# Linux
/path/to/caspion --scrape

# Windows
"C:\Program Files\caspion\caspion.exe" --scrape
```

**From source (development):**

```bash
yarn scrape
```

**Example cron job (scrape daily at 6 AM):**

```bash
0 6 * * * /Applications/caspion.app/Contents/MacOS/caspion --scrape >> /var/log/caspion.log 2>&1
```

The CLI mode will:
- Load your existing configuration (accounts, exporters, settings)
- Run all active scrapers
- Export transactions to your configured destinations
- Print progress to stdout
- Exit with code 0 on success, 1 on failure

### Proxy Support

If you're behind a corporate proxy or need to use a proxy server, Caspion supports standard proxy environment variables:
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"build:preload": "cd ./packages/preload && vite build",
"build:renderer": "cd ./packages/renderer && vite build",
"compile": "cross-env MODE=production yarn build && electron-builder build --config electron-builder.yml --dir",
"scrape": "node scripts/scrape.js",
"test": "yarn test:main && yarn test:preload && yarn test:renderer && yarn test:e2e",
"test:e2e": "yarn build && vitest run",
"test:main": "vitest run -r packages/main --passWithNoTests",
Expand Down
5 changes: 3 additions & 2 deletions packages/main/src/backend/import/importTransactions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import { createOperationLogger, type OperationLogger } from '/@/logging/operatio

type ScrapingConfig = Config['scraping'];

const DEFAULT_MAX_CONCURRENCY = 3;
const TRANSACTION_STATUS_COMPLETED = 'completed';

export async function scrapeFinancialAccountsAndFetchTransactions(
Expand All @@ -47,7 +48,7 @@ export async function scrapeFinancialAccountsAndFetchTransactions(
numDaysBack: moment().diff(moment(startDate), 'days'),
showBrowser: scrapingConfig.showBrowser,
timeout: scrapingConfig.timeout,
maxConcurrency: scrapingConfig.maxConcurrency ?? 1,
maxConcurrency: scrapingConfig.maxConcurrency ?? DEFAULT_MAX_CONCURRENCY,
});

if (scrapingConfig.chromiumPath) {
Expand All @@ -65,7 +66,7 @@ export async function scrapeFinancialAccountsAndFetchTransactions(
}

const limiter = new Bottleneck({
maxConcurrent: scrapingConfig.maxConcurrency,
maxConcurrent: scrapingConfig.maxConcurrency || DEFAULT_MAX_CONCURRENCY,
});
const scrapePromises = activeAccounts.map(async (accountToScrape) => ({
id: accountToScrape.id,
Expand Down
50 changes: 41 additions & 9 deletions packages/main/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,25 @@ import { registerHandlers } from './handlers';
import './security-restrictions';
import { restoreOrCreateWindow } from '/@/mainWindow';
import { logAppEvent } from './logging/operationLogger';
import { scrapeAndUpdateOutputVendors } from './backend';
import { getConfig } from './backend/configManager/configManager';
import { BudgetTrackingEventEmitter } from './backend/eventEmitters/EventEmitter';

// Check for CLI mode
const isCliScrape = process.argv.includes('--scrape');

/**
* Prevent electron from running multiple instances.
* Skip this check in CLI mode to allow running from cron while GUI is open.
*/
const isSingleInstance = app.requestSingleInstanceLock();
if (!isSingleInstance) {
app.quit();
process.exit(0);
if (!isCliScrape) {
const isSingleInstance = app.requestSingleInstanceLock();
if (!isSingleInstance) {
app.quit();
process.exit(0);
}
app.on('second-instance', restoreOrCreateWindow);
}
app.on('second-instance', restoreOrCreateWindow);

/**
* Disable Hardware Acceleration to save more system resources.
Expand All @@ -37,17 +46,40 @@ app.on('window-all-closed', () => {
app.on('activate', restoreOrCreateWindow);

/**
* Create the application window when the background process is ready.
* Create the application window when the background process is ready,
* or run CLI scraping if --scrape flag is passed.
*/
app
.whenReady()
.then(() => {
.then(async () => {
logAppEvent('APP_READY', {
version: app.getVersion(),
platform,
nodeVersion: process.versions.node,
electronVersion: process.versions.electron,
cliMode: isCliScrape,
});

if (isCliScrape) {
// CLI mode: run scraping and exit
logAppEvent('CLI_SCRAPE_START');
try {
const config = await getConfig();
const eventPublisher = new BudgetTrackingEventEmitter();
eventPublisher.onAny((eventName, eventData) => {
console.log(`[${eventName}]`, eventData?.message ?? '');
});
await scrapeAndUpdateOutputVendors(config, eventPublisher);
logAppEvent('CLI_SCRAPE_SUCCESS');
app.quit();
} catch (error) {
logAppEvent('CLI_SCRAPE_FAILED', { errorMessage: (error as Error).message });
Comment on lines +67 to +74
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm from mobile, but what is the difference between console.log and logAppEvent?

If the logAppEvent is for special lifecycle keys, why are those keys not enum?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logAppEvent writes structured logs to electron-log (persistent log files), while console.log here outputs scraping progress to stdout for CLI users running from terminal/cron. They serve different purposes — logAppEvent for persistent diagnostics, console for real-time CLI feedback.
Regarding enums — good point, but all existing event keys throughout the codebase (APP_READY, APP_QUIT, UPDATE_CHECK_START, etc.) are plain strings. We can create an enum for all of them in a separate PR to keep this one focused.

console.error('CLI scrape failed:', error);
app.exit(1);
}
return;
}

return restoreOrCreateWindow();
})
.catch((e) => {
Expand Down Expand Up @@ -79,14 +111,14 @@ app

/**
* Check for app updates, install it in background and notify user that new version was installed.
* No reason run this in non-production build.
* No reason run this in non-production build or CLI mode.
* @see https://www.electron.build/auto-update.html#quick-setup-guide
*
* Note: It may throw "ENOENT: no such file app-update.yml"
* if you compile production app without publishing it to distribution server.
* Like `yarn compile` does. It's ok 😅
*/
if (import.meta.env.PROD) {
if (import.meta.env.PROD && !isCliScrape) {
app
.whenReady()
.then(async () => {
Expand Down
70 changes: 70 additions & 0 deletions scripts/scrape.js
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this file? Is it for those who want to really cron it from source, and you trying to avoid recompiling every time, but only when needed?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly

Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { execSync } from 'child_process';
import { statSync, existsSync, readdirSync } from 'fs';
import { resolve } from 'path';

const projectRoot = resolve(import.meta.dirname, '..');

function findSourceFiles(dir) {
const files = [];
const entries = readdirSync(dir, { withFileTypes: true });

for (const entry of entries) {
const fullPath = resolve(dir, entry.name);
if (entry.isDirectory()) {
files.push(...findSourceFiles(fullPath));
} else if (entry.isFile() && (entry.name.endsWith('.ts') || entry.name.endsWith('.tsx'))) {
files.push(fullPath);
}
}

return files;
}

function getLatestSourceModificationTime(sourceDir) {
try {
const sourceFiles = findSourceFiles(resolve(projectRoot, sourceDir));

let latestTime = 0;
for (const file of sourceFiles) {
const stat = statSync(file);
if (stat.mtimeMs > latestTime) latestTime = stat.mtimeMs;
}
return latestTime;
} catch {
return Date.now(); // If error, assume rebuild needed
}
}

function getBuildOutputModificationTime(distFile) {
try {
if (!existsSync(distFile)) return 0;
return statSync(distFile).mtimeMs;
} catch {
return 0;
}
}

const mainSourceLastModified = getLatestSourceModificationTime('packages/main/src');
const preloadSourceLastModified = getLatestSourceModificationTime('packages/preload/src');
const mainBuildLastModified = getBuildOutputModificationTime(resolve(projectRoot, 'packages/main/dist/index.js'));
const preloadBuildLastModified = getBuildOutputModificationTime(resolve(projectRoot, 'packages/preload/dist/index.js'));

const mainNeedsRebuild = mainSourceLastModified > mainBuildLastModified;
const preloadNeedsRebuild = preloadSourceLastModified > preloadBuildLastModified;

if (mainNeedsRebuild || preloadNeedsRebuild) {
console.log('Source files changed, rebuilding...');
if (mainNeedsRebuild) {
console.log('Building main...');
execSync('yarn build:main', { stdio: 'inherit', cwd: projectRoot });
}
if (preloadNeedsRebuild) {
console.log('Building preload...');
execSync('yarn build:preload', { stdio: 'inherit', cwd: projectRoot });
}
} else {
console.log('Build is up to date, skipping...');
}

console.log('Starting scrape...');
execSync('electron . --scrape', { stdio: 'inherit', cwd: projectRoot });