From 87490b15b64812b77e00f1aa66103052885c8778 Mon Sep 17 00:00:00 2001 From: Guy Ben-Aharon Date: Wed, 18 Feb 2026 16:50:30 +0200 Subject: [PATCH 1/5] fix: add squadhub extensions in order to add pairing --- apps/web/CLAUDE.md | 12 +- apps/web/src/app/api/chat/abort/route.spec.ts | 14 + apps/web/src/app/api/chat/abort/route.ts | 6 +- .../src/app/api/chat/history/route.spec.ts | 14 + apps/web/src/app/api/chat/history/route.ts | 6 +- apps/web/src/app/api/chat/route.spec.ts | 23 +- apps/web/src/app/api/chat/route.ts | 11 +- apps/web/src/app/api/squadhub/health/route.ts | 9 +- .../web/src/app/api/squadhub/pairing/route.ts | 17 +- .../app/api/tenant/squadhub/restart/route.ts | 6 +- apps/web/src/app/api/tenant/squadhub/route.ts | 6 +- .../app/api/tenant/squadhub/status/route.ts | 6 +- apps/web/src/app/api/tenant/status/route.ts | 27 + apps/web/src/app/setup/provisioning/page.tsx | 36 +- apps/web/src/lib/api/tenant-auth.ts | 17 +- apps/web/src/lib/squadhub/connection.ts | 14 +- packages/backend/convex/lib/auth.ts | 7 +- packages/backend/convex/lib/helpers.ts | 5 +- packages/backend/convex/types.ts | 17 + steps.md | 1546 +++++++++++++++++ 20 files changed, 1753 insertions(+), 46 deletions(-) create mode 100644 apps/web/src/app/api/tenant/status/route.ts create mode 100644 steps.md diff --git a/apps/web/CLAUDE.md b/apps/web/CLAUDE.md index 7890e51..3620c8c 100644 --- a/apps/web/CLAUDE.md +++ b/apps/web/CLAUDE.md @@ -61,17 +61,15 @@ const { data } = useQuery({ ## Types -Document types come from Convex. Use `Doc<"tableName">` from the generated types: +Document types live in `packages/backend/convex/types.ts`. Import from `@clawe/backend/types`: ```tsx -import type { Doc } from "@clawe/backend/server"; - -type Task = Doc<"tasks">; -type Agent = Doc<"agents">; -type Message = Doc<"messages">; +import type { Agent, Tenant } from "@clawe/backend/types"; ``` -Or infer from query results (preferred when using the data directly). +- If a type doesn't exist yet, add it to `types.ts` using `Doc<>` +- Never use `Doc<>` outside of `types.ts` +- Or infer from query results (preferred when using the data directly) **Environment variables:** diff --git a/apps/web/src/app/api/chat/abort/route.spec.ts b/apps/web/src/app/api/chat/abort/route.spec.ts index f10ba65..b1f150a 100644 --- a/apps/web/src/app/api/chat/abort/route.spec.ts +++ b/apps/web/src/app/api/chat/abort/route.spec.ts @@ -2,6 +2,20 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { NextRequest } from "next/server"; import { POST } from "./route"; +// Mock tenant auth +vi.mock("@/lib/api/tenant-auth", () => ({ + getAuthenticatedTenant: vi.fn(async () => ({ + error: null, + convex: {}, + tenant: { + _id: "test-tenant-id", + squadhubUrl: "http://localhost:18790", + squadhubToken: "test-token", + status: "active", + }, + })), +})); + // Mock the shared client const mockRequest = vi.fn(); diff --git a/apps/web/src/app/api/chat/abort/route.ts b/apps/web/src/app/api/chat/abort/route.ts index 2e8c30b..1460a4a 100644 --- a/apps/web/src/app/api/chat/abort/route.ts +++ b/apps/web/src/app/api/chat/abort/route.ts @@ -1,5 +1,6 @@ import { NextRequest, NextResponse } from "next/server"; import { getSharedClient } from "@clawe/shared/squadhub"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; export const runtime = "nodejs"; @@ -15,6 +16,9 @@ type AbortRequestBody = { * Abort an in-progress chat generation. */ export async function POST(request: NextRequest) { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + let body: AbortRequestBody; try { @@ -33,7 +37,7 @@ export async function POST(request: NextRequest) { } try { - const client = await getSharedClient(getConnection()); + const client = await getSharedClient(getConnection(auth.tenant)); await client.request("chat.abort", { sessionKey, diff --git a/apps/web/src/app/api/chat/history/route.spec.ts b/apps/web/src/app/api/chat/history/route.spec.ts index 6c76a70..114b853 100644 --- a/apps/web/src/app/api/chat/history/route.spec.ts +++ b/apps/web/src/app/api/chat/history/route.spec.ts @@ -2,6 +2,20 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { NextRequest } from "next/server"; import { GET } from "./route"; +// Mock tenant auth +vi.mock("@/lib/api/tenant-auth", () => ({ + getAuthenticatedTenant: vi.fn(async () => ({ + error: null, + convex: {}, + tenant: { + _id: "test-tenant-id", + squadhubUrl: "http://localhost:18790", + squadhubToken: "test-token", + status: "active", + }, + })), +})); + // Mock the shared client const mockRequest = vi.fn(); diff --git a/apps/web/src/app/api/chat/history/route.ts b/apps/web/src/app/api/chat/history/route.ts index a6600a0..13a583a 100644 --- a/apps/web/src/app/api/chat/history/route.ts +++ b/apps/web/src/app/api/chat/history/route.ts @@ -1,6 +1,7 @@ import { NextRequest, NextResponse } from "next/server"; import { getSharedClient } from "@clawe/shared/squadhub"; import type { ChatHistoryResponse } from "@clawe/shared/squadhub"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; export const runtime = "nodejs"; @@ -10,6 +11,9 @@ export const dynamic = "force-dynamic"; * GET /api/chat/history?sessionKey=xxx&limit=200 */ export async function GET(request: NextRequest) { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + const searchParams = request.nextUrl.searchParams; const sessionKey = searchParams.get("sessionKey"); const limitParam = searchParams.get("limit"); @@ -30,7 +34,7 @@ export async function GET(request: NextRequest) { } try { - const client = await getSharedClient(getConnection()); + const client = await getSharedClient(getConnection(auth.tenant)); const response = await client.request("chat.history", { sessionKey, diff --git a/apps/web/src/app/api/chat/route.spec.ts b/apps/web/src/app/api/chat/route.spec.ts index 8175344..8fe1596 100644 --- a/apps/web/src/app/api/chat/route.spec.ts +++ b/apps/web/src/app/api/chat/route.spec.ts @@ -1,6 +1,21 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; +import { NextRequest } from "next/server"; import { POST } from "./route"; +// Mock tenant auth +vi.mock("@/lib/api/tenant-auth", () => ({ + getAuthenticatedTenant: vi.fn(async () => ({ + error: null, + convex: {}, + tenant: { + _id: "test-tenant-id", + squadhubUrl: "http://localhost:18790", + squadhubToken: "test-token", + status: "active", + }, + })), +})); + // Mock the AI SDK vi.mock("@ai-sdk/openai", () => ({ createOpenAI: vi.fn(() => ({ @@ -25,7 +40,7 @@ describe("POST /api/chat", () => { }); it("returns 400 when sessionKey is missing", async () => { - const request = new Request("http://localhost/api/chat", { + const request = new NextRequest("http://localhost/api/chat", { method: "POST", body: JSON.stringify({ messages: [{ role: "user", content: "Hello" }] }), }); @@ -38,7 +53,7 @@ describe("POST /api/chat", () => { }); it("returns 400 when messages is missing", async () => { - const request = new Request("http://localhost/api/chat", { + const request = new NextRequest("http://localhost/api/chat", { method: "POST", body: JSON.stringify({ sessionKey: "test-session" }), }); @@ -51,7 +66,7 @@ describe("POST /api/chat", () => { }); it("returns stream response with valid request", async () => { - const request = new Request("http://localhost/api/chat", { + const request = new NextRequest("http://localhost/api/chat", { method: "POST", body: JSON.stringify({ sessionKey: "test-session", @@ -64,7 +79,7 @@ describe("POST /api/chat", () => { }); it("returns 500 on invalid JSON", async () => { - const request = new Request("http://localhost/api/chat", { + const request = new NextRequest("http://localhost/api/chat", { method: "POST", body: "invalid json", }); diff --git a/apps/web/src/app/api/chat/route.ts b/apps/web/src/app/api/chat/route.ts index 2c15835..31d0ee0 100644 --- a/apps/web/src/app/api/chat/route.ts +++ b/apps/web/src/app/api/chat/route.ts @@ -1,5 +1,7 @@ +import type { NextRequest } from "next/server"; import { createOpenAI } from "@ai-sdk/openai"; import { streamText } from "ai"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; export const runtime = "nodejs"; @@ -7,10 +9,13 @@ export const dynamic = "force-dynamic"; /** * POST /api/chat - * Proxy chat requests to the squadhub's OpenAI-compatible endpoint. + * Proxy chat requests to the tenant's squadhub OpenAI-compatible endpoint. */ -export async function POST(request: Request) { +export async function POST(request: NextRequest) { try { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + const body = await request.json(); const { messages, sessionKey } = body; @@ -28,7 +33,7 @@ export async function POST(request: Request) { }); } - const { squadhubUrl, squadhubToken } = getConnection(); + const { squadhubUrl, squadhubToken } = getConnection(auth.tenant); // Create OpenAI-compatible client pointing to squadhub gateway const squadhub = createOpenAI({ diff --git a/apps/web/src/app/api/squadhub/health/route.ts b/apps/web/src/app/api/squadhub/health/route.ts index 2c81c12..58d02e6 100644 --- a/apps/web/src/app/api/squadhub/health/route.ts +++ b/apps/web/src/app/api/squadhub/health/route.ts @@ -1,8 +1,13 @@ import { NextResponse } from "next/server"; +import type { NextRequest } from "next/server"; import { checkHealth } from "@clawe/shared/squadhub"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; -export async function POST() { - const result = await checkHealth(getConnection()); +export async function POST(request: NextRequest) { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + + const result = await checkHealth(getConnection(auth.tenant)); return NextResponse.json(result); } diff --git a/apps/web/src/app/api/squadhub/pairing/route.ts b/apps/web/src/app/api/squadhub/pairing/route.ts index f6dc601..dfb6e43 100644 --- a/apps/web/src/app/api/squadhub/pairing/route.ts +++ b/apps/web/src/app/api/squadhub/pairing/route.ts @@ -1,14 +1,18 @@ import { NextResponse } from "next/server"; +import type { NextRequest } from "next/server"; import { listChannelPairingRequests, approveChannelPairingCode, } from "@clawe/shared/squadhub"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; // GET /api/squadhub/pairing?channel=telegram - List pending pairing requests -export async function GET(request: Request) { - const { searchParams } = new URL(request.url); - const channel = searchParams.get("channel") || "telegram"; +export async function GET(request: NextRequest) { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + + const channel = request.nextUrl.searchParams.get("channel") || "telegram"; const result = await listChannelPairingRequests(channel); @@ -20,7 +24,10 @@ export async function GET(request: Request) { } // POST /api/squadhub/pairing - Approve a pairing code -export async function POST(request: Request) { +export async function POST(request: NextRequest) { + const auth = await getAuthenticatedTenant(request); + if (auth.error) return auth.error; + try { const body = await request.json(); const { channel = "telegram", code } = body as { @@ -33,7 +40,7 @@ export async function POST(request: Request) { } const result = await approveChannelPairingCode( - getConnection(), + getConnection(auth.tenant), channel, code, ); diff --git a/apps/web/src/app/api/tenant/squadhub/restart/route.ts b/apps/web/src/app/api/tenant/squadhub/restart/route.ts index 8712be8..926c2f4 100644 --- a/apps/web/src/app/api/tenant/squadhub/restart/route.ts +++ b/apps/web/src/app/api/tenant/squadhub/restart/route.ts @@ -10,10 +10,8 @@ import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; * Dev: no-op. Cloud: forces new ECS task deployment. */ export const POST = async (request: NextRequest) => { - const result = await getAuthenticatedTenant(request); - if ("error" in result) return result.error; - - const { tenant } = result; + const { error, tenant } = await getAuthenticatedTenant(request); + if (error) return error; try { await loadPlugins(); diff --git a/apps/web/src/app/api/tenant/squadhub/route.ts b/apps/web/src/app/api/tenant/squadhub/route.ts index c893085..6e9789b 100644 --- a/apps/web/src/app/api/tenant/squadhub/route.ts +++ b/apps/web/src/app/api/tenant/squadhub/route.ts @@ -10,10 +10,8 @@ import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; * Dev: no-op. Cloud: deletes ECS service + EFS access point + CloudMap entry. */ export const DELETE = async (request: NextRequest) => { - const result = await getAuthenticatedTenant(request); - if ("error" in result) return result.error; - - const { tenant } = result; + const { error, tenant } = await getAuthenticatedTenant(request); + if (error) return error; try { await loadPlugins(); diff --git a/apps/web/src/app/api/tenant/squadhub/status/route.ts b/apps/web/src/app/api/tenant/squadhub/status/route.ts index c7cead3..fca4887 100644 --- a/apps/web/src/app/api/tenant/squadhub/status/route.ts +++ b/apps/web/src/app/api/tenant/squadhub/status/route.ts @@ -11,10 +11,8 @@ import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; * Cloud: checks ECS service running count + task health. */ export const GET = async (request: NextRequest) => { - const result = await getAuthenticatedTenant(request); - if ("error" in result) return result.error; - - const { tenant } = result; + const { error, tenant } = await getAuthenticatedTenant(request); + if (error) return error; try { await loadPlugins(); diff --git a/apps/web/src/app/api/tenant/status/route.ts b/apps/web/src/app/api/tenant/status/route.ts new file mode 100644 index 0000000..7c3f1fd --- /dev/null +++ b/apps/web/src/app/api/tenant/status/route.ts @@ -0,0 +1,27 @@ +import { NextResponse } from "next/server"; +import type { NextRequest } from "next/server"; +import { loadPlugins, getPlugin } from "@clawe/plugins"; +import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; + +/** + * GET /api/tenant/status + * + * Check provisioning status for the current user's tenant. + * Dev: always returns { status: "active" }. + * Cloud: returns real ECS provisioning status. + */ +export const GET = async (request: NextRequest) => { + const { error, tenant } = await getAuthenticatedTenant(request); + if (error) return error; + + try { + await loadPlugins(); + const provisioner = getPlugin("squadhub-provisioner"); + const status = await provisioner.getProvisioningStatus(tenant._id); + + return NextResponse.json(status); + } catch (error) { + const message = error instanceof Error ? error.message : "Unknown error"; + return NextResponse.json({ error: message }, { status: 500 }); + } +}; diff --git a/apps/web/src/app/setup/provisioning/page.tsx b/apps/web/src/app/setup/provisioning/page.tsx index 8029654..a192517 100644 --- a/apps/web/src/app/setup/provisioning/page.tsx +++ b/apps/web/src/app/setup/provisioning/page.tsx @@ -10,11 +10,15 @@ import { Spinner } from "@clawe/ui/components/spinner"; import { useAuth } from "@/providers/auth-provider"; import { useApiClient } from "@/hooks/use-api-client"; +const POLL_INTERVAL_MS = 3000; +const DEFAULT_MESSAGE = "Setting up your workspace..."; + export default function ProvisioningPage() { const router = useRouter(); const { isAuthenticated } = useAuth(); const apiClient = useApiClient(); const [error, setError] = useState(null); + const [progressMessage, setProgressMessage] = useState(DEFAULT_MESSAGE); const provisioningRef = useRef(false); const tenant = useQuery( @@ -39,8 +43,38 @@ export default function ProvisioningPage() { } }, [tenant?.status, isOnboardingComplete, router]); + // Poll provisioning status for progress messages + useEffect(() => { + if (tenant?.status !== "provisioning") return; + + const poll = async () => { + try { + const { data } = await apiClient.get<{ + status: "provisioning" | "active" | "error"; + message?: string; + }>("/api/tenant/status"); + + if (data.message) { + setProgressMessage(data.message); + } + + if (data.status === "error") { + setError(data.message ?? "Provisioning failed"); + provisioningRef.current = false; + } + } catch { + // Polling errors are non-fatal — Convex subscription handles redirect + } + }; + + poll(); + const interval = setInterval(poll, POLL_INTERVAL_MS); + return () => clearInterval(interval); + }, [tenant?.status, apiClient]); + const provision = useCallback(async () => { setError(null); + setProgressMessage(DEFAULT_MESSAGE); try { await apiClient.post("/api/tenant/provision"); // Convex subscription will reactively update `tenant` → redirect fires @@ -87,7 +121,7 @@ export default function ProvisioningPage() {
-

Setting up your workspace...

+

{progressMessage}

This will only take a moment.

diff --git a/apps/web/src/lib/api/tenant-auth.ts b/apps/web/src/lib/api/tenant-auth.ts index 7e0bd7b..724cfd0 100644 --- a/apps/web/src/lib/api/tenant-auth.ts +++ b/apps/web/src/lib/api/tenant-auth.ts @@ -2,13 +2,20 @@ import { NextResponse } from "next/server"; import type { NextRequest } from "next/server"; import { ConvexHttpClient } from "convex/browser"; import { api } from "@clawe/backend"; +import type { Tenant } from "@clawe/backend/types"; + +export type AuthResult = + | { error: NextResponse; convex: null; tenant: null } + | { error: null; convex: ConvexHttpClient; tenant: Tenant }; /** * Extract the auth token and resolve the tenant for the current user. * Returns a Convex client (with auth set) and the tenant record, * or a NextResponse error if auth/tenant resolution fails. */ -export async function getAuthenticatedTenant(request: NextRequest) { +export async function getAuthenticatedTenant( + request: NextRequest, +): Promise { const convexUrl = process.env.NEXT_PUBLIC_CONVEX_URL; if (!convexUrl) { return { @@ -16,6 +23,8 @@ export async function getAuthenticatedTenant(request: NextRequest) { { error: "NEXT_PUBLIC_CONVEX_URL not configured" }, { status: 500 }, ), + convex: null, + tenant: null, }; } @@ -30,6 +39,8 @@ export async function getAuthenticatedTenant(request: NextRequest) { { error: "Missing Authorization header" }, { status: 401 }, ), + convex: null, + tenant: null, }; } @@ -44,8 +55,10 @@ export async function getAuthenticatedTenant(request: NextRequest) { { error: "No tenant found for current user" }, { status: 404 }, ), + convex: null, + tenant: null, }; } - return { convex, tenant }; + return { error: null, convex, tenant }; } diff --git a/apps/web/src/lib/squadhub/connection.ts b/apps/web/src/lib/squadhub/connection.ts index 4b7f642..f7373cd 100644 --- a/apps/web/src/lib/squadhub/connection.ts +++ b/apps/web/src/lib/squadhub/connection.ts @@ -1,8 +1,16 @@ import type { SquadhubConnection } from "@clawe/shared/squadhub"; +import type { Tenant } from "@clawe/backend/types"; -export function getConnection(): SquadhubConnection { +/** + * Get the squadhub connection for a tenant. + * If a tenant with squadhubUrl/squadhubToken is provided, uses those. + * Otherwise falls back to env vars (self-hosted / dev). + */ +export function getConnection(tenant?: Tenant | null): SquadhubConnection { return { - squadhubUrl: process.env.SQUADHUB_URL || "http://localhost:18790", - squadhubToken: process.env.SQUADHUB_TOKEN || "", + squadhubUrl: + tenant?.squadhubUrl || process.env.SQUADHUB_URL || "http://localhost:18790", + squadhubToken: + tenant?.squadhubToken || process.env.SQUADHUB_TOKEN || "", }; } diff --git a/packages/backend/convex/lib/auth.ts b/packages/backend/convex/lib/auth.ts index a2532dc..c4076f6 100644 --- a/packages/backend/convex/lib/auth.ts +++ b/packages/backend/convex/lib/auth.ts @@ -1,5 +1,6 @@ -import type { Doc, Id } from "../_generated/dataModel"; +import type { Id } from "../_generated/dataModel"; import type { MutationCtx, QueryCtx } from "../_generated/server"; +import type { User, Account } from "../types"; type ReadCtx = { db: QueryCtx["db"]; auth: QueryCtx["auth"] }; type WriteCtx = { db: MutationCtx["db"] }; @@ -135,8 +136,8 @@ export async function resolveTenantId( */ export async function ensureAccountForUser( ctx: ReadCtx & WriteCtx, - user: Doc<"users">, -): Promise> { + user: User, +): Promise { const membership = await ctx.db .query("accountMembers") .withIndex("by_user", (q) => q.eq("userId", user._id)) diff --git a/packages/backend/convex/lib/helpers.ts b/packages/backend/convex/lib/helpers.ts index 33a29aa..0b4a4ac 100644 --- a/packages/backend/convex/lib/helpers.ts +++ b/packages/backend/convex/lib/helpers.ts @@ -1,11 +1,12 @@ import type { QueryCtx } from "../_generated/server"; -import type { Doc, Id } from "../_generated/dataModel"; +import type { Id } from "../_generated/dataModel"; +import type { Agent } from "../types"; export async function getAgentBySessionKey( ctx: { db: QueryCtx["db"] }, tenantId: Id<"tenants">, sessionKey: string, -): Promise | null> { +): Promise { return await ctx.db .query("agents") .withIndex("by_tenant_sessionKey", (q) => diff --git a/packages/backend/convex/types.ts b/packages/backend/convex/types.ts index bf5ef13..ab28d1a 100644 --- a/packages/backend/convex/types.ts +++ b/packages/backend/convex/types.ts @@ -8,6 +8,23 @@ import type { Doc, Id } from "./_generated/dataModel"; +// ============================================================================= +// User & Account Types +// ============================================================================= + +/** User document */ +export type User = Doc<"users">; + +/** Account document */ +export type Account = Doc<"accounts">; + +// ============================================================================= +// Tenant Types +// ============================================================================= + +/** Tenant document */ +export type Tenant = Doc<"tenants">; + // ============================================================================= // Agent Types // ============================================================================= diff --git a/steps.md b/steps.md new file mode 100644 index 0000000..65c9a38 --- /dev/null +++ b/steps.md @@ -0,0 +1,1546 @@ +# Clawe Cloud Deployment Plan + +## Goal + +Deploy Clawe as a multi-tenant SaaS on AWS, where each user signs up, provides their Anthropic API key, and gets their own squadhub service. All infrastructure managed via CDK with full dev/prod environment separation. + +## Architecture Overview + +``` + Users (browser) + │ + ▼ + ┌──────────┐ ┌────────────┐ ┌──────────┐ + │ Route 53 │────▶│ CloudFront │────▶│ ALB │ + └──────────┘ └────────────┘ └────┬─────┘ + │ + ┌─────────────▼──────────────┐ + │ clawe-cluster-{env} │ + │ ┌────────────────────┐ │ + │ │ Web App (Fargate) │ │ + │ │ Shared, all tenants│ │ + │ └───┬────────────────┘ │ + │ │ │ + │ ┌───▼────────────────┐ │ + │ │ Watcher (Fargate) │ │ + │ │ Shared, polls all │ │ + │ └───┬────────────────┘ │ + └──────┼──────────────────────┘ + │ + ┌────────────────┐│┌────────────────┐ + │ │││ │ + ▼ ▼│▼ ▼ + ┌───────────┐ ┌─────▼─────────────────────┐ + │ Convex │ │ clawe-squadhubs-{env} │ + │ (Managed) │ │ ┌────────┐ ┌────────┐ │ + │ Multi- │ │ │SH 1 │ │SH 2 │... │ + │ tenant │ │ │Tenant A│ │Tenant B│ │ + └───────────┘ │ └───┬────┘ └───┬────┘ │ + └──────┼──────────┼─────────┘ + │ │ + ┌───────────────▼──────────▼──┐ + │ EFS │ + │ ┌──────────┐ ┌──────────┐ │ + │ │/tenant-A │ │/tenant-B │ │ + │ └──────────┘ └──────────┘ │ + └─────────────────────────────┘ + + Routing: Web App/Watcher → CloudMap DNS → Squadhub private IP + Auth: Browser: Google → Cognito JWT → user record → accountMembers → account → tenantId + Machine: SQUADHUB_TOKEN → tenant record → tenantId (CLI/watcher) + Images: ECR (shared across envs, tagged per env) +``` + +## Environment Strategy + +All CDK stacks are parameterized by environment (`dev` | `prod`): + +```bash +cdk deploy --context env=dev # Deploys to dev +cdk deploy --context env=prod # Deploys to prod +``` + +Each environment gets completely isolated resources: + +| Resource | Dev | Prod | +| ----------------------- | ----------------------------------------- | ------------------------------------------ | +| Domain | app-dev.clawe.io | app.clawe.io | +| AUTH_PROVIDER | `cognito` | `cognito` | +| Cognito User Pool | clawe-userpool-dev | clawe-userpool-prod | +| Cognito Domain | clawe-dev.auth.{region}.amazoncognito.com | clawe-prod.auth.{region}.amazoncognito.com | +| Sign-in method | Google (social login only) | Google (social login only) | +| ECS Cluster (shared) | clawe-cluster-dev | clawe-cluster-prod | +| ECS Cluster (squadhubs) | clawe-squadhubs-dev | clawe-squadhubs-prod | +| VPC | clawe-vpc-dev | clawe-vpc-prod | +| ALB | clawe-alb-dev | clawe-alb-prod | +| ECR Repos | Shared across envs | Same images, different tags | +| Convex Deployment | Separate dev deployment | Separate prod deployment | +| CloudMap Namespace | clawe-internal-dev | clawe-internal-prod | + +--- + +## Phase 1: Shared Client & Watcher Refactor + +Code-only changes. No infrastructure needed. Makes the codebase multi-tenant ready. + +### 1.1 Refactor shared squadhub client + +Update `packages/shared/src/squadhub/client.ts`: + +- [x] All exported functions take `SquadhubConnection` (`{ squadhubUrl, squadhubToken }`) as first parameter +- [x] Remove the lazy singleton `getSquadhubClient()` — create client per call +- [x] This makes the client stateless and multi-tenant safe +- [x] Apply same pattern to all functions: `saveTelegramBotToken`, `getConfig`, `patchConfig`, `listSessions`, `sessionsSend`, `sendMessage`, `cronList`, `cronAdd` +- [x] Update `pairing.ts` (`approveChannelPairingCode` takes connection param) +- [x] Export `SquadhubConnection` type from `index.ts` +- [x] Update all tests in `client.spec.ts` + +### 1.2 Refactor shared gateway client + +Update `packages/shared/src/squadhub/gateway-client.ts`: + +- [x] `GatewayClient` constructor accepts `url` and `token` as parameters (verified, no env var fallbacks) +- [x] `createGatewayClient()` requires `SquadhubConnection` as first param (no env var fallback) +- [x] `getSharedClient()` in `shared-client.ts` requires `SquadhubConnection` as first param +- [x] Web app callers pass connection explicitly + +### 1.3 Update web app server actions + +Update `apps/web/src/lib/squadhub/actions.ts`: + +- [x] Each action reads `squadhubUrl` and `squadhubToken` from env (for now — will become per-tenant later) +- [x] Passes them explicitly to shared client functions +- [x] This is the adapter layer: currently reads env, later resolves from tenant record + +### 1.4 Update web app API routes + +Update all API routes that use the squadhub client: + +- [x] `/api/squadhub/health` — pass `squadhubUrl`/`squadhubToken` explicitly +- [x] `/api/squadhub/pairing` — same +- [x] `/api/chat` — reads env at call site via `getSquadhubConfig()` +- [x] `/api/chat/history` — passes connection to `getSharedClient()` +- [x] `/api/chat/abort` — passes connection to `getSharedClient()` +- [x] `/api/business/context` — moved env reads to `getEnvConfig()` called at request time + +### 1.5 Refactor watcher — tenant-aware polling loop + +Refactor `apps/watcher/src/index.ts`: + +- [x] Watcher passes `SquadhubConnection` to all shared client calls (`sessionsSend`, `cronList`, `cronAdd`) +- [x] Connection created from existing `config.squadhubUrl`/`config.squadhubToken` +- [x] Multi-tenant polling loop (`getActiveTenants()` → iterate over tenants in delivery and cron setup) +- [x] `deliverToAgent` and `setupCrons` accept `SquadhubConnection` param +- [x] `deliveryLoop` iterates over tenants, passes tenant connection to each squadhub delivery +- [x] Backwards compatibility: `getActiveTenants()` returns single tenant from env vars +- [x] System-level auth with `WATCHER_TOKEN` → implemented in 2.7 (`validateWatcherToken` + `tenants.listActive`) +- [x] Tenant-scoped Convex calls with per-tenant `machineToken` → implemented in 2.6 (added `machineToken` to all queries) + +Current loop: + +``` +every 2s: poll notifications → deliver to SQUADHUB_URL +every 10s: check routines → trigger via SQUADHUB_URL +``` + +New loop: + +``` +every 2s: + 1. Query Convex for all active tenants (tenants.listActive, authenticated with WATCHER_TOKEN) + 2. For each tenant: + a. Query undelivered notifications using tenant's squadhubToken as machineToken + b. Deliver to tenant's squadhubUrl +every 10s: + 1. Query Convex for all active tenants (same WATCHER_TOKEN) + 2. For each tenant: + a. Check due routines using tenant's squadhubToken as machineToken + b. Trigger via tenant's squadhubUrl +``` + +Authentication: + +- System-level calls (`tenants.listActive`) use `WATCHER_TOKEN` env var +- Tenant-scoped calls (`notifications.getUndelivered`, `routines`) use each tenant's `squadhubToken` as `machineToken` (returned by `listActive`) + +For backwards compatibility during development: if no tenants table exists yet, fall back to env-based single-tenant mode. + +### 1.6 Move startup logic from watcher to provisioning route + +- [x] Remove `registerAgents()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` +- [x] Remove `setupCrons()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` +- [x] Remove `seedRoutines()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` +- [x] Create `POST /api/tenant/provision` route that runs all setup (agents, crons, routines) +- [x] Watcher is now purely a polling/delivery service (no startup side effects) + +### 1.7 Update CLI for multi-tenancy + +The CLI (`packages/cli`) communicates with Convex directly via `ConvexHttpClient`. It needs to authenticate as a machine caller using the per-tenant `SQUADHUB_TOKEN`. + +- [x] CLI reads `SQUADHUB_TOKEN` from env and exports as `machineToken` +- [x] Update `packages/cli/src/client.ts` — added `query`/`mutation`/`action` wrapper functions (pass-through for now; Phase 2.6 will inject `machineToken`) +- [x] Update all 17 command handlers in `packages/cli/src/commands/` to use wrappers instead of `client.query()`/`client.mutation()` +- [x] Update all 16 test files to match new mock pattern + +### 1.8 Rebuild and test + +- [x] Rebuild shared package: `pnpm --filter @clawe/shared build` +- [x] Rebuild CLI: `pnpm --filter @clawe/cli build` +- [x] Run all tests: `pnpm test` — 155 tests passing (shared 21, cli 64, watcher 5, web 65) +- [x] Verify local docker compose still works (single-tenant mode with env vars) + +--- + +## Phase 2: Multi-Tenancy in Convex + +### 2.1 Add users table + +- [x] Add to `packages/backend/convex/schema.ts`: + +```typescript +users: defineTable({ + email: v.string(), // From Cognito JWT (immutable, unique) + name: v.optional(v.string()), // Display name + createdAt: v.number(), + updatedAt: v.number(), +}) + .index("by_email", ["email"]), +``` + +Cognito is only the authentication layer. The `users` table in Convex is the source of truth for user data. On first login, a user record is created from the Cognito JWT email. + +### 2.2 Add accounts + tenants tables + +- [x] Add `accounts` table to `packages/backend/convex/schema.ts`: + +```typescript +accounts: defineTable({ + name: v.optional(v.string()), // "Guy's Account" + createdAt: v.number(), + updatedAt: v.number(), +}), +``` + +- [x] Add `tenants` table with `accountId` (tenant belongs to an account, not a user): + +```typescript +tenants: defineTable({ + accountId: v.id("accounts"), // Account that owns this tenant + status: v.union( + v.literal("provisioning"), // Squadhub being created + v.literal("active"), // Squadhub running + v.literal("stopped"), // Squadhub stopped (future: sleep mode) + v.literal("error"), // Provisioning failed + ), + squadhubUrl: v.optional(v.string()), // Internal squadhub URL (CloudMap DNS) + squadhubToken: v.optional(v.string()), // Per-tenant squadhub token + squadhubServiceArn: v.optional(v.string()), // ECS Service ARN + efsAccessPointId: v.optional(v.string()), // EFS access point + anthropicApiKey: v.optional(v.string()), // User's Anthropic API key + settings: v.optional(v.object({ + timezone: v.optional(v.string()), + })), + createdAt: v.number(), + updatedAt: v.number(), +}) + .index("by_account", ["accountId"]) + .index("by_status", ["status"]) + .index("by_squadhubToken", ["squadhubToken"]), +``` + +### 2.3 Add accountMembers join table + +- [x] Add to `packages/backend/convex/schema.ts`: + +```typescript +accountMembers: defineTable({ + userId: v.id("users"), + accountId: v.id("accounts"), + role: v.union(v.literal("owner"), v.literal("member")), + createdAt: v.number(), +}) + .index("by_user", ["userId"]) + .index("by_account", ["accountId"]) + .index("by_user_account", ["userId", "accountId"]), +``` + +Access model: `user → accountMembers → account → tenants` + +- A user belongs to an account via `accountMembers` +- An account can own multiple tenants (future: plan-based limits) +- A tenant belongs to exactly one account +- If you're a member of an account, you can access all its tenants + +### 2.4 Add tenantId to all existing tables + +Add `tenantId: v.id("tenants")` field to every table: + +- [x] `agents` — add field + index `by_tenant: ["tenantId"]` +- [x] `tasks` — add field + index `by_tenant: ["tenantId"]` +- [x] `messages` — add field + index `by_tenant: ["tenantId"]` +- [x] `notifications` — add field + index `by_tenant: ["tenantId"]` +- [x] `activities` — add field + index `by_tenant: ["tenantId"]` +- [x] `documents` — add field + index `by_tenant: ["tenantId"]` +- [x] `businessContext` — add field + index `by_tenant: ["tenantId"]` +- [x] `channels` — add field + compound index `by_tenant_type: ["tenantId", "type"]` +- [x] `routines` — add field + index `by_tenant: ["tenantId"]` + +### 2.5 Create user and tenant resolution helpers + +Create `packages/backend/convex/lib/auth.ts`: + +Two auth paths — browser (JWT) and machine (token): + +**Browser path (web app):** + +- [x] Helper function `getUser(ctx)` — gets auth identity, extracts email, looks up `users` table by email +- [x] Helper function `getTenantIdFromJwt(ctx)` — calls `getUser(ctx)`, queries `accountMembers` by user → account → `tenants` by account, returns first tenant's `_id` + +**Machine path — per-tenant (CLI):** + +- [x] Helper function `getTenantIdFromToken(ctx, machineToken)` — queries `tenants` by `by_squadhubToken` index, returns tenant `_id` + +**Machine path — system-level (watcher):** + +- [x] Helper function `validateWatcherToken(ctx, watcherToken)` — reads `WATCHER_TOKEN` from Convex env vars, compares with provided token +- [x] Wire into watcher's `getActiveTenants()` (done in 2.7) + +**Unified resolver (for tenant-scoped functions):** + +- [x] Helper function `resolveTenantId(ctx, args)` + `resolveTenantIdMut(ctx, args)`: + 1. If `args.machineToken` is provided → `getTenantIdFromToken(ctx, args.machineToken)` + 2. If `AUTH_ENABLED=true` → `getTenantIdFromJwt(ctx)` (requires valid JWT) + 3. If `AUTH_ENABLED=false` → `getOrCreateDevTenant(ctx)` (auto-creates a default dev tenant, mutation variant only) + +Create `packages/backend/convex/users.ts`: + +- [x] `getOrCreateFromAuth(ctx)` — called on every login, creates user record if needed +- [x] `get()` — get current authenticated user +- [x] `update(name)` — update profile + +### 2.6 Update all Convex queries and mutations + +Every query/mutation needs to accept optional `machineToken` arg, call `resolveTenantId(ctx, args)`, filter reads by `tenantId`, include `tenantId` in writes. + +Example — `agents.list`: + +```typescript +// Before: +export const list = query({ + args: {}, + handler: async (ctx) => { + return await ctx.db.query("agents").collect(); + }, +}); + +// After: +export const list = query({ + args: { machineToken: v.optional(v.string()) }, + handler: async (ctx, args) => { + const tenantId = await resolveTenantId(ctx, args); + return await ctx.db + .query("agents") + .withIndex("by_tenant", (q) => q.eq("tenantId", tenantId)) + .collect(); + }, +}); +``` + +- Browser calls (web app): `useQuery(api.agents.list)` — no `machineToken`, uses JWT +- Machine calls (CLI): `client.query(api.agents.list, { machineToken })` — uses token + +Files to update: + +- [x] `convex/agents.ts` — all functions +- [x] `convex/tasks.ts` — all functions +- [x] `convex/messages.ts` — all functions +- [x] `convex/notifications.ts` — all functions +- [x] `convex/activities.ts` — all functions +- [x] `convex/documents.ts` — all functions +- [x] Remove `convex/settings.ts` — migrated `timezone` to `tenants.settings` and `onboardingComplete` to `accounts.onboardingComplete`, dropped `settings` table from schema. Created `convex/tenants.ts` with `getTimezone`, `setTimezone`. Created `convex/accounts.ts` with `isOnboardingComplete`, `completeOnboarding`. Updated callers: + - `apps/web/src/app/(dashboard)/settings/general/_components/timezone-settings.tsx` → `api.tenants.getTimezone` / `api.tenants.setTimezone` + - `apps/web/src/hooks/use-onboarding-guard.ts` → `api.accounts.isOnboardingComplete` + - `apps/web/src/app/page.tsx` → `api.accounts.isOnboardingComplete` + - `apps/web/src/app/setup/complete/page.tsx` → `api.accounts.completeOnboarding` + - `apps/web/src/app/setup/provisioning/page.tsx` → `api.accounts.isOnboardingComplete` + - `apps/watcher/src/index.ts` → `api.tenants.getTimezone` (with `machineToken`) +- [x] `convex/businessContext.ts` — all functions +- [x] `convex/channels.ts` — all functions +- [x] `convex/routines.ts` — all functions +- [x] Update watcher to pass `machineToken` in all Convex calls (`agents.list`, `notifications.getUndelivered`, `notifications.markDelivered`, `routines.getDueRoutines`, `routines.trigger`, `tenants.getTimezone`) + +### 2.7 Create tenant management functions + +Create `packages/backend/convex/tenants.ts`: + +- [x] `create(accountId)` — creates tenant with `accountId`, status "provisioning" +- [x] `getForCurrentUser()` — gets tenant for authenticated user: + 1. Call `getUser(ctx)` to get user record + 2. Query `accountMembers.by_user` to find account membership + 3. Query `tenants.by_account` to find tenant in that account + 4. Return tenant +- [x] `updateStatus(tenantId, status, squadhubUrl?, serviceArn?)` — update provisioning state +- [x] `setAnthropicKey(apiKey)` — store API key in tenant record +- [x] `listActive(watcherToken)` — get all active tenants with squadhubUrl + squadhubToken (requires valid `WATCHER_TOKEN`) +- [x] Wire watcher's `getActiveTenants()` to call `tenants.listActive` instead of env fallback (deferred from 1.5 → 2.5 → here) +- [x] Wire `validateWatcherToken` into `listActive` (from 2.5) + +--- + +## Phase 3: Authentication (Always-On Auth) + +Auth is **always on**. Two providers, one interface: + +- **Cloud**: Cognito (Google social login) — JWKS validated by Convex +- **Local / self-hosted**: NextAuth with Credentials provider — auto-login, zero config + +The `AUTH_PROVIDER` env var (`nextauth` | `cognito`) controls which provider is active. Both paths produce valid JWTs with the same claims shape. All `AUTH_ENABLED` conditionals have been eliminated. + +### 3.1 Convex Auth integration + +- [x] `packages/backend/convex/auth.config.nextauth.ts` — NextAuth `customJwt` provider (JWKS URL) +- [x] `packages/backend/convex/auth.config.cognito.ts` — Cognito provider (kept from before) +- [x] `packages/backend/convex/auth.config.ts` — Committed as NextAuth version (default for local dev). Deploy script overwrites for cloud. +- [x] `packages/backend/convex/dev-jwks/` — Committed dev RSA key pair + JWKS JSON for local JWT signing +- [x] `packages/backend/convex/lib/auth.ts` — Simplified: removed `AUTH_ENABLED` checks, dev-tenant logic, `resolveTenantIdMut`. Single `resolveTenantId` works for both queries and mutations. +- [x] `scripts/convex-deploy.sh` — Updated: uses `AUTH_PROVIDER` instead of `AUTH_ENABLED` + +### 3.2 Web app — Auth provider and login page + +Dual-provider architecture selected by `NEXT_PUBLIC_AUTH_PROVIDER`: + +- [x] `apps/web/src/lib/auth/nextauth-config.ts` — NextAuth v5 config with Credentials + RS256 JWT signing +- [x] `apps/web/src/app/api/auth/[...nextauth]/route.ts` — NextAuth API route handler +- [x] `apps/web/src/app/api/auth/jwks/route.ts` — JWKS endpoint for Convex to validate NextAuth JWTs +- [x] `apps/web/src/providers/auth-provider.tsx` — Dual provider: NextAuth (SessionProvider) or Cognito (Amplify, dynamic import). Shared `AuthContextValue` interface. `useConvexAuth` hook for ConvexProviderWithAuth. +- [x] `apps/web/src/providers/convex-provider.tsx` — Always uses `ConvexProviderWithAuth` (no conditional) +- [x] `apps/web/src/app/auth/login/page.tsx` — Email form (NextAuth, auto-login for local dev) or Google button (Cognito) +- [x] `apps/web/src/hooks/use-user-menu.ts` — Simplified: always reads from `useAuth()`, no `AUTH_ENABLED` branches +- [x] `apps/web/src/hooks/use-onboarding-guard.ts` — Simplified: always checks auth, no `AUTH_ENABLED` branches +- [x] `.env.example` + `apps/web/.env.*` — Updated for `AUTH_PROVIDER`, `NEXTAUTH_SECRET`, `NEXTAUTH_URL` + +### 3.3 Web app — Auth middleware + +- [x] Protect all routes except `/auth/login` and `/api/health` — `apps/web/src/middleware.ts` checks session cookie, allows `PUBLIC_PATHS` +- [x] Redirect unauthenticated users to `/auth/login` — middleware redirects when no `authjs.session-token` cookie +- [x] Pass tenant context to downstream routes — tenant resolved in Convex via JWT email → user → accountMembers → tenant + +### 3.4 Web app — Auth UI components + +- [x] User menu in dashboard sidebar (email, logout) — wired `signOut` from `useAuth` +- [x] Onboarding guard hook checks auth state — redirects to `/auth/login` when not authenticated + +--- + +## Phase 4: Dev & Self-Hosted Provisioning + +**Goal**: After this phase, local dev and self-hosted deployments work end-to-end. A user can sign in, get auto-provisioned, go through onboarding, use the dashboard, and the watcher operates correctly — all without any cloud infrastructure. + +Everything remaining after this phase is cloud-only (CDK, AWS, Cognito, multi-tenant squadhub infrastructure). This clean boundary allows a separate `clawe-cloud` repo to extend this codebase with cloud-specific plugins without modifying the base code. + +### 4.1 Plugin architecture (`packages/plugins/`) + +Create a `@clawe/plugins` package that provides interfaces for cloud-extensible behavior and a plugin registry. The base repo contains interfaces + dev defaults. The `clawe-cloud` repo adds `packages/cloud-plugins/` that implements these interfaces with AWS. + +**Package structure:** + +``` +packages/plugins/ +├── src/ +│ ├── index.ts # Main exports: loadPlugins, hasPlugin, getPlugin, interfaces +│ ├── registry.ts # Plugin registry internals +│ ├── interfaces/ +│ │ ├── index.ts # Re-exports all interfaces +│ │ ├── squadhub-provisioner.ts # SquadhubProvisioner interface +│ │ └── squadhub-lifecycle.ts # SquadhubLifecycle interface +│ └── defaults/ +│ ├── index.ts # Default implementations +│ ├── squadhub-provisioner.ts # DefaultSquadhubProvisioner (reads env vars) +│ └── squadhub-lifecycle.ts # DefaultSquadhubLifecycle (no-ops) +├── package.json +└── tsconfig.json +``` + +**Plugin interfaces:** + +```typescript +// interfaces/squadhub-provisioner.ts +export interface SquadhubProvisioner { + /** + * Create infrastructure for a new tenant. + * Dev: reads SQUADHUB_URL + SQUADHUB_TOKEN from env, returns immediately. + * Cloud: creates ECS service + EFS access point + CloudMap entry, waits for health. + */ + provision(params: { + tenantId: string; + accountId: string; + anthropicApiKey?: string; + convexUrl: string; + }): Promise<{ + squadhubUrl: string; + squadhubToken: string; + metadata?: Record; // Cloud: ECS ARN, EFS AP ID, etc. + }>; + + /** + * Check provisioning progress (for polling UI). + * Dev: always returns "active" (instant). + * Cloud: returns "provisioning" while ECS service is starting. + */ + getProvisioningStatus(tenantId: string): Promise<{ + status: "provisioning" | "active" | "error"; + message?: string; + }>; + + /** + * Tear down all infrastructure for a tenant. + * Dev: no-op. + * Cloud: deletes ECS service + EFS access point + CloudMap entry. + */ + deprovision(params: DeprovisionParams): Promise; +} + +// interfaces/squadhub-lifecycle.ts +export interface SquadhubLifecycle { + /** + * Restart the tenant's squadhub service (e.g., after API key change). + * Dev: no-op (user manually restarts docker). + * Cloud: forces new ECS task deployment. + */ + restart(tenantId: string): Promise; + + /** + * Stop the tenant's squadhub service. + * Dev: no-op. + * Cloud: sets ECS desiredCount to 0. + */ + stop(tenantId: string): Promise; + + /** + * Destroy tenant's squadhub resources permanently. + * Dev: no-op. + * Cloud: deletes ECS service + EFS access point + CloudMap entry. + */ + destroy(tenantId: string): Promise; + + /** + * Check health/status of the tenant's squadhub. + * Dev: returns { running: true, healthy: true }. + * Cloud: checks ECS service running count + task health. + */ + getStatus(tenantId: string): Promise<{ + running: boolean; + healthy: boolean; + }>; +} +``` + +**Registry:** + +```typescript +// registry.ts +export interface PluginMap { + "squadhub-provisioner": SquadhubProvisioner; + "squadhub-lifecycle": SquadhubLifecycle; +} + +let plugins: PluginMap; // Always set (dev defaults or cloud) +let cloudLoaded = false; + +/** + * Initialize plugins. Call once at app startup. + * Tries to load @clawe/cloud-plugins. Falls back to dev defaults. + */ +export async function loadPlugins(): Promise { + try { + const cloud = await import("@clawe/cloud-plugins"); + plugins = cloud.register(); + cloudLoaded = true; + } catch { + plugins = { + "squadhub-provisioner": new DefaultSquadhubProvisioner(), + "squadhub-lifecycle": new DefaultSquadhubLifecycle(), + }; + } +} + +/** Returns true if cloud plugins are loaded (vs dev defaults). */ +export function hasPlugin(name: keyof PluginMap): boolean { + return cloudLoaded; +} + +/** Get a plugin implementation. Always returns something (cloud or dev default). */ +export function getPlugin(name: K): PluginMap[K] { + return plugins[name]; +} +``` + +**Dev default implementations:** + +```typescript +// defaults/squadhub-provisioner.ts — DefaultSquadhubProvisioner +export class DefaultSquadhubProvisioner implements SquadhubProvisioner { + async provision(params) { + // Read from env vars — single-tenant dev mode + return { + squadhubUrl: process.env.SQUADHUB_URL || "http://localhost:18790", + squadhubToken: process.env.SQUADHUB_TOKEN || "", + }; + } + + async getProvisioningStatus() { + return { status: "active" as const }; // Instant in dev + } + + async deprovision() {} // No-op in dev +} + +// defaults/squadhub-lifecycle.ts — DefaultSquadhubLifecycle +export class DefaultSquadhubLifecycle implements SquadhubLifecycle { + async restart() {} // No-op — user manages docker + async stop() {} // No-op + async destroy() {} // No-op + async getStatus() { + return { running: true, healthy: true }; + } +} +``` + +**Consumption pattern (call sites):** + +```typescript +import { getPlugin, hasPlugin } from "@clawe/plugins"; + +// Provision route — always works (dev or cloud) +const provisioner = getPlugin("squadhub-provisioner"); +const result = await provisioner.provision({ tenantId, accountId, convexUrl }); + +// Settings UI — conditionally show restart button +if (hasPlugin("squadhub-lifecycle")) { + // Show "Restart Service" button after API key change +} +``` + +Setup: + +- [x] Create `packages/plugins/` package with interfaces, registry, and dev defaults +- [x] Add to `pnpm-workspace.yaml` (already covered by `packages/*` glob) +- [x] Build outputs: ESM (consumable by Next.js, Node, and React) +- [x] Export types and runtime functions from package entry point + +### 4.2 Dev provision route + +Update `apps/web/src/app/api/tenant/provision/route.ts`: + +Current state: Only runs app-level setup (agents, crons, routines). Doesn't create the tenant or wire connection tokens. + +Changes: + +- [x] Call `loadPlugins()` at route level (ensures registry is initialized) +- [x] Authenticate the caller via NextAuth session (import `auth` from nextauth-config) +- [x] Get user email from session → look up user in Convex +- [x] Call Convex `accounts.getOrCreateForUser()` to ensure account + membership exist +- [x] Check if account already has a tenant (`tenants.getForCurrentUser`) +- [x] If no tenant: call `tenants.create(accountId)`, then use `getPlugin("squadhub-provisioner").provision()` to get `squadhubUrl` + `squadhubToken`, update tenant with these values + `status: "active"` +- [x] Run existing app-level setup (register agents, setup crons, seed routines) — already implemented in `setupTenant()` +- [x] Return `{ ok: true, tenantId }` + +The route is idempotent — calling it when a tenant already exists is a no-op (returns existing tenant). + +**Cloud extensibility**: The provision route uses `getPlugin("squadhub-provisioner")` which returns the dev provisioner by default (reads env vars, instant). When `@clawe/cloud-plugins` is installed (in `clawe-cloud` repo), the same route automatically uses the cloud provisioner (creates ECS/EFS/CloudMap, waits for health). No file changes needed. + +### 4.3 WATCHER_TOKEN setup + +The watcher uses two tokens for Convex authentication: + +1. **`WATCHER_TOKEN`** — system-level token for `tenants.listActive()` (get all active tenants) +2. **Per-tenant `squadhubToken`** — returned by `listActive()`, used as `machineToken` for tenant-scoped queries + +Setup: + +- [x] Add `WATCHER_TOKEN` to root `.env.example` with a default dev value (e.g., `clawe-watcher-dev-token`) +- [x] Add `WATCHER_TOKEN` to root `.env` with the same default +- [x] Add `WATCHER_TOKEN` to `turbo.json` `globalPassThroughEnv` +- [x] Set `WATCHER_TOKEN` as a Convex env var — auto-synced on `pnpm dev` via `scripts/sync-convex-env.sh` (runs as background process in backend dev script, waits for local Convex backend, then sets all env vars) +- [x] Document in `.env.example`: "Must match the value set in Convex env vars" + +After the provision route (4.2) runs, the tenant record has `squadhubToken` set to `SQUADHUB_TOKEN` from env. The watcher calls `tenants.listActive(watcherToken)` → gets the tenant's `squadhubToken` → passes it as `machineToken` to all tenant-scoped Convex calls. The full chain works. + +### 4.4 Auto-provision flow in web app + +After login, the app needs to automatically provision the user if they don't have an active tenant. + +- [x] Login page (`/auth/login`) only does: authenticate → `getOrCreateFromAuth()` → redirect to `/` +- [x] Root page (`/`) checks tenant status: no active tenant → redirect to `/setup/provisioning` +- [x] Provisioning page (`/setup/provisioning`) calls `POST /api/tenant/provision`, shows loading/error UI +- [x] Onboarding guards check tenant status before checking `isOnboardingComplete` +- [x] For returning users (tenant already exists), the idempotent provision route returns immediately. + +Flow after this step: + +``` +Login → auth → getOrCreateFromAuth → redirect to / + → root page queries tenant: + → no active tenant: redirect to /setup/provisioning + → provision → redirect to /setup/welcome (or /board if onboarding complete) + → has active tenant, not onboarded: redirect to /setup + → has active tenant, onboarded: redirect to /board +``` + +### 4.5 Environment variables summary + +Root `.env.example` additions: + +```bash +# Watcher system token (must also be set as Convex env var) +WATCHER_TOKEN=clawe-watcher-dev-token +``` + +`turbo.json` `globalPassThroughEnv` additions: + +``` +WATCHER_TOKEN +``` + +Convex env vars (set via CLI): + +```bash +npx convex env set WATCHER_TOKEN clawe-watcher-dev-token +``` + +### 4.6 Verification + +After this phase, verify the complete flow: + +1. **Fresh start**: Clear Convex data, restart services +2. **Login**: `AUTO_LOGIN_EMAIL` auto-signs in (or Google OAuth if configured) +3. **Auto-provision**: User + account + tenant created, tenant has `squadhubUrl`/`squadhubToken` from env, agents registered, crons set up +4. **Onboarding**: `/setup/welcome` → business context → telegram → complete — all tenant-scoped queries resolve +5. **Dashboard**: All pages work (agents, board, settings) — data is tenant-scoped +6. **Watcher**: `WATCHER_TOKEN` validates → `listActive` returns tenant → per-tenant `machineToken` resolves → notifications delivered + +### 4.7 Dedicated provisioning screen + +Move the provision step out of the login page into a shared onboarding screen. This screen is used by all modes (dev, self-hosted, cloud). + +**Why**: Login should only authenticate. Provisioning (creating tenant, registering agents, setting up crons) is a separate concern that deserves its own screen with loading state and error handling. For cloud, this step will take longer (spinning up ECS), so having a dedicated screen is essential. For dev/self-hosted it's instant but the flow should be the same. + +**Changes:** + +- [x] Remove `apiClient.post("/api/tenant/provision")` from `apps/web/src/app/auth/login/page.tsx` +- [x] Login page only does: authenticate → `getOrCreateFromAuth()` → redirect to `/` +- [x] Root page (`/`) queries `api.tenants.getForCurrentUser`, redirects to `/setup/provisioning` if no active tenant +- [x] Create `apps/web/src/app/setup/provisioning/page.tsx`: + - Calls `POST /api/tenant/provision` on mount + - Shows loading screen: spinner + "Setting up your workspace..." + - On success: Convex subscription updates reactively → redirect fires + - On error: show error message with retry button + - If tenant already active (idempotent): redirect immediately + - Uses `useRef` to prevent double-fire in StrictMode +- [x] Update onboarding guard (`use-onboarding-guard.ts`): check tenant exists and is active before checking `isOnboardingComplete`. If no active tenant → redirect to `/setup/provisioning`. Uses `usePathname()` to avoid redirect loop. +- [x] Rename `lib/squadhub/provision.ts` → `lib/squadhub/setup.ts`, `provisionTenant()` → `setupTenant()` (app-level setup, not infrastructure provisioning) +- [x] For cloud (future): poll `GET /api/tenant/status` while status is `"provisioning"`, show progress messages ("Creating workspace...", "Starting services...", "Almost ready...") + +**Updated flow:** + +``` +Login → auth → getOrCreateFromAuth → redirect to / + +/ (root page): + → query getForCurrentUser + → null or not active → /setup/provisioning + → active, not onboarded → /setup + → active, onboarded → /board + +/setup/provisioning: + → tenant already active? → redirect out + → call POST /api/tenant/provision + → spinner while provisioning + → Convex updates reactively → redirect to /setup/welcome or /board + +/setup/* (welcome, business, telegram, complete): + → guard: no active tenant → /setup/provisioning + → guard: already onboarded → /board + +/board (dashboard): + → guard: no active tenant → /setup/provisioning + → guard: not onboarded → /setup +``` + +--- + +## Phase 5: CDK Project Setup & Base Infrastructure + +### 5.1 Initialize CDK project + +Create the CDK project in `packages/infra/` (in `clawe-cloud` repo): + +``` +packages/infra/ +├── bin/ +│ └── clawe.ts # CDK app entry point +├── lib/ +│ ├── config.ts # Environment config (dev/prod values) +│ ├── networking-stack.ts # VPC, subnets, security groups +│ ├── auth-stack.ts # Cognito +│ ├── storage-stack.ts # EFS, ECR +│ ├── shared-services-stack.ts # ECS cluster, ALB, web app, watcher +│ ├── tenant-stack.ts # Per-tenant squadhub construct (Phase 7) +│ ├── dns-stack.ts # Route53, CloudFront, ACM +│ └── monitoring-stack.ts # CloudWatch dashboards, alarms +├── cdk.json +├── tsconfig.json +└── package.json +``` + +- [x] Add `infra` to pnpm workspace — lives in `packages/infra/`, covered by `packages/*` glob +- [x] Install CDK dependencies: `aws-cdk-lib`, `constructs`, `aws-cdk`, `@types/node`, `ts-node` +- [x] Configure `cdk.json` with context defaults (`env: "dev"`) +- [x] Create `bin/clawe.ts` — CDK app entry point wiring all stacks with `--context env=dev|prod` +- [x] Create `lib/config.ts` — environment config (domain, sizing, namespace per dev/prod) +- [x] Create `lib/networking-stack.ts` — VPC, 5 security groups (ALB, web, watcher, squadhub, EFS) +- [x] Create `lib/auth-stack.ts` — Cognito User Pool with Google social login +- [x] Create `lib/storage-stack.ts` — 3 ECR repos + encrypted EFS +- [x] Create `lib/shared-services-stack.ts` — 2 ECS clusters, ALB, web app + watcher Fargate services +- [x] Create `lib/dns-stack.ts` — Route53, ACM certificate, CloudFront distribution +- [x] Create `lib/monitoring-stack.ts` — CloudWatch dashboard + 5xx alarm +- [x] Type-check passes cleanly (`tsc --noEmit`) + +### 5.2 Cognito User Pool (Google social login only) + +All implemented in `auth-stack.ts` during 5.1. Custom domain (`auth.clawe.io`) instead of prefix domain. Added deletion protection, case-insensitive sign-in, prevent user existence errors (adopted from ChartDB). Removed dead `googleClientSecret` variable; added pre-deploy SSM setup instructions as comments. + +Create Cognito User Pool: + +- [x] Cognito is only the auth layer — user data lives in Convex `users` table +- [x] No self sign-up with email/password — Google is the only sign-in method (`selfSignUpEnabled: false`) +- [x] No custom Cognito attributes needed (only standard `email` + `fullname`) + +Configure Google as federated identity provider: + +- [x] Create OAuth 2.0 Client ID in Google Cloud Console (Web application type) + - Authorized redirect URI: `https://{authDomain}/oauth2/idpresponse` +- [x] Add Google as identity provider in Cognito User Pool: + - Client ID + Client Secret from SSM (`/clawe/{env}/google-client-id`, `/clawe/{env}/google-client-secret`) + - Scopes: `openid email profile` + - Attribute mapping: Google `email` → Cognito `email`, Google `name` → Cognito `fullname` + +Configure Cognito domain: + +- [x] Custom domain: `auth.clawe.io` (prod) / `auth-dev.clawe.io` (dev) with ACM cert + Route53 alias + +Create App Client: + +- [x] No client secret (public client for SPA) — `generateSecret: false` +- [x] Auth flows: `userSrp: true` (SRP kept as fallback, Google login uses OAuth flow) +- [x] OAuth settings: + - Callback URL: `https://app.clawe.io` (prod) / `https://app-dev.clawe.io` (dev) / `http://localhost:3000` (local) + - Sign-out URL: same as callback + - OAuth scopes: `openid`, `email`, `profile` + - Supported identity providers: Google +- [x] Token validity: access 1h, id 1h, refresh 30d + +- [x] Output User Pool ID, App Client ID, and Cognito Domain as stack outputs +- [x] Store Google OAuth Client ID and Secret in SSM Parameter Store — done for both dev and prod + +### 5.3 Environment config + +- [x] Created `packages/infra/lib/config.ts` during 5.1 — maps `env` context to all environment-specific values including `authDomain`, `cloudMapNamespace`, sizing for web/watcher/squadhub, `natGateways` + +### 5.4 Networking stack + +Created `packages/infra/lib/networking-stack.ts` during 5.1: + +- [x] 2 public subnets (ALB) +- [x] 2 private subnets with NAT (ECS tasks, EFS) +- [x] 1 NAT Gateway (dev) / 2 NAT Gateways (prod) for cost vs HA tradeoff +- [x] Security groups: + - `alb-sg`: inbound 443/80 from internet + - `web-sg`: inbound from ALB only (port 3000) + - `squadhub-sg`: inbound from web-sg and watcher-sg (port 18789) + - `watcher-sg`: outbound to squadhub-sg and internet (Convex) + - `efs-sg`: inbound NFS (2049) from squadhub-sg + +### 5.5 ECR repositories + +Created in `packages/infra/lib/storage-stack.ts` during 5.1: + +- [x] `clawe/web` — Next.js web app image +- [x] `clawe/squadhub` — Squadhub (OpenClaw gateway) image +- [x] `clawe/watcher` — Watcher service image +- [x] Lifecycle policy: keep last 10 images + +### 5.6 EFS filesystem + +Created in `packages/infra/lib/storage-stack.ts` during 5.1: + +- [x] Encrypted at rest +- [x] Performance mode: generalPurpose +- [x] Throughput mode: bursting (sufficient for tens of users) +- [x] Per-tenant access points created dynamically during provisioning (Phase 8.2 — CloudSquadhubProvisioner) +- [x] Mount targets in each private subnet (created in storage-stack.ts) + +### 5.7 SSM Parameter Store secrets + +Generate and store shared secrets in SSM Parameter Store (SecureString): + +- [x] `/clawe/{env}/watcher-token` (SecureString) — created for dev and prod +- [x] `/clawe/{env}/google-client-id` (String) — created for dev and prod +- [x] `/clawe/{env}/google-client-secret` (SecureString) — created for dev and prod +- [x] Set watcher token as Convex env var — automated in deploy workflow ("Sync Convex env vars" step reads from SSM, sets via `npx convex env set`) + +### 5.8 Shared wildcard certificate + +Created `lib/certificate-stack.ts`. Single ACM wildcard certificate for all subdomains, replacing per-service certs. + +- [x] Create `lib/certificate-stack.ts` — standalone stack (deployed in `us-east-1`) +- [x] Certificate covers: `*.clawe.io` + `clawe.io` (SAN) +- [x] DNS validation via Route53 hosted zone lookup +- [x] Export certificate ARN as stack output (`clawe-wildcard-certificate-arn`) +- [x] Deploy order: certificate-stack deploys **before** auth-stack and dns-stack +- [x] Update `auth-stack.ts` — removed per-domain `AuthCertificate`, accepts shared wildcard cert as prop +- [x] Update `dns-stack.ts` — removed per-domain certificate, accepts shared wildcard cert as prop +- [x] Update `shared-services-stack.ts` — ALB HTTPS listener now uses shared wildcard cert +- [x] Update `bin/clawe.ts` — added certificate stack, passes cert to auth-stack, dns-stack, and shared-services-stack + +CloudFront → ALB now uses HTTPS (not HTTP), since ALB has the wildcard cert. + +### 5.9 GitHub Actions OIDC for AWS + +Created `lib/ci-stack.ts`. Keyless AWS auth — no long-lived credentials in GitHub. + +- [x] Create `lib/ci-stack.ts` — deployed once per account (not per environment) +- [x] Create IAM OIDC provider for GitHub (`token.actions.githubusercontent.com`) +- [x] Create IAM role `clawe-github-ci` with trust policy: + - Principal: GitHub OIDC provider + - Condition: `sub` matches `repo:getclawe/clawe-cloud:*` (only this repo can assume) +- [x] Role permissions (CDK-native approach — assumes CDK bootstrap roles for deploy): + - `sts:AssumeRole` on `cdk-*` bootstrap roles (CDK deploy delegates to these) + - ECR: full push permissions (for Docker image builds done outside CDK) + - CloudFront: `CreateInvalidation` (post-deploy cache clear) + - SSM: `GetParameter` scoped to `/clawe/*` (read env config) + - STS: `GetCallerIdentity` +- [x] Export role ARN as stack output (`clawe-github-ci-role-arn`) +- [x] Add to `bin/clawe.ts` — deployed as `ClaweCi` (no env parameterization) + +### 5.10 CI/CD deploy workflow + +Created `.github/workflows/deploy.yml`. Manual (`workflow_dispatch`) — no auto-deploy on push. + +**Deployment approach**: All ECS service updates go through CDK (`cdk deploy`), not direct `aws ecs update-service`. CDK is the single source of truth — when the image tag changes, CloudFormation detects the task definition update and triggers ECS rolling deployment automatically. This prevents drift and gives us circuit breaker + auto-rollback for free. (Pattern adopted from ChartDB's backend-node deployment.) + +- [x] Trigger: `workflow_dispatch` with inputs: + - `environment`: choice of `dev` / `prod` + - `targets`: choice of `all` / `infra` / `web` / `watcher` / `squadhub` / `convex` +- [x] Authentication: `aws-actions/configure-aws-credentials` with OIDC (role from 5.9) +- [x] Steps (conditional on `targets`): + 1. **Checkout** + pnpm + Node.js setup + 2. **Configure AWS credentials** via OIDC + 3. **Build & push Docker images** to ECR (web, watcher, squadhub — each conditional) + 4. **Tag images**: `{repo}:{env}-{git-sha}` + `{repo}:{env}-latest` + 5. **Deploy Convex** via `./scripts/convex-deploy.sh` + 6. **CDK deploy** — single command handles infra + ECS updates: + ```bash + npx cdk deploy --all --context env={env} --context imageTag={env}-{sha} + ``` + 7. **Invalidate CloudFront cache** — reads distribution ID from stack outputs + 8. **Smoke test** — retries health check for up to 3 minutes +- [x] ECS deployment settings (defined in CDK shared-services-stack): + - Circuit breaker enabled with auto-rollback on health check failure + - MaximumPercent: 200% (new tasks start before old ones stop) + - MinimumHealthyPercent: 50% (allows rolling replacement) + - Health check grace period: 120s (allows startup time) +- [x] `imageTag` CDK context — shared-services-stack reads `this.node.tryGetContext("imageTag")`, defaults to `{env}-latest` +- [x] Hardcoded in workflow: role ARN (`arn:aws:iam::985539756675:role/clawe-github-ci`), account ID, ECR registry +- [x] GitHub environment secrets configured: `CONVEX_DEPLOY_KEY` (per env: dev + prod) +- [x] GitHub environment variables configured: `CONVEX_URL` (per env: dev + prod) +- [x] Removed `convexUrl` from `config.ts` — now required CDK context (`--context convexUrl=...`), fails fast if missing + +--- + +## Phase 6: Shared Services Deployment (CDK) + +All of Phase 6 was implemented in `shared-services-stack.ts` during Phase 5.1, with deployment config (circuit breaker, rolling updates) added in 5.8/5.10. + +### 6.1 ECS Clusters + +Created in `shared-services-stack.ts`: + +**Shared services cluster:** + +- [x] Name: `clawe-cluster-{env}` +- [x] Hosts: web app + watcher +- [x] Container insights enabled + +**Squadhubs cluster:** + +- [x] Name: `clawe-squadhubs-{env}` +- [x] Hosts: per-tenant squadhub ECS Services +- [x] CloudMap namespace: `clawe-internal-{env}` (for squadhub service discovery) +- [x] Container insights enabled +- [x] Separate cluster keeps tenant workloads isolated from shared services + +### 6.2 Application Load Balancer + +Created in `shared-services-stack.ts`: + +- [x] HTTPS listener (443) with shared wildcard certificate +- [x] HTTP listener (80) — redirect to HTTPS +- [x] Target groups: + - `web-tg`: routes to web app service (default rule) +- [x] Health check: `/api/health` (interval 30s, healthy 2, unhealthy 3) + +### 6.3 Web app Fargate service + +- [x] ECS service for the Next.js app (in `shared-services-stack.ts`): + +- Task definition: + - Image: `fromEcrRepository(webRepo, imageTag)` — image tag from CDK context + - CPU: 512 (dev) / 1024 (prod) + - Memory: 1024 (dev) / 2048 (prod) + - Port: 3000 + - Environment: `NODE_ENV`, `ENVIRONMENT`, `NEXT_PUBLIC_CONVEX_URL` (from context), `NEXT_PUBLIC_COGNITO_*` (from auth stack), `NEXT_PUBLIC_APP_URL`, `AWS_REGION`, infrastructure refs for provisioning (cluster ARN, namespace ID, EFS ID, squadhub image, SG IDs, subnet IDs) + - IAM role: ECS provisioning, EFS access points, CloudMap service discovery, IAM PassRole + - Logging: `/clawe/web-{env}` (30d dev, 90d prod) +- Service: + - Desired count: 1 (dev) / 2 (prod) + - Circuit breaker with auto-rollback + - MaximumPercent: 200%, MinimumHealthyPercent: 50% + - Health check grace period: 120s + - ALB target group attachment + - [ ] Auto-scaling: target CPU 70%, min 1 / max 10 (prod only) — **to add** + +### 6.4 Watcher Fargate service + +- [x] ECS service for the shared watcher (in `shared-services-stack.ts`): + +- Task definition: + - Image: `fromEcrRepository(watcherRepo, imageTag)` — same context as web + - CPU: 256, Memory: 512 + - Environment: `CONVEX_URL` (from context), `ENVIRONMENT` + - Secrets: `WATCHER_TOKEN` from SSM (`/clawe/{env}/watcher-token`) + - IAM role: CloudMap DiscoverInstances + - Logging: `/clawe/watcher-{env}` (30d dev, 90d prod) +- Service: + - Desired count: 1 + - Circuit breaker with auto-rollback + - No ALB attachment (internal service only) + +--- + +## Phase 7: Per-Tenant Squadhub (CDK + Runtime) + +### 7.1 Base squadhub task definition (shared infrastructure in CDK) + +Phase 7.1 creates the **shared CDK infrastructure** that all per-tenant squadhub services reference. The actual per-tenant ECS task definitions and services are created at **runtime** by CloudSquadhubProvisioner (Phase 8). + +Changes in `packages/infra/lib/shared-services-stack.ts`: + +- [x] Shared squadhub execution role (`clawe-squadhub-exec-{env}`) — ECS Fargate execution role for ECR pulls + CloudWatch log writes, with `AmazonECSTaskExecutionRolePolicy` +- [x] Shared squadhub task role (`clawe-squadhub-task-{env}`) — runtime role with EFS mount/write/root access (scoped to file system ARN) + CloudMap `DiscoverInstances` +- [x] Shared squadhub log group (`/clawe/squadhub-{env}`) — tenant-specific stream prefixes set at runtime by CloudSquadhubProvisioner (30d dev, 90d prod retention) +- [x] Export role ARNs + log group to web container env vars: `SQUADHUB_EXECUTION_ROLE_ARN`, `SQUADHUB_TASK_ROLE_ARN`, `SQUADHUB_LOG_GROUP` +- [x] Tightened `iam:PassRole` scope from `resources: ["*"]` to the two specific role ARNs + +Per-tenant ECS task definition template (created at runtime by CloudSquadhubProvisioner in Phase 8): + +- Image: `{account}.dkr.ecr.{region}.amazonaws.com/clawe/squadhub:{env}-latest` +- CPU: 256 (0.25 vCPU) +- Memory: 512 (0.5 GB) +- Port: 18789 +- EFS volume mount: `/data` (access point set per-tenant at runtime) +- Health check: `wget -q --spider http://localhost:18789/health` +- IAM task role: shared `clawe-squadhub-task-{env}` (EFS + CloudMap) +- IAM execution role: shared `clawe-squadhub-exec-{env}` (ECR + logs) +- Logging: shared `/clawe/squadhub-{env}` log group with tenant-specific stream prefix + +### 7.2 CloudMap service discovery + +All CDK infrastructure for CloudMap is already in place (created in Phase 5.1 + 7.1). Per-tenant service registration happens at runtime via CloudSquadhubProvisioner (Phase 8.2). + +- [x] CloudMap private DNS namespace (`clawe-internal-{env}`) — created in shared-services-stack +- [x] Namespace ID + name exported to web container (`CLOUDMAP_NAMESPACE_ID`, `CLOUDMAP_NAMESPACE_NAME`) +- [x] Web app task role has full CloudMap permissions (`CreateService`, `DeleteService`, `RegisterInstance`, `DeregisterInstance`, `DiscoverInstances`, `GetService`, `ListInstances`) +- [x] Watcher task role has `DiscoverInstances` permission +- [x] Squadhub task role has `DiscoverInstances` permission (added in 7.1) + +Runtime (Phase 8.2): CloudSquadhubProvisioner creates per-tenant CloudMap service entries (`squadhub-{tenantId}.clawe-internal-{env}`) and associates with ECS services. Web app resolves `squadhub-{tenantId}.clawe-internal-{env}` → private IP:18789. + +### 7.3 Squadhub lifecycle API routes + +These routes use `getPlugin("squadhub-lifecycle")` from `@clawe/plugins`. In dev, they return no-ops. In cloud, `CloudSquadhubLifecycle` manages ECS services. + +- [x] `POST /api/tenant/squadhub/restart` — calls `getPlugin("squadhub-lifecycle").restart(tenantId)` +- [x] `GET /api/tenant/squadhub/status` — calls `getPlugin("squadhub-lifecycle").getStatus(tenantId)` +- [x] `DELETE /api/tenant/squadhub` — calls `getPlugin("squadhub-lifecycle").destroy(tenantId)` +- [x] Extracted shared `getAuthenticatedTenant()` helper (`lib/api/tenant-auth.ts`) — reads JWT from Authorization header, resolves tenant via Convex + +These routes live in the **base repo** and work for both dev and cloud via the plugin interface. + +--- + +## Phase 8: Cloud Tenant Provisioning + +### 8.1 Cloud plugins package (`packages/cloud-plugins/`) + +Create `@clawe/cloud-plugins` in the `clawe-cloud` repo. This package implements the `@clawe/plugins` interfaces (from Phase 4.1) with real AWS infrastructure. + +**Package structure (in `clawe-cloud` repo):** + +``` +packages/cloud-plugins/ +├── src/ +│ ├── index.ts # register() function — returns PluginMap +│ ├── squadhub-provisioner.ts # CloudSquadhubProvisioner implements SquadhubProvisioner +│ └── squadhub-lifecycle.ts # CloudSquadhubLifecycle implements SquadhubLifecycle +├── package.json # depends on @clawe/plugins (for types), aws-sdk +└── tsconfig.json +``` + +**Registration:** + +```typescript +// cloud-plugins/src/index.ts +import type { PluginMap } from "@clawe/plugins"; +import { CloudSquadhubProvisioner } from "./squadhub-provisioner"; +import { CloudSquadhubLifecycle } from "./squadhub-lifecycle"; + +export function register(): PluginMap { + return { + "squadhub-provisioner": new CloudSquadhubProvisioner(), + "squadhub-lifecycle": new CloudSquadhubLifecycle(), + }; +} +``` + +When `@clawe/cloud-plugins` is installed and `loadPlugins()` runs, the base provision route (Phase 4.2) automatically uses `CloudSquadhubProvisioner` instead of `DefaultSquadhubProvisioner`. No route file changes needed. + +- [x] Create `packages/cloud-plugins/` with `CloudSquadhubProvisioner` and `CloudSquadhubLifecycle` (stub implementations, `throw new Error` for each method) +- [x] Add `@clawe/plugins` as dependency (for interfaces) +- [x] Add `@aws-sdk/client-ecs`, `@aws-sdk/client-efs`, `@aws-sdk/client-servicediscovery` as dependencies +- [x] Already covered by `packages/*` glob in `pnpm-workspace.yaml` +- [x] Add `@clawe/cloud-plugins: "workspace:*"` to `apps/web/package.json` in `clawe-cloud` — this is the single line that activates cloud plugins (without it, `loadPlugins()` falls back to defaults) + +### 8.2 CloudSquadhubProvisioner implementation + +Implement `SquadhubProvisioner` interface with AWS infrastructure: + +**`provision()`:** + +- [x] 1. Create EFS access point for tenant (path: `/tenants/{tenantId}`) +- [x] 2. Register ECS task definition (from base template, with tenant-specific env vars): + - `SQUADHUB_TOKEN` — generated per-tenant UUID + - `ANTHROPIC_API_KEY` — from `params.anthropicApiKey` + - `CONVEX_URL` — from `params.convexUrl` + - `OPENCLAW_STATE_DIR=/data/config` + - `OPENCLAW_PORT=18789` + - `OPENCLAW_SKIP_GMAIL_WATCHER=1` +- [x] 3. Create ECS Service on `clawe-squadhubs-{env}` cluster with `desiredCount: 1` +- [x] 4. Associate with CloudMap service discovery (`squadhub-{tenantId}.clawe-internal-{env}`) +- [x] 5. Return `{ squadhubUrl, squadhubToken, metadata: { serviceArn, efsAccessPointId } }` + +**`getProvisioningStatus()`:** + +- [x] Query ECS Service running count and task health +- [x] Return `"provisioning"` while tasks are starting, `"active"` when healthy, `"error"` on failure + +**`deprovision(params: DeprovisionParams)`:** + +- [x] Delete ECS Service (force delete, stops running tasks) +- [x] Deregister task definition +- [x] Deregister CloudMap service entry +- [x] Delete EFS access point (via `params.metadata.efsAccessPointId`) +- [x] Best-effort cleanup — continues through all steps, collects errors, throws aggregate error + +**Error handling:** + +- [x] `provision()` uses compensating transactions — on failure, rolls back all created resources (best-effort) +- [x] `deprovision()` uses best-effort pattern — attempts all cleanup steps, throws aggregate error if any non-404 failures + +Environment variables for the squadhub ECS task: + +``` +SQUADHUB_TOKEN={generated-per-tenant} # Used for both: squadhub HTTP auth AND Convex machine auth +ANTHROPIC_API_KEY={from-convex-tenant-record} +CONVEX_URL={env-specific-convex-url} +OPENCLAW_STATE_DIR=/data/config +OPENCLAW_PORT=18789 +OPENCLAW_SKIP_GMAIL_WATCHER=1 +``` + +Note: `TENANT_ID` is not needed as an env var — the CLI resolves the tenant by passing `SQUADHUB_TOKEN` as `machineToken` to Convex, which looks up the tenant via the `by_squadhubToken` index. + +### 8.3 CloudSquadhubLifecycle implementation + +Implement `SquadhubLifecycle` interface with ECS operations: + +**`restart(tenantId)`:** + +- [x] Force new deployment on the tenant's ECS Service via `UpdateServiceCommand` with `forceNewDeployment: true` + +**`stop(tenantId)`:** + +- [x] Set ECS Service `desiredCount: 0` via `UpdateServiceCommand` + +**`destroy(tenantId)`:** + +- [x] Delete ECS Service (force) + deregister task definition +- [x] Delete EFS access point (looked up by `clawe:tenantId` tag) +- [x] Delete CloudMap service entry +- [x] Best-effort cleanup — continues through all steps, throws aggregate error + +**`getStatus(tenantId)`:** + +- [x] Check ECS Service running count via `DescribeServicesCommand` +- [x] Check task-level health via `ListTasksCommand` + `DescribeTasksCommand` +- [x] Return `{ running: runningCount > 0, healthy: container.healthStatus === "HEALTHY" }` + +**Refactoring:** + +- [x] Extracted shared utilities (`getEnvConfig`, `serviceName`, `isNotFoundError`, `toError`, AWS clients) to `aws-config.ts` +- [x] Updated `squadhub-provisioner.ts` to import from `aws-config.ts` + +### 8.4 Provisioning status polling + +Create `apps/web/src/app/api/tenant/status/route.ts`: + +- [x] Calls `getPlugin("squadhub-provisioner").getProvisioningStatus(tenantId)` +- [x] Returns status to client for polling +- [x] Works for both dev (instant "active") and cloud (real status) + +### 8.5 Cloud provisioning UI flow + +**Note**: The shared `/setup/provisioning` screen (Phase 4.7) already handles the full flow — loading state, error handling, and redirect. This step just ensures the cloud polling path works end-to-end: + +- [x] Verify `GET /api/tenant/status` (8.4) returns real ECS status via `CloudSquadhubProvisioner.getProvisioningStatus()` +- [x] Verify `/setup/provisioning` page polls status while `"provisioning"` and shows progress messages ("Creating workspace...", "Starting services...", "Almost ready...") +- [x] Verify redirect to `/setup/welcome` once status is `"active"` +- [x] No new UI page needed — reuses the shared screen from 4.7 + +### 8.6 Dynamic squadhub routing + +Currently the web app reads `SQUADHUB_URL` from env. This needs to become per-tenant: + +- [x] Update `apps/web/src/lib/squadhub/connection.ts`: + - `getConnection(tenant?)` — uses tenant's `squadhubUrl` + `squadhubToken` if provided + - Falls back to env vars when no tenant (self-hosted / dev) + - No cache needed — tenant data comes from the same `getAuthenticatedTenant()` query used for auth + +- [x] Update all API routes that call the squadhub (already refactored in Phase 1 to accept params): + - `/api/chat` — resolve tenant's squadhub URL before calling + - `/api/chat/history` — same + - `/api/chat/abort` — same + - `/api/squadhub/health` — same + - `/api/squadhub/pairing` — same + +- [x] Refactor `getAuthenticatedTenant` return type to clean discriminated union (`{ error, convex, tenant }`) + +### 8.7 Anthropic API key management + +Update onboarding flow: + +- [ ] Add new step in `/setup/welcome` or as a dedicated step: "Enter your Anthropic API key" +- [ ] Store key in Convex tenant record via `tenants.setAnthropicKey()` +- [ ] The provisioning route reads from Convex and passes as env var to ECS Service +- [ ] If user updates key in settings later: + 1. Update Convex tenant record + 2. Call `getPlugin("squadhub-lifecycle").restart(tenantId)` to pick up new key + +Create settings page: + +- [ ] Add "API Key" section to `/settings/general` +- [ ] Input field (masked) to view/update Anthropic API key +- [ ] "Save" triggers Convex update + `getPlugin("squadhub-lifecycle").restart(tenantId)` +- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` for restart progress +- [ ] In dev (`!hasPlugin("squadhub-lifecycle")`), skip restart UI — API key is an env var + +--- + +## Phase 9: Cloud Onboarding Flow Updates + +### 9.1 New onboarding step order + +``` +1. Sign in with Google (Cognito OAuth redirect) +2. Provisioning (auto — create tenant + start squadhub) +3. Anthropic API Key (new step — enter API key, stored in Convex tenant record) +4. Business Context (existing — chat with Clawe agent) +5. Telegram (optional) (existing — bot token + pairing) +6. Complete (existing) +``` + +### 9.2 API Key onboarding step + +Create `apps/web/src/app/setup/api-key/page.tsx`: + +- [ ] Input field for Anthropic API key (masked, with show/hide toggle) +- [ ] Validation: call Anthropic API to verify key is valid (lightweight models list request) +- [ ] On submit: store in Convex tenant record, call `getPlugin("squadhub-lifecycle").restart(tenantId)` +- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` — wait for healthy before proceeding + +### 9.3 Settings — API Key management + +Add to `/settings/general`: + +- [ ] "Anthropic API Key" section +- [ ] Masked input showing last 4 chars +- [ ] "Update" button → updates Convex + calls `getPlugin("squadhub-lifecycle").restart(tenantId)` +- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` for restart progress +- [ ] In dev (`!hasPlugin("squadhub-lifecycle")`), skip restart UI + +--- + +## Phase 10: DNS & CDN (CDK) + +All of Phase 10 was implemented in `dns-stack.ts` during Phase 5.1, updated in 5.8 to use shared wildcard cert. + +### 10.1 Hosted Zone lookup + +- [x] Look up existing Route53 hosted zone for `clawe.io` via `HostedZone.fromLookup` + +### 10.2 ACM Certificate + +- [x] Uses shared wildcard cert from Phase 5.8 (`*.clawe.io` + `clawe.io`), passed as prop to dns-stack + +### 10.3 CloudFront distribution + +- [x] Created in `dns-stack.ts`: + - Origin: ALB via HTTPS (wildcard cert on ALB) + - Alternate domain: `app.clawe.io` / `app-dev.clawe.io` + - Certificate: shared wildcard cert + - Cache policy: caching disabled (dynamic Next.js app) + - Origin request policy: ALL_VIEWER (forwards all headers/cookies) + - Allowed methods: ALL (GET, POST, PUT, DELETE, etc.) + - Exports `DistributionId` (used by deploy workflow for cache invalidation) + +### 10.4 Route53 record + +- [x] A record (alias) pointing `app.clawe.io` / `app-dev.clawe.io` to CloudFront distribution + +--- + +## Phase 11: Docker Images & CI/CD + +CI/CD foundation was set up in Phase 5.9–5.10. Image builds use `docker/build-push-action` with GitHub Actions layer caching. + +### 11.1 Image tagging convention + +- [x] Tagging implemented in deploy workflow (Phase 5.10): + - `clawe/web:{env}-{git-sha}` + `clawe/web:{env}-latest` + - `clawe/watcher:{env}-{git-sha}` + `clawe/watcher:{env}-latest` + - `clawe/squadhub:{env}-{git-sha}` + `clawe/squadhub:{env}-latest` +- [x] Uses `docker/metadata-action@v5` for tag/label generation +- [x] Uses `docker/build-push-action@v6` with `docker/setup-buildx-action@v3` +- [x] GitHub Actions layer caching (`cache-from/cache-to: type=gha`) per image scope +- [x] Per-tenant squadhub rolling update via `aws ecs update-service --force-new-deployment` on all services in squadhubs cluster + +### 11.2 Refine deploy workflow (future optimization) + +- [ ] Use `turbo prune --docker` in Dockerfiles for smaller images and faster builds +- [ ] Parallel jobs: build web/watcher/squadhub images in parallel +- [ ] Selective builds: only rebuild images whose source changed (based on git diff) +- [ ] Post-deploy Slack notification (optional) + +--- + +## Phase 12: Monitoring (CDK) + +Basic monitoring created in `monitoring-stack.ts` during Phase 5.1. Log groups created in `shared-services-stack.ts`. + +### 12.1 CloudWatch alarms + +- [x] ALB 5xx error rate alarm (threshold > 5 in 5 minutes) — in `monitoring-stack.ts` +- [ ] Web app: CPU > 80%, unhealthy targets +- [ ] Squadhub services: task stopped unexpectedly, health check failures +- [ ] Watcher: task stopped, log errors +- [ ] EFS: burst credit balance low + +### 12.2 CloudWatch dashboard + +- [x] ALB request count widget — in `monitoring-stack.ts` +- [x] ALB 5xx errors widget — in `monitoring-stack.ts` +- [ ] Active tenants count +- [ ] Squadhub services running / restarting +- [ ] Web app latency +- [ ] EFS usage + +### 12.3 Log groups + +- [x] `/clawe/web-{env}` — web app logs (30d dev, 90d prod) — in `shared-services-stack.ts` +- [x] `/clawe/watcher-{env}` — watcher logs (30d dev, 90d prod) — in `shared-services-stack.ts` +- [x] `/clawe/squadhub-{env}` — shared squadhub log group with per-tenant stream prefixes (Phase 7.1, 30d dev, 90d prod) + +--- + +## Implementation Order + +``` +Phase 1 (Shared client & watcher refactor) — ✅ COMPLETE + │ +Phase 2 (Multi-tenancy in Convex) — ✅ COMPLETE + │ +Phase 3 (Authentication) — ✅ COMPLETE + │ +Phase 4 (Dev & self-hosted provisioning) — ✅ COMPLETE + │ includes: @clawe/plugins package (interfaces + registry + dev defaults) + ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ + │ ↑ Dev & self-hosted complete here (base repo: clawe) + │ ↓ Everything below is cloud-only (clawe-cloud repo) + │ +Phase 5 (CDK setup + base infra) — ✅ COMPLETE (all stacks + CI/CD + SSM params) + │ +Phase 6 (Shared services CDK) — ✅ COMPLETE (done in 5.1, auto-scaling pending) + │ +Phase 7 (Per-tenant squadhub CDK) — NEXT + │ +Phase 8 (Cloud tenant provisioning) — @clawe/cloud-plugins + AWS provisioning + │ includes: CloudSquadhubProvisioner + CloudSquadhubLifecycle implementing plugin interfaces + │ +Phase 9 (Cloud onboarding updates) — API key management + │ +Phase 10 (DNS & CDN) — ✅ COMPLETE (done in 5.1) + │ +Phase 11 (CI/CD) — ✅ COMPLETE (deploy workflow + image builds done in 5.10) + │ +Phase 12 (Monitoring) — PARTIAL (basic dashboard + 5xx alarm + log groups done) +``` + +Phases 1-4: complete (base repo: clawe). +Phases 5, 6, 10, 11: complete (clawe-cloud repo) — all CDK stacks, CI/CD, DNS/CDN. +Phase 12: partial — basic monitoring done, advanced alarms/dashboards deferred. +**Next: Phase 7** (per-tenant squadhub infrastructure) → Phase 8 (cloud provisioning) → Phase 9 (onboarding). + +--- + +## Files to Create + +### Base repo (clawe) + +``` +packages/plugins/ (new — Phase 4.1) +├── src/ +│ ├── index.ts # Main exports: loadPlugins, hasPlugin, getPlugin +│ ├── registry.ts # Plugin registry internals +│ ├── interfaces/ +│ │ ├── index.ts # Re-exports all interfaces +│ │ ├── squadhub-provisioner.ts # SquadhubProvisioner interface +│ │ └── squadhub-lifecycle.ts # SquadhubLifecycle interface +│ └── defaults/ +│ ├── index.ts # Dev default implementations +│ ├── squadhub-provisioner.ts # DefaultSquadhubProvisioner (reads env vars) +│ └── squadhub-lifecycle.ts # DefaultSquadhubLifecycle (no-ops) +├── package.json +└── tsconfig.json + +apps/web/src/ +├── proxy.ts (new — renamed from middleware.ts) +├── app/auth/ +│ └── login/page.tsx (new — "Sign in with Google" button) +├── app/api/tenant/ +│ ├── provision/route.ts (new) +│ ├── status/route.ts (new — uses getPlugin("squadhub-provisioner")) +│ └── squadhub/ +│ ├── restart/route.ts (new — uses getPlugin("squadhub-lifecycle")) +│ └── status/route.ts (new — uses getPlugin("squadhub-lifecycle")) +├── providers/auth-provider.tsx (new) +├── lib/squadhub/connection.ts (updated — tenant-aware getConnection) +└── hooks/use-auth.ts (new) + +packages/backend/convex/ +├── auth.config.cognito.ts (new — Cognito auth provider template) +├── auth.config.nextauth.ts (new — NextAuth customJwt provider) +├── auth.config.ts (generated — copied from template at deploy time) +├── users.ts (new) +├── tenants.ts (new) +├── accounts.ts (new — account management) +├── lib/auth.ts (new) +├── schema.ts (modify — add tenantId everywhere) +├── agents.ts (modify — tenant filtering) +├── tasks.ts (modify — tenant filtering) +├── messages.ts (modify — tenant filtering) +├── notifications.ts (modify — tenant filtering) +├── activities.ts (modify — tenant filtering) +├── documents.ts (modify — tenant filtering) +├── settings.ts (deleted — migrated to tenants.ts) +├── businessContext.ts (modify — tenant filtering) +├── channels.ts (modify — tenant filtering) +└── routines.ts (modify — tenant filtering) + +packages/shared/src/squadhub/ +├── client.ts (modify — accept url/token params) +└── gateway-client.ts (modify — accept url/token params) + +packages/cli/src/ +├── client.ts (modify — add machineToken to all calls) +└── commands/*.ts (modify — pass machineToken) + +apps/watcher/src/ +├── index.ts (modify — multi-tenant loop) +└── config.ts (modify — remove single-tenant env vars) + +apps/web/src/lib/squadhub/ +└── actions.ts (modify — per-tenant routing) +``` + +### Cloud repo (clawe-cloud) — only new files, no base file modifications + +``` +packages/cloud-plugins/ (new — Phase 8.1) +├── src/ +│ ├── index.ts # register() → PluginMap with cloud impls +│ ├── squadhub-provisioner.ts # CloudSquadhubProvisioner implements SquadhubProvisioner +│ └── squadhub-lifecycle.ts # CloudSquadhubLifecycle implements SquadhubLifecycle +├── package.json # depends on @clawe/plugins, @aws-sdk/* +└── tsconfig.json + +packages/infra/ (new — Phase 5+) +├── bin/clawe.ts +├── lib/ +│ ├── config.ts +│ ├── certificate-stack.ts # Shared wildcard cert (Phase 5.8) +│ ├── ci-stack.ts # GitHub OIDC + CI role (Phase 5.9) +│ ├── networking-stack.ts +│ ├── auth-stack.ts +│ ├── storage-stack.ts +│ ├── shared-services-stack.ts +│ ├── tenant-stack.ts +│ ├── dns-stack.ts +│ └── monitoring-stack.ts +├── scripts/ +│ └── build-and-push.sh +├── cdk.json +├── tsconfig.json +└── package.json + +apps/web/src/app/setup/ +└── api-key/page.tsx (new — Phase 9.2, cloud onboarding step) + +.github/workflows/ +└── deploy.yml (new — Phase 5.10) +``` + +## Files to Modify + +### Base repo + +``` +apps/web/src/app/layout.tsx (add auth provider) +apps/web/src/app/(dashboard)/layout.tsx (add auth guard) +apps/web/src/app/setup/welcome/page.tsx (update step flow) +apps/web/src/app/(dashboard)/settings/ (add API key section — uses getPlugin("squadhub-lifecycle")) +apps/web/package.json (add @clawe/plugins, auth deps) +apps/web/src/lib/squadhub/connection.ts (use getPlugin("squadhub-provisioner") result instead of env vars) +apps/web/src/app/api/tenant/provision/route.ts (use getPlugin("squadhub-provisioner") for infrastructure) +pnpm-workspace.yaml (add packages/plugins) +package.json (add workspace scripts) +``` + +### Cloud repo + +``` +apps/web/package.json (add @clawe/cloud-plugins) +apps/watcher/package.json (add aws-sdk for CloudMap) +docker/squadhub/entrypoint.sh (add agent registration on first start) +pnpm-workspace.yaml (add packages/cloud-plugins, infra/) +package.json (add infra scripts) +``` From 2d92effe5859e80b89195251341fae1c88062ae9 Mon Sep 17 00:00:00 2001 From: Guy Ben-Aharon Date: Wed, 18 Feb 2026 16:53:28 +0200 Subject: [PATCH 2/5] fix --- apps/web/src/lib/squadhub/connection.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/web/src/lib/squadhub/connection.ts b/apps/web/src/lib/squadhub/connection.ts index f7373cd..9e47f40 100644 --- a/apps/web/src/lib/squadhub/connection.ts +++ b/apps/web/src/lib/squadhub/connection.ts @@ -9,8 +9,9 @@ import type { Tenant } from "@clawe/backend/types"; export function getConnection(tenant?: Tenant | null): SquadhubConnection { return { squadhubUrl: - tenant?.squadhubUrl || process.env.SQUADHUB_URL || "http://localhost:18790", - squadhubToken: - tenant?.squadhubToken || process.env.SQUADHUB_TOKEN || "", + tenant?.squadhubUrl || + process.env.SQUADHUB_URL || + "http://localhost:18790", + squadhubToken: tenant?.squadhubToken || process.env.SQUADHUB_TOKEN || "", }; } From 14dc583cd1cdd30543a5841e35a6055cdb054ef5 Mon Sep 17 00:00:00 2001 From: Guy Ben-Aharon Date: Wed, 18 Feb 2026 19:20:25 +0200 Subject: [PATCH 3/5] fix --- .../telegram-disconnect-dialog.tsx | 5 + .../web/src/app/api/squadhub/pairing/route.ts | 36 +++- apps/web/src/hooks/use-squadhub-status.ts | 4 +- apps/web/src/lib/squadhub/actions.spec.ts | 2 +- apps/web/src/lib/squadhub/actions.ts | 42 +++- docker/squadhub/Dockerfile | 7 +- docker/squadhub/entrypoint.sh | 6 +- .../squadhub/templates/config.template.json | 8 + package.json | 2 +- packages/backend/convex/_generated/api.d.ts | 2 + packages/shared/src/squadhub/client.ts | 26 +++ packages/shared/src/squadhub/index.ts | 8 +- packages/shared/src/squadhub/pairing.ts | 191 ----------------- .../squadhub-extensions/openclaw.plugin.json | 8 + packages/squadhub-extensions/package.json | 28 +++ packages/squadhub-extensions/src/index.ts | 6 + .../squadhub-extensions/src/tools/pairing.ts | 200 ++++++++++++++++++ packages/squadhub-extensions/src/types.ts | 32 +++ packages/squadhub-extensions/tsconfig.json | 14 ++ packages/squadhub-extensions/tsup.config.ts | 12 ++ pnpm-lock.yaml | 27 +++ 21 files changed, 452 insertions(+), 214 deletions(-) delete mode 100644 packages/shared/src/squadhub/pairing.ts create mode 100644 packages/squadhub-extensions/openclaw.plugin.json create mode 100644 packages/squadhub-extensions/package.json create mode 100644 packages/squadhub-extensions/src/index.ts create mode 100644 packages/squadhub-extensions/src/tools/pairing.ts create mode 100644 packages/squadhub-extensions/src/types.ts create mode 100644 packages/squadhub-extensions/tsconfig.json create mode 100644 packages/squadhub-extensions/tsup.config.ts diff --git a/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx b/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx index 0062ee8..8f72fad 100644 --- a/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx +++ b/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx @@ -6,6 +6,7 @@ import { toast } from "@clawe/ui/components/sonner"; import { api } from "@clawe/backend"; import { Button } from "@clawe/ui/components/button"; import { Spinner } from "@clawe/ui/components/spinner"; +import { removeTelegramBot } from "@/lib/squadhub/actions"; import { AlertDialog, AlertDialogContent, @@ -32,6 +33,10 @@ export const TelegramDisconnectDialog = ({ const handleDisconnect = async () => { setIsDisconnecting(true); try { + // Remove token from squadhub config + await removeTelegramBot(); + + // Update Convex status await disconnectChannel({ type: "telegram" }); toast.success("Telegram disconnected"); onOpenChange(false); diff --git a/apps/web/src/app/api/squadhub/pairing/route.ts b/apps/web/src/app/api/squadhub/pairing/route.ts index dfb6e43..5fbcafd 100644 --- a/apps/web/src/app/api/squadhub/pairing/route.ts +++ b/apps/web/src/app/api/squadhub/pairing/route.ts @@ -1,26 +1,34 @@ import { NextResponse } from "next/server"; import type { NextRequest } from "next/server"; import { - listChannelPairingRequests, - approveChannelPairingCode, + listPairingRequests, + approvePairingCode, } from "@clawe/shared/squadhub"; import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; +function parseToolText(result: { + result: { content: Array<{ text?: string }> }; +}): Record { + const text = result.result.content[0]?.text; + if (!text) return {}; + return JSON.parse(text) as Record; +} + // GET /api/squadhub/pairing?channel=telegram - List pending pairing requests export async function GET(request: NextRequest) { const auth = await getAuthenticatedTenant(request); if (auth.error) return auth.error; const channel = request.nextUrl.searchParams.get("channel") || "telegram"; - - const result = await listChannelPairingRequests(channel); + const result = await listPairingRequests(getConnection(auth.tenant), channel); if (!result.ok) { return NextResponse.json({ error: result.error.message }, { status: 500 }); } - return NextResponse.json(result.result); + const data = parseToolText(result); + return NextResponse.json({ requests: data.requests ?? [] }); } // POST /api/squadhub/pairing - Approve a pairing code @@ -39,18 +47,28 @@ export async function POST(request: NextRequest) { return NextResponse.json({ error: "Code is required" }, { status: 400 }); } - const result = await approveChannelPairingCode( + const result = await approvePairingCode( getConnection(auth.tenant), channel, code, ); if (!result.ok) { - const status = result.error.type === "not_found" ? 404 : 500; - return NextResponse.json({ error: result.error.message }, { status }); + return NextResponse.json( + { error: result.error.message }, + { status: 500 }, + ); + } + + const data = parseToolText(result); + if (!data.ok) { + return NextResponse.json( + { error: data.error || "Failed to approve pairing code" }, + { status: 404 }, + ); } - return NextResponse.json(result.result); + return NextResponse.json({ id: data.id, approved: data.approved }); } catch { return NextResponse.json( { error: "Failed to approve pairing code" }, diff --git a/apps/web/src/hooks/use-squadhub-status.ts b/apps/web/src/hooks/use-squadhub-status.ts index 242e772..bca07af 100644 --- a/apps/web/src/hooks/use-squadhub-status.ts +++ b/apps/web/src/hooks/use-squadhub-status.ts @@ -22,8 +22,8 @@ export const useSquadhubStatus = () => { return false; } }, - refetchInterval: 30000, // Check every 30 seconds - staleTime: 10000, + refetchInterval: 10000, // Check every 10 seconds + staleTime: 5000, retry: false, }); diff --git a/apps/web/src/lib/squadhub/actions.spec.ts b/apps/web/src/lib/squadhub/actions.spec.ts index 8f1b6d6..2bc5b24 100644 --- a/apps/web/src/lib/squadhub/actions.spec.ts +++ b/apps/web/src/lib/squadhub/actions.spec.ts @@ -6,7 +6,7 @@ vi.mock("@clawe/shared/squadhub", () => ({ getConfig: vi.fn(), saveTelegramBotToken: vi.fn(), probeTelegramToken: vi.fn(), - approveChannelPairingCode: vi.fn(), + approvePairingCode: vi.fn(), })); import { diff --git a/apps/web/src/lib/squadhub/actions.ts b/apps/web/src/lib/squadhub/actions.ts index ffe25e0..768f144 100644 --- a/apps/web/src/lib/squadhub/actions.ts +++ b/apps/web/src/lib/squadhub/actions.ts @@ -6,7 +6,7 @@ import { saveTelegramBotToken as saveTelegramBotTokenClient, removeTelegramBotToken as removeTelegramBotTokenClient, probeTelegramToken, - approveChannelPairingCode, + approvePairingCode as approvePairingCodeClient, } from "@clawe/shared/squadhub"; import { getConnection } from "./connection"; @@ -40,7 +40,45 @@ export async function approvePairingCode( code: string, channel: string = "telegram", ) { - return approveChannelPairingCode(getConnection(), channel, code); + const result = await approvePairingCodeClient(getConnection(), channel, code); + + if (!result.ok) { + return { + ok: false as const, + error: { type: "tool_error", message: result.error.message }, + }; + } + + // Parse the tool's text response + const text = result.result.content[0]?.text; + if (!text) { + return { + ok: false as const, + error: { type: "parse_error", message: "Empty tool response" }, + }; + } + + const data = JSON.parse(text) as { + ok: boolean; + id?: string; + approved?: boolean; + error?: string; + }; + + if (!data.ok) { + return { + ok: false as const, + error: { + type: "not_found", + message: data.error || "Invalid or expired pairing code", + }, + }; + } + + return { + ok: true as const, + result: { id: data.id, approved: data.approved }, + }; } export async function removeTelegramBot() { diff --git a/docker/squadhub/Dockerfile b/docker/squadhub/Dockerfile index 3dbc35f..147df6b 100644 --- a/docker/squadhub/Dockerfile +++ b/docker/squadhub/Dockerfile @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ && rm -rf /var/lib/apt/lists/* -RUN npm install -g openclaw@latest +RUN npm install -g openclaw@2026.2.17 RUN mkdir -p /data/config /data/workspace @@ -24,6 +24,11 @@ RUN chmod +x /opt/clawe/scripts/*.sh COPY packages/cli/dist/clawe.js /usr/local/bin/clawe RUN chmod +x /usr/local/bin/clawe +# Copy Clawe OpenClaw extensions (plugin bundle + manifest) +COPY packages/squadhub-extensions/dist/index.js /opt/clawe/extensions/clawe/dist/index.js +COPY packages/squadhub-extensions/openclaw.plugin.json /opt/clawe/extensions/clawe/openclaw.plugin.json +COPY packages/squadhub-extensions/package.json /opt/clawe/extensions/clawe/package.json + ENV OPENCLAW_STATE_DIR=/data/config ENV OPENCLAW_PORT=18789 ENV OPENCLAW_SKIP_GMAIL_WATCHER=1 diff --git a/docker/squadhub/entrypoint.sh b/docker/squadhub/entrypoint.sh index 0e01c04..50fef3f 100644 --- a/docker/squadhub/entrypoint.sh +++ b/docker/squadhub/entrypoint.sh @@ -62,8 +62,12 @@ node /opt/clawe/scripts/pair-device.js --watch & echo "==> Starting OpenClaw gateway on port $PORT..." +# OpenClaw does a "full process restart" on certain config changes (e.g. +# adding a Telegram channel). It spawns a child process and the parent +# exits. In Docker, PID 1 exiting kills the container. We rely on +# Docker's restart policy (restart: unless-stopped) to handle this. exec openclaw gateway run \ --port "$PORT" \ - --bind 0.0.0.0 \ + --bind lan \ --token "$TOKEN" \ --allow-unconfigured diff --git a/docker/squadhub/templates/config.template.json b/docker/squadhub/templates/config.template.json index df02ef7..3c0bbd1 100644 --- a/docker/squadhub/templates/config.template.json +++ b/docker/squadhub/templates/config.template.json @@ -64,6 +64,11 @@ } ] }, + "plugins": { + "load": { + "paths": ["/opt/clawe/extensions/clawe"] + } + }, "tools": { "agentToAgent": { "enabled": true, @@ -97,6 +102,9 @@ "mode": "token", "token": "${OPENCLAW_TOKEN}" }, + "tools": { + "allow": ["gateway"] + }, "http": { "endpoints": { "chatCompletions": { diff --git a/package.json b/package.json index 412eba4..1463b1e 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "scripts": { "build": "dotenv -e .env -- turbo run build", "dev": "dotenv -e .env -- turbo run dev", - "dev:docker": "pnpm --filter @clawe/cli build && docker compose up --build squadhub", + "dev:docker": "pnpm --filter @clawe/cli build && pnpm --filter @clawe/squadhub-extensions build && docker compose up --build squadhub", "build:docker": "pnpm --filter @clawe/cli build && docker compose build --no-cache squadhub", "debug": "dotenv -e .env -- turbo run debug", "dev:web": "dotenv -e .env -- turbo run dev --filter=web", diff --git a/packages/backend/convex/_generated/api.d.ts b/packages/backend/convex/_generated/api.d.ts index 9b9ae44..86a4697 100644 --- a/packages/backend/convex/_generated/api.d.ts +++ b/packages/backend/convex/_generated/api.d.ts @@ -15,6 +15,7 @@ import type * as businessContext from "../businessContext.js"; import type * as channels from "../channels.js"; import type * as documents from "../documents.js"; import type * as lib_auth from "../lib/auth.js"; +import type * as lib_helpers from "../lib/helpers.js"; import type * as messages from "../messages.js"; import type * as notifications from "../notifications.js"; import type * as routines from "../routines.js"; @@ -37,6 +38,7 @@ declare const fullApi: ApiFromModules<{ channels: typeof channels; documents: typeof documents; "lib/auth": typeof lib_auth; + "lib/helpers": typeof lib_helpers; messages: typeof messages; notifications: typeof notifications; routines: typeof routines; diff --git a/packages/shared/src/squadhub/client.ts b/packages/shared/src/squadhub/client.ts index 0d5b156..5f21149 100644 --- a/packages/shared/src/squadhub/client.ts +++ b/packages/shared/src/squadhub/client.ts @@ -6,6 +6,7 @@ import type { SessionsListResult, GatewayHealthResult, TelegramProbeResult, + PairingRequest, } from "./types"; export type SquadhubConnection = { @@ -199,6 +200,31 @@ export async function sessionsSend( }); } +// Pairing (via clawe_pairing plugin tool) +export async function listPairingRequests( + connection: SquadhubConnection, + channel: string, +): Promise> { + return invokeTool(connection, "clawe_pairing", undefined, { + action: "list", + channel, + }); +} + +export async function approvePairingCode( + connection: SquadhubConnection, + channel: string, + code: string, +): Promise< + ToolResult<{ ok: boolean; id?: string; approved?: boolean; error?: string }> +> { + return invokeTool(connection, "clawe_pairing", undefined, { + action: "approve", + channel, + code, + }); +} + // Cron types (matching squadhub src/cron/types.ts) export type CronSchedule = | { kind: "at"; at: string } diff --git a/packages/shared/src/squadhub/index.ts b/packages/shared/src/squadhub/index.ts index 10d4321..a77caeb 100644 --- a/packages/shared/src/squadhub/index.ts +++ b/packages/shared/src/squadhub/index.ts @@ -11,6 +11,8 @@ export { sessionsSend, cronList, cronAdd, + listPairingRequests, + approvePairingCode, } from "./client"; export type { SquadhubConnection, @@ -31,12 +33,6 @@ export { GatewayClient, createGatewayClient } from "./gateway-client"; export type { GatewayClientOptions } from "./gateway-client"; export { getSharedClient } from "./shared-client"; -// Pairing -export { - listChannelPairingRequests, - approveChannelPairingCode, -} from "./pairing"; - // Types export type { AgentToolResult, diff --git a/packages/shared/src/squadhub/pairing.ts b/packages/shared/src/squadhub/pairing.ts deleted file mode 100644 index 7591345..0000000 --- a/packages/shared/src/squadhub/pairing.ts +++ /dev/null @@ -1,191 +0,0 @@ -import { promises as fs } from "fs"; -import path from "path"; -import os from "os"; -import type { - PairingRequest, - PairingListResult, - PairingApproveResult, - DirectResult, -} from "./types"; -import { getConfig, patchConfig, type SquadhubConnection } from "./client"; - -const SQUADHUB_STATE_DIR = - process.env.SQUADHUB_STATE_DIR || path.join(os.homedir(), ".squadhub"); -const CREDENTIALS_DIR = path.join(SQUADHUB_STATE_DIR, "credentials"); - -type PairingStore = { - version: 1; - requests: PairingRequest[]; -}; - -const PAIRING_PENDING_TTL_MS = 60 * 60 * 1000; // 1 hour - -function resolvePairingPath(channel: string): string { - // Sanitize channel name for filesystem - const safe = channel - .trim() - .toLowerCase() - .replace(/[\\/:*?"<>|]/g, "_"); - return path.join(CREDENTIALS_DIR, `${safe}-pairing.json`); -} - -async function readJsonFile(filePath: string, fallback: T): Promise { - try { - const raw = await fs.readFile(filePath, "utf-8"); - return JSON.parse(raw) as T; - } catch { - return fallback; - } -} - -async function writeJsonFile(filePath: string, value: unknown): Promise { - const dir = path.dirname(filePath); - await fs.mkdir(dir, { recursive: true, mode: 0o700 }); - await fs.writeFile(filePath, JSON.stringify(value, null, 2), "utf-8"); - await fs.chmod(filePath, 0o600); -} - -function isExpired(entry: PairingRequest, nowMs: number): boolean { - const createdAt = Date.parse(entry.createdAt); - if (!Number.isFinite(createdAt)) return true; - return nowMs - createdAt > PAIRING_PENDING_TTL_MS; -} - -function pruneExpiredRequests(reqs: PairingRequest[], nowMs: number) { - return reqs.filter((req) => !isExpired(req, nowMs)); -} - -export async function listChannelPairingRequests( - channel: string, -): Promise> { - try { - const filePath = resolvePairingPath(channel); - const store = await readJsonFile(filePath, { - version: 1, - requests: [], - }); - - const nowMs = Date.now(); - const requests = pruneExpiredRequests(store.requests || [], nowMs); - - return { ok: true, result: { requests } }; - } catch (error) { - return { - ok: false, - error: { - type: "read_error", - message: - error instanceof Error - ? error.message - : "Failed to read pairing requests", - }, - }; - } -} - -export async function approveChannelPairingCode( - connection: SquadhubConnection, - channel: string, - code: string, -): Promise> { - try { - if (!code) { - return { - ok: false, - error: { type: "invalid_code", message: "Pairing code is required" }, - }; - } - - const normalizedCode = code.trim().toUpperCase(); - const pairingPath = resolvePairingPath(channel); - - // Read current pairing requests (file-based - squadhub writes these) - const store = await readJsonFile(pairingPath, { - version: 1, - requests: [], - }); - - const nowMs = Date.now(); - const requests = pruneExpiredRequests(store.requests || [], nowMs); - - // Find the request with matching code - const entry = requests.find((r) => r.code.toUpperCase() === normalizedCode); - - if (!entry) { - return { - ok: false, - error: { - type: "not_found", - message: "Invalid or expired pairing code", - }, - }; - } - - // Get current config to read existing allowFrom list - const configResult = await getConfig(connection); - if (!configResult.ok) { - return { - ok: false, - error: { - type: "config_error", - message: "Failed to read current config", - }, - }; - } - - // Extract existing allowFrom list - const config = configResult.result.details.config as { - channels?: { - [key: string]: { - allowFrom?: string[]; - }; - }; - }; - const existingAllowFrom = config?.channels?.[channel]?.allowFrom ?? []; - - // Add user ID to allowFrom if not already present - if (!existingAllowFrom.includes(entry.id)) { - const patchResult = await patchConfig( - connection, - { - channels: { - [channel]: { - allowFrom: [...existingAllowFrom, entry.id], - }, - }, - }, - configResult.result.details.hash, - ); - - if (!patchResult.ok) { - return { - ok: false, - error: { - type: "config_error", - message: "Failed to update allowFrom config", - }, - }; - } - } - - // Remove from pending requests file - const remainingRequests = requests.filter((r) => r.id !== entry.id); - await writeJsonFile(pairingPath, { - version: 1, - requests: remainingRequests, - }); - - return { ok: true, result: { id: entry.id, approved: true } }; - } catch (error) { - return { - ok: false, - error: { - type: "write_error", - message: - error instanceof Error - ? error.message - : "Failed to approve pairing code", - }, - }; - } -} diff --git a/packages/squadhub-extensions/openclaw.plugin.json b/packages/squadhub-extensions/openclaw.plugin.json new file mode 100644 index 0000000..173bdf7 --- /dev/null +++ b/packages/squadhub-extensions/openclaw.plugin.json @@ -0,0 +1,8 @@ +{ + "id": "squadhub-extensions", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/packages/squadhub-extensions/package.json b/packages/squadhub-extensions/package.json new file mode 100644 index 0000000..efd8ccd --- /dev/null +++ b/packages/squadhub-extensions/package.json @@ -0,0 +1,28 @@ +{ + "name": "@clawe/squadhub-extensions", + "version": "0.1.0", + "description": "OpenClaw plugin extensions for Clawe squadhub", + "private": true, + "type": "module", + "scripts": { + "build": "tsup", + "check-types": "tsc --noEmit" + }, + "openclaw": { + "extensions": [ + "./dist/index.js" + ] + }, + "devDependencies": { + "@clawe/typescript-config": "workspace:*", + "@sinclair/typebox": "^0.34.0", + "@types/node": "^22.0.0", + "tsup": "^8.0.0", + "typescript": "^5.7.3", + "vitest": "^4.0.0" + }, + "files": [ + "dist", + "openclaw.plugin.json" + ] +} diff --git a/packages/squadhub-extensions/src/index.ts b/packages/squadhub-extensions/src/index.ts new file mode 100644 index 0000000..833f686 --- /dev/null +++ b/packages/squadhub-extensions/src/index.ts @@ -0,0 +1,6 @@ +import type { OpenClawPluginApi } from "./types"; +import { registerPairingTool } from "./tools/pairing"; + +export default function register(api: OpenClawPluginApi) { + registerPairingTool(api); +} diff --git a/packages/squadhub-extensions/src/tools/pairing.ts b/packages/squadhub-extensions/src/tools/pairing.ts new file mode 100644 index 0000000..bbf60d4 --- /dev/null +++ b/packages/squadhub-extensions/src/tools/pairing.ts @@ -0,0 +1,200 @@ +import { promises as fs } from "node:fs"; +import path from "node:path"; +import { Type } from "@sinclair/typebox"; +import type { OpenClawPluginApi, ToolResult } from "../types"; + +/** + * Pairing tool for Clawe. + * + * Exposes channel pairing operations (list pending requests, approve by code) + * as an OpenClaw tool callable via POST /tools/invoke. + * + * File layout (relative to $OPENCLAW_STATE_DIR): + * credentials/-pairing.json — pending pairing requests + * credentials/-allowFrom.json — approved sender IDs + */ + +const PAIRING_PENDING_TTL_MS = 60 * 60 * 1000; // 1 hour + +type PairingRequest = { + id: string; + code: string; + createdAt: string; + lastSeenAt: string; + meta?: Record; +}; + +type PairingStore = { + version: 1; + requests: PairingRequest[]; +}; + +type AllowFromStore = { + version: 1; + allowFrom: string[]; +}; + +function resolveStateDir(): string { + return process.env.OPENCLAW_STATE_DIR || "/data/config"; +} + +function safeChannelKey(channel: string): string { + const raw = channel.trim().toLowerCase(); + if (!raw) throw new Error("invalid pairing channel"); + const safe = raw.replace(/[\\/:*?"<>|]/g, "_").replace(/\.\./g, "_"); + if (!safe || safe === "_") throw new Error("invalid pairing channel"); + return safe; +} + +function resolvePairingPath(channel: string): string { + return path.join( + resolveStateDir(), + "credentials", + `${safeChannelKey(channel)}-pairing.json`, + ); +} + +function resolveAllowFromPath(channel: string): string { + return path.join( + resolveStateDir(), + "credentials", + `${safeChannelKey(channel)}-allowFrom.json`, + ); +} + +async function readJsonFile(filePath: string, fallback: T): Promise { + try { + const raw = await fs.readFile(filePath, "utf-8"); + return JSON.parse(raw) as T; + } catch { + return fallback; + } +} + +async function writeJsonFile(filePath: string, value: unknown): Promise { + const dir = path.dirname(filePath); + await fs.mkdir(dir, { recursive: true, mode: 0o700 }); + await fs.writeFile(filePath, JSON.stringify(value, null, 2), "utf-8"); + await fs.chmod(filePath, 0o600); +} + +function isExpired(entry: PairingRequest, nowMs: number): boolean { + const createdAt = Date.parse(entry.createdAt); + if (!Number.isFinite(createdAt)) return true; + return nowMs - createdAt > PAIRING_PENDING_TTL_MS; +} + +function pruneExpired(reqs: PairingRequest[], nowMs: number): PairingRequest[] { + return reqs.filter((r) => !isExpired(r, nowMs)); +} + +function textResult(data: unknown): ToolResult { + return { + content: [{ type: "text", text: JSON.stringify(data) }], + details: + typeof data === "object" && data !== null + ? (data as Record) + : {}, + }; +} + +function errorResult(message: string): ToolResult { + return { + content: [ + { type: "text", text: JSON.stringify({ ok: false, error: message }) }, + ], + details: { ok: false, error: message }, + }; +} + +async function listAction(channel: string): Promise { + const filePath = resolvePairingPath(channel); + const store = await readJsonFile(filePath, { + version: 1, + requests: [], + }); + const requests = pruneExpired(store.requests || [], Date.now()); + return textResult({ ok: true, requests }); +} + +async function approveAction( + channel: string, + code: string, +): Promise { + if (!code) return errorResult("Pairing code is required"); + + const normalizedCode = code.trim().toUpperCase(); + const pairingPath = resolvePairingPath(channel); + + // Read pending requests + const store = await readJsonFile(pairingPath, { + version: 1, + requests: [], + }); + const requests = pruneExpired(store.requests || [], Date.now()); + + // Find matching request + const entry = requests.find((r) => r.code.toUpperCase() === normalizedCode); + if (!entry) return errorResult("Invalid or expired pairing code"); + + // Add to allow-from store + const allowFromPath = resolveAllowFromPath(channel); + const allowFromStore = await readJsonFile(allowFromPath, { + version: 1, + allowFrom: [], + }); + const existing = allowFromStore.allowFrom || []; + if (!existing.includes(entry.id)) { + await writeJsonFile(allowFromPath, { + version: 1, + allowFrom: [...existing, entry.id], + } satisfies AllowFromStore); + } + + // Remove approved request from pending + const remaining = requests.filter((r) => r.id !== entry.id); + await writeJsonFile(pairingPath, { + version: 1, + requests: remaining, + } satisfies PairingStore); + + return textResult({ ok: true, id: entry.id, approved: true }); +} + +export function registerPairingTool(api: OpenClawPluginApi) { + api.registerTool({ + name: "clawe_pairing", + description: + "Manage channel pairing requests (list pending, approve by code)", + parameters: Type.Object({ + action: Type.Union([Type.Literal("list"), Type.Literal("approve")], { + description: 'Action to perform: "list" or "approve"', + }), + channel: Type.String({ description: 'Channel name (e.g. "telegram")' }), + code: Type.Optional( + Type.String({ description: "Pairing code (required for approve)" }), + ), + }), + async execute( + _toolCallId: string, + params: Record, + ): Promise { + const action = params.action as string; + const channel = params.channel as string; + + if (!channel) return errorResult("Channel is required"); + + if (action === "list") { + return listAction(channel); + } + + if (action === "approve") { + const code = params.code as string | undefined; + if (!code) return errorResult("Code is required for approve action"); + return approveAction(channel, code); + } + + return errorResult(`Unknown action: ${action}`); + }, + }); +} diff --git a/packages/squadhub-extensions/src/types.ts b/packages/squadhub-extensions/src/types.ts new file mode 100644 index 0000000..4d3340a --- /dev/null +++ b/packages/squadhub-extensions/src/types.ts @@ -0,0 +1,32 @@ +import type { TObject } from "@sinclair/typebox"; + +/** + * Minimal type definitions for the OpenClaw plugin API. + * Only the surface we actually use — avoids importing from openclaw. + */ + +export type ToolResult = { + content: Array<{ type: "text"; text: string }>; + details?: Record; +}; + +export type AgentTool = { + name: string; + description: string; + parameters: TObject; + execute: ( + toolCallId: string, + params: Record, + ) => Promise; +}; + +export type OpenClawPluginApi = { + id: string; + name: string; + config: Record; + pluginConfig?: Record; + registerTool: ( + tool: AgentTool, + opts?: { name?: string; optional?: boolean }, + ) => void; +}; diff --git a/packages/squadhub-extensions/tsconfig.json b/packages/squadhub-extensions/tsconfig.json new file mode 100644 index 0000000..5bb6f44 --- /dev/null +++ b/packages/squadhub-extensions/tsconfig.json @@ -0,0 +1,14 @@ +{ + "extends": "@clawe/typescript-config/base.json", + "compilerOptions": { + "module": "ESNext", + "moduleResolution": "bundler", + "types": ["node"], + "rootDir": "./src", + "outDir": "./dist", + "declaration": true, + "declarationMap": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "**/*.spec.ts"] +} diff --git a/packages/squadhub-extensions/tsup.config.ts b/packages/squadhub-extensions/tsup.config.ts new file mode 100644 index 0000000..d2754f4 --- /dev/null +++ b/packages/squadhub-extensions/tsup.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: { index: "src/index.ts" }, + format: ["esm"], + target: "node22", + platform: "node", + outDir: "dist", + clean: true, + bundle: true, + noExternal: [/.*/], +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1049082..5e9e7fc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -344,6 +344,27 @@ importers: specifier: ^4.0.18 version: 4.0.18(@opentelemetry/api@1.9.0)(@types/node@22.15.3)(jiti@2.6.1)(jsdom@28.0.0)(lightningcss@1.30.2)(tsx@4.21.0) + packages/squadhub-extensions: + devDependencies: + '@clawe/typescript-config': + specifier: workspace:* + version: link:../typescript-config + '@sinclair/typebox': + specifier: ^0.34.0 + version: 0.34.48 + '@types/node': + specifier: ^22.0.0 + version: 22.15.3 + tsup: + specifier: ^8.0.0 + version: 8.5.1(jiti@2.6.1)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.2) + typescript: + specifier: ^5.7.3 + version: 5.9.2 + vitest: + specifier: ^4.0.0 + version: 4.0.18(@opentelemetry/api@1.9.0)(@types/node@22.15.3)(jiti@2.6.1)(jsdom@28.0.0)(lightningcss@1.30.2)(tsx@4.21.0) + packages/typescript-config: {} packages/ui: @@ -2192,6 +2213,9 @@ packages: cpu: [x64] os: [win32] + '@sinclair/typebox@0.34.48': + resolution: {integrity: sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA==} + '@smithy/abort-controller@4.2.8': resolution: {integrity: sha512-peuVfkYHAmS5ybKxWcfraK7WBBP0J+rkfUcbHJJKQ4ir3UAUNQI+Y4Vt/PqSzGqgloJ5O1dk7+WzNL8wcCSXbw==} engines: {node: '>=18.0.0'} @@ -4286,6 +4310,7 @@ packages: nanoid@3.3.11: resolution: {integrity: sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==} engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} @@ -7376,6 +7401,8 @@ snapshots: '@rollup/rollup-win32-x64-msvc@4.57.1': optional: true + '@sinclair/typebox@0.34.48': {} + '@smithy/abort-controller@4.2.8': dependencies: '@smithy/types': 4.12.0 From a1f52ad396703b6f0a5f4c6aff1e447f31124326 Mon Sep 17 00:00:00 2001 From: Guy Ben-Aharon Date: Wed, 18 Feb 2026 19:25:57 +0200 Subject: [PATCH 4/5] fix --- steps.md | 1546 ------------------------------------------------------ 1 file changed, 1546 deletions(-) delete mode 100644 steps.md diff --git a/steps.md b/steps.md deleted file mode 100644 index 65c9a38..0000000 --- a/steps.md +++ /dev/null @@ -1,1546 +0,0 @@ -# Clawe Cloud Deployment Plan - -## Goal - -Deploy Clawe as a multi-tenant SaaS on AWS, where each user signs up, provides their Anthropic API key, and gets their own squadhub service. All infrastructure managed via CDK with full dev/prod environment separation. - -## Architecture Overview - -``` - Users (browser) - │ - ▼ - ┌──────────┐ ┌────────────┐ ┌──────────┐ - │ Route 53 │────▶│ CloudFront │────▶│ ALB │ - └──────────┘ └────────────┘ └────┬─────┘ - │ - ┌─────────────▼──────────────┐ - │ clawe-cluster-{env} │ - │ ┌────────────────────┐ │ - │ │ Web App (Fargate) │ │ - │ │ Shared, all tenants│ │ - │ └───┬────────────────┘ │ - │ │ │ - │ ┌───▼────────────────┐ │ - │ │ Watcher (Fargate) │ │ - │ │ Shared, polls all │ │ - │ └───┬────────────────┘ │ - └──────┼──────────────────────┘ - │ - ┌────────────────┐│┌────────────────┐ - │ │││ │ - ▼ ▼│▼ ▼ - ┌───────────┐ ┌─────▼─────────────────────┐ - │ Convex │ │ clawe-squadhubs-{env} │ - │ (Managed) │ │ ┌────────┐ ┌────────┐ │ - │ Multi- │ │ │SH 1 │ │SH 2 │... │ - │ tenant │ │ │Tenant A│ │Tenant B│ │ - └───────────┘ │ └───┬────┘ └───┬────┘ │ - └──────┼──────────┼─────────┘ - │ │ - ┌───────────────▼──────────▼──┐ - │ EFS │ - │ ┌──────────┐ ┌──────────┐ │ - │ │/tenant-A │ │/tenant-B │ │ - │ └──────────┘ └──────────┘ │ - └─────────────────────────────┘ - - Routing: Web App/Watcher → CloudMap DNS → Squadhub private IP - Auth: Browser: Google → Cognito JWT → user record → accountMembers → account → tenantId - Machine: SQUADHUB_TOKEN → tenant record → tenantId (CLI/watcher) - Images: ECR (shared across envs, tagged per env) -``` - -## Environment Strategy - -All CDK stacks are parameterized by environment (`dev` | `prod`): - -```bash -cdk deploy --context env=dev # Deploys to dev -cdk deploy --context env=prod # Deploys to prod -``` - -Each environment gets completely isolated resources: - -| Resource | Dev | Prod | -| ----------------------- | ----------------------------------------- | ------------------------------------------ | -| Domain | app-dev.clawe.io | app.clawe.io | -| AUTH_PROVIDER | `cognito` | `cognito` | -| Cognito User Pool | clawe-userpool-dev | clawe-userpool-prod | -| Cognito Domain | clawe-dev.auth.{region}.amazoncognito.com | clawe-prod.auth.{region}.amazoncognito.com | -| Sign-in method | Google (social login only) | Google (social login only) | -| ECS Cluster (shared) | clawe-cluster-dev | clawe-cluster-prod | -| ECS Cluster (squadhubs) | clawe-squadhubs-dev | clawe-squadhubs-prod | -| VPC | clawe-vpc-dev | clawe-vpc-prod | -| ALB | clawe-alb-dev | clawe-alb-prod | -| ECR Repos | Shared across envs | Same images, different tags | -| Convex Deployment | Separate dev deployment | Separate prod deployment | -| CloudMap Namespace | clawe-internal-dev | clawe-internal-prod | - ---- - -## Phase 1: Shared Client & Watcher Refactor - -Code-only changes. No infrastructure needed. Makes the codebase multi-tenant ready. - -### 1.1 Refactor shared squadhub client - -Update `packages/shared/src/squadhub/client.ts`: - -- [x] All exported functions take `SquadhubConnection` (`{ squadhubUrl, squadhubToken }`) as first parameter -- [x] Remove the lazy singleton `getSquadhubClient()` — create client per call -- [x] This makes the client stateless and multi-tenant safe -- [x] Apply same pattern to all functions: `saveTelegramBotToken`, `getConfig`, `patchConfig`, `listSessions`, `sessionsSend`, `sendMessage`, `cronList`, `cronAdd` -- [x] Update `pairing.ts` (`approveChannelPairingCode` takes connection param) -- [x] Export `SquadhubConnection` type from `index.ts` -- [x] Update all tests in `client.spec.ts` - -### 1.2 Refactor shared gateway client - -Update `packages/shared/src/squadhub/gateway-client.ts`: - -- [x] `GatewayClient` constructor accepts `url` and `token` as parameters (verified, no env var fallbacks) -- [x] `createGatewayClient()` requires `SquadhubConnection` as first param (no env var fallback) -- [x] `getSharedClient()` in `shared-client.ts` requires `SquadhubConnection` as first param -- [x] Web app callers pass connection explicitly - -### 1.3 Update web app server actions - -Update `apps/web/src/lib/squadhub/actions.ts`: - -- [x] Each action reads `squadhubUrl` and `squadhubToken` from env (for now — will become per-tenant later) -- [x] Passes them explicitly to shared client functions -- [x] This is the adapter layer: currently reads env, later resolves from tenant record - -### 1.4 Update web app API routes - -Update all API routes that use the squadhub client: - -- [x] `/api/squadhub/health` — pass `squadhubUrl`/`squadhubToken` explicitly -- [x] `/api/squadhub/pairing` — same -- [x] `/api/chat` — reads env at call site via `getSquadhubConfig()` -- [x] `/api/chat/history` — passes connection to `getSharedClient()` -- [x] `/api/chat/abort` — passes connection to `getSharedClient()` -- [x] `/api/business/context` — moved env reads to `getEnvConfig()` called at request time - -### 1.5 Refactor watcher — tenant-aware polling loop - -Refactor `apps/watcher/src/index.ts`: - -- [x] Watcher passes `SquadhubConnection` to all shared client calls (`sessionsSend`, `cronList`, `cronAdd`) -- [x] Connection created from existing `config.squadhubUrl`/`config.squadhubToken` -- [x] Multi-tenant polling loop (`getActiveTenants()` → iterate over tenants in delivery and cron setup) -- [x] `deliverToAgent` and `setupCrons` accept `SquadhubConnection` param -- [x] `deliveryLoop` iterates over tenants, passes tenant connection to each squadhub delivery -- [x] Backwards compatibility: `getActiveTenants()` returns single tenant from env vars -- [x] System-level auth with `WATCHER_TOKEN` → implemented in 2.7 (`validateWatcherToken` + `tenants.listActive`) -- [x] Tenant-scoped Convex calls with per-tenant `machineToken` → implemented in 2.6 (added `machineToken` to all queries) - -Current loop: - -``` -every 2s: poll notifications → deliver to SQUADHUB_URL -every 10s: check routines → trigger via SQUADHUB_URL -``` - -New loop: - -``` -every 2s: - 1. Query Convex for all active tenants (tenants.listActive, authenticated with WATCHER_TOKEN) - 2. For each tenant: - a. Query undelivered notifications using tenant's squadhubToken as machineToken - b. Deliver to tenant's squadhubUrl -every 10s: - 1. Query Convex for all active tenants (same WATCHER_TOKEN) - 2. For each tenant: - a. Check due routines using tenant's squadhubToken as machineToken - b. Trigger via tenant's squadhubUrl -``` - -Authentication: - -- System-level calls (`tenants.listActive`) use `WATCHER_TOKEN` env var -- Tenant-scoped calls (`notifications.getUndelivered`, `routines`) use each tenant's `squadhubToken` as `machineToken` (returned by `listActive`) - -For backwards compatibility during development: if no tenants table exists yet, fall back to env-based single-tenant mode. - -### 1.6 Move startup logic from watcher to provisioning route - -- [x] Remove `registerAgents()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` -- [x] Remove `setupCrons()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` -- [x] Remove `seedRoutines()` from watcher → moved to `apps/web/src/lib/squadhub/setup.ts` -- [x] Create `POST /api/tenant/provision` route that runs all setup (agents, crons, routines) -- [x] Watcher is now purely a polling/delivery service (no startup side effects) - -### 1.7 Update CLI for multi-tenancy - -The CLI (`packages/cli`) communicates with Convex directly via `ConvexHttpClient`. It needs to authenticate as a machine caller using the per-tenant `SQUADHUB_TOKEN`. - -- [x] CLI reads `SQUADHUB_TOKEN` from env and exports as `machineToken` -- [x] Update `packages/cli/src/client.ts` — added `query`/`mutation`/`action` wrapper functions (pass-through for now; Phase 2.6 will inject `machineToken`) -- [x] Update all 17 command handlers in `packages/cli/src/commands/` to use wrappers instead of `client.query()`/`client.mutation()` -- [x] Update all 16 test files to match new mock pattern - -### 1.8 Rebuild and test - -- [x] Rebuild shared package: `pnpm --filter @clawe/shared build` -- [x] Rebuild CLI: `pnpm --filter @clawe/cli build` -- [x] Run all tests: `pnpm test` — 155 tests passing (shared 21, cli 64, watcher 5, web 65) -- [x] Verify local docker compose still works (single-tenant mode with env vars) - ---- - -## Phase 2: Multi-Tenancy in Convex - -### 2.1 Add users table - -- [x] Add to `packages/backend/convex/schema.ts`: - -```typescript -users: defineTable({ - email: v.string(), // From Cognito JWT (immutable, unique) - name: v.optional(v.string()), // Display name - createdAt: v.number(), - updatedAt: v.number(), -}) - .index("by_email", ["email"]), -``` - -Cognito is only the authentication layer. The `users` table in Convex is the source of truth for user data. On first login, a user record is created from the Cognito JWT email. - -### 2.2 Add accounts + tenants tables - -- [x] Add `accounts` table to `packages/backend/convex/schema.ts`: - -```typescript -accounts: defineTable({ - name: v.optional(v.string()), // "Guy's Account" - createdAt: v.number(), - updatedAt: v.number(), -}), -``` - -- [x] Add `tenants` table with `accountId` (tenant belongs to an account, not a user): - -```typescript -tenants: defineTable({ - accountId: v.id("accounts"), // Account that owns this tenant - status: v.union( - v.literal("provisioning"), // Squadhub being created - v.literal("active"), // Squadhub running - v.literal("stopped"), // Squadhub stopped (future: sleep mode) - v.literal("error"), // Provisioning failed - ), - squadhubUrl: v.optional(v.string()), // Internal squadhub URL (CloudMap DNS) - squadhubToken: v.optional(v.string()), // Per-tenant squadhub token - squadhubServiceArn: v.optional(v.string()), // ECS Service ARN - efsAccessPointId: v.optional(v.string()), // EFS access point - anthropicApiKey: v.optional(v.string()), // User's Anthropic API key - settings: v.optional(v.object({ - timezone: v.optional(v.string()), - })), - createdAt: v.number(), - updatedAt: v.number(), -}) - .index("by_account", ["accountId"]) - .index("by_status", ["status"]) - .index("by_squadhubToken", ["squadhubToken"]), -``` - -### 2.3 Add accountMembers join table - -- [x] Add to `packages/backend/convex/schema.ts`: - -```typescript -accountMembers: defineTable({ - userId: v.id("users"), - accountId: v.id("accounts"), - role: v.union(v.literal("owner"), v.literal("member")), - createdAt: v.number(), -}) - .index("by_user", ["userId"]) - .index("by_account", ["accountId"]) - .index("by_user_account", ["userId", "accountId"]), -``` - -Access model: `user → accountMembers → account → tenants` - -- A user belongs to an account via `accountMembers` -- An account can own multiple tenants (future: plan-based limits) -- A tenant belongs to exactly one account -- If you're a member of an account, you can access all its tenants - -### 2.4 Add tenantId to all existing tables - -Add `tenantId: v.id("tenants")` field to every table: - -- [x] `agents` — add field + index `by_tenant: ["tenantId"]` -- [x] `tasks` — add field + index `by_tenant: ["tenantId"]` -- [x] `messages` — add field + index `by_tenant: ["tenantId"]` -- [x] `notifications` — add field + index `by_tenant: ["tenantId"]` -- [x] `activities` — add field + index `by_tenant: ["tenantId"]` -- [x] `documents` — add field + index `by_tenant: ["tenantId"]` -- [x] `businessContext` — add field + index `by_tenant: ["tenantId"]` -- [x] `channels` — add field + compound index `by_tenant_type: ["tenantId", "type"]` -- [x] `routines` — add field + index `by_tenant: ["tenantId"]` - -### 2.5 Create user and tenant resolution helpers - -Create `packages/backend/convex/lib/auth.ts`: - -Two auth paths — browser (JWT) and machine (token): - -**Browser path (web app):** - -- [x] Helper function `getUser(ctx)` — gets auth identity, extracts email, looks up `users` table by email -- [x] Helper function `getTenantIdFromJwt(ctx)` — calls `getUser(ctx)`, queries `accountMembers` by user → account → `tenants` by account, returns first tenant's `_id` - -**Machine path — per-tenant (CLI):** - -- [x] Helper function `getTenantIdFromToken(ctx, machineToken)` — queries `tenants` by `by_squadhubToken` index, returns tenant `_id` - -**Machine path — system-level (watcher):** - -- [x] Helper function `validateWatcherToken(ctx, watcherToken)` — reads `WATCHER_TOKEN` from Convex env vars, compares with provided token -- [x] Wire into watcher's `getActiveTenants()` (done in 2.7) - -**Unified resolver (for tenant-scoped functions):** - -- [x] Helper function `resolveTenantId(ctx, args)` + `resolveTenantIdMut(ctx, args)`: - 1. If `args.machineToken` is provided → `getTenantIdFromToken(ctx, args.machineToken)` - 2. If `AUTH_ENABLED=true` → `getTenantIdFromJwt(ctx)` (requires valid JWT) - 3. If `AUTH_ENABLED=false` → `getOrCreateDevTenant(ctx)` (auto-creates a default dev tenant, mutation variant only) - -Create `packages/backend/convex/users.ts`: - -- [x] `getOrCreateFromAuth(ctx)` — called on every login, creates user record if needed -- [x] `get()` — get current authenticated user -- [x] `update(name)` — update profile - -### 2.6 Update all Convex queries and mutations - -Every query/mutation needs to accept optional `machineToken` arg, call `resolveTenantId(ctx, args)`, filter reads by `tenantId`, include `tenantId` in writes. - -Example — `agents.list`: - -```typescript -// Before: -export const list = query({ - args: {}, - handler: async (ctx) => { - return await ctx.db.query("agents").collect(); - }, -}); - -// After: -export const list = query({ - args: { machineToken: v.optional(v.string()) }, - handler: async (ctx, args) => { - const tenantId = await resolveTenantId(ctx, args); - return await ctx.db - .query("agents") - .withIndex("by_tenant", (q) => q.eq("tenantId", tenantId)) - .collect(); - }, -}); -``` - -- Browser calls (web app): `useQuery(api.agents.list)` — no `machineToken`, uses JWT -- Machine calls (CLI): `client.query(api.agents.list, { machineToken })` — uses token - -Files to update: - -- [x] `convex/agents.ts` — all functions -- [x] `convex/tasks.ts` — all functions -- [x] `convex/messages.ts` — all functions -- [x] `convex/notifications.ts` — all functions -- [x] `convex/activities.ts` — all functions -- [x] `convex/documents.ts` — all functions -- [x] Remove `convex/settings.ts` — migrated `timezone` to `tenants.settings` and `onboardingComplete` to `accounts.onboardingComplete`, dropped `settings` table from schema. Created `convex/tenants.ts` with `getTimezone`, `setTimezone`. Created `convex/accounts.ts` with `isOnboardingComplete`, `completeOnboarding`. Updated callers: - - `apps/web/src/app/(dashboard)/settings/general/_components/timezone-settings.tsx` → `api.tenants.getTimezone` / `api.tenants.setTimezone` - - `apps/web/src/hooks/use-onboarding-guard.ts` → `api.accounts.isOnboardingComplete` - - `apps/web/src/app/page.tsx` → `api.accounts.isOnboardingComplete` - - `apps/web/src/app/setup/complete/page.tsx` → `api.accounts.completeOnboarding` - - `apps/web/src/app/setup/provisioning/page.tsx` → `api.accounts.isOnboardingComplete` - - `apps/watcher/src/index.ts` → `api.tenants.getTimezone` (with `machineToken`) -- [x] `convex/businessContext.ts` — all functions -- [x] `convex/channels.ts` — all functions -- [x] `convex/routines.ts` — all functions -- [x] Update watcher to pass `machineToken` in all Convex calls (`agents.list`, `notifications.getUndelivered`, `notifications.markDelivered`, `routines.getDueRoutines`, `routines.trigger`, `tenants.getTimezone`) - -### 2.7 Create tenant management functions - -Create `packages/backend/convex/tenants.ts`: - -- [x] `create(accountId)` — creates tenant with `accountId`, status "provisioning" -- [x] `getForCurrentUser()` — gets tenant for authenticated user: - 1. Call `getUser(ctx)` to get user record - 2. Query `accountMembers.by_user` to find account membership - 3. Query `tenants.by_account` to find tenant in that account - 4. Return tenant -- [x] `updateStatus(tenantId, status, squadhubUrl?, serviceArn?)` — update provisioning state -- [x] `setAnthropicKey(apiKey)` — store API key in tenant record -- [x] `listActive(watcherToken)` — get all active tenants with squadhubUrl + squadhubToken (requires valid `WATCHER_TOKEN`) -- [x] Wire watcher's `getActiveTenants()` to call `tenants.listActive` instead of env fallback (deferred from 1.5 → 2.5 → here) -- [x] Wire `validateWatcherToken` into `listActive` (from 2.5) - ---- - -## Phase 3: Authentication (Always-On Auth) - -Auth is **always on**. Two providers, one interface: - -- **Cloud**: Cognito (Google social login) — JWKS validated by Convex -- **Local / self-hosted**: NextAuth with Credentials provider — auto-login, zero config - -The `AUTH_PROVIDER` env var (`nextauth` | `cognito`) controls which provider is active. Both paths produce valid JWTs with the same claims shape. All `AUTH_ENABLED` conditionals have been eliminated. - -### 3.1 Convex Auth integration - -- [x] `packages/backend/convex/auth.config.nextauth.ts` — NextAuth `customJwt` provider (JWKS URL) -- [x] `packages/backend/convex/auth.config.cognito.ts` — Cognito provider (kept from before) -- [x] `packages/backend/convex/auth.config.ts` — Committed as NextAuth version (default for local dev). Deploy script overwrites for cloud. -- [x] `packages/backend/convex/dev-jwks/` — Committed dev RSA key pair + JWKS JSON for local JWT signing -- [x] `packages/backend/convex/lib/auth.ts` — Simplified: removed `AUTH_ENABLED` checks, dev-tenant logic, `resolveTenantIdMut`. Single `resolveTenantId` works for both queries and mutations. -- [x] `scripts/convex-deploy.sh` — Updated: uses `AUTH_PROVIDER` instead of `AUTH_ENABLED` - -### 3.2 Web app — Auth provider and login page - -Dual-provider architecture selected by `NEXT_PUBLIC_AUTH_PROVIDER`: - -- [x] `apps/web/src/lib/auth/nextauth-config.ts` — NextAuth v5 config with Credentials + RS256 JWT signing -- [x] `apps/web/src/app/api/auth/[...nextauth]/route.ts` — NextAuth API route handler -- [x] `apps/web/src/app/api/auth/jwks/route.ts` — JWKS endpoint for Convex to validate NextAuth JWTs -- [x] `apps/web/src/providers/auth-provider.tsx` — Dual provider: NextAuth (SessionProvider) or Cognito (Amplify, dynamic import). Shared `AuthContextValue` interface. `useConvexAuth` hook for ConvexProviderWithAuth. -- [x] `apps/web/src/providers/convex-provider.tsx` — Always uses `ConvexProviderWithAuth` (no conditional) -- [x] `apps/web/src/app/auth/login/page.tsx` — Email form (NextAuth, auto-login for local dev) or Google button (Cognito) -- [x] `apps/web/src/hooks/use-user-menu.ts` — Simplified: always reads from `useAuth()`, no `AUTH_ENABLED` branches -- [x] `apps/web/src/hooks/use-onboarding-guard.ts` — Simplified: always checks auth, no `AUTH_ENABLED` branches -- [x] `.env.example` + `apps/web/.env.*` — Updated for `AUTH_PROVIDER`, `NEXTAUTH_SECRET`, `NEXTAUTH_URL` - -### 3.3 Web app — Auth middleware - -- [x] Protect all routes except `/auth/login` and `/api/health` — `apps/web/src/middleware.ts` checks session cookie, allows `PUBLIC_PATHS` -- [x] Redirect unauthenticated users to `/auth/login` — middleware redirects when no `authjs.session-token` cookie -- [x] Pass tenant context to downstream routes — tenant resolved in Convex via JWT email → user → accountMembers → tenant - -### 3.4 Web app — Auth UI components - -- [x] User menu in dashboard sidebar (email, logout) — wired `signOut` from `useAuth` -- [x] Onboarding guard hook checks auth state — redirects to `/auth/login` when not authenticated - ---- - -## Phase 4: Dev & Self-Hosted Provisioning - -**Goal**: After this phase, local dev and self-hosted deployments work end-to-end. A user can sign in, get auto-provisioned, go through onboarding, use the dashboard, and the watcher operates correctly — all without any cloud infrastructure. - -Everything remaining after this phase is cloud-only (CDK, AWS, Cognito, multi-tenant squadhub infrastructure). This clean boundary allows a separate `clawe-cloud` repo to extend this codebase with cloud-specific plugins without modifying the base code. - -### 4.1 Plugin architecture (`packages/plugins/`) - -Create a `@clawe/plugins` package that provides interfaces for cloud-extensible behavior and a plugin registry. The base repo contains interfaces + dev defaults. The `clawe-cloud` repo adds `packages/cloud-plugins/` that implements these interfaces with AWS. - -**Package structure:** - -``` -packages/plugins/ -├── src/ -│ ├── index.ts # Main exports: loadPlugins, hasPlugin, getPlugin, interfaces -│ ├── registry.ts # Plugin registry internals -│ ├── interfaces/ -│ │ ├── index.ts # Re-exports all interfaces -│ │ ├── squadhub-provisioner.ts # SquadhubProvisioner interface -│ │ └── squadhub-lifecycle.ts # SquadhubLifecycle interface -│ └── defaults/ -│ ├── index.ts # Default implementations -│ ├── squadhub-provisioner.ts # DefaultSquadhubProvisioner (reads env vars) -│ └── squadhub-lifecycle.ts # DefaultSquadhubLifecycle (no-ops) -├── package.json -└── tsconfig.json -``` - -**Plugin interfaces:** - -```typescript -// interfaces/squadhub-provisioner.ts -export interface SquadhubProvisioner { - /** - * Create infrastructure for a new tenant. - * Dev: reads SQUADHUB_URL + SQUADHUB_TOKEN from env, returns immediately. - * Cloud: creates ECS service + EFS access point + CloudMap entry, waits for health. - */ - provision(params: { - tenantId: string; - accountId: string; - anthropicApiKey?: string; - convexUrl: string; - }): Promise<{ - squadhubUrl: string; - squadhubToken: string; - metadata?: Record; // Cloud: ECS ARN, EFS AP ID, etc. - }>; - - /** - * Check provisioning progress (for polling UI). - * Dev: always returns "active" (instant). - * Cloud: returns "provisioning" while ECS service is starting. - */ - getProvisioningStatus(tenantId: string): Promise<{ - status: "provisioning" | "active" | "error"; - message?: string; - }>; - - /** - * Tear down all infrastructure for a tenant. - * Dev: no-op. - * Cloud: deletes ECS service + EFS access point + CloudMap entry. - */ - deprovision(params: DeprovisionParams): Promise; -} - -// interfaces/squadhub-lifecycle.ts -export interface SquadhubLifecycle { - /** - * Restart the tenant's squadhub service (e.g., after API key change). - * Dev: no-op (user manually restarts docker). - * Cloud: forces new ECS task deployment. - */ - restart(tenantId: string): Promise; - - /** - * Stop the tenant's squadhub service. - * Dev: no-op. - * Cloud: sets ECS desiredCount to 0. - */ - stop(tenantId: string): Promise; - - /** - * Destroy tenant's squadhub resources permanently. - * Dev: no-op. - * Cloud: deletes ECS service + EFS access point + CloudMap entry. - */ - destroy(tenantId: string): Promise; - - /** - * Check health/status of the tenant's squadhub. - * Dev: returns { running: true, healthy: true }. - * Cloud: checks ECS service running count + task health. - */ - getStatus(tenantId: string): Promise<{ - running: boolean; - healthy: boolean; - }>; -} -``` - -**Registry:** - -```typescript -// registry.ts -export interface PluginMap { - "squadhub-provisioner": SquadhubProvisioner; - "squadhub-lifecycle": SquadhubLifecycle; -} - -let plugins: PluginMap; // Always set (dev defaults or cloud) -let cloudLoaded = false; - -/** - * Initialize plugins. Call once at app startup. - * Tries to load @clawe/cloud-plugins. Falls back to dev defaults. - */ -export async function loadPlugins(): Promise { - try { - const cloud = await import("@clawe/cloud-plugins"); - plugins = cloud.register(); - cloudLoaded = true; - } catch { - plugins = { - "squadhub-provisioner": new DefaultSquadhubProvisioner(), - "squadhub-lifecycle": new DefaultSquadhubLifecycle(), - }; - } -} - -/** Returns true if cloud plugins are loaded (vs dev defaults). */ -export function hasPlugin(name: keyof PluginMap): boolean { - return cloudLoaded; -} - -/** Get a plugin implementation. Always returns something (cloud or dev default). */ -export function getPlugin(name: K): PluginMap[K] { - return plugins[name]; -} -``` - -**Dev default implementations:** - -```typescript -// defaults/squadhub-provisioner.ts — DefaultSquadhubProvisioner -export class DefaultSquadhubProvisioner implements SquadhubProvisioner { - async provision(params) { - // Read from env vars — single-tenant dev mode - return { - squadhubUrl: process.env.SQUADHUB_URL || "http://localhost:18790", - squadhubToken: process.env.SQUADHUB_TOKEN || "", - }; - } - - async getProvisioningStatus() { - return { status: "active" as const }; // Instant in dev - } - - async deprovision() {} // No-op in dev -} - -// defaults/squadhub-lifecycle.ts — DefaultSquadhubLifecycle -export class DefaultSquadhubLifecycle implements SquadhubLifecycle { - async restart() {} // No-op — user manages docker - async stop() {} // No-op - async destroy() {} // No-op - async getStatus() { - return { running: true, healthy: true }; - } -} -``` - -**Consumption pattern (call sites):** - -```typescript -import { getPlugin, hasPlugin } from "@clawe/plugins"; - -// Provision route — always works (dev or cloud) -const provisioner = getPlugin("squadhub-provisioner"); -const result = await provisioner.provision({ tenantId, accountId, convexUrl }); - -// Settings UI — conditionally show restart button -if (hasPlugin("squadhub-lifecycle")) { - // Show "Restart Service" button after API key change -} -``` - -Setup: - -- [x] Create `packages/plugins/` package with interfaces, registry, and dev defaults -- [x] Add to `pnpm-workspace.yaml` (already covered by `packages/*` glob) -- [x] Build outputs: ESM (consumable by Next.js, Node, and React) -- [x] Export types and runtime functions from package entry point - -### 4.2 Dev provision route - -Update `apps/web/src/app/api/tenant/provision/route.ts`: - -Current state: Only runs app-level setup (agents, crons, routines). Doesn't create the tenant or wire connection tokens. - -Changes: - -- [x] Call `loadPlugins()` at route level (ensures registry is initialized) -- [x] Authenticate the caller via NextAuth session (import `auth` from nextauth-config) -- [x] Get user email from session → look up user in Convex -- [x] Call Convex `accounts.getOrCreateForUser()` to ensure account + membership exist -- [x] Check if account already has a tenant (`tenants.getForCurrentUser`) -- [x] If no tenant: call `tenants.create(accountId)`, then use `getPlugin("squadhub-provisioner").provision()` to get `squadhubUrl` + `squadhubToken`, update tenant with these values + `status: "active"` -- [x] Run existing app-level setup (register agents, setup crons, seed routines) — already implemented in `setupTenant()` -- [x] Return `{ ok: true, tenantId }` - -The route is idempotent — calling it when a tenant already exists is a no-op (returns existing tenant). - -**Cloud extensibility**: The provision route uses `getPlugin("squadhub-provisioner")` which returns the dev provisioner by default (reads env vars, instant). When `@clawe/cloud-plugins` is installed (in `clawe-cloud` repo), the same route automatically uses the cloud provisioner (creates ECS/EFS/CloudMap, waits for health). No file changes needed. - -### 4.3 WATCHER_TOKEN setup - -The watcher uses two tokens for Convex authentication: - -1. **`WATCHER_TOKEN`** — system-level token for `tenants.listActive()` (get all active tenants) -2. **Per-tenant `squadhubToken`** — returned by `listActive()`, used as `machineToken` for tenant-scoped queries - -Setup: - -- [x] Add `WATCHER_TOKEN` to root `.env.example` with a default dev value (e.g., `clawe-watcher-dev-token`) -- [x] Add `WATCHER_TOKEN` to root `.env` with the same default -- [x] Add `WATCHER_TOKEN` to `turbo.json` `globalPassThroughEnv` -- [x] Set `WATCHER_TOKEN` as a Convex env var — auto-synced on `pnpm dev` via `scripts/sync-convex-env.sh` (runs as background process in backend dev script, waits for local Convex backend, then sets all env vars) -- [x] Document in `.env.example`: "Must match the value set in Convex env vars" - -After the provision route (4.2) runs, the tenant record has `squadhubToken` set to `SQUADHUB_TOKEN` from env. The watcher calls `tenants.listActive(watcherToken)` → gets the tenant's `squadhubToken` → passes it as `machineToken` to all tenant-scoped Convex calls. The full chain works. - -### 4.4 Auto-provision flow in web app - -After login, the app needs to automatically provision the user if they don't have an active tenant. - -- [x] Login page (`/auth/login`) only does: authenticate → `getOrCreateFromAuth()` → redirect to `/` -- [x] Root page (`/`) checks tenant status: no active tenant → redirect to `/setup/provisioning` -- [x] Provisioning page (`/setup/provisioning`) calls `POST /api/tenant/provision`, shows loading/error UI -- [x] Onboarding guards check tenant status before checking `isOnboardingComplete` -- [x] For returning users (tenant already exists), the idempotent provision route returns immediately. - -Flow after this step: - -``` -Login → auth → getOrCreateFromAuth → redirect to / - → root page queries tenant: - → no active tenant: redirect to /setup/provisioning - → provision → redirect to /setup/welcome (or /board if onboarding complete) - → has active tenant, not onboarded: redirect to /setup - → has active tenant, onboarded: redirect to /board -``` - -### 4.5 Environment variables summary - -Root `.env.example` additions: - -```bash -# Watcher system token (must also be set as Convex env var) -WATCHER_TOKEN=clawe-watcher-dev-token -``` - -`turbo.json` `globalPassThroughEnv` additions: - -``` -WATCHER_TOKEN -``` - -Convex env vars (set via CLI): - -```bash -npx convex env set WATCHER_TOKEN clawe-watcher-dev-token -``` - -### 4.6 Verification - -After this phase, verify the complete flow: - -1. **Fresh start**: Clear Convex data, restart services -2. **Login**: `AUTO_LOGIN_EMAIL` auto-signs in (or Google OAuth if configured) -3. **Auto-provision**: User + account + tenant created, tenant has `squadhubUrl`/`squadhubToken` from env, agents registered, crons set up -4. **Onboarding**: `/setup/welcome` → business context → telegram → complete — all tenant-scoped queries resolve -5. **Dashboard**: All pages work (agents, board, settings) — data is tenant-scoped -6. **Watcher**: `WATCHER_TOKEN` validates → `listActive` returns tenant → per-tenant `machineToken` resolves → notifications delivered - -### 4.7 Dedicated provisioning screen - -Move the provision step out of the login page into a shared onboarding screen. This screen is used by all modes (dev, self-hosted, cloud). - -**Why**: Login should only authenticate. Provisioning (creating tenant, registering agents, setting up crons) is a separate concern that deserves its own screen with loading state and error handling. For cloud, this step will take longer (spinning up ECS), so having a dedicated screen is essential. For dev/self-hosted it's instant but the flow should be the same. - -**Changes:** - -- [x] Remove `apiClient.post("/api/tenant/provision")` from `apps/web/src/app/auth/login/page.tsx` -- [x] Login page only does: authenticate → `getOrCreateFromAuth()` → redirect to `/` -- [x] Root page (`/`) queries `api.tenants.getForCurrentUser`, redirects to `/setup/provisioning` if no active tenant -- [x] Create `apps/web/src/app/setup/provisioning/page.tsx`: - - Calls `POST /api/tenant/provision` on mount - - Shows loading screen: spinner + "Setting up your workspace..." - - On success: Convex subscription updates reactively → redirect fires - - On error: show error message with retry button - - If tenant already active (idempotent): redirect immediately - - Uses `useRef` to prevent double-fire in StrictMode -- [x] Update onboarding guard (`use-onboarding-guard.ts`): check tenant exists and is active before checking `isOnboardingComplete`. If no active tenant → redirect to `/setup/provisioning`. Uses `usePathname()` to avoid redirect loop. -- [x] Rename `lib/squadhub/provision.ts` → `lib/squadhub/setup.ts`, `provisionTenant()` → `setupTenant()` (app-level setup, not infrastructure provisioning) -- [x] For cloud (future): poll `GET /api/tenant/status` while status is `"provisioning"`, show progress messages ("Creating workspace...", "Starting services...", "Almost ready...") - -**Updated flow:** - -``` -Login → auth → getOrCreateFromAuth → redirect to / - -/ (root page): - → query getForCurrentUser - → null or not active → /setup/provisioning - → active, not onboarded → /setup - → active, onboarded → /board - -/setup/provisioning: - → tenant already active? → redirect out - → call POST /api/tenant/provision - → spinner while provisioning - → Convex updates reactively → redirect to /setup/welcome or /board - -/setup/* (welcome, business, telegram, complete): - → guard: no active tenant → /setup/provisioning - → guard: already onboarded → /board - -/board (dashboard): - → guard: no active tenant → /setup/provisioning - → guard: not onboarded → /setup -``` - ---- - -## Phase 5: CDK Project Setup & Base Infrastructure - -### 5.1 Initialize CDK project - -Create the CDK project in `packages/infra/` (in `clawe-cloud` repo): - -``` -packages/infra/ -├── bin/ -│ └── clawe.ts # CDK app entry point -├── lib/ -│ ├── config.ts # Environment config (dev/prod values) -│ ├── networking-stack.ts # VPC, subnets, security groups -│ ├── auth-stack.ts # Cognito -│ ├── storage-stack.ts # EFS, ECR -│ ├── shared-services-stack.ts # ECS cluster, ALB, web app, watcher -│ ├── tenant-stack.ts # Per-tenant squadhub construct (Phase 7) -│ ├── dns-stack.ts # Route53, CloudFront, ACM -│ └── monitoring-stack.ts # CloudWatch dashboards, alarms -├── cdk.json -├── tsconfig.json -└── package.json -``` - -- [x] Add `infra` to pnpm workspace — lives in `packages/infra/`, covered by `packages/*` glob -- [x] Install CDK dependencies: `aws-cdk-lib`, `constructs`, `aws-cdk`, `@types/node`, `ts-node` -- [x] Configure `cdk.json` with context defaults (`env: "dev"`) -- [x] Create `bin/clawe.ts` — CDK app entry point wiring all stacks with `--context env=dev|prod` -- [x] Create `lib/config.ts` — environment config (domain, sizing, namespace per dev/prod) -- [x] Create `lib/networking-stack.ts` — VPC, 5 security groups (ALB, web, watcher, squadhub, EFS) -- [x] Create `lib/auth-stack.ts` — Cognito User Pool with Google social login -- [x] Create `lib/storage-stack.ts` — 3 ECR repos + encrypted EFS -- [x] Create `lib/shared-services-stack.ts` — 2 ECS clusters, ALB, web app + watcher Fargate services -- [x] Create `lib/dns-stack.ts` — Route53, ACM certificate, CloudFront distribution -- [x] Create `lib/monitoring-stack.ts` — CloudWatch dashboard + 5xx alarm -- [x] Type-check passes cleanly (`tsc --noEmit`) - -### 5.2 Cognito User Pool (Google social login only) - -All implemented in `auth-stack.ts` during 5.1. Custom domain (`auth.clawe.io`) instead of prefix domain. Added deletion protection, case-insensitive sign-in, prevent user existence errors (adopted from ChartDB). Removed dead `googleClientSecret` variable; added pre-deploy SSM setup instructions as comments. - -Create Cognito User Pool: - -- [x] Cognito is only the auth layer — user data lives in Convex `users` table -- [x] No self sign-up with email/password — Google is the only sign-in method (`selfSignUpEnabled: false`) -- [x] No custom Cognito attributes needed (only standard `email` + `fullname`) - -Configure Google as federated identity provider: - -- [x] Create OAuth 2.0 Client ID in Google Cloud Console (Web application type) - - Authorized redirect URI: `https://{authDomain}/oauth2/idpresponse` -- [x] Add Google as identity provider in Cognito User Pool: - - Client ID + Client Secret from SSM (`/clawe/{env}/google-client-id`, `/clawe/{env}/google-client-secret`) - - Scopes: `openid email profile` - - Attribute mapping: Google `email` → Cognito `email`, Google `name` → Cognito `fullname` - -Configure Cognito domain: - -- [x] Custom domain: `auth.clawe.io` (prod) / `auth-dev.clawe.io` (dev) with ACM cert + Route53 alias - -Create App Client: - -- [x] No client secret (public client for SPA) — `generateSecret: false` -- [x] Auth flows: `userSrp: true` (SRP kept as fallback, Google login uses OAuth flow) -- [x] OAuth settings: - - Callback URL: `https://app.clawe.io` (prod) / `https://app-dev.clawe.io` (dev) / `http://localhost:3000` (local) - - Sign-out URL: same as callback - - OAuth scopes: `openid`, `email`, `profile` - - Supported identity providers: Google -- [x] Token validity: access 1h, id 1h, refresh 30d - -- [x] Output User Pool ID, App Client ID, and Cognito Domain as stack outputs -- [x] Store Google OAuth Client ID and Secret in SSM Parameter Store — done for both dev and prod - -### 5.3 Environment config - -- [x] Created `packages/infra/lib/config.ts` during 5.1 — maps `env` context to all environment-specific values including `authDomain`, `cloudMapNamespace`, sizing for web/watcher/squadhub, `natGateways` - -### 5.4 Networking stack - -Created `packages/infra/lib/networking-stack.ts` during 5.1: - -- [x] 2 public subnets (ALB) -- [x] 2 private subnets with NAT (ECS tasks, EFS) -- [x] 1 NAT Gateway (dev) / 2 NAT Gateways (prod) for cost vs HA tradeoff -- [x] Security groups: - - `alb-sg`: inbound 443/80 from internet - - `web-sg`: inbound from ALB only (port 3000) - - `squadhub-sg`: inbound from web-sg and watcher-sg (port 18789) - - `watcher-sg`: outbound to squadhub-sg and internet (Convex) - - `efs-sg`: inbound NFS (2049) from squadhub-sg - -### 5.5 ECR repositories - -Created in `packages/infra/lib/storage-stack.ts` during 5.1: - -- [x] `clawe/web` — Next.js web app image -- [x] `clawe/squadhub` — Squadhub (OpenClaw gateway) image -- [x] `clawe/watcher` — Watcher service image -- [x] Lifecycle policy: keep last 10 images - -### 5.6 EFS filesystem - -Created in `packages/infra/lib/storage-stack.ts` during 5.1: - -- [x] Encrypted at rest -- [x] Performance mode: generalPurpose -- [x] Throughput mode: bursting (sufficient for tens of users) -- [x] Per-tenant access points created dynamically during provisioning (Phase 8.2 — CloudSquadhubProvisioner) -- [x] Mount targets in each private subnet (created in storage-stack.ts) - -### 5.7 SSM Parameter Store secrets - -Generate and store shared secrets in SSM Parameter Store (SecureString): - -- [x] `/clawe/{env}/watcher-token` (SecureString) — created for dev and prod -- [x] `/clawe/{env}/google-client-id` (String) — created for dev and prod -- [x] `/clawe/{env}/google-client-secret` (SecureString) — created for dev and prod -- [x] Set watcher token as Convex env var — automated in deploy workflow ("Sync Convex env vars" step reads from SSM, sets via `npx convex env set`) - -### 5.8 Shared wildcard certificate - -Created `lib/certificate-stack.ts`. Single ACM wildcard certificate for all subdomains, replacing per-service certs. - -- [x] Create `lib/certificate-stack.ts` — standalone stack (deployed in `us-east-1`) -- [x] Certificate covers: `*.clawe.io` + `clawe.io` (SAN) -- [x] DNS validation via Route53 hosted zone lookup -- [x] Export certificate ARN as stack output (`clawe-wildcard-certificate-arn`) -- [x] Deploy order: certificate-stack deploys **before** auth-stack and dns-stack -- [x] Update `auth-stack.ts` — removed per-domain `AuthCertificate`, accepts shared wildcard cert as prop -- [x] Update `dns-stack.ts` — removed per-domain certificate, accepts shared wildcard cert as prop -- [x] Update `shared-services-stack.ts` — ALB HTTPS listener now uses shared wildcard cert -- [x] Update `bin/clawe.ts` — added certificate stack, passes cert to auth-stack, dns-stack, and shared-services-stack - -CloudFront → ALB now uses HTTPS (not HTTP), since ALB has the wildcard cert. - -### 5.9 GitHub Actions OIDC for AWS - -Created `lib/ci-stack.ts`. Keyless AWS auth — no long-lived credentials in GitHub. - -- [x] Create `lib/ci-stack.ts` — deployed once per account (not per environment) -- [x] Create IAM OIDC provider for GitHub (`token.actions.githubusercontent.com`) -- [x] Create IAM role `clawe-github-ci` with trust policy: - - Principal: GitHub OIDC provider - - Condition: `sub` matches `repo:getclawe/clawe-cloud:*` (only this repo can assume) -- [x] Role permissions (CDK-native approach — assumes CDK bootstrap roles for deploy): - - `sts:AssumeRole` on `cdk-*` bootstrap roles (CDK deploy delegates to these) - - ECR: full push permissions (for Docker image builds done outside CDK) - - CloudFront: `CreateInvalidation` (post-deploy cache clear) - - SSM: `GetParameter` scoped to `/clawe/*` (read env config) - - STS: `GetCallerIdentity` -- [x] Export role ARN as stack output (`clawe-github-ci-role-arn`) -- [x] Add to `bin/clawe.ts` — deployed as `ClaweCi` (no env parameterization) - -### 5.10 CI/CD deploy workflow - -Created `.github/workflows/deploy.yml`. Manual (`workflow_dispatch`) — no auto-deploy on push. - -**Deployment approach**: All ECS service updates go through CDK (`cdk deploy`), not direct `aws ecs update-service`. CDK is the single source of truth — when the image tag changes, CloudFormation detects the task definition update and triggers ECS rolling deployment automatically. This prevents drift and gives us circuit breaker + auto-rollback for free. (Pattern adopted from ChartDB's backend-node deployment.) - -- [x] Trigger: `workflow_dispatch` with inputs: - - `environment`: choice of `dev` / `prod` - - `targets`: choice of `all` / `infra` / `web` / `watcher` / `squadhub` / `convex` -- [x] Authentication: `aws-actions/configure-aws-credentials` with OIDC (role from 5.9) -- [x] Steps (conditional on `targets`): - 1. **Checkout** + pnpm + Node.js setup - 2. **Configure AWS credentials** via OIDC - 3. **Build & push Docker images** to ECR (web, watcher, squadhub — each conditional) - 4. **Tag images**: `{repo}:{env}-{git-sha}` + `{repo}:{env}-latest` - 5. **Deploy Convex** via `./scripts/convex-deploy.sh` - 6. **CDK deploy** — single command handles infra + ECS updates: - ```bash - npx cdk deploy --all --context env={env} --context imageTag={env}-{sha} - ``` - 7. **Invalidate CloudFront cache** — reads distribution ID from stack outputs - 8. **Smoke test** — retries health check for up to 3 minutes -- [x] ECS deployment settings (defined in CDK shared-services-stack): - - Circuit breaker enabled with auto-rollback on health check failure - - MaximumPercent: 200% (new tasks start before old ones stop) - - MinimumHealthyPercent: 50% (allows rolling replacement) - - Health check grace period: 120s (allows startup time) -- [x] `imageTag` CDK context — shared-services-stack reads `this.node.tryGetContext("imageTag")`, defaults to `{env}-latest` -- [x] Hardcoded in workflow: role ARN (`arn:aws:iam::985539756675:role/clawe-github-ci`), account ID, ECR registry -- [x] GitHub environment secrets configured: `CONVEX_DEPLOY_KEY` (per env: dev + prod) -- [x] GitHub environment variables configured: `CONVEX_URL` (per env: dev + prod) -- [x] Removed `convexUrl` from `config.ts` — now required CDK context (`--context convexUrl=...`), fails fast if missing - ---- - -## Phase 6: Shared Services Deployment (CDK) - -All of Phase 6 was implemented in `shared-services-stack.ts` during Phase 5.1, with deployment config (circuit breaker, rolling updates) added in 5.8/5.10. - -### 6.1 ECS Clusters - -Created in `shared-services-stack.ts`: - -**Shared services cluster:** - -- [x] Name: `clawe-cluster-{env}` -- [x] Hosts: web app + watcher -- [x] Container insights enabled - -**Squadhubs cluster:** - -- [x] Name: `clawe-squadhubs-{env}` -- [x] Hosts: per-tenant squadhub ECS Services -- [x] CloudMap namespace: `clawe-internal-{env}` (for squadhub service discovery) -- [x] Container insights enabled -- [x] Separate cluster keeps tenant workloads isolated from shared services - -### 6.2 Application Load Balancer - -Created in `shared-services-stack.ts`: - -- [x] HTTPS listener (443) with shared wildcard certificate -- [x] HTTP listener (80) — redirect to HTTPS -- [x] Target groups: - - `web-tg`: routes to web app service (default rule) -- [x] Health check: `/api/health` (interval 30s, healthy 2, unhealthy 3) - -### 6.3 Web app Fargate service - -- [x] ECS service for the Next.js app (in `shared-services-stack.ts`): - -- Task definition: - - Image: `fromEcrRepository(webRepo, imageTag)` — image tag from CDK context - - CPU: 512 (dev) / 1024 (prod) - - Memory: 1024 (dev) / 2048 (prod) - - Port: 3000 - - Environment: `NODE_ENV`, `ENVIRONMENT`, `NEXT_PUBLIC_CONVEX_URL` (from context), `NEXT_PUBLIC_COGNITO_*` (from auth stack), `NEXT_PUBLIC_APP_URL`, `AWS_REGION`, infrastructure refs for provisioning (cluster ARN, namespace ID, EFS ID, squadhub image, SG IDs, subnet IDs) - - IAM role: ECS provisioning, EFS access points, CloudMap service discovery, IAM PassRole - - Logging: `/clawe/web-{env}` (30d dev, 90d prod) -- Service: - - Desired count: 1 (dev) / 2 (prod) - - Circuit breaker with auto-rollback - - MaximumPercent: 200%, MinimumHealthyPercent: 50% - - Health check grace period: 120s - - ALB target group attachment - - [ ] Auto-scaling: target CPU 70%, min 1 / max 10 (prod only) — **to add** - -### 6.4 Watcher Fargate service - -- [x] ECS service for the shared watcher (in `shared-services-stack.ts`): - -- Task definition: - - Image: `fromEcrRepository(watcherRepo, imageTag)` — same context as web - - CPU: 256, Memory: 512 - - Environment: `CONVEX_URL` (from context), `ENVIRONMENT` - - Secrets: `WATCHER_TOKEN` from SSM (`/clawe/{env}/watcher-token`) - - IAM role: CloudMap DiscoverInstances - - Logging: `/clawe/watcher-{env}` (30d dev, 90d prod) -- Service: - - Desired count: 1 - - Circuit breaker with auto-rollback - - No ALB attachment (internal service only) - ---- - -## Phase 7: Per-Tenant Squadhub (CDK + Runtime) - -### 7.1 Base squadhub task definition (shared infrastructure in CDK) - -Phase 7.1 creates the **shared CDK infrastructure** that all per-tenant squadhub services reference. The actual per-tenant ECS task definitions and services are created at **runtime** by CloudSquadhubProvisioner (Phase 8). - -Changes in `packages/infra/lib/shared-services-stack.ts`: - -- [x] Shared squadhub execution role (`clawe-squadhub-exec-{env}`) — ECS Fargate execution role for ECR pulls + CloudWatch log writes, with `AmazonECSTaskExecutionRolePolicy` -- [x] Shared squadhub task role (`clawe-squadhub-task-{env}`) — runtime role with EFS mount/write/root access (scoped to file system ARN) + CloudMap `DiscoverInstances` -- [x] Shared squadhub log group (`/clawe/squadhub-{env}`) — tenant-specific stream prefixes set at runtime by CloudSquadhubProvisioner (30d dev, 90d prod retention) -- [x] Export role ARNs + log group to web container env vars: `SQUADHUB_EXECUTION_ROLE_ARN`, `SQUADHUB_TASK_ROLE_ARN`, `SQUADHUB_LOG_GROUP` -- [x] Tightened `iam:PassRole` scope from `resources: ["*"]` to the two specific role ARNs - -Per-tenant ECS task definition template (created at runtime by CloudSquadhubProvisioner in Phase 8): - -- Image: `{account}.dkr.ecr.{region}.amazonaws.com/clawe/squadhub:{env}-latest` -- CPU: 256 (0.25 vCPU) -- Memory: 512 (0.5 GB) -- Port: 18789 -- EFS volume mount: `/data` (access point set per-tenant at runtime) -- Health check: `wget -q --spider http://localhost:18789/health` -- IAM task role: shared `clawe-squadhub-task-{env}` (EFS + CloudMap) -- IAM execution role: shared `clawe-squadhub-exec-{env}` (ECR + logs) -- Logging: shared `/clawe/squadhub-{env}` log group with tenant-specific stream prefix - -### 7.2 CloudMap service discovery - -All CDK infrastructure for CloudMap is already in place (created in Phase 5.1 + 7.1). Per-tenant service registration happens at runtime via CloudSquadhubProvisioner (Phase 8.2). - -- [x] CloudMap private DNS namespace (`clawe-internal-{env}`) — created in shared-services-stack -- [x] Namespace ID + name exported to web container (`CLOUDMAP_NAMESPACE_ID`, `CLOUDMAP_NAMESPACE_NAME`) -- [x] Web app task role has full CloudMap permissions (`CreateService`, `DeleteService`, `RegisterInstance`, `DeregisterInstance`, `DiscoverInstances`, `GetService`, `ListInstances`) -- [x] Watcher task role has `DiscoverInstances` permission -- [x] Squadhub task role has `DiscoverInstances` permission (added in 7.1) - -Runtime (Phase 8.2): CloudSquadhubProvisioner creates per-tenant CloudMap service entries (`squadhub-{tenantId}.clawe-internal-{env}`) and associates with ECS services. Web app resolves `squadhub-{tenantId}.clawe-internal-{env}` → private IP:18789. - -### 7.3 Squadhub lifecycle API routes - -These routes use `getPlugin("squadhub-lifecycle")` from `@clawe/plugins`. In dev, they return no-ops. In cloud, `CloudSquadhubLifecycle` manages ECS services. - -- [x] `POST /api/tenant/squadhub/restart` — calls `getPlugin("squadhub-lifecycle").restart(tenantId)` -- [x] `GET /api/tenant/squadhub/status` — calls `getPlugin("squadhub-lifecycle").getStatus(tenantId)` -- [x] `DELETE /api/tenant/squadhub` — calls `getPlugin("squadhub-lifecycle").destroy(tenantId)` -- [x] Extracted shared `getAuthenticatedTenant()` helper (`lib/api/tenant-auth.ts`) — reads JWT from Authorization header, resolves tenant via Convex - -These routes live in the **base repo** and work for both dev and cloud via the plugin interface. - ---- - -## Phase 8: Cloud Tenant Provisioning - -### 8.1 Cloud plugins package (`packages/cloud-plugins/`) - -Create `@clawe/cloud-plugins` in the `clawe-cloud` repo. This package implements the `@clawe/plugins` interfaces (from Phase 4.1) with real AWS infrastructure. - -**Package structure (in `clawe-cloud` repo):** - -``` -packages/cloud-plugins/ -├── src/ -│ ├── index.ts # register() function — returns PluginMap -│ ├── squadhub-provisioner.ts # CloudSquadhubProvisioner implements SquadhubProvisioner -│ └── squadhub-lifecycle.ts # CloudSquadhubLifecycle implements SquadhubLifecycle -├── package.json # depends on @clawe/plugins (for types), aws-sdk -└── tsconfig.json -``` - -**Registration:** - -```typescript -// cloud-plugins/src/index.ts -import type { PluginMap } from "@clawe/plugins"; -import { CloudSquadhubProvisioner } from "./squadhub-provisioner"; -import { CloudSquadhubLifecycle } from "./squadhub-lifecycle"; - -export function register(): PluginMap { - return { - "squadhub-provisioner": new CloudSquadhubProvisioner(), - "squadhub-lifecycle": new CloudSquadhubLifecycle(), - }; -} -``` - -When `@clawe/cloud-plugins` is installed and `loadPlugins()` runs, the base provision route (Phase 4.2) automatically uses `CloudSquadhubProvisioner` instead of `DefaultSquadhubProvisioner`. No route file changes needed. - -- [x] Create `packages/cloud-plugins/` with `CloudSquadhubProvisioner` and `CloudSquadhubLifecycle` (stub implementations, `throw new Error` for each method) -- [x] Add `@clawe/plugins` as dependency (for interfaces) -- [x] Add `@aws-sdk/client-ecs`, `@aws-sdk/client-efs`, `@aws-sdk/client-servicediscovery` as dependencies -- [x] Already covered by `packages/*` glob in `pnpm-workspace.yaml` -- [x] Add `@clawe/cloud-plugins: "workspace:*"` to `apps/web/package.json` in `clawe-cloud` — this is the single line that activates cloud plugins (without it, `loadPlugins()` falls back to defaults) - -### 8.2 CloudSquadhubProvisioner implementation - -Implement `SquadhubProvisioner` interface with AWS infrastructure: - -**`provision()`:** - -- [x] 1. Create EFS access point for tenant (path: `/tenants/{tenantId}`) -- [x] 2. Register ECS task definition (from base template, with tenant-specific env vars): - - `SQUADHUB_TOKEN` — generated per-tenant UUID - - `ANTHROPIC_API_KEY` — from `params.anthropicApiKey` - - `CONVEX_URL` — from `params.convexUrl` - - `OPENCLAW_STATE_DIR=/data/config` - - `OPENCLAW_PORT=18789` - - `OPENCLAW_SKIP_GMAIL_WATCHER=1` -- [x] 3. Create ECS Service on `clawe-squadhubs-{env}` cluster with `desiredCount: 1` -- [x] 4. Associate with CloudMap service discovery (`squadhub-{tenantId}.clawe-internal-{env}`) -- [x] 5. Return `{ squadhubUrl, squadhubToken, metadata: { serviceArn, efsAccessPointId } }` - -**`getProvisioningStatus()`:** - -- [x] Query ECS Service running count and task health -- [x] Return `"provisioning"` while tasks are starting, `"active"` when healthy, `"error"` on failure - -**`deprovision(params: DeprovisionParams)`:** - -- [x] Delete ECS Service (force delete, stops running tasks) -- [x] Deregister task definition -- [x] Deregister CloudMap service entry -- [x] Delete EFS access point (via `params.metadata.efsAccessPointId`) -- [x] Best-effort cleanup — continues through all steps, collects errors, throws aggregate error - -**Error handling:** - -- [x] `provision()` uses compensating transactions — on failure, rolls back all created resources (best-effort) -- [x] `deprovision()` uses best-effort pattern — attempts all cleanup steps, throws aggregate error if any non-404 failures - -Environment variables for the squadhub ECS task: - -``` -SQUADHUB_TOKEN={generated-per-tenant} # Used for both: squadhub HTTP auth AND Convex machine auth -ANTHROPIC_API_KEY={from-convex-tenant-record} -CONVEX_URL={env-specific-convex-url} -OPENCLAW_STATE_DIR=/data/config -OPENCLAW_PORT=18789 -OPENCLAW_SKIP_GMAIL_WATCHER=1 -``` - -Note: `TENANT_ID` is not needed as an env var — the CLI resolves the tenant by passing `SQUADHUB_TOKEN` as `machineToken` to Convex, which looks up the tenant via the `by_squadhubToken` index. - -### 8.3 CloudSquadhubLifecycle implementation - -Implement `SquadhubLifecycle` interface with ECS operations: - -**`restart(tenantId)`:** - -- [x] Force new deployment on the tenant's ECS Service via `UpdateServiceCommand` with `forceNewDeployment: true` - -**`stop(tenantId)`:** - -- [x] Set ECS Service `desiredCount: 0` via `UpdateServiceCommand` - -**`destroy(tenantId)`:** - -- [x] Delete ECS Service (force) + deregister task definition -- [x] Delete EFS access point (looked up by `clawe:tenantId` tag) -- [x] Delete CloudMap service entry -- [x] Best-effort cleanup — continues through all steps, throws aggregate error - -**`getStatus(tenantId)`:** - -- [x] Check ECS Service running count via `DescribeServicesCommand` -- [x] Check task-level health via `ListTasksCommand` + `DescribeTasksCommand` -- [x] Return `{ running: runningCount > 0, healthy: container.healthStatus === "HEALTHY" }` - -**Refactoring:** - -- [x] Extracted shared utilities (`getEnvConfig`, `serviceName`, `isNotFoundError`, `toError`, AWS clients) to `aws-config.ts` -- [x] Updated `squadhub-provisioner.ts` to import from `aws-config.ts` - -### 8.4 Provisioning status polling - -Create `apps/web/src/app/api/tenant/status/route.ts`: - -- [x] Calls `getPlugin("squadhub-provisioner").getProvisioningStatus(tenantId)` -- [x] Returns status to client for polling -- [x] Works for both dev (instant "active") and cloud (real status) - -### 8.5 Cloud provisioning UI flow - -**Note**: The shared `/setup/provisioning` screen (Phase 4.7) already handles the full flow — loading state, error handling, and redirect. This step just ensures the cloud polling path works end-to-end: - -- [x] Verify `GET /api/tenant/status` (8.4) returns real ECS status via `CloudSquadhubProvisioner.getProvisioningStatus()` -- [x] Verify `/setup/provisioning` page polls status while `"provisioning"` and shows progress messages ("Creating workspace...", "Starting services...", "Almost ready...") -- [x] Verify redirect to `/setup/welcome` once status is `"active"` -- [x] No new UI page needed — reuses the shared screen from 4.7 - -### 8.6 Dynamic squadhub routing - -Currently the web app reads `SQUADHUB_URL` from env. This needs to become per-tenant: - -- [x] Update `apps/web/src/lib/squadhub/connection.ts`: - - `getConnection(tenant?)` — uses tenant's `squadhubUrl` + `squadhubToken` if provided - - Falls back to env vars when no tenant (self-hosted / dev) - - No cache needed — tenant data comes from the same `getAuthenticatedTenant()` query used for auth - -- [x] Update all API routes that call the squadhub (already refactored in Phase 1 to accept params): - - `/api/chat` — resolve tenant's squadhub URL before calling - - `/api/chat/history` — same - - `/api/chat/abort` — same - - `/api/squadhub/health` — same - - `/api/squadhub/pairing` — same - -- [x] Refactor `getAuthenticatedTenant` return type to clean discriminated union (`{ error, convex, tenant }`) - -### 8.7 Anthropic API key management - -Update onboarding flow: - -- [ ] Add new step in `/setup/welcome` or as a dedicated step: "Enter your Anthropic API key" -- [ ] Store key in Convex tenant record via `tenants.setAnthropicKey()` -- [ ] The provisioning route reads from Convex and passes as env var to ECS Service -- [ ] If user updates key in settings later: - 1. Update Convex tenant record - 2. Call `getPlugin("squadhub-lifecycle").restart(tenantId)` to pick up new key - -Create settings page: - -- [ ] Add "API Key" section to `/settings/general` -- [ ] Input field (masked) to view/update Anthropic API key -- [ ] "Save" triggers Convex update + `getPlugin("squadhub-lifecycle").restart(tenantId)` -- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` for restart progress -- [ ] In dev (`!hasPlugin("squadhub-lifecycle")`), skip restart UI — API key is an env var - ---- - -## Phase 9: Cloud Onboarding Flow Updates - -### 9.1 New onboarding step order - -``` -1. Sign in with Google (Cognito OAuth redirect) -2. Provisioning (auto — create tenant + start squadhub) -3. Anthropic API Key (new step — enter API key, stored in Convex tenant record) -4. Business Context (existing — chat with Clawe agent) -5. Telegram (optional) (existing — bot token + pairing) -6. Complete (existing) -``` - -### 9.2 API Key onboarding step - -Create `apps/web/src/app/setup/api-key/page.tsx`: - -- [ ] Input field for Anthropic API key (masked, with show/hide toggle) -- [ ] Validation: call Anthropic API to verify key is valid (lightweight models list request) -- [ ] On submit: store in Convex tenant record, call `getPlugin("squadhub-lifecycle").restart(tenantId)` -- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` — wait for healthy before proceeding - -### 9.3 Settings — API Key management - -Add to `/settings/general`: - -- [ ] "Anthropic API Key" section -- [ ] Masked input showing last 4 chars -- [ ] "Update" button → updates Convex + calls `getPlugin("squadhub-lifecycle").restart(tenantId)` -- [ ] Poll `getPlugin("squadhub-lifecycle").getStatus(tenantId)` for restart progress -- [ ] In dev (`!hasPlugin("squadhub-lifecycle")`), skip restart UI - ---- - -## Phase 10: DNS & CDN (CDK) - -All of Phase 10 was implemented in `dns-stack.ts` during Phase 5.1, updated in 5.8 to use shared wildcard cert. - -### 10.1 Hosted Zone lookup - -- [x] Look up existing Route53 hosted zone for `clawe.io` via `HostedZone.fromLookup` - -### 10.2 ACM Certificate - -- [x] Uses shared wildcard cert from Phase 5.8 (`*.clawe.io` + `clawe.io`), passed as prop to dns-stack - -### 10.3 CloudFront distribution - -- [x] Created in `dns-stack.ts`: - - Origin: ALB via HTTPS (wildcard cert on ALB) - - Alternate domain: `app.clawe.io` / `app-dev.clawe.io` - - Certificate: shared wildcard cert - - Cache policy: caching disabled (dynamic Next.js app) - - Origin request policy: ALL_VIEWER (forwards all headers/cookies) - - Allowed methods: ALL (GET, POST, PUT, DELETE, etc.) - - Exports `DistributionId` (used by deploy workflow for cache invalidation) - -### 10.4 Route53 record - -- [x] A record (alias) pointing `app.clawe.io` / `app-dev.clawe.io` to CloudFront distribution - ---- - -## Phase 11: Docker Images & CI/CD - -CI/CD foundation was set up in Phase 5.9–5.10. Image builds use `docker/build-push-action` with GitHub Actions layer caching. - -### 11.1 Image tagging convention - -- [x] Tagging implemented in deploy workflow (Phase 5.10): - - `clawe/web:{env}-{git-sha}` + `clawe/web:{env}-latest` - - `clawe/watcher:{env}-{git-sha}` + `clawe/watcher:{env}-latest` - - `clawe/squadhub:{env}-{git-sha}` + `clawe/squadhub:{env}-latest` -- [x] Uses `docker/metadata-action@v5` for tag/label generation -- [x] Uses `docker/build-push-action@v6` with `docker/setup-buildx-action@v3` -- [x] GitHub Actions layer caching (`cache-from/cache-to: type=gha`) per image scope -- [x] Per-tenant squadhub rolling update via `aws ecs update-service --force-new-deployment` on all services in squadhubs cluster - -### 11.2 Refine deploy workflow (future optimization) - -- [ ] Use `turbo prune --docker` in Dockerfiles for smaller images and faster builds -- [ ] Parallel jobs: build web/watcher/squadhub images in parallel -- [ ] Selective builds: only rebuild images whose source changed (based on git diff) -- [ ] Post-deploy Slack notification (optional) - ---- - -## Phase 12: Monitoring (CDK) - -Basic monitoring created in `monitoring-stack.ts` during Phase 5.1. Log groups created in `shared-services-stack.ts`. - -### 12.1 CloudWatch alarms - -- [x] ALB 5xx error rate alarm (threshold > 5 in 5 minutes) — in `monitoring-stack.ts` -- [ ] Web app: CPU > 80%, unhealthy targets -- [ ] Squadhub services: task stopped unexpectedly, health check failures -- [ ] Watcher: task stopped, log errors -- [ ] EFS: burst credit balance low - -### 12.2 CloudWatch dashboard - -- [x] ALB request count widget — in `monitoring-stack.ts` -- [x] ALB 5xx errors widget — in `monitoring-stack.ts` -- [ ] Active tenants count -- [ ] Squadhub services running / restarting -- [ ] Web app latency -- [ ] EFS usage - -### 12.3 Log groups - -- [x] `/clawe/web-{env}` — web app logs (30d dev, 90d prod) — in `shared-services-stack.ts` -- [x] `/clawe/watcher-{env}` — watcher logs (30d dev, 90d prod) — in `shared-services-stack.ts` -- [x] `/clawe/squadhub-{env}` — shared squadhub log group with per-tenant stream prefixes (Phase 7.1, 30d dev, 90d prod) - ---- - -## Implementation Order - -``` -Phase 1 (Shared client & watcher refactor) — ✅ COMPLETE - │ -Phase 2 (Multi-tenancy in Convex) — ✅ COMPLETE - │ -Phase 3 (Authentication) — ✅ COMPLETE - │ -Phase 4 (Dev & self-hosted provisioning) — ✅ COMPLETE - │ includes: @clawe/plugins package (interfaces + registry + dev defaults) - ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ - │ ↑ Dev & self-hosted complete here (base repo: clawe) - │ ↓ Everything below is cloud-only (clawe-cloud repo) - │ -Phase 5 (CDK setup + base infra) — ✅ COMPLETE (all stacks + CI/CD + SSM params) - │ -Phase 6 (Shared services CDK) — ✅ COMPLETE (done in 5.1, auto-scaling pending) - │ -Phase 7 (Per-tenant squadhub CDK) — NEXT - │ -Phase 8 (Cloud tenant provisioning) — @clawe/cloud-plugins + AWS provisioning - │ includes: CloudSquadhubProvisioner + CloudSquadhubLifecycle implementing plugin interfaces - │ -Phase 9 (Cloud onboarding updates) — API key management - │ -Phase 10 (DNS & CDN) — ✅ COMPLETE (done in 5.1) - │ -Phase 11 (CI/CD) — ✅ COMPLETE (deploy workflow + image builds done in 5.10) - │ -Phase 12 (Monitoring) — PARTIAL (basic dashboard + 5xx alarm + log groups done) -``` - -Phases 1-4: complete (base repo: clawe). -Phases 5, 6, 10, 11: complete (clawe-cloud repo) — all CDK stacks, CI/CD, DNS/CDN. -Phase 12: partial — basic monitoring done, advanced alarms/dashboards deferred. -**Next: Phase 7** (per-tenant squadhub infrastructure) → Phase 8 (cloud provisioning) → Phase 9 (onboarding). - ---- - -## Files to Create - -### Base repo (clawe) - -``` -packages/plugins/ (new — Phase 4.1) -├── src/ -│ ├── index.ts # Main exports: loadPlugins, hasPlugin, getPlugin -│ ├── registry.ts # Plugin registry internals -│ ├── interfaces/ -│ │ ├── index.ts # Re-exports all interfaces -│ │ ├── squadhub-provisioner.ts # SquadhubProvisioner interface -│ │ └── squadhub-lifecycle.ts # SquadhubLifecycle interface -│ └── defaults/ -│ ├── index.ts # Dev default implementations -│ ├── squadhub-provisioner.ts # DefaultSquadhubProvisioner (reads env vars) -│ └── squadhub-lifecycle.ts # DefaultSquadhubLifecycle (no-ops) -├── package.json -└── tsconfig.json - -apps/web/src/ -├── proxy.ts (new — renamed from middleware.ts) -├── app/auth/ -│ └── login/page.tsx (new — "Sign in with Google" button) -├── app/api/tenant/ -│ ├── provision/route.ts (new) -│ ├── status/route.ts (new — uses getPlugin("squadhub-provisioner")) -│ └── squadhub/ -│ ├── restart/route.ts (new — uses getPlugin("squadhub-lifecycle")) -│ └── status/route.ts (new — uses getPlugin("squadhub-lifecycle")) -├── providers/auth-provider.tsx (new) -├── lib/squadhub/connection.ts (updated — tenant-aware getConnection) -└── hooks/use-auth.ts (new) - -packages/backend/convex/ -├── auth.config.cognito.ts (new — Cognito auth provider template) -├── auth.config.nextauth.ts (new — NextAuth customJwt provider) -├── auth.config.ts (generated — copied from template at deploy time) -├── users.ts (new) -├── tenants.ts (new) -├── accounts.ts (new — account management) -├── lib/auth.ts (new) -├── schema.ts (modify — add tenantId everywhere) -├── agents.ts (modify — tenant filtering) -├── tasks.ts (modify — tenant filtering) -├── messages.ts (modify — tenant filtering) -├── notifications.ts (modify — tenant filtering) -├── activities.ts (modify — tenant filtering) -├── documents.ts (modify — tenant filtering) -├── settings.ts (deleted — migrated to tenants.ts) -├── businessContext.ts (modify — tenant filtering) -├── channels.ts (modify — tenant filtering) -└── routines.ts (modify — tenant filtering) - -packages/shared/src/squadhub/ -├── client.ts (modify — accept url/token params) -└── gateway-client.ts (modify — accept url/token params) - -packages/cli/src/ -├── client.ts (modify — add machineToken to all calls) -└── commands/*.ts (modify — pass machineToken) - -apps/watcher/src/ -├── index.ts (modify — multi-tenant loop) -└── config.ts (modify — remove single-tenant env vars) - -apps/web/src/lib/squadhub/ -└── actions.ts (modify — per-tenant routing) -``` - -### Cloud repo (clawe-cloud) — only new files, no base file modifications - -``` -packages/cloud-plugins/ (new — Phase 8.1) -├── src/ -│ ├── index.ts # register() → PluginMap with cloud impls -│ ├── squadhub-provisioner.ts # CloudSquadhubProvisioner implements SquadhubProvisioner -│ └── squadhub-lifecycle.ts # CloudSquadhubLifecycle implements SquadhubLifecycle -├── package.json # depends on @clawe/plugins, @aws-sdk/* -└── tsconfig.json - -packages/infra/ (new — Phase 5+) -├── bin/clawe.ts -├── lib/ -│ ├── config.ts -│ ├── certificate-stack.ts # Shared wildcard cert (Phase 5.8) -│ ├── ci-stack.ts # GitHub OIDC + CI role (Phase 5.9) -│ ├── networking-stack.ts -│ ├── auth-stack.ts -│ ├── storage-stack.ts -│ ├── shared-services-stack.ts -│ ├── tenant-stack.ts -│ ├── dns-stack.ts -│ └── monitoring-stack.ts -├── scripts/ -│ └── build-and-push.sh -├── cdk.json -├── tsconfig.json -└── package.json - -apps/web/src/app/setup/ -└── api-key/page.tsx (new — Phase 9.2, cloud onboarding step) - -.github/workflows/ -└── deploy.yml (new — Phase 5.10) -``` - -## Files to Modify - -### Base repo - -``` -apps/web/src/app/layout.tsx (add auth provider) -apps/web/src/app/(dashboard)/layout.tsx (add auth guard) -apps/web/src/app/setup/welcome/page.tsx (update step flow) -apps/web/src/app/(dashboard)/settings/ (add API key section — uses getPlugin("squadhub-lifecycle")) -apps/web/package.json (add @clawe/plugins, auth deps) -apps/web/src/lib/squadhub/connection.ts (use getPlugin("squadhub-provisioner") result instead of env vars) -apps/web/src/app/api/tenant/provision/route.ts (use getPlugin("squadhub-provisioner") for infrastructure) -pnpm-workspace.yaml (add packages/plugins) -package.json (add workspace scripts) -``` - -### Cloud repo - -``` -apps/web/package.json (add @clawe/cloud-plugins) -apps/watcher/package.json (add aws-sdk for CloudMap) -docker/squadhub/entrypoint.sh (add agent registration on first start) -pnpm-workspace.yaml (add packages/cloud-plugins, infra/) -package.json (add infra scripts) -``` From ece7ff80741074fb3924e0ddf0a07b64d4620ad6 Mon Sep 17 00:00:00 2001 From: Guy Ben-Aharon Date: Wed, 18 Feb 2026 19:37:57 +0200 Subject: [PATCH 5/5] fix --- .../telegram-disconnect-dialog.tsx | 4 +-- apps/web/src/app/api/chat/abort/route.spec.ts | 15 ++--------- .../src/app/api/chat/history/route.spec.ts | 15 ++--------- apps/web/src/app/api/chat/route.spec.ts | 15 ++--------- .../web/src/app/api/squadhub/pairing/route.ts | 25 +++++++++---------- apps/web/src/lib/squadhub/actions.ts | 18 ++++--------- apps/web/src/test/mock-tenant-auth.ts | 21 ++++++++++++++++ packages/shared/src/squadhub/client.ts | 17 +++++++++++++ packages/shared/src/squadhub/index.ts | 4 +-- packages/shared/src/squadhub/types.ts | 14 ----------- 10 files changed, 64 insertions(+), 84 deletions(-) create mode 100644 apps/web/src/test/mock-tenant-auth.ts diff --git a/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx b/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx index 8f72fad..1770598 100644 --- a/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx +++ b/apps/web/src/app/(dashboard)/settings/integrations/_components/telegram-disconnect-dialog.tsx @@ -55,8 +55,8 @@ export const TelegramDisconnectDialog = ({ Your bot{" "} {botUsername && @{botUsername}}{" "} - will stop receiving messages. The bot token will remain saved and - you can reconnect anytime. + will stop receiving messages. You can reconnect anytime by adding a + new bot token. diff --git a/apps/web/src/app/api/chat/abort/route.spec.ts b/apps/web/src/app/api/chat/abort/route.spec.ts index b1f150a..58a09a7 100644 --- a/apps/web/src/app/api/chat/abort/route.spec.ts +++ b/apps/web/src/app/api/chat/abort/route.spec.ts @@ -1,20 +1,9 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { NextRequest } from "next/server"; +import { mockTenantAuth } from "@/test/mock-tenant-auth"; import { POST } from "./route"; -// Mock tenant auth -vi.mock("@/lib/api/tenant-auth", () => ({ - getAuthenticatedTenant: vi.fn(async () => ({ - error: null, - convex: {}, - tenant: { - _id: "test-tenant-id", - squadhubUrl: "http://localhost:18790", - squadhubToken: "test-token", - status: "active", - }, - })), -})); +vi.mock("@/lib/api/tenant-auth", () => mockTenantAuth); // Mock the shared client const mockRequest = vi.fn(); diff --git a/apps/web/src/app/api/chat/history/route.spec.ts b/apps/web/src/app/api/chat/history/route.spec.ts index 114b853..ff656b5 100644 --- a/apps/web/src/app/api/chat/history/route.spec.ts +++ b/apps/web/src/app/api/chat/history/route.spec.ts @@ -1,20 +1,9 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { NextRequest } from "next/server"; +import { mockTenantAuth } from "@/test/mock-tenant-auth"; import { GET } from "./route"; -// Mock tenant auth -vi.mock("@/lib/api/tenant-auth", () => ({ - getAuthenticatedTenant: vi.fn(async () => ({ - error: null, - convex: {}, - tenant: { - _id: "test-tenant-id", - squadhubUrl: "http://localhost:18790", - squadhubToken: "test-token", - status: "active", - }, - })), -})); +vi.mock("@/lib/api/tenant-auth", () => mockTenantAuth); // Mock the shared client const mockRequest = vi.fn(); diff --git a/apps/web/src/app/api/chat/route.spec.ts b/apps/web/src/app/api/chat/route.spec.ts index 8fe1596..c4d28e1 100644 --- a/apps/web/src/app/api/chat/route.spec.ts +++ b/apps/web/src/app/api/chat/route.spec.ts @@ -1,20 +1,9 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { NextRequest } from "next/server"; +import { mockTenantAuth } from "@/test/mock-tenant-auth"; import { POST } from "./route"; -// Mock tenant auth -vi.mock("@/lib/api/tenant-auth", () => ({ - getAuthenticatedTenant: vi.fn(async () => ({ - error: null, - convex: {}, - tenant: { - _id: "test-tenant-id", - squadhubUrl: "http://localhost:18790", - squadhubToken: "test-token", - status: "active", - }, - })), -})); +vi.mock("@/lib/api/tenant-auth", () => mockTenantAuth); // Mock the AI SDK vi.mock("@ai-sdk/openai", () => ({ diff --git a/apps/web/src/app/api/squadhub/pairing/route.ts b/apps/web/src/app/api/squadhub/pairing/route.ts index 5fbcafd..5d05476 100644 --- a/apps/web/src/app/api/squadhub/pairing/route.ts +++ b/apps/web/src/app/api/squadhub/pairing/route.ts @@ -3,18 +3,11 @@ import type { NextRequest } from "next/server"; import { listPairingRequests, approvePairingCode, + parseToolText, } from "@clawe/shared/squadhub"; import { getAuthenticatedTenant } from "@/lib/api/tenant-auth"; import { getConnection } from "@/lib/squadhub/connection"; -function parseToolText(result: { - result: { content: Array<{ text?: string }> }; -}): Record { - const text = result.result.content[0]?.text; - if (!text) return {}; - return JSON.parse(text) as Record; -} - // GET /api/squadhub/pairing?channel=telegram - List pending pairing requests export async function GET(request: NextRequest) { const auth = await getAuthenticatedTenant(request); @@ -27,8 +20,8 @@ export async function GET(request: NextRequest) { return NextResponse.json({ error: result.error.message }, { status: 500 }); } - const data = parseToolText(result); - return NextResponse.json({ requests: data.requests ?? [] }); + const data = parseToolText<{ requests?: unknown[] }>(result); + return NextResponse.json({ requests: data?.requests ?? [] }); } // POST /api/squadhub/pairing - Approve a pairing code @@ -60,10 +53,16 @@ export async function POST(request: NextRequest) { ); } - const data = parseToolText(result); - if (!data.ok) { + const data = parseToolText<{ + ok: boolean; + id?: string; + approved?: boolean; + error?: string; + }>(result); + + if (!data?.ok) { return NextResponse.json( - { error: data.error || "Failed to approve pairing code" }, + { error: data?.error || "Failed to approve pairing code" }, { status: 404 }, ); } diff --git a/apps/web/src/lib/squadhub/actions.ts b/apps/web/src/lib/squadhub/actions.ts index 768f144..dc85a51 100644 --- a/apps/web/src/lib/squadhub/actions.ts +++ b/apps/web/src/lib/squadhub/actions.ts @@ -7,6 +7,7 @@ import { removeTelegramBotToken as removeTelegramBotTokenClient, probeTelegramToken, approvePairingCode as approvePairingCodeClient, + parseToolText, } from "@clawe/shared/squadhub"; import { getConnection } from "./connection"; @@ -49,28 +50,19 @@ export async function approvePairingCode( }; } - // Parse the tool's text response - const text = result.result.content[0]?.text; - if (!text) { - return { - ok: false as const, - error: { type: "parse_error", message: "Empty tool response" }, - }; - } - - const data = JSON.parse(text) as { + const data = parseToolText<{ ok: boolean; id?: string; approved?: boolean; error?: string; - }; + }>(result); - if (!data.ok) { + if (!data?.ok) { return { ok: false as const, error: { type: "not_found", - message: data.error || "Invalid or expired pairing code", + message: data?.error || "Invalid or expired pairing code", }, }; } diff --git a/apps/web/src/test/mock-tenant-auth.ts b/apps/web/src/test/mock-tenant-auth.ts new file mode 100644 index 0000000..b19a9b7 --- /dev/null +++ b/apps/web/src/test/mock-tenant-auth.ts @@ -0,0 +1,21 @@ +import { vi } from "vitest"; + +/** + * Shared mock for `@/lib/api/tenant-auth` used across API route tests. + * Import this before the route under test to set up the mock. + * + * Usage: + * vi.mock("@/lib/api/tenant-auth", () => mockTenantAuth); + */ +export const mockTenantAuth = { + getAuthenticatedTenant: vi.fn(async () => ({ + error: null, + convex: {}, + tenant: { + _id: "test-tenant-id", + squadhubUrl: "http://localhost:18790", + squadhubToken: "test-token", + status: "active", + }, + })), +}; diff --git a/packages/shared/src/squadhub/client.ts b/packages/shared/src/squadhub/client.ts index 5f21149..9416f25 100644 --- a/packages/shared/src/squadhub/client.ts +++ b/packages/shared/src/squadhub/client.ts @@ -58,6 +58,23 @@ async function invokeTool( } } +/** + * Parse the text payload from a tool invoke result. + * Tool results wrap their data as JSON inside `content[0].text`. + */ +export function parseToolText>( + result: ToolResult, +): T | null { + if (!result.ok) return null; + const text = result.result.content[0]?.text; + if (!text) return null; + try { + return JSON.parse(text) as T; + } catch { + return null; + } +} + export async function checkHealth( connection: SquadhubConnection, ): Promise> { diff --git a/packages/shared/src/squadhub/index.ts b/packages/shared/src/squadhub/index.ts index a77caeb..6832bbc 100644 --- a/packages/shared/src/squadhub/index.ts +++ b/packages/shared/src/squadhub/index.ts @@ -13,6 +13,7 @@ export { cronAdd, listPairingRequests, approvePairingCode, + parseToolText, } from "./client"; export type { SquadhubConnection, @@ -37,7 +38,6 @@ export { getSharedClient } from "./shared-client"; export type { AgentToolResult, ToolResult, - DirectResult, ConfigGetResult, ConfigPatchResult, Session, @@ -46,8 +46,6 @@ export type { GatewayHealthResult, TelegramProbeResult, PairingRequest, - PairingListResult, - PairingApproveResult, } from "./types"; // Gateway Types diff --git a/packages/shared/src/squadhub/types.ts b/packages/shared/src/squadhub/types.ts index 3ed726d..0f231bd 100644 --- a/packages/shared/src/squadhub/types.ts +++ b/packages/shared/src/squadhub/types.ts @@ -14,11 +14,6 @@ export type ToolResult = | { ok: true; result: AgentToolResult } | { ok: false; error: { type: string; message: string } }; -// DirectResult for operations that don't go through squadhub tools -export type DirectResult = - | { ok: true; result: T } - | { ok: false; error: { type: string; message: string } }; - export type ConfigGetResult = { config: Record; hash: string; @@ -69,12 +64,3 @@ export type PairingRequest = { lastSeenAt: string; meta?: Record; }; - -export type PairingListResult = { - requests: PairingRequest[]; -}; - -export type PairingApproveResult = { - id: string; - approved: boolean; -};