A transformer-based LLM built from scratch with Effect. Inspired by RustGPT.
npm i effect-gptbun install
bun run dev # train + generate via cli
bun test # run test suiteA complete LLM implementation including:
- Tokenization — BPE-style text preprocessing
- Transformer Architecture — embeddings, multi-head attention, feed-forward layers, layer norm
- Training — cross-entropy loss, backpropagation, Adam optimizer with gradient clipping
- Inference — greedy decoding for text generation
This project leverages Effect's robust ecosystem to bring systems-programming discipline to TypeScript:
- Service-Based Architecture — Uses
ContextandLayerto keep the core model pure and make testing deterministic by swapping implementations. - Type-Safe Errors — Implements
Data.TaggedErrorfor a precise, union-based error system, ensuring all failure cases are handled explicitly. - Lazy Streaming — Utilizes
Streamfor high-performance, backpressured data loading and batching during training. - Resource Management — Uses
Scopeto guarantee that file handles and fibers are always cleaned up correctly. - Declarative Concurrency — Leverages Effect's runtime to manage parallel preprocessing and training loops.
Check out our blog post: Building a transformer-based LLM with Effect.
src/
├── tensor/ # Tensor2D, matmul, softmax, layer norm
├── model/ # embeddings, attention, transformer blocks, forward/backward
├── training/ # loss, gradients, Adam optimizer, training loop
├── tokenize/ # text → tokens
├── vocab/ # vocabulary management
├── services/ # Effect services (Random, Logger, Metrics)
└── cli/ # command-line interface
import { Effect, Layer } from "effect"
import {
LLM,
Vocab,
tokenize,
train,
makeLLMLayer,
makeTrainingConfigLayer,
SeededRandomLive,
SeedLayer,
ConsoleLoggerLive,
NoOpMetricsLive
} from "effect-gpt"
// Build a tiny model
const vocab = Vocab.fromCorpus("hello world", 50)
const config = { vocabSize: vocab.size, embedDim: 32, numHeads: 2, numLayers: 2, seqLen: 16 }
const program = Effect.gen(function* () {
const llm = yield* LLM.init(config)
const tokens = tokenize(vocab, "hello")
const output = yield* llm.forward(tokens)
console.log("Output shape:", output.rows, "x", output.cols)
})
const live = Layer.mergeAll(
SeedLayer(42),
SeededRandomLive,
ConsoleLoggerLive,
NoOpMetricsLive
)
Effect.runPromise(program.pipe(Effect.provide(live)))import { T2D, TensorOps, seeded } from "effect-gpt"
const rng = seeded(123)
// Create tensors
const a = T2D.randn(rng, 3, 4)
const b = T2D.randn(rng, 4, 2)
// Matrix multiplication
const c = TensorOps.matmul(a, b) // 3x2
// Element-wise operations
const scaled = TensorOps.scale(a, 0.5)
const added = TensorOps.add(a, a)
// Softmax (row-wise)
const probs = TensorOps.softmaxRows(a)import { Effect, Stream, Layer } from "effect"
import {
LLM,
Vocab,
Dataset,
trainStream,
Adam,
makeLLMLayer,
makeTrainingConfigLayer,
SeededRandomLive,
SeedLayer,
ConsoleLoggerLive,
InMemoryMetricsLive
} from "effect-gpt"
const trainingProgram = Effect.gen(function* () {
const vocab = Vocab.fromCorpus("the quick brown fox jumps over the lazy dog", 100)
const config = { vocabSize: vocab.size, embedDim: 64, numHeads: 4, numLayers: 2, seqLen: 32 }
const llm = yield* LLM.init(config)
const optimizer = Adam.create(0.001)
// Stream-based training with backpressure
const batches = Stream.fromIterable([
{ input: [1, 2, 3, 4], target: [2, 3, 4, 5] },
{ input: [5, 6, 7, 8], target: [6, 7, 8, 9] }
])
yield* trainStream(llm, optimizer, batches, { epochs: 10 })
})
const live = Layer.mergeAll(
SeedLayer(42),
SeededRandomLive,
ConsoleLoggerLive,
InMemoryMetricsLive
)
Effect.runPromise(trainingProgram.pipe(Effect.provide(live)))import { Effect, Layer } from "effect"
import { Logger, Random, Metrics, log, next, counter } from "effect-gpt"
// Use built-in services
const program = Effect.gen(function* () {
yield* log("info", "Starting training...")
const rand = yield* next() // random float [0, 1)
const trainCounter = yield* counter("batches_processed")
trainCounter.inc()
})
// Swap implementations for testing
const testLayer = Layer.mergeAll(
NullLoggerLive, // silent logging
SeededRandomLive, // deterministic RNG
InMemoryMetricsLive // capture metrics
)MIT