Skip to content

Commit fc059a8

Browse files
committed
docs: improve comments for clarity and consistency
1 parent 6821750 commit fc059a8

File tree

1 file changed

+58
-32
lines changed

1 file changed

+58
-32
lines changed

the-tiny-lua-compiler.lua

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ end
3232
-- Constructs a Prefix Tree (Trie) for efficient operator matching.
3333
--
3434
-- This structure solves the "Longest Prefix Match" problem. When the tokenizer
35-
-- encounters a character like `.`, it needs to decide if it's a Dot (`.`),
36-
-- a Concat (`..`), or Vararg (`...`).
35+
-- encounters a character like `>`, it needs to decide if it's a standalone `>`,
36+
-- or part of a longer operator like `>=`.
3737
--
3838
-- Instead of complex lookahead logic, we traverse this tree. If we can travel
3939
-- deeper (e.g., from `.` to `.`), we continue. If we stop, we know we've
@@ -141,16 +141,14 @@ Tokenizer.CONFIG = {
141141
})
142142
}
143143

144-
--[[
145-
Character Classification Tables (Pre-computed)
146-
147-
Tokenization is the tightest loop in the compiler, running once per character.
148-
Calling Lua's pattern matcher (`string.match`) inside this loop is too slow.
149-
150-
Instead, we pre-calculate the classification for every possible byte (0-255).
151-
Checking `if PATTERNS.DIGIT[char]` becomes a simple array lookup, which is
152-
orders of magnitude faster than regex matching.
153-
--]]
144+
-- Character Classification Tables (Pre-computed)
145+
--
146+
-- Tokenization is the tightest loop in the compiler, running once per character.
147+
-- Calling Lua's pattern matcher (`string.match`) inside this loop is too slow.
148+
--
149+
-- Instead, we pre-calculate the classification for every possible byte (0-255).
150+
-- Checking `if PATTERNS.DIGIT[char]` becomes a simple array lookup, which is
151+
-- orders of magnitude faster than regex matching.
154152
Tokenizer.PATTERNS = {
155153
SPACE = makePatternLookup("%s"), -- Whitespace
156154
DIGIT = makePatternLookup("%d"), -- 0-9
@@ -2417,8 +2415,7 @@ function CodeGenerator:splitTableElements(elements)
24172415
return implicitElems, explicitElems
24182416
end
24192417

2420-
-- Used in methods like `processAssignmentStatement` to set
2421-
-- the value of a register to another register's value.
2418+
-- Sets the value of a variable or table index from a register.
24222419
function CodeGenerator:setRegisterValue(node, copyFromRegister)
24232420
local nodeKind = node.kind
24242421

@@ -2430,11 +2427,13 @@ function CodeGenerator:setRegisterValue(node, copyFromRegister)
24302427
-- OP_MOVE [A, B] R(A) := R(B)
24312428
self:emitInstruction("MOVE", variableRegister, copyFromRegister)
24322429
elseif variableType == "Upvalue" then
2430+
local upvalueIndex = self:findOrCreateUpvalue(variableName)
24332431
-- OP_SETUPVAL [A, B] UpValue[B] := R(A)
2434-
self:emitInstruction("SETUPVAL", copyFromRegister, self:findOrCreateUpvalue(variableName))
2432+
self:emitInstruction("SETUPVAL", copyFromRegister, upvalueIndex)
24352433
elseif variableType == "Global" then
2434+
local constantIndex = self:findOrCreateConstant(variableName)
24362435
-- OP_SETGLOBAL [A, Bx] Gbl[Kst(Bx)] := R(A)
2437-
self:emitInstruction("SETGLOBAL", copyFromRegister, self:findOrCreateConstant(variableName))
2436+
self:emitInstruction("SETGLOBAL", copyFromRegister, constantIndex)
24382437
end
24392438
return
24402439
elseif nodeKind == "IndexExpression" then
@@ -2661,16 +2660,19 @@ function CodeGenerator:processVariable(node, register)
26612660
local varType = node.variableType -- "Local" / "Upvalue" / "Global"
26622661

26632662
if varType == "Local" then
2663+
-- Local variables are stored in the function's stack (registers).
26642664
local variable = self:findVariableRegister(varName)
26652665

26662666
-- OP_MOVE [A, B] R(A) := R(B)
26672667
self:emitInstruction("MOVE", register, variable)
26682668
elseif varType == "Global" then
2669+
-- Globals are stored in the global environment table.
26692670
local constantIndex = self:findOrCreateConstant(varName)
26702671

26712672
-- OP_GETGLOBAL [A, Bx] R(A) := Gbl[Kst(Bx)]
26722673
self:emitInstruction("GETGLOBAL", register, constantIndex)
26732674
elseif varType == "Upvalue" then
2675+
-- Upvalues are stored in the function's upvalue list.
26742676
local upvalueIndex = self:findOrCreateUpvalue(varName)
26752677

26762678
-- OP_GETUPVAL [A, B] R(A) := UpValue[B]
@@ -2685,12 +2687,15 @@ function CodeGenerator:processFunctionCall(node, register, resultRegisters)
26852687
local arguments = node.arguments
26862688
local isMethodCall = node.isMethodCall
26872689

2690+
-- Method calls (e.g., `obj:method()`) need special handling.
2691+
-- We need to load the `self` parameter (the table) into the
2692+
-- register before the function call, so we use the `SELF` instruction.
26882693
if isMethodCall then
26892694
local calleeExpressionIndex = callee.index
26902695
local calleeExpressionBase = callee.base
26912696

26922697
self:processExpressionNode(calleeExpressionBase, register)
2693-
self:allocateRegisters(1) -- Used for `self` arg, will get free'd later.
2698+
self:allocateRegisters(1) -- Used for implicit `self` argument.
26942699

26952700
local calleeIndexRegister = self:processConstantOrExpression(calleeExpressionIndex)
26962701

@@ -2796,22 +2801,26 @@ function CodeGenerator:processLocalFunctionDeclaration(node)
27962801
end
27972802

27982803
-- NOTE:
2799-
-- According to the Lua 5.1 assignment semantics (https://www.lua.org/manual/5.1/manual.html#2.4.3),
2800-
-- when there are a variable used in both sides of the assignment, the right-hand side expressions
2801-
-- should be evaluated first, and then assigned to the left-hand side variables. This means
2802-
-- that this code will incorrectly throw an error if compiled with our current implementation:
2804+
-- According to the Lua 5.1 assignment semantics, when there are variables
2805+
-- used in both sides of the assignment, the right-hand side expressions
2806+
-- should be evaluated first, and then assigned to the left-hand side
2807+
-- variables. This means that this code will incorrectly throw an error
2808+
-- if compiled with our current implementation:
28032809
-- ```lua
28042810
-- local a, b = {}, 2
28052811
-- a[b], b = 10, 20
28062812
-- assert(a[2] == 10 and b == 20)
28072813
-- ```
2808-
-- I haven't found an easy way to implement this behavior yet, so for now, we will leave it as is.
2814+
-- I haven't found an easy way to implement this behavior yet, so for now,
2815+
-- we will leave it as is.
2816+
--
2817+
-- Reference: https://www.lua.org/manual/5.1/manual.html#2.4.3
28092818
function CodeGenerator:processAssignmentStatement(node)
28102819
local lvalues = node.lvalues
28112820
local expressions = node.expressions
28122821

28132822
local variableBaseRegister = self.stackSize - 1
2814-
local lvalueRegisterCount = self:processExpressionList(expressions, #lvalues)
2823+
local lvalueRegisterCount = self:processExpressionList(expressions, #lvalues)
28152824

28162825
for index, lvalue in ipairs(lvalues) do
28172826
local lvalueRegister = variableBaseRegister + index
@@ -2825,35 +2834,46 @@ function CodeGenerator:processIfStatement(node)
28252834
local clauses = node.clauses
28262835
local elseClause = node.elseClause
28272836

2828-
local jumpToEndPCs = {}
2829-
local lastClause = clauses[#clauses]
2830-
local previousJumpPC = nil
2837+
local jumpToEndPCs = {}
2838+
local lastClause = clauses[#clauses]
28312839

2840+
-- Process all 'if' and 'elseif' clauses first.
28322841
for _, clause in ipairs(clauses) do
28332842
local condition = clause.condition
28342843
local body = clause.body
28352844

28362845
local conditionRegister = self:processExpressionNode(condition)
28372846

28382847
-- OP_TEST [A, C] if not (R(A) <=> C) then pc++
2848+
-- If the condition is false, jump to the next instruction.
2849+
-- (which is always a jump to the next clause)
28392850
self:emitInstruction("TEST", conditionRegister, 0, 0)
28402851
self:freeRegisters(1) -- Free conditionRegister
28412852

2842-
previousJumpPC = self:emitJump()
2853+
-- This is the jump that occurs if the condition is false.
2854+
-- We will patch it later to jump to the next clause.
2855+
local conditionIsFalseJumpPC = self:emitJump()
28432856
self:processBlockNode(body)
28442857

2845-
local isLastClause = (clause == lastClause)
2846-
if not isLastClause or elseClause then
2858+
local isLastClause = (clause == lastClause)
2859+
local shouldJumpToEnd = not isLastClause or elseClause
2860+
if shouldJumpToEnd then
28472861
local jumpToEndPC = self:emitJump()
28482862
table.insert(jumpToEndPCs, jumpToEndPC)
28492863
end
2850-
self:patchJumpToHere(previousJumpPC)
2864+
2865+
-- Patch the condition false jump to here.
2866+
self:patchJumpToHere(conditionIsFalseJumpPC)
28512867
end
28522868

2869+
-- Is there an 'else' clause to process?
2870+
-- NOTE: The previous clause's condition already jumps
2871+
-- to here, so we don't need to patch anything.
28532872
if elseClause then
28542873
self:processBlockNode(elseClause)
28552874
end
28562875

2876+
-- Patch all jumps at the end of clauses to here.
28572877
self:patchJumpsToHere(jumpToEndPCs)
28582878
end
28592879

@@ -2862,7 +2882,7 @@ function CodeGenerator:processForGenericStatement(node)
28622882
local expressions = node.expressions
28632883
local body = node.body
28642884

2865-
local baseStackSize = self.stackSize
2885+
local baseRegister = self.stackSize
28662886
local expressionRegisters = self:processExpressionList(expressions, 3)
28672887
self:declareLocalVariables(iterators)
28682888

@@ -2874,7 +2894,10 @@ function CodeGenerator:processForGenericStatement(node)
28742894

28752895
-- OP_TFORLOOP [A, C] R(A+3), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2))
28762896
-- if R(A+3) ~= nil then R(A+2)=R(A+3) else pc++
2877-
self:emitInstruction("TFORLOOP", baseStackSize, 0, #iterators)
2897+
-- Skips next instruction if there are no more values.
2898+
self:emitInstruction("TFORLOOP", baseRegister, 0, #iterators)
2899+
2900+
-- Emit jump back to the start of the loop if there are more values.
28782901
self:emitJumpBack(loopStartPC)
28792902
end)
28802903
self:undeclareVariables(iterators)
@@ -2891,10 +2914,13 @@ function CodeGenerator:processForNumericStatement(node)
28912914
local startRegister = self:processExpressionNode(startExpr)
28922915
self:processExpressionNode(limitExpr)
28932916
local stepRegister = self:allocateRegisters(1)
2917+
2918+
-- Is there a step expression?
28942919
if stepExpr then
28952920
self:processExpressionNode(stepExpr, stepRegister)
28962921
else
28972922
-- OP_LOADK [A, Bx] R(A) := Kst(Bx)
2923+
-- Default step is 1.
28982924
self:emitInstruction("LOADK", stepRegister, self:findOrCreateConstant(1))
28992925
end
29002926

0 commit comments

Comments
 (0)