3232-- Constructs a Prefix Tree (Trie) for efficient operator matching.
3333--
3434-- This structure solves the "Longest Prefix Match" problem. When the tokenizer
35- -- encounters a character like `. `, it needs to decide if it's a Dot (`.`) ,
36- -- a Concat (`..`), or Vararg (`...`) .
35+ -- encounters a character like `> `, it needs to decide if it's a standalone `>` ,
36+ -- or part of a longer operator like `>=` .
3737--
3838-- Instead of complex lookahead logic, we traverse this tree. If we can travel
3939-- deeper (e.g., from `.` to `.`), we continue. If we stop, we know we've
@@ -141,16 +141,14 @@ Tokenizer.CONFIG = {
141141 })
142142}
143143
144- --[[
145- Character Classification Tables (Pre-computed)
146-
147- Tokenization is the tightest loop in the compiler, running once per character.
148- Calling Lua's pattern matcher (`string.match`) inside this loop is too slow.
149-
150- Instead, we pre-calculate the classification for every possible byte (0-255).
151- Checking `if PATTERNS.DIGIT[char]` becomes a simple array lookup, which is
152- orders of magnitude faster than regex matching.
153- --]]
144+ -- Character Classification Tables (Pre-computed)
145+ --
146+ -- Tokenization is the tightest loop in the compiler, running once per character.
147+ -- Calling Lua's pattern matcher (`string.match`) inside this loop is too slow.
148+ --
149+ -- Instead, we pre-calculate the classification for every possible byte (0-255).
150+ -- Checking `if PATTERNS.DIGIT[char]` becomes a simple array lookup, which is
151+ -- orders of magnitude faster than regex matching.
154152Tokenizer .PATTERNS = {
155153 SPACE = makePatternLookup (" %s" ), -- Whitespace
156154 DIGIT = makePatternLookup (" %d" ), -- 0-9
@@ -2417,8 +2415,7 @@ function CodeGenerator:splitTableElements(elements)
24172415 return implicitElems , explicitElems
24182416end
24192417
2420- -- Used in methods like `processAssignmentStatement` to set
2421- -- the value of a register to another register's value.
2418+ -- Sets the value of a variable or table index from a register.
24222419function CodeGenerator :setRegisterValue (node , copyFromRegister )
24232420 local nodeKind = node .kind
24242421
@@ -2430,11 +2427,13 @@ function CodeGenerator:setRegisterValue(node, copyFromRegister)
24302427 -- OP_MOVE [A, B] R(A) := R(B)
24312428 self :emitInstruction (" MOVE" , variableRegister , copyFromRegister )
24322429 elseif variableType == " Upvalue" then
2430+ local upvalueIndex = self :findOrCreateUpvalue (variableName )
24332431 -- OP_SETUPVAL [A, B] UpValue[B] := R(A)
2434- self :emitInstruction (" SETUPVAL" , copyFromRegister , self : findOrCreateUpvalue ( variableName ) )
2432+ self :emitInstruction (" SETUPVAL" , copyFromRegister , upvalueIndex )
24352433 elseif variableType == " Global" then
2434+ local constantIndex = self :findOrCreateConstant (variableName )
24362435 -- OP_SETGLOBAL [A, Bx] Gbl[Kst(Bx)] := R(A)
2437- self :emitInstruction (" SETGLOBAL" , copyFromRegister , self : findOrCreateConstant ( variableName ) )
2436+ self :emitInstruction (" SETGLOBAL" , copyFromRegister , constantIndex )
24382437 end
24392438 return
24402439 elseif nodeKind == " IndexExpression" then
@@ -2661,16 +2660,19 @@ function CodeGenerator:processVariable(node, register)
26612660 local varType = node .variableType -- "Local" / "Upvalue" / "Global"
26622661
26632662 if varType == " Local" then
2663+ -- Local variables are stored in the function's stack (registers).
26642664 local variable = self :findVariableRegister (varName )
26652665
26662666 -- OP_MOVE [A, B] R(A) := R(B)
26672667 self :emitInstruction (" MOVE" , register , variable )
26682668 elseif varType == " Global" then
2669+ -- Globals are stored in the global environment table.
26692670 local constantIndex = self :findOrCreateConstant (varName )
26702671
26712672 -- OP_GETGLOBAL [A, Bx] R(A) := Gbl[Kst(Bx)]
26722673 self :emitInstruction (" GETGLOBAL" , register , constantIndex )
26732674 elseif varType == " Upvalue" then
2675+ -- Upvalues are stored in the function's upvalue list.
26742676 local upvalueIndex = self :findOrCreateUpvalue (varName )
26752677
26762678 -- OP_GETUPVAL [A, B] R(A) := UpValue[B]
@@ -2685,12 +2687,15 @@ function CodeGenerator:processFunctionCall(node, register, resultRegisters)
26852687 local arguments = node .arguments
26862688 local isMethodCall = node .isMethodCall
26872689
2690+ -- Method calls (e.g., `obj:method()`) need special handling.
2691+ -- We need to load the `self` parameter (the table) into the
2692+ -- register before the function call, so we use the `SELF` instruction.
26882693 if isMethodCall then
26892694 local calleeExpressionIndex = callee .index
26902695 local calleeExpressionBase = callee .base
26912696
26922697 self :processExpressionNode (calleeExpressionBase , register )
2693- self :allocateRegisters (1 ) -- Used for `self` arg, will get free'd later .
2698+ self :allocateRegisters (1 ) -- Used for implicit `self` argument .
26942699
26952700 local calleeIndexRegister = self :processConstantOrExpression (calleeExpressionIndex )
26962701
@@ -2796,22 +2801,26 @@ function CodeGenerator:processLocalFunctionDeclaration(node)
27962801end
27972802
27982803-- NOTE:
2799- -- According to the Lua 5.1 assignment semantics (https://www.lua.org/manual/5.1/manual.html#2.4.3),
2800- -- when there are a variable used in both sides of the assignment, the right-hand side expressions
2801- -- should be evaluated first, and then assigned to the left-hand side variables. This means
2802- -- that this code will incorrectly throw an error if compiled with our current implementation:
2804+ -- According to the Lua 5.1 assignment semantics, when there are variables
2805+ -- used in both sides of the assignment, the right-hand side expressions
2806+ -- should be evaluated first, and then assigned to the left-hand side
2807+ -- variables. This means that this code will incorrectly throw an error
2808+ -- if compiled with our current implementation:
28032809-- ```lua
28042810-- local a, b = {}, 2
28052811-- a[b], b = 10, 20
28062812-- assert(a[2] == 10 and b == 20)
28072813-- ```
2808- -- I haven't found an easy way to implement this behavior yet, so for now, we will leave it as is.
2814+ -- I haven't found an easy way to implement this behavior yet, so for now,
2815+ -- we will leave it as is.
2816+ --
2817+ -- Reference: https://www.lua.org/manual/5.1/manual.html#2.4.3
28092818function CodeGenerator :processAssignmentStatement (node )
28102819 local lvalues = node .lvalues
28112820 local expressions = node .expressions
28122821
28132822 local variableBaseRegister = self .stackSize - 1
2814- local lvalueRegisterCount = self :processExpressionList (expressions , # lvalues )
2823+ local lvalueRegisterCount = self :processExpressionList (expressions , # lvalues )
28152824
28162825 for index , lvalue in ipairs (lvalues ) do
28172826 local lvalueRegister = variableBaseRegister + index
@@ -2825,35 +2834,46 @@ function CodeGenerator:processIfStatement(node)
28252834 local clauses = node .clauses
28262835 local elseClause = node .elseClause
28272836
2828- local jumpToEndPCs = {}
2829- local lastClause = clauses [# clauses ]
2830- local previousJumpPC = nil
2837+ local jumpToEndPCs = {}
2838+ local lastClause = clauses [# clauses ]
28312839
2840+ -- Process all 'if' and 'elseif' clauses first.
28322841 for _ , clause in ipairs (clauses ) do
28332842 local condition = clause .condition
28342843 local body = clause .body
28352844
28362845 local conditionRegister = self :processExpressionNode (condition )
28372846
28382847 -- OP_TEST [A, C] if not (R(A) <=> C) then pc++
2848+ -- If the condition is false, jump to the next instruction.
2849+ -- (which is always a jump to the next clause)
28392850 self :emitInstruction (" TEST" , conditionRegister , 0 , 0 )
28402851 self :freeRegisters (1 ) -- Free conditionRegister
28412852
2842- previousJumpPC = self :emitJump ()
2853+ -- This is the jump that occurs if the condition is false.
2854+ -- We will patch it later to jump to the next clause.
2855+ local conditionIsFalseJumpPC = self :emitJump ()
28432856 self :processBlockNode (body )
28442857
2845- local isLastClause = (clause == lastClause )
2846- if not isLastClause or elseClause then
2858+ local isLastClause = (clause == lastClause )
2859+ local shouldJumpToEnd = not isLastClause or elseClause
2860+ if shouldJumpToEnd then
28472861 local jumpToEndPC = self :emitJump ()
28482862 table.insert (jumpToEndPCs , jumpToEndPC )
28492863 end
2850- self :patchJumpToHere (previousJumpPC )
2864+
2865+ -- Patch the condition false jump to here.
2866+ self :patchJumpToHere (conditionIsFalseJumpPC )
28512867 end
28522868
2869+ -- Is there an 'else' clause to process?
2870+ -- NOTE: The previous clause's condition already jumps
2871+ -- to here, so we don't need to patch anything.
28532872 if elseClause then
28542873 self :processBlockNode (elseClause )
28552874 end
28562875
2876+ -- Patch all jumps at the end of clauses to here.
28572877 self :patchJumpsToHere (jumpToEndPCs )
28582878end
28592879
@@ -2862,7 +2882,7 @@ function CodeGenerator:processForGenericStatement(node)
28622882 local expressions = node .expressions
28632883 local body = node .body
28642884
2865- local baseStackSize = self .stackSize
2885+ local baseRegister = self .stackSize
28662886 local expressionRegisters = self :processExpressionList (expressions , 3 )
28672887 self :declareLocalVariables (iterators )
28682888
@@ -2874,7 +2894,10 @@ function CodeGenerator:processForGenericStatement(node)
28742894
28752895 -- OP_TFORLOOP [A, C] R(A+3), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2))
28762896 -- if R(A+3) ~= nil then R(A+2)=R(A+3) else pc++
2877- self :emitInstruction (" TFORLOOP" , baseStackSize , 0 , # iterators )
2897+ -- Skips next instruction if there are no more values.
2898+ self :emitInstruction (" TFORLOOP" , baseRegister , 0 , # iterators )
2899+
2900+ -- Emit jump back to the start of the loop if there are more values.
28782901 self :emitJumpBack (loopStartPC )
28792902 end )
28802903 self :undeclareVariables (iterators )
@@ -2891,10 +2914,13 @@ function CodeGenerator:processForNumericStatement(node)
28912914 local startRegister = self :processExpressionNode (startExpr )
28922915 self :processExpressionNode (limitExpr )
28932916 local stepRegister = self :allocateRegisters (1 )
2917+
2918+ -- Is there a step expression?
28942919 if stepExpr then
28952920 self :processExpressionNode (stepExpr , stepRegister )
28962921 else
28972922 -- OP_LOADK [A, Bx] R(A) := Kst(Bx)
2923+ -- Default step is 1.
28982924 self :emitInstruction (" LOADK" , stepRegister , self :findOrCreateConstant (1 ))
28992925 end
29002926
0 commit comments