Skip to content

Commit db2da13

Browse files
gastonpontisamlaf
andauthored
fix(node) Altda failover to ethda should keep finalizing l2 chain (#316)
* test(altda): add a test to make sure altda node keeps finalizing even after failover to ethda Currently it does not, as shown by the test TestAltDA_FinalizationAfterEthDAFailover failing * fix(damgr): ethda failover finalization stall bug Weiwei from Polymer found this bug. He proposed a solution. This is an alternative solution which seems simpler, but not 100% of its soundness. * fix: damgr_test doesn't compile * chore: add more logs to damgr and altda_data_source * docs(altda_test): fix typo --------- Co-authored-by: Samuel Laferriere <samlaf92@gmail.com>
1 parent 5b60e1a commit db2da13

File tree

8 files changed

+179
-45
lines changed

8 files changed

+179
-45
lines changed

op-alt-da/damgr.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,15 @@ func (d *DA) OnFinalizedHeadSignal(f HeadSignalFn) {
117117
func (d *DA) updateFinalizedHead(l1Finalized eth.L1BlockRef) {
118118
d.l1FinalizedHead = l1Finalized
119119
// Prune the state to the finalized head
120-
d.state.Prune(l1Finalized.ID())
121-
d.finalizedHead = d.state.lastPrunedCommitment
120+
lastPrunedCommIncBlock := d.state.Prune(l1Finalized.ID())
121+
d.log.Debug("updateFinalizedHead", "currFinalizedHead", d.finalizedHead.Number, "lastPrunedCommIncBlock", lastPrunedCommIncBlock.Number, "l1Finalized", l1Finalized.Number)
122+
// If a commitment was pruned, set the finalized head to that commitment's inclusion block
123+
// When no commitments are left to be pruned (one example is if we have failed over to ethda)
124+
// then updateFinalizedFromL1 becomes the main driver of the finalized head.
125+
// Note that updateFinalizedFromL1 is only called when d.state.NoCommitments() is true.
126+
if lastPrunedCommIncBlock != (eth.L1BlockRef{}) {
127+
d.finalizedHead = lastPrunedCommIncBlock
128+
}
122129
}
123130

124131
// updateFinalizedFromL1 updates the finalized head based on the challenge window.
@@ -133,6 +140,7 @@ func (d *DA) updateFinalizedFromL1(ctx context.Context, l1 L1Fetcher) error {
133140
if err != nil {
134141
return err
135142
}
143+
d.log.Debug("updateFinalizedFromL1", "currFinalizedHead", d.finalizedHead.Number, "newFinalizedHead", ref.Number, "l1FinalizedHead", d.l1FinalizedHead.Number, "challengeWindow", d.cfg.ChallengeWindow)
136144
d.finalizedHead = ref
137145
return nil
138146
}
@@ -413,6 +421,7 @@ func (d *DA) fetchChallengeLogs(ctx context.Context, l1 L1Fetcher, block eth.Blo
413421
}
414422
for _, log := range rec.Logs {
415423
if log.Address == d.cfg.DAChallengeContractAddress && len(log.Topics) > 0 && log.Topics[0] == ChallengeStatusEventABIHash {
424+
d.log.Info("found challenge event", "block", block.Number, "log", log.Index)
416425
logs = append(logs, log)
417426
}
418427
}

op-alt-da/damgr_test.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ func TestFinalization(t *testing.T) {
5353
require.NoError(t, state.ExpireCommitments(bID(8)))
5454
require.Empty(t, state.commitments)
5555

56-
state.Prune(bID(bn1))
57-
require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
58-
state.Prune(bID(7))
59-
require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
60-
state.Prune(bID(8))
61-
require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
56+
lastPrunedCommitment := state.Prune(bID(bn1))
57+
require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
58+
lastPrunedCommitment = state.Prune(bID(7))
59+
require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
60+
lastPrunedCommitment = state.Prune(bID(8))
61+
require.Equal(t, l1Ref(bn1), lastPrunedCommitment)
6262

6363
// Track a commitment, challenge it, & then resolve it
6464
c2 := RandomCommitment(rng)
@@ -83,12 +83,12 @@ func TestFinalization(t *testing.T) {
8383
require.Empty(t, state.challenges)
8484

8585
// Now finalize everything
86-
state.Prune(bID(20))
87-
require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
88-
state.Prune(bID(28))
89-
require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
90-
state.Prune(bID(32))
91-
require.Equal(t, l1Ref(bn2), state.lastPrunedCommitment)
86+
lastPrunedCommitment = state.Prune(bID(20))
87+
require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
88+
lastPrunedCommitment = state.Prune(bID(28))
89+
require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
90+
lastPrunedCommitment = state.Prune(bID(32))
91+
require.Equal(t, l1Ref(bn2), lastPrunedCommitment)
9292
}
9393

9494
// TestExpireChallenges expires challenges and prunes the state for longer windows
@@ -175,8 +175,8 @@ func TestDAChallengeDetached(t *testing.T) {
175175
require.ErrorIs(t, err, ErrReorgRequired)
176176

177177
// pruning finalized block is safe. It should not prune any commitments yet.
178-
state.Prune(bID(1))
179-
require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
178+
lastPrunedCommitment := state.Prune(bID(1))
179+
require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
180180

181181
// Perform reorg back to bn2
182182
state.ClearCommitments()

op-alt-da/damock.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ func (c *MockDAClient) DeleteData(key []byte) error {
4848
return c.store.Delete(key)
4949
}
5050

51+
// DAErrFaker is a DA client that can be configured to return errors on GetInput
52+
// and SetInput calls.
5153
type DAErrFaker struct {
5254
Client *MockDAClient
5355

@@ -109,6 +111,10 @@ func (d *AltDADisabled) AdvanceL1Origin(ctx context.Context, l1 L1Fetcher, block
109111
// - request latencies, to mimic a DA service with slow responses
110112
// (eg. eigenDA with 10 min batching interval).
111113
// - response status codes, to mimic a DA service that is down.
114+
//
115+
// We use this FakeDaServer as opposed to the DAErrFaker client in the op-e2e altda system tests
116+
// because the batcher service only has a constructor to build from CLI flags (no dependency injection),
117+
// meaning the da client is built from an rpc url config instead of being injected.
112118
type FakeDAServer struct {
113119
*DAServer
114120
putRequestLatency time.Duration

op-alt-da/dastate.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,14 @@ func challengeKey(comm CommitmentData, inclusionBlockNumber uint64) string {
5252
// In the special case of a L2 reorg, challenges are still tracked but commitments are removed.
5353
// This will allow the altDA fetcher to find the expired challenge.
5454
type State struct {
55-
commitments []Commitment // commitments where the challenge/resolve period has not expired yet
56-
expiredCommitments []Commitment // commitments where the challenge/resolve period has expired but not finalized
57-
challenges []*Challenge // challenges ordered by L1 inclusion
58-
expiredChallenges []*Challenge // challenges ordered by L1 inclusion
59-
challengesMap map[string]*Challenge // challenges by serialized comm + block number for easy lookup
60-
lastPrunedCommitment eth.L1BlockRef // the last commitment to be pruned
61-
cfg Config
62-
log log.Logger
63-
metrics Metricer
55+
commitments []Commitment // commitments where the challenge/resolve period has not expired yet
56+
expiredCommitments []Commitment // commitments where the challenge/resolve period has expired but not finalized
57+
challenges []*Challenge // challenges ordered by L1 inclusion
58+
expiredChallenges []*Challenge // challenges ordered by L1 inclusion
59+
challengesMap map[string]*Challenge // challenges by serialized comm + block number for easy lookup
60+
cfg Config
61+
log log.Logger
62+
metrics Metricer
6463
}
6564

6665
func NewState(log log.Logger, m Metricer, cfg Config) *State {
@@ -207,15 +206,18 @@ func (s *State) ExpireChallenges(origin eth.BlockID) {
207206
}
208207

209208
// Prune removes challenges & commitments which have an expiry block number beyond the given block number.
210-
func (s *State) Prune(origin eth.BlockID) {
209+
// It returns the last pruned commitment's inclusion block number, or eth.L1BlockRef{} if no commitments were pruned.
210+
func (s *State) Prune(origin eth.BlockID) eth.L1BlockRef {
211211
// Commitments rely on challenges, so we prune commitments first.
212-
s.pruneCommitments(origin)
212+
lastPrunedCommIncBlock := s.pruneCommitments(origin)
213213
s.pruneChallenges(origin)
214+
return lastPrunedCommIncBlock
214215
}
215216

216217
// pruneCommitments removes commitments which have are beyond a given block number.
217218
// It will remove commitments in order of inclusion until it finds a commitment which is not beyond the given block number.
218-
func (s *State) pruneCommitments(origin eth.BlockID) {
219+
func (s *State) pruneCommitments(origin eth.BlockID) eth.L1BlockRef {
220+
var lastPrunedCommIncBlock eth.L1BlockRef
219221
for len(s.expiredCommitments) > 0 {
220222
c := s.expiredCommitments[0]
221223
challenge, ok := s.GetChallenge(c.data, c.inclusionBlock.Number)
@@ -236,8 +238,9 @@ func (s *State) pruneCommitments(origin eth.BlockID) {
236238
s.expiredCommitments = s.expiredCommitments[1:]
237239

238240
// Record the latest inclusion block to be returned
239-
s.lastPrunedCommitment = c.inclusionBlock
241+
lastPrunedCommIncBlock = c.inclusionBlock
240242
}
243+
return lastPrunedCommIncBlock
241244
}
242245

243246
// pruneChallenges removes challenges which have are beyond a given block number.

op-e2e/actions/altda/altda_test.go

Lines changed: 111 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package altda
22

33
import (
4+
"log/slog"
45
"math/big"
56
"math/rand"
67
"testing"
@@ -49,6 +50,12 @@ type L2AltDA struct {
4950

5051
type AltDAParam func(p *e2eutils.TestParams)
5152

53+
func WithLogLevel(level slog.Level) AltDAParam {
54+
return func(p *e2eutils.TestParams) {
55+
p.LogLevel = level
56+
}
57+
}
58+
5259
func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
5360
p := &e2eutils.TestParams{
5461
MaxSequencerDrift: 40,
@@ -57,11 +64,12 @@ func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
5764
L1BlockTime: 12,
5865
UseAltDA: true,
5966
AllocType: config.AllocTypeAltDA,
67+
LogLevel: log.LevelDebug,
6068
}
6169
for _, apply := range params {
6270
apply(p)
6371
}
64-
log := testlog.Logger(t, log.LvlDebug)
72+
log := testlog.Logger(t, p.LogLevel)
6573

6674
dp := e2eutils.MakeDeployParams(t, p)
6775
sd := e2eutils.Setup(t, dp, helpers.DefaultAlloc)
@@ -75,14 +83,13 @@ func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
7583
engine := helpers.NewL2Engine(t, log, sd.L2Cfg, jwtPath)
7684
engCl := engine.EngineClient(t, sd.RollupCfg)
7785

78-
storage := &altda.DAErrFaker{Client: altda.NewMockDAClient(log)}
79-
8086
l1F, err := sources.NewL1Client(miner.RPCClient(), log, nil, sources.L1ClientDefaultConfig(sd.RollupCfg, false, sources.RPCKindBasic))
8187
require.NoError(t, err)
8288

8389
altDACfg, err := sd.RollupCfg.GetOPAltDAConfig()
8490
require.NoError(t, err)
8591

92+
storage := &altda.DAErrFaker{Client: altda.NewMockDAClient(log)}
8693
daMgr := altda.NewAltDAWithStorage(log, altDACfg, storage, &altda.NoopMetrics{})
8794

8895
sequencer := helpers.NewL2Sequencer(t, log, l1F, miner.BlobStore(), daMgr, engCl, sd.RollupCfg, 0)
@@ -177,6 +184,34 @@ func (a *L2AltDA) ActNewL2Tx(t helpers.Testing) {
177184
a.lastCommBn = a.miner.L1Chain().CurrentBlock().Number.Uint64()
178185
}
179186

187+
// ActNewL2TxFinalized sends a new L2 transaction, submits a batch containing it to L1
188+
// and finalizes the L1 and L2 chains (including advancing enough to clear the altda challenge window).
189+
//
190+
// TODO: understand why (notation is l1unsafe/l1safe/l1finalized-l2unsafe/l2safe/l2finalized):
191+
// - the first call advances heads by (0/0/17-71/71/1)
192+
// - second call advances by 0/0/17-204/204/82,
193+
// - but all subsequent calls advance status by exactly 0/0/17-204/204/204.
194+
//
195+
// 17 makes sense because challengeWindow=16 and we create 1 extra block before that,
196+
// and 204 L2blocks = 17 L1blocks * 12 L2blocks/L1block (L1blocktime=12s, L2blocktime=1s)
197+
func (a *L2AltDA) ActNewL2TxFinalized(t helpers.Testing) {
198+
// Include a new l2 batcher transaction, submitting an input commitment to the l1.
199+
a.ActNewL2Tx(t)
200+
// Create ChallengeWindow empty blocks so the above batcher blocks can finalize (can't be challenged anymore)
201+
a.ActL1Blocks(t, a.altDACfg.ChallengeWindow)
202+
// Finalize the L1 chain and the L2 chain (by draining all events and running through derivation pipeline)
203+
// TODO: understand why we need to drain the pipeline before AND after actL1Finalized
204+
a.sequencer.ActL2PipelineFull(t)
205+
a.ActL1Finalized(t)
206+
a.sequencer.ActL2PipelineFull(t)
207+
208+
// Uncomment the below code to observe the behavior described in the TODO above
209+
// syncStatus := a.sequencer.SyncStatus()
210+
// a.log.Info("Sync status after ActNewL2TxFinalized",
211+
// "unsafeL1", syncStatus.HeadL1.Number, "safeL1", syncStatus.SafeL1.Number, "finalizedL1", syncStatus.FinalizedL1.Number,
212+
// "unsafeL2", syncStatus.UnsafeL2.Number, "safeL2", syncStatus.SafeL2.Number, "finalizedL2", syncStatus.FinalizedL2.Number)
213+
}
214+
180215
func (a *L2AltDA) ActDeleteLastInput(t helpers.Testing) {
181216
require.NoError(t, a.storage.Client.DeleteData(a.lastComm))
182217
}
@@ -363,7 +398,7 @@ func TestAltDA_ChallengeResolved(gt *testing.T) {
363398
}
364399

365400
// DA storage service goes offline while sequencer keeps making blocks. When storage comes back online, it should be able to catch up.
366-
func TestAltDA_StorageError(gt *testing.T) {
401+
func TestAltDA_StorageGetError(gt *testing.T) {
367402
t := helpers.NewDefaultTesting(gt)
368403
harness := NewL2AltDA(t)
369404

@@ -528,19 +563,20 @@ func TestAltDA_Finalization(gt *testing.T) {
528563
t := helpers.NewDefaultTesting(gt)
529564
a := NewL2AltDA(t)
530565

531-
// build L1 block #1
566+
// Notation everywhere below is l1unsafe/l1safe/l1finalized-l2unsafe/l2safe/l2finalized
567+
// build L1 block #1: 0/0/0-0/0/0 -> 1/1/0-0/0/0
532568
a.ActL1Blocks(t, 1)
533569
a.miner.ActL1SafeNext(t)
534570

535-
// Fill with l2 blocks up to the L1 head
571+
// Fill with l2 blocks up to the L1 head: 1/1/0:0/0/0 -> 1/1/0:1/1/0
536572
a.sequencer.ActL1HeadSignal(t)
537573
a.sequencer.ActBuildToL1Head(t)
538574

539575
a.sequencer.ActL2PipelineFull(t)
540576
a.sequencer.ActL1SafeSignal(t)
541577
require.Equal(t, uint64(1), a.sequencer.SyncStatus().SafeL1.Number)
542578

543-
// add L1 block #2
579+
// add L1 block #2: 1/1/0:1/1/0 -> 2/2/1:2/1/0
544580
a.ActL1Blocks(t, 1)
545581
a.miner.ActL1SafeNext(t)
546582
a.miner.ActL1FinalizeNext(t)
@@ -552,7 +588,7 @@ func TestAltDA_Finalization(gt *testing.T) {
552588
a.sequencer.ActL1FinalizedSignal(t)
553589
a.sequencer.ActL1SafeSignal(t)
554590

555-
// commit all the l2 blocks to L1
591+
// commit all the l2 blocks to L1: 2/2/1:2/1/0 -> 3/2/1:2/1/0
556592
a.batcher.ActSubmitAll(t)
557593
a.miner.ActL1StartBlock(12)(t)
558594
a.miner.ActL1IncludeTx(a.dp.Addresses.Batcher)(t)
@@ -561,31 +597,31 @@ func TestAltDA_Finalization(gt *testing.T) {
561597
// verify
562598
a.sequencer.ActL2PipelineFull(t)
563599

564-
// fill with more unsafe L2 blocks
600+
// fill with more unsafe L2 blocks: 3/2/1:2/1/0 -> 3/2/1:3/1/0
565601
a.sequencer.ActL1HeadSignal(t)
566602
a.sequencer.ActBuildToL1Head(t)
567603

568-
// submit those blocks too, block #4
604+
// submit those blocks too, block #4: 3/2/1:3/1/0 -> 4/2/1:3/1/0
569605
a.batcher.ActSubmitAll(t)
570606
a.miner.ActL1StartBlock(12)(t)
571607
a.miner.ActL1IncludeTx(a.dp.Addresses.Batcher)(t)
572608
a.miner.ActL1EndBlock(t)
573609

574-
// add some more L1 blocks #5, #6
610+
// add some more L1 blocks #5, #6: 4/2/1:3/1/0 -> 6/2/1:3/1/0
575611
a.miner.ActEmptyBlock(t)
576612
a.miner.ActEmptyBlock(t)
577613

578-
// and more unsafe L2 blocks
614+
// and more unsafe L2 blocks: 6/2/1:3/1/0 -> 6/2/1:6/1/0
579615
a.sequencer.ActL1HeadSignal(t)
580616
a.sequencer.ActBuildToL1Head(t)
581617

582-
// move safe/finalize markers: finalize the L1 chain block with the first batch, but not the second
618+
// move safe/finalize markers: 6/2/1:6/1/0 -> 6/4/3:6/1/0
583619
a.miner.ActL1SafeNext(t) // #2 -> #3
584620
a.miner.ActL1SafeNext(t) // #3 -> #4
585621
a.miner.ActL1FinalizeNext(t) // #1 -> #2
586622
a.miner.ActL1FinalizeNext(t) // #2 -> #3
587623

588-
// L1 safe and finalized as expected
624+
// L1 safe and finalized as expected:
589625
a.sequencer.ActL2PipelineFull(t)
590626
a.sequencer.ActL1FinalizedSignal(t)
591627
a.sequencer.ActL1SafeSignal(t)
@@ -607,3 +643,64 @@ func TestAltDA_Finalization(gt *testing.T) {
607643
// given 12s l1 time and 1s l2 time, l2 should be 12 * 3 = 36 blocks finalized
608644
require.Equal(t, uint64(36), a.sequencer.SyncStatus().FinalizedL2.Number)
609645
}
646+
647+
// This test tests altDA -> ethDA -> altDA finalization behavior, simulating a temp altDA failure.
648+
func TestAltDA_FinalizationAfterEthDAFailover(gt *testing.T) {
649+
t := helpers.NewDefaultTesting(gt)
650+
// we only print critical logs to be able to see the statusLogs
651+
harness := NewL2AltDA(t, WithLogLevel(log.LevelDebug))
652+
653+
// We first call this twice because the first 2 times are irregular.
654+
// See ActNewL2TxFinalized's TODO comment.
655+
harness.ActNewL2TxFinalized(t)
656+
harness.ActNewL2TxFinalized(t)
657+
658+
// ActNewL2TxFinalized advances L1 by (1+ChallengeWindow)L1 blocks, and there are 12 L2 blocks per L1 block.
659+
diffL2Blocks := (1 + harness.altDACfg.ChallengeWindow) * 12
660+
661+
for i := 0; i < 5; i++ {
662+
ssBefore := harness.sequencer.SyncStatus()
663+
harness.ActNewL2TxFinalized(t)
664+
ssAfter := harness.sequencer.SyncStatus()
665+
// Finalized head should advance normally in altda mode
666+
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
667+
}
668+
669+
// We swap out altda batcher for ethda batcher
670+
harness.batcher.ActAltDAFailoverToEthDA(t)
671+
672+
for i := 0; i < 3; i++ {
673+
ssBefore := harness.sequencer.SyncStatus()
674+
harness.ActNewL2TxFinalized(t)
675+
if i == 0 {
676+
// TODO: figure out why we need to act twice for the first time after failover.
677+
// I think it's because the L1 driven finalizedHead is set to L1FinalizedHead-ChallengeWindow (see damgr.go updateFinalizedFromL1),
678+
// so it trails behind by an extra challenge_window when we switch over to ethDA.
679+
harness.ActNewL2TxFinalized(t)
680+
}
681+
ssAfter := harness.sequencer.SyncStatus()
682+
// Even after failover, the finalized head should continue advancing normally
683+
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
684+
}
685+
686+
// Revert back to altda batcher (simulating that altda's temporary outage is resolved)
687+
harness.batcher.ActAltDAFallbackToAltDA(t)
688+
689+
for i := 0; i < 3; i++ {
690+
ssBefore := harness.sequencer.SyncStatus()
691+
harness.ActNewL2TxFinalized(t)
692+
ssAfter := harness.sequencer.SyncStatus()
693+
694+
// Even after fallback to altda, the finalized head should continue advancing normally
695+
if i == 0 {
696+
// This is the opposite as the altda->ethda direction. In this case, the first time we fallback to altda,
697+
// the finalized head will advance by 2*diffL2Blocks: in ethda mode when driven by L1 finalization,
698+
// the head is set to L1FinalizedHead-ChallengeWindow. After sending an altda commitment, the finalized head
699+
// is now driven by the finalization of the altda commitment.
700+
require.Equal(t, ssBefore.FinalizedL2.Number+2*diffL2Blocks, ssAfter.FinalizedL2.Number)
701+
} else {
702+
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
703+
}
704+
705+
}
706+
}

0 commit comments

Comments
 (0)