Skip to content

Commit 2a0a3f5

Browse files
committed
ipld/unixfs/io: add WithMaxLinks() to directories
The new option sets effectively a maximum width to the directories. Currently, on a dynamic directory, the switching to-from Basic to HAMT is controlled by HAMTShardingSize, and the width of the HAMT shards by DefaultShardWidth. When WithMaxLinks() is specified, the switching is additionally controlled by the number of links exceeding the limit. In that case, MaxLinks is used as ShardWidth. The directory can only be converted back to BasicDirectory when the total number of links is below the limit. Backwards compatibility is kept and tests have been added. Note that when setting MaxLinks to a high number, it is possible that we still suffer automatic conversion to HAMT before hitting MaxLinks, if the estimated directory size is above 256KiB (as before).
1 parent 23dacad commit 2a0a3f5

File tree

2 files changed

+288
-54
lines changed

2 files changed

+288
-54
lines changed

ipld/unixfs/io/directory.go

Lines changed: 192 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,39 @@ type Directory interface {
6969

7070
// GetCidBuilder returns the CID Builder used.
7171
GetCidBuilder() cid.Builder
72+
73+
// SetMaxLinks sets the max width of the Directory.
74+
SetMaxLinks(n int)
75+
}
76+
77+
type DirectoryOption func(Directory)
78+
79+
// WithMaxLinks stablishes the max number of links allowed for a directory:
80+
//
81+
// - On Dynamic directories using a BasicDirectory, it can trigger conversion
82+
// to HAMT when set and exceeded. The subsequent HAMT nodes will use MaxLinks
83+
// as ShardWidth. Conversion can happen too based on HAMTShardingSize.
84+
//
85+
// - On Dynamic directories using a HAMTDirectory, it can trigger conversion
86+
// to BasicDirectory when the number of links is below MaxLinks (and
87+
// HAMTShardingSize allows).
88+
//
89+
// - On Basic directories, it causes an error when adding more than MaxLinks
90+
// children.
91+
//
92+
// - On HAMT directories, it sets the ShardWidth, otherwise DefaultShardWidth
93+
// is used.
94+
func WithMaxLinks(n int) DirectoryOption {
95+
return func(d Directory) {
96+
d.SetMaxLinks(n)
97+
}
98+
}
99+
100+
// WithCidBuilder sets the CidBuilder for new Directories.
101+
func WithCidBuilder(cb cid.Builder) DirectoryOption {
102+
return func(d Directory) {
103+
d.SetCidBuilder(cb)
104+
}
72105
}
73106

74107
// TODO: Evaluate removing `dserv` from this layer and providing it in MFS.
@@ -96,6 +129,11 @@ type BasicDirectory struct {
96129
// (We maintain this value up to date even if the HAMTShardingSize is off
97130
// since potentially the option could be activated on the fly.)
98131
estimatedSize int
132+
totalLinks int
133+
134+
// opts
135+
// maxNumberOfLinks. If set, can trigger conversion to HAMT directory.
136+
maxLinks int
99137
}
100138

101139
// HAMTDirectory is the HAMT implementation of `Directory`.
@@ -104,30 +142,82 @@ type HAMTDirectory struct {
104142
shard *hamt.Shard
105143
dserv ipld.DAGService
106144

145+
// opts
146+
maxLinks int
147+
cidBuilder cid.Builder
148+
107149
// Track the changes in size by the AddChild and RemoveChild calls
108150
// for the HAMTShardingSize option.
109151
sizeChange int
152+
totalLinks int
110153
}
111154

112-
func newEmptyBasicDirectory(dserv ipld.DAGService) *BasicDirectory {
113-
return newBasicDirectoryFromNode(dserv, format.EmptyDirNode())
155+
func newEmptyBasicDirectory(dserv ipld.DAGService, opts ...DirectoryOption) *BasicDirectory {
156+
return newBasicDirectoryFromNode(dserv, format.EmptyDirNode(), opts...)
114157
}
115158

116-
func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode) *BasicDirectory {
159+
func newBasicDirectoryFromNode(dserv ipld.DAGService, node *mdag.ProtoNode, opts ...DirectoryOption) *BasicDirectory {
117160
basicDir := new(BasicDirectory)
118161
basicDir.node = node
119162
basicDir.dserv = dserv
120163

164+
for _, o := range opts {
165+
o(basicDir)
166+
}
167+
121168
// Scan node links (if any) to restore estimated size.
122-
basicDir.computeEstimatedSize()
169+
basicDir.computeEstimatedSizeAndTotalLinks()
123170

124171
return basicDir
125172
}
126173

127-
// NewDirectory returns a Directory implemented by DynamicDirectory
128-
// containing a BasicDirectory that can be converted to a HAMTDirectory.
129-
func NewDirectory(dserv ipld.DAGService) Directory {
130-
return &DynamicDirectory{newEmptyBasicDirectory(dserv)}
174+
func newEmptyHAMTDirectory(dserv ipld.DAGService, sizeChange int, opts ...DirectoryOption) (*HAMTDirectory, error) {
175+
dir := new(HAMTDirectory)
176+
dir.dserv = dserv
177+
dir.sizeChange = sizeChange
178+
dir.maxLinks = 0
179+
180+
for _, opt := range opts {
181+
opt(dir)
182+
}
183+
184+
// If user did not set maxLinks, we leave it unset, in case this
185+
// folder is converted to basic folder at some point.
186+
maxLinks := DefaultShardWidth
187+
if dir.maxLinks > 0 {
188+
maxLinks = dir.maxLinks
189+
}
190+
191+
shard, err := hamt.NewShard(dir.dserv, maxLinks)
192+
if err != nil {
193+
return nil, err
194+
}
195+
shard.SetCidBuilder(dir.cidBuilder)
196+
dir.shard = shard
197+
198+
return dir, nil
199+
}
200+
201+
func newHAMTDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (*HAMTDirectory, error) {
202+
dir := new(HAMTDirectory)
203+
dir.dserv = dserv
204+
dir.sizeChange = 0
205+
206+
shard, err := hamt.NewHamtFromDag(dserv, node)
207+
if err != nil {
208+
return nil, err
209+
}
210+
dir.shard = shard
211+
dir.totalLinks = len(node.Links())
212+
213+
return dir, nil
214+
}
215+
216+
// NewDirectory returns a Directory implemented by DynamicDirectory containing
217+
// a BasicDirectory that automatically converts to a from a HAMTDirectory
218+
// based on HAMTShardingSize and MaxLinks (when set).
219+
func NewDirectory(dserv ipld.DAGService, opts ...DirectoryOption) Directory {
220+
return &DynamicDirectory{newEmptyBasicDirectory(dserv, opts...)}
131221
}
132222

133223
// ErrNotADir implies that the given node was not a unixfs directory
@@ -150,21 +240,26 @@ func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, err
150240
case format.TDirectory:
151241
return &DynamicDirectory{newBasicDirectoryFromNode(dserv, protoBufNode.Copy().(*mdag.ProtoNode))}, nil
152242
case format.THAMTShard:
153-
shard, err := hamt.NewHamtFromDag(dserv, node)
243+
hamtDir, err := newHAMTDirectoryFromNode(dserv, node)
154244
if err != nil {
155245
return nil, err
156246
}
157-
return &DynamicDirectory{&HAMTDirectory{shard, dserv, 0}}, nil
247+
return &DynamicDirectory{hamtDir}, nil
158248
}
159249

160250
return nil, ErrNotADir
161251
}
162252

163-
func (d *BasicDirectory) computeEstimatedSize() {
253+
func (d *BasicDirectory) SetMaxLinks(n int) {
254+
d.maxLinks = n
255+
}
256+
257+
func (d *BasicDirectory) computeEstimatedSizeAndTotalLinks() {
164258
d.estimatedSize = 0
165259
// err is just breaking the iteration and we always return nil
166260
_ = d.ForEachLink(context.TODO(), func(l *ipld.Link) error {
167261
d.addToEstimatedSize(l.Name, l.Cid)
262+
d.totalLinks++
168263
return nil
169264
})
170265
// ForEachLink will never fail traversing the BasicDirectory
@@ -181,7 +276,7 @@ func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) {
181276
// Something has gone very wrong. Log an error and recompute the
182277
// size from scratch.
183278
log.Error("BasicDirectory's estimatedSize went below 0")
184-
d.computeEstimatedSize()
279+
d.computeEstimatedSizeAndTotalLinks()
185280
}
186281
}
187282

@@ -219,7 +314,15 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node
219314
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
220315
}
221316

222-
return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil
317+
switchShardingSize := d.estimatedSize+operationSizeChange >= HAMTShardingSize
318+
switchMaxLinks := false
319+
// We should switch if we have reached maxLinks and a new link is being
320+
// added and maxLinks is valid for shardWidth.
321+
if nodeToAdd != nil && entryToRemove == nil && validShardWidth(d.maxLinks) &&
322+
(d.totalLinks+1) > d.maxLinks {
323+
switchMaxLinks = true
324+
}
325+
return switchShardingSize || switchMaxLinks, nil
223326
}
224327

225328
// addLinkChild adds the link as an entry to this directory under the given
@@ -231,12 +334,20 @@ func (d *BasicDirectory) addLinkChild(ctx context.Context, name string, link *ip
231334
if err != nil && err != os.ErrNotExist {
232335
return err
233336
}
337+
if err == nil { // existed
338+
d.totalLinks--
339+
}
340+
341+
if d.maxLinks > 0 && d.totalLinks+1 > d.maxLinks {
342+
return errors.New("BasicDirectory: cannot add child: maxLinks reached")
343+
}
234344

235345
err = d.node.AddRawLink(name, link)
236346
if err != nil {
237347
return err
238348
}
239349
d.addToEstimatedSize(name, link.Cid)
350+
d.totalLinks++
240351
return nil
241352
}
242353

@@ -304,6 +415,7 @@ func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error {
304415

305416
// The name actually existed so we should update the estimated size.
306417
d.removeFromEstimatedSize(link.Name, link.Cid)
418+
d.totalLinks--
307419

308420
return d.node.RemoveNodeLink(name)
309421
// GetNodeLink didn't return ErrLinkNotFound so this won't fail with that
@@ -321,30 +433,38 @@ func (d *BasicDirectory) GetCidBuilder() cid.Builder {
321433
}
322434

323435
// switchToSharding returns a HAMT implementation of this directory.
324-
func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, error) {
325-
hamtDir := new(HAMTDirectory)
326-
hamtDir.dserv = d.dserv
327-
328-
shard, err := hamt.NewShard(d.dserv, DefaultShardWidth)
436+
func (d *BasicDirectory) switchToSharding(ctx context.Context, opts ...DirectoryOption) (*HAMTDirectory, error) {
437+
hamtDir, err := newEmptyHAMTDirectory(d.dserv, 0, opts...)
329438
if err != nil {
330439
return nil, err
331440
}
332-
shard.SetCidBuilder(d.node.CidBuilder())
333-
hamtDir.shard = shard
334441

335442
for _, lnk := range d.node.Links() {
336443
err = hamtDir.shard.SetLink(ctx, lnk.Name, lnk)
337444
if err != nil {
338445
return nil, err
339446
}
447+
hamtDir.totalLinks++
340448
}
341449

342450
return hamtDir, nil
343451
}
344452

345453
// SetCidBuilder implements the `Directory` interface.
346454
func (d *HAMTDirectory) SetCidBuilder(builder cid.Builder) {
347-
d.shard.SetCidBuilder(builder)
455+
d.cidBuilder = builder
456+
if d.shard != nil {
457+
d.shard.SetCidBuilder(builder)
458+
}
459+
}
460+
461+
// SetMaxLinks set the max number of links in a HAMTDirectory. It overrides
462+
// DefaultShardWidth when present. Needs to be a power of two (shard entry
463+
// size) and multiple of 8 (bitfield size).
464+
func (d *HAMTDirectory) SetMaxLinks(maxLinks int) {
465+
if validShardWidth(maxLinks) {
466+
d.maxLinks = maxLinks
467+
}
348468
}
349469

350470
// AddChild implements the `Directory` interface.
@@ -358,6 +478,9 @@ func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node)
358478
d.removeFromSizeChange(oldChild.Name, oldChild.Cid)
359479
}
360480
d.addToSizeChange(name, nd.Cid())
481+
if oldChild == nil {
482+
d.totalLinks++
483+
}
361484
return nil
362485
}
363486

@@ -396,6 +519,7 @@ func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error {
396519

397520
if oldChild != nil {
398521
d.removeFromSizeChange(oldChild.Name, oldChild.Cid)
522+
d.totalLinks--
399523
}
400524

401525
return nil
@@ -412,9 +536,10 @@ func (d *HAMTDirectory) GetCidBuilder() cid.Builder {
412536
}
413537

414538
// switchToBasic returns a BasicDirectory implementation of this directory.
415-
func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, error) {
416-
basicDir := newEmptyBasicDirectory(d.dserv)
417-
basicDir.SetCidBuilder(d.GetCidBuilder())
539+
func (d *HAMTDirectory) switchToBasic(ctx context.Context, opts ...DirectoryOption) (*BasicDirectory, error) {
540+
// needsToSwichToBasicDir checks d.maxLinks is appropiate. No check is
541+
// performed here.
542+
basicDir := newEmptyBasicDirectory(d.dserv, opts...)
418543

419544
err := d.ForEachLink(ctx, func(lnk *ipld.Link) error {
420545
err := basicDir.addLinkChild(ctx, lnk.Name, lnk)
@@ -472,14 +597,32 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
472597
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
473598
}
474599

475-
if d.sizeChange+operationSizeChange >= 0 {
476-
// We won't have reduced the HAMT net size.
477-
return false, nil
600+
// We must switch if size and maxlinks are below threshold
601+
canSwitchSize := false
602+
// Directory size reduced, perhaps below limit.
603+
if d.sizeChange+operationSizeChange < 0 {
604+
canSwitchSize, err = d.sizeBelowThreshold(ctx, operationSizeChange)
605+
if err != nil {
606+
return false, err
607+
}
608+
}
609+
610+
canSwitchMaxLinks := true
611+
if d.maxLinks > 0 {
612+
total := d.totalLinks
613+
if nodeToAdd != nil && entryToRemove == nil {
614+
total++
615+
} else if nodeToAdd == nil && entryToRemove != nil {
616+
total--
617+
}
618+
if total > d.maxLinks {
619+
// prevent switching as we would end up with too many links
620+
canSwitchMaxLinks = false
621+
}
478622
}
479623

480-
// We have reduced the directory size, check if went below the
481-
// HAMTShardingSize threshold to trigger a switch.
482-
return d.sizeBelowThreshold(ctx, operationSizeChange)
624+
return canSwitchSize && canSwitchMaxLinks, nil
625+
483626
}
484627

485628
// Evaluate directory size and a future sizeChange and check if it will be below
@@ -554,7 +697,7 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No
554697
}
555698

556699
if switchToBasic {
557-
basicDir, err := hamtDir.switchToBasic(ctx)
700+
basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks), WithCidBuilder(hamtDir.GetCidBuilder()))
558701
if err != nil {
559702
return err
560703
}
@@ -578,7 +721,14 @@ func (d *DynamicDirectory) AddChild(ctx context.Context, name string, nd ipld.No
578721
if !switchToHAMT {
579722
return basicDir.AddChild(ctx, name, nd)
580723
}
581-
hamtDir, err = basicDir.switchToSharding(ctx)
724+
725+
maxLinks := DefaultShardWidth
726+
// Verify that our maxLinks is usuable for ShardWidth (power of 2, multiple of 8)
727+
if validShardWidth(basicDir.maxLinks) {
728+
maxLinks = basicDir.maxLinks
729+
}
730+
731+
hamtDir, err = basicDir.switchToSharding(ctx, WithMaxLinks(maxLinks), WithCidBuilder(basicDir.GetCidBuilder()))
582732
if err != nil {
583733
return err
584734
}
@@ -608,14 +758,23 @@ func (d *DynamicDirectory) RemoveChild(ctx context.Context, name string) error {
608758
return hamtDir.RemoveChild(ctx, name)
609759
}
610760

611-
basicDir, err := hamtDir.switchToBasic(ctx)
761+
// We have not removed the element that violates MaxLinks, so we have to +1 the limit. We -1 below.
762+
basicDir, err := hamtDir.switchToBasic(ctx, WithMaxLinks(hamtDir.maxLinks+1), WithCidBuilder(hamtDir.GetCidBuilder()))
612763
if err != nil {
613764
return err
614765
}
615766
err = basicDir.RemoveChild(ctx, name)
616767
if err != nil {
617768
return err
618769
}
770+
771+
basicDir.SetMaxLinks(hamtDir.maxLinks - 1)
619772
d.Directory = basicDir
620773
return nil
621774
}
775+
776+
// validShardWidth verifies that the given number is positive, a power of 2
777+
// and a multiple of 8.
778+
func validShardWidth(n int) bool {
779+
return n > 0 && (n&(n-1)) == 0 && n%8 == 0
780+
}

0 commit comments

Comments
 (0)