Skip to content

Commit eb62685

Browse files
committed
add S3 minimum part size defined by the user (vitessio#17171)
Signed-off-by: Renan Rangel <rrangel@slack-corp.com>
1 parent ad51403 commit eb62685

File tree

8 files changed

+150
-20
lines changed

8 files changed

+150
-20
lines changed

examples/operator/operator.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,9 @@ spec:
511511
maxLength: 256
512512
pattern: ^[^\r\n]*$
513513
type: string
514+
minPartSize:
515+
format: int64
516+
type: integer
514517
region:
515518
minLength: 1
516519
type: string
@@ -1351,6 +1354,9 @@ spec:
13511354
maxLength: 256
13521355
pattern: ^[^\r\n]*$
13531356
type: string
1357+
minPartSize:
1358+
format: int64
1359+
type: integer
13541360
region:
13551361
minLength: 1
13561362
type: string
@@ -3898,6 +3904,9 @@ spec:
38983904
maxLength: 256
38993905
pattern: ^[^\r\n]*$
39003906
type: string
3907+
minPartSize:
3908+
format: int64
3909+
type: integer
39013910
region:
39023911
minLength: 1
39033912
type: string
@@ -5247,6 +5256,9 @@ spec:
52475256
maxLength: 256
52485257
pattern: ^[^\r\n]*$
52495258
type: string
5259+
minPartSize:
5260+
format: int64
5261+
type: integer
52505262
region:
52515263
minLength: 1
52525264
type: string

go.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ require (
7171
golang.org/x/mod v0.17.0 // indirect
7272
golang.org/x/net v0.28.0
7373
golang.org/x/oauth2 v0.22.0
74-
golang.org/x/sys v0.24.0
74+
golang.org/x/sys v0.25.0
7575
golang.org/x/term v0.23.0
7676
golang.org/x/text v0.17.0 // indirect
7777
golang.org/x/time v0.6.0
@@ -96,6 +96,7 @@ require (
9696
github.com/aws/aws-sdk-go-v2/service/s3 v1.60.1
9797
github.com/aws/smithy-go v1.20.4
9898
github.com/bndr/gotabulate v1.1.2
99+
github.com/dustin/go-humanize v1.0.1
99100
github.com/gammazero/deque v0.2.1
100101
github.com/google/safehtml v0.1.0
101102
github.com/hashicorp/go-cleanhttp v0.5.2
@@ -151,9 +152,8 @@ require (
151152
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
152153
github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
153154
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
154-
github.com/dustin/go-humanize v1.0.1 // indirect
155-
github.com/ebitengine/purego v0.6.1 // indirect
156-
github.com/fatih/color v1.16.0 // indirect
155+
github.com/ebitengine/purego v0.8.1 // indirect
156+
github.com/fatih/color v1.18.0 // indirect
157157
github.com/felixge/httpsnoop v1.0.4 // indirect
158158
github.com/go-logr/logr v1.4.2 // indirect
159159
github.com/go-logr/stdr v1.2.2 // indirect

go.sum

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,8 @@ github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUn
152152
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
153153
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
154154
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
155-
github.com/ebitengine/purego v0.6.1 h1:sjN8rfzbhXQ59/pE+wInswbU9aMDHiwlup4p/a07Mkg=
156-
github.com/ebitengine/purego v0.6.1/go.mod h1:ah1In8AOtksoNK6yk5z1HTJeUkC1Ez4Wk2idgGslMwQ=
155+
github.com/ebitengine/purego v0.8.1 h1:sdRKd6plj7KYW33EH5As6YKfe8m9zbN9JMrOjNVF/BE=
156+
github.com/ebitengine/purego v0.8.1/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
157157
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
158158
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
159159
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -163,8 +163,8 @@ github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
163163
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
164164
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
165165
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
166-
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
167-
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
166+
github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
167+
github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
168168
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
169169
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
170170
github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk=
@@ -682,8 +682,8 @@ golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b/go.mod h1:oPkhp1MJrh7nUepCBc
682682
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
683683
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
684684
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
685-
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
686-
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
685+
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
686+
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
687687
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
688688
golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
689689
golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=

go/flags/endtoend/vtbackup.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ Flags:
196196
--remote_operation_timeout duration time to wait for a remote operation (default 15s)
197197
--restart_before_backup Perform a mysqld clean/full restart after applying binlogs, but before taking the backup. Only makes sense to work around xtrabackup bugs.
198198
--s3_backup_aws_endpoint string endpoint of the S3 backend (region must be provided).
199+
--s3_backup_aws_min_partsize int Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size. (default 5242880)
199200
--s3_backup_aws_region string AWS region to use. (default "us-east-1")
200201
--s3_backup_aws_retries int AWS request retries. (default -1)
201202
--s3_backup_force_path_style force the s3 path style.

go/flags/endtoend/vtctld.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Flags:
110110
--purge_logs_interval duration how often try to remove old logs (default 1h0m0s)
111111
--remote_operation_timeout duration time to wait for a remote operation (default 15s)
112112
--s3_backup_aws_endpoint string endpoint of the S3 backend (region must be provided).
113+
--s3_backup_aws_min_partsize int Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size. (default 5242880)
113114
--s3_backup_aws_region string AWS region to use. (default "us-east-1")
114115
--s3_backup_aws_retries int AWS request retries. (default -1)
115116
--s3_backup_force_path_style force the s3 path style.

go/flags/endtoend/vttablet.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ Flags:
312312
--restore_from_backup_ts string (init restore parameter) if set, restore the latest backup taken at or before this timestamp. Example: '2021-04-29.133050'
313313
--retain_online_ddl_tables duration How long should vttablet keep an old migrated table before purging it (default 24h0m0s)
314314
--s3_backup_aws_endpoint string endpoint of the S3 backend (region must be provided).
315+
--s3_backup_aws_min_partsize int Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size. (default 5242880)
315316
--s3_backup_aws_region string AWS region to use. (default "us-east-1")
316317
--s3_backup_aws_retries int AWS request retries. (default -1)
317318
--s3_backup_force_path_style force the s3 path style.

go/vt/mysqlctl/s3backupstorage/s3.go

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ import (
4444
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
4545
"github.com/aws/aws-sdk-go-v2/service/s3"
4646
"github.com/aws/aws-sdk-go-v2/service/s3/types"
47+
transport "github.com/aws/smithy-go/endpoints"
4748
"github.com/aws/smithy-go/middleware"
49+
"github.com/dustin/go-humanize"
4850
"github.com/spf13/pflag"
4951

5052
"vitess.io/vitess/go/vt/concurrency"
@@ -54,6 +56,11 @@ import (
5456
"vitess.io/vitess/go/vt/servenv"
5557
)
5658

59+
const (
60+
sseCustomerPrefix = "sse_c:"
61+
MaxPartSize = 1024 * 1024 * 1024 * 5 // 5GiB - limited by AWS https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
62+
)
63+
5764
var (
5865
// AWS API region
5966
region string
@@ -83,6 +90,11 @@ var (
8390

8491
// path component delimiter
8592
delimiter = "/"
93+
94+
// minimum part size
95+
minPartSize int64
96+
97+
ErrPartSize = errors.New("minimum S3 part size must be between 5MiB and 5GiB")
8698
)
8799

88100
func registerFlags(fs *pflag.FlagSet) {
@@ -95,6 +107,7 @@ func registerFlags(fs *pflag.FlagSet) {
95107
fs.BoolVar(&tlsSkipVerifyCert, "s3_backup_tls_skip_verify_cert", false, "skip the 'certificate is valid' check for SSL connections.")
96108
fs.StringVar(&requiredLogLevel, "s3_backup_log_level", "LogOff", "determine the S3 loglevel to use from LogOff, LogDebug, LogDebugWithSigning, LogDebugWithHTTPBody, LogDebugWithRequestRetries, LogDebugWithRequestErrors.")
97109
fs.StringVar(&sse, "s3_backup_server_side_encryption", "", "server-side encryption algorithm (e.g., AES256, aws:kms, sse_c:/path/to/key/file).")
110+
fs.Int64Var(&minPartSize, "s3_backup_aws_min_partsize", manager.MinUploadPartSize, "Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size.")
98111
}
99112

100113
func init() {
@@ -108,7 +121,22 @@ type logNameToLogLevel map[string]aws.ClientLogMode
108121

109122
var logNameMap logNameToLogLevel
110123

111-
const sseCustomerPrefix = "sse_c:"
124+
type endpointResolver struct {
125+
r s3.EndpointResolverV2
126+
endpoint *string
127+
}
128+
129+
func (er *endpointResolver) ResolveEndpoint(ctx context.Context, params s3.EndpointParameters) (transport.Endpoint, error) {
130+
params.Endpoint = er.endpoint
131+
return er.r.ResolveEndpoint(ctx, params)
132+
}
133+
134+
func newEndpointResolver() *endpointResolver {
135+
return &endpointResolver{
136+
r: s3.NewDefaultEndpointResolverV2(),
137+
endpoint: &endpoint,
138+
}
139+
}
112140

113141
type iClient interface {
114142
manager.UploadAPIClient
@@ -161,17 +189,13 @@ func (bh *S3BackupHandle) AddFile(ctx context.Context, filename string, filesize
161189
return nil, fmt.Errorf("AddFile cannot be called on read-only backup")
162190
}
163191

164-
// Calculate s3 upload part size using the source filesize
165-
partSizeBytes := manager.DefaultUploadPartSize
166-
if filesize > 0 {
167-
minimumPartSize := float64(filesize) / float64(manager.MaxUploadParts)
168-
// Round up to ensure large enough partsize
169-
calculatedPartSizeBytes := int64(math.Ceil(minimumPartSize))
170-
if calculatedPartSizeBytes > partSizeBytes {
171-
partSizeBytes = calculatedPartSizeBytes
172-
}
192+
partSizeBytes, err := calculateUploadPartSize(filesize)
193+
if err != nil {
194+
return nil, err
173195
}
174196

197+
bh.bs.params.Logger.Infof("Using S3 upload part size: %s", humanize.IBytes(uint64(partSizeBytes)))
198+
175199
reader, writer := io.Pipe()
176200
bh.waitGroup.Add(1)
177201

@@ -212,6 +236,32 @@ func (bh *S3BackupHandle) AddFile(ctx context.Context, filename string, filesize
212236
return writer, nil
213237
}
214238

239+
// calculateUploadPartSize is a helper to calculate the part size, taking into consideration the minimum part size
240+
// passed in by an operator.
241+
func calculateUploadPartSize(filesize int64) (partSizeBytes int64, err error) {
242+
// Calculate s3 upload part size using the source filesize
243+
partSizeBytes = manager.DefaultUploadPartSize
244+
if filesize > 0 {
245+
minimumPartSize := float64(filesize) / float64(manager.MaxUploadParts)
246+
// Round up to ensure large enough partsize
247+
calculatedPartSizeBytes := int64(math.Ceil(minimumPartSize))
248+
if calculatedPartSizeBytes > partSizeBytes {
249+
partSizeBytes = calculatedPartSizeBytes
250+
}
251+
}
252+
253+
if minPartSize != 0 && partSizeBytes < minPartSize {
254+
if minPartSize > MaxPartSize || minPartSize < manager.MinUploadPartSize { // 5GiB and 5MiB respectively
255+
return 0, fmt.Errorf("%w, currently set to %s",
256+
ErrPartSize, humanize.IBytes(uint64(minPartSize)),
257+
)
258+
}
259+
partSizeBytes = int64(minPartSize)
260+
}
261+
262+
return
263+
}
264+
215265
// EndBackup is part of the backupstorage.BackupHandle interface.
216266
func (bh *S3BackupHandle) EndBackup(ctx context.Context) error {
217267
if bh.readOnly {

go/vt/mysqlctl/s3backupstorage/s3_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,68 @@ func TestWithParams(t *testing.T) {
328328
assert.NotNil(t, s3.transport.DialContext)
329329
assert.NotNil(t, s3.transport.Proxy)
330330
}
331+
332+
func TestCalculateUploadPartSize(t *testing.T) {
333+
originalMinimum := minPartSize
334+
defer func() { minPartSize = originalMinimum }()
335+
336+
tests := []struct {
337+
name string
338+
filesize int64
339+
minimumPartSize int64
340+
want int64
341+
err error
342+
}{
343+
{
344+
name: "minimum - 10 MiB",
345+
filesize: 1024 * 1024 * 10, // 10 MiB
346+
minimumPartSize: 1024 * 1024 * 5, // 5 MiB
347+
want: 1024 * 1024 * 5, // 5 MiB,
348+
err: nil,
349+
},
350+
{
351+
name: "below minimum - 10 MiB",
352+
filesize: 1024 * 1024 * 10, // 10 MiB
353+
minimumPartSize: 1024 * 1024 * 8, // 8 MiB
354+
want: 1024 * 1024 * 8, // 8 MiB,
355+
err: nil,
356+
},
357+
{
358+
name: "above minimum - 1 TiB",
359+
filesize: 1024 * 1024 * 1024 * 1024, // 1 TiB
360+
minimumPartSize: 1024 * 1024 * 5, // 5 MiB
361+
want: 109951163, // ~104 MiB
362+
err: nil,
363+
},
364+
{
365+
name: "below minimum - 1 TiB",
366+
filesize: 1024 * 1024 * 1024 * 1024, // 1 TiB
367+
minimumPartSize: 1024 * 1024 * 200, // 200 MiB
368+
want: 1024 * 1024 * 200, // 200 MiB
369+
err: nil,
370+
},
371+
{
372+
name: "below S3 limits - 5 MiB",
373+
filesize: 1024 * 1024 * 3, // 3 MiB
374+
minimumPartSize: 1024 * 1024 * 4, // 4 MiB
375+
want: 1024 * 1024 * 5, // 5 MiB - should always return the minimum
376+
err: nil,
377+
},
378+
{
379+
name: "above S3 limits - 5 GiB",
380+
filesize: 1024 * 1024 * 1024 * 1024, // 1 TiB
381+
minimumPartSize: 1024 * 1024 * 1024 * 6, // 6 GiB
382+
want: 0,
383+
err: ErrPartSize,
384+
},
385+
}
386+
387+
for _, tt := range tests {
388+
t.Run(tt.name, func(t *testing.T) {
389+
minPartSize = tt.minimumPartSize
390+
partSize, err := calculateUploadPartSize(tt.filesize)
391+
require.ErrorIs(t, err, tt.err)
392+
require.Equal(t, tt.want, partSize)
393+
})
394+
}
395+
}

0 commit comments

Comments
 (0)