-
Notifications
You must be signed in to change notification settings - Fork 8
Use varint length field for last_path encoding to support longer GCP object names #72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
9a55228
db7e7b9
9f691e6
33b526c
d398e28
dea553b
9ce5d17
df160ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,28 +91,50 @@ static FileList listFiles(final PluginTask task) { | |
|
|
||
| // String nextToken = base64Encode(0x0a + ASCII character according to utf8EncodeLength position+ filePath); | ||
| static String base64Encode(final String path) { | ||
| byte[] lengthVarint; | ||
| byte[] encoding; | ||
| byte[] utf8 = path.getBytes(StandardCharsets.UTF_8); | ||
| LOG.debug("path string: {} ,path length:{} \" + ", path, utf8.length); | ||
|
|
||
| int utf8EncodeLength = utf8.length; | ||
| if (utf8EncodeLength >= 128) { | ||
| if (utf8EncodeLength >= 65_535) { | ||
| throw new ConfigException(String.format("last_path '%s' is too long to encode. Please try to reduce its length", path)); | ||
| } | ||
|
|
||
| encoding = new byte[utf8.length + 2]; | ||
| lengthVarint = encodeVarint(utf8EncodeLength); | ||
| encoding = new byte[1 + lengthVarint.length + utf8.length]; | ||
| encoding[0] = 0x0a; | ||
|
|
||
| // for example: 60 -> '<' | ||
| char temp = (char) utf8EncodeLength; | ||
| encoding[1] = (byte) temp; | ||
| System.arraycopy(utf8, 0, encoding, 2, utf8.length); | ||
| System.arraycopy(lengthVarint, 0, encoding, 1, lengthVarint.length); | ||
| System.arraycopy(utf8, 0, encoding, 1 + lengthVarint.length, utf8.length); | ||
|
|
||
| final String s = Base64.getEncoder().encodeToString(encoding); | ||
| LOG.debug("last_path(base64 encoded): {}", s); | ||
| return s; | ||
| } | ||
|
|
||
| // see: https://protobuf.dev/programming-guides/encoding/#varints | ||
| private static byte[] encodeVarint(int value) | ||
|
||
| { | ||
| // utf8EncodeLength.length is up to 65535, so 2 bytes are enough for buffer | ||
| byte[] buffer = new byte[2]; | ||
| int pos = 0; | ||
| while (true) { | ||
| int bits = value & 0x7F; | ||
| value >>>= 7; | ||
| if (value != 0) { | ||
| buffer[pos++] = (byte) (bits | 0x80); | ||
| } | ||
| else { | ||
| buffer[pos++] = (byte) bits; | ||
| break; | ||
| } | ||
| } | ||
| byte[] result = new byte[pos]; | ||
| System.arraycopy(buffer, 0, result, 0, pos); | ||
| return result; | ||
| } | ||
|
|
||
| private static void printBucketInfo(final Storage client, final String bucket) { | ||
| // get Bucket | ||
| Storage.BucketGetOption fields = Storage.BucketGetOption.fields( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's understandable that this
base64Encodemethod can handle 65536 bytes at most by itself (regardless of the limitation fortask.getLastPath()), but it is a little bit confusing for code readers that it has a different limitation fromtask.getLastPath().Can you align this with
task.getLastPath()and leave a comment to explain why limited by that number?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aligned byte limit to 1024 bytes and added a comment.
db7e7b9