Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package io.trino.execution.buffer;

import com.google.common.collect.AbstractIterator;
import io.airlift.compress.v3.xxhash.XxHash3Hasher;
import io.airlift.compress.v3.xxhash.XxHash3Native;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceInput;
import io.airlift.slice.SliceOutput;
Expand All @@ -39,6 +41,8 @@

public final class PagesSerdeUtil
{
private static ThreadLocal<XxHash3Hasher> HASHERS = ThreadLocal.withInitial(XxHash3Native::newHasher);

private PagesSerdeUtil() {}

static final int SERIALIZED_PAGE_POSITION_COUNT_OFFSET = 0;
Expand Down Expand Up @@ -77,11 +81,24 @@ static Page readRawPage(int positionCount, SliceInput input, BlockEncodingSerde

public static long calculateChecksum(List<Slice> pages)
{
XxHash64 hash = new XxHash64();
for (Slice page : pages) {
hash.update(page);
int size = pages.stream().mapToInt(Slice::length).sum();
long checksum;
if (size > 16384) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that for small slices the cost of calling native outweights the benefits of a faster hashing function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could the magic live in aircompressor ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, if we cannot move this switch there, the code as it stands now absolutely requires code comment

XxHash3Hasher hasher = HASHERS.get();
hasher.reset();
for (Slice page : pages) {
hasher.update(page.byteArray(), page.byteArrayOffset(), page.length());
}
checksum = hasher.digest();
}
long checksum = hash.hash();
else {
XxHash64 hasher = new XxHash64();
for (Slice page : pages) {
hasher.update(page);
}
checksum = hasher.hash();
}

// Since NO_CHECKSUM is assigned a special meaning, it is not a valid checksum.
if (checksum == NO_CHECKSUM) {
return checksum + 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
*/
package io.trino.operator.join;

import io.airlift.slice.XxHash64;
import io.airlift.compress.v3.xxhash.XxHash3Hasher;
import io.airlift.compress.v3.xxhash.XxHash3Native;
import io.trino.spi.Page;

import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -63,11 +65,10 @@ public PositionLinks create(List<JoinFilterFunction> searchFunctions)
@Override
public long checksum()
{
long hash = 0;
for (int positionLink : positionLinks) {
hash = XxHash64.hash(hash, positionLink);
try (XxHash3Hasher hasher = XxHash3Native.newHasher()) {
hasher.update(MemorySegment.ofArray(positionLinks));
return hasher.digest();
}
return hash;
}
};
}
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@
<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor-v3</artifactId>
<version>3.4</version>
<version>3.5</version>
</dependency>

<dependency>
Expand Down